lua2hcb_compiler/
lua.rs

1use anyhow::{anyhow, bail, Context, Result};
2use regex::Regex;
3use std::collections::HashSet;
4use std::path::Path;
5
6#[derive(Clone, Debug)]
7pub struct Function {
8    pub name: String,
9    pub args_count: i8,
10    pub locals_count: i8,
11    pub body: Vec<Stmt>,
12    // Raw lines for scanning/inference
13    pub raw: Vec<String>,
14}
15
16#[derive(Clone, Debug, PartialEq, Eq)]
17pub enum GlobalKind {
18    NonVolatile,
19    Volatile,
20}
21
22#[derive(Clone, Debug)]
23pub struct GlobalDecl {
24    pub name: String,
25    pub kind: GlobalKind,
26}
27
28#[derive(Clone, Debug)]
29pub struct Program {
30    pub globals: Vec<GlobalDecl>,
31    pub functions: Vec<Function>,
32}
33
34#[derive(Clone, Debug)]
35pub enum Stmt {
36    Simple(String),
37    Return(Option<String>),
38    Break,
39    If {
40        arms: Vec<(String, Vec<Stmt>)>,
41        else_arm: Option<Vec<Stmt>>,
42    },
43    While {
44        cond: String,
45        body: Vec<Stmt>,
46    },
47}
48
49fn is_comment_or_empty(s: &str) -> bool {
50    let t = s.trim();
51    t.is_empty() || t.starts_with("--")
52}
53
54fn strip_local(stmt: &str) -> &str {
55    let t = stmt.trim_start();
56    if let Some(rest) = t.strip_prefix("local ") {
57        return rest.trim_start();
58    }
59    t
60}
61
62fn is_if_start(s: &str) -> bool {
63    let t = s.trim();
64    t.starts_with("if ") && t.ends_with(" then")
65}
66
67fn is_elseif(s: &str) -> bool {
68    let t = s.trim();
69    t.starts_with("elseif ") && t.ends_with(" then")
70}
71
72fn is_else(s: &str) -> bool {
73    s.trim() == "else"
74}
75
76fn is_while_start(s: &str) -> bool {
77    let t = s.trim();
78    t.starts_with("while ") && t.ends_with(" do")
79}
80
81fn is_end(s: &str) -> bool {
82    s.trim() == "end"
83}
84
85fn extract_if_cond(line: &str) -> Result<String> {
86    let t = line.trim();
87    if !t.ends_with(" then") {
88        bail!("invalid if header: {t}");
89    }
90    let inner = t
91        .strip_prefix("if ")
92        .or_else(|| t.strip_prefix("elseif "))
93        .ok_or_else(|| anyhow!("invalid if header: {t}"))?;
94    let inner = inner.trim_end_matches(" then");
95    Ok(inner.trim().to_string())
96}
97
98fn extract_while_cond(line: &str) -> Result<String> {
99    let t = line.trim();
100    if !t.ends_with(" do") {
101        bail!("invalid while header: {t}");
102    }
103    let inner = t
104        .strip_prefix("while ")
105        .ok_or_else(|| anyhow!("invalid while header: {t}"))?;
106    let inner = inner.trim_end_matches(" do");
107    Ok(inner.trim().to_string())
108}
109
110fn parse_block(lines: &[String], i: &mut usize, stop_on: &[&str]) -> Result<Vec<Stmt>> {
111    let mut out: Vec<Stmt> = Vec::new();
112
113    while *i < lines.len() {
114        let line0 = lines[*i].clone();
115        let line = line0.trim();
116
117        if is_comment_or_empty(line) {
118            *i += 1;
119            continue;
120        }
121
122        if stop_on.iter().any(|tok| match *tok {
123            "elseif" => is_elseif(line),
124            "else" => is_else(line),
125            "end" => is_end(line),
126            _ => false,
127        }) {
128            break;
129        }
130
131        if is_if_start(line) {
132            let cond = extract_if_cond(line)?;
133            *i += 1;
134            let then_block = parse_block(lines, i, &["elseif", "else", "end"])?;
135
136            let mut arms: Vec<(String, Vec<Stmt>)> = vec![(cond, then_block)];
137            while *i < lines.len() && is_elseif(lines[*i].trim()) {
138                let c = extract_if_cond(lines[*i].trim())?;
139                *i += 1;
140                let b = parse_block(lines, i, &["elseif", "else", "end"])?;
141                arms.push((c, b));
142            }
143
144            let mut else_arm: Option<Vec<Stmt>> = None;
145            if *i < lines.len() && is_else(lines[*i].trim()) {
146                *i += 1;
147                let b = parse_block(lines, i, &["end"])?;
148                else_arm = Some(b);
149            }
150
151            if *i >= lines.len() || !is_end(lines[*i].trim()) {
152                bail!("if without closing end");
153            }
154            *i += 1;
155
156            out.push(Stmt::If { arms, else_arm });
157            continue;
158        }
159
160        if is_while_start(line) {
161            let cond = extract_while_cond(line)?;
162            *i += 1;
163            let body = parse_block(lines, i, &["end"])?;
164            if *i >= lines.len() || !is_end(lines[*i].trim()) {
165                bail!("while without closing end");
166            }
167            *i += 1;
168            out.push(Stmt::While { cond, body });
169            continue;
170        }
171
172        if line == "break" {
173            *i += 1;
174            out.push(Stmt::Break);
175            continue;
176        }
177
178        if line == "return" {
179            *i += 1;
180            out.push(Stmt::Return(None));
181            continue;
182        }
183
184        if let Some(rest) = line.strip_prefix("return ") {
185            *i += 1;
186            out.push(Stmt::Return(Some(rest.trim().to_string())));
187            continue;
188        }
189
190        // Local declarations are not semantic. Keep assignments.
191        if line.starts_with("local ") && !line.contains('=') {
192            *i += 1;
193            continue;
194        }
195
196        let simple = strip_local(line).to_string();
197        *i += 1;
198        out.push(Stmt::Simple(simple));
199    }
200
201    Ok(out)
202}
203
204fn split_functions(lines: &[String], start_idx: usize) -> Result<Vec<Vec<String>>> {
205    let head_re = Regex::new(r"^(?:local\s+)?function\s+").unwrap();
206
207    let mut out: Vec<Vec<String>> = Vec::new();
208    let mut i = start_idx;
209    while i < lines.len() {
210        if head_re.is_match(lines[i].trim()) {
211            let start = i;
212            let mut nest = 1i32;
213            i += 1;
214            while i < lines.len() && nest > 0 {
215                let t = lines[i].trim();
216                if head_re.is_match(t) {
217                    nest += 1;
218                } else if is_if_start(t) {
219                    nest += 1;
220                } else if is_while_start(t) {
221                    nest += 1;
222                } else if is_end(t) {
223                    nest -= 1;
224                }
225                i += 1;
226            }
227            out.push(lines[start..i].to_vec());
228        } else if is_comment_or_empty(lines[i].trim()) {
229            i += 1;
230        } else {
231            bail!("unsupported top-level statement: {}", lines[i].trim());
232        }
233    }
234
235    if out.is_empty() {
236        bail!("no functions found in Lua");
237    }
238
239    Ok(out)
240}
241
242fn parse_global_line(line: &str, seen: &mut HashSet<String>, out: &mut Vec<GlobalDecl>) -> Result<()> {
243    let t = line.trim();
244    let (kind, rest) = if let Some(rest) = t.strip_prefix("global ") {
245        (GlobalKind::NonVolatile, rest.trim())
246    } else if let Some(rest) = t.strip_prefix("volatile global ") {
247        (GlobalKind::Volatile, rest.trim())
248    } else {
249        bail!("unsupported top-level statement: {t}");
250    };
251
252    if rest.is_empty() {
253        bail!("empty global declaration: {t}");
254    }
255    if rest.contains('=') {
256        bail!("global initializers are not supported: {t}");
257    }
258
259    let re_g = Regex::new(r"^g\d+$").unwrap();
260    let re_vg = Regex::new(r"^vg\d+$").unwrap();
261
262    for raw_name in rest.split(',') {
263        let name = raw_name.trim();
264        if name.is_empty() {
265            bail!("empty global name in declaration: {t}");
266        }
267        match kind {
268            GlobalKind::NonVolatile => {
269                if !re_g.is_match(name) {
270                    bail!("non-volatile globals must be named gN: {name}");
271                }
272            }
273            GlobalKind::Volatile => {
274                if !re_vg.is_match(name) {
275                    bail!("volatile globals must be named vgN: {name}");
276                }
277            }
278        }
279        if !seen.insert(name.to_string()) {
280            bail!("duplicate global declaration: {name}");
281        }
282        out.push(GlobalDecl {
283            name: name.to_string(),
284            kind: kind.clone(),
285        });
286    }
287
288    Ok(())
289}
290
291pub fn parse_lua(path: &Path) -> Result<Program> {
292    let txt = std::fs::read_to_string(path).with_context(|| format!("read lua: {}", path.display()))?;
293    let lines: Vec<String> = txt.lines().map(|s| s.to_string()).collect();
294    let head_re = Regex::new(r"^(?:local\s+)?function\s+").unwrap();
295
296    let mut globals: Vec<GlobalDecl> = Vec::new();
297    let mut seen_globals: HashSet<String> = HashSet::new();
298    let mut first_fn_idx = None;
299
300    for (i, line) in lines.iter().enumerate() {
301        let t = line.trim();
302        if is_comment_or_empty(t) {
303            continue;
304        }
305        if head_re.is_match(t) {
306            first_fn_idx = Some(i);
307            break;
308        }
309        parse_global_line(t, &mut seen_globals, &mut globals)?;
310    }
311
312    let start_idx = first_fn_idx.ok_or_else(|| anyhow!("no functions found in Lua"))?;
313    let funcs_lines = split_functions(&lines, start_idx)?;
314
315    let head_re = Regex::new(r"^(?:local\s+)?function\s+([A-Za-z_]\w*)\s*\(([^)]*)\)\s*$").unwrap();
316    let re_a = Regex::new(r"\ba(\d+)\b").unwrap();
317    let re_l = Regex::new(r"\bl(\d+)\b").unwrap();
318
319    let mut funs: Vec<Function> = Vec::new();
320    for fl in funcs_lines {
321        if fl.is_empty() {
322            continue;
323        }
324        let head = fl[0].trim();
325        let caps = head_re
326            .captures(head)
327            .ok_or_else(|| anyhow!("unexpected function header: {head}"))?;
328        let name = caps.get(1).unwrap().as_str().to_string();
329        let args_s = caps.get(2).unwrap().as_str().trim();
330        let args: Vec<&str> = if args_s.is_empty() {
331            vec![]
332        } else {
333            args_s.split(',').map(|x| x.trim()).filter(|x| !x.is_empty()).collect()
334        };
335
336        let mut max_a: Option<u32> = None;
337        for a in &args {
338            if let Some(mm) = re_a.captures(a) {
339                let v: u32 = mm.get(1).unwrap().as_str().parse().unwrap_or(0);
340                max_a = Some(max_a.map(|x| x.max(v)).unwrap_or(v));
341            }
342        }
343        for ln in &fl {
344            for mm in re_a.captures_iter(ln) {
345                let v: u32 = mm.get(1).unwrap().as_str().parse().unwrap_or(0);
346                max_a = Some(max_a.map(|x| x.max(v)).unwrap_or(v));
347            }
348        }
349        let args_count = i8::try_from(max_a.map(|x| x + 1).unwrap_or(0))
350            .map_err(|_| anyhow!("args_count does not fit i8"))?;
351
352        let mut max_l: Option<u32> = None;
353        for ln in &fl {
354            for mm in re_l.captures_iter(ln) {
355                let v: u32 = mm.get(1).unwrap().as_str().parse().unwrap_or(0);
356                max_l = Some(max_l.map(|x| x.max(v)).unwrap_or(v));
357            }
358        }
359        let locals_count = i8::try_from(max_l.map(|x| x + 1).unwrap_or(0))
360            .map_err(|_| anyhow!("locals_count does not fit i8"))?;
361
362        if fl.len() < 2 {
363            bail!("function {name}: too short");
364        }
365        let body_lines: Vec<String> = fl[1..fl.len() - 1].to_vec();
366        let mut idx = 0usize;
367        let body = parse_block(&body_lines, &mut idx, &[])?;
368
369        funs.push(Function {
370            name,
371            args_count,
372            locals_count,
373            body,
374            raw: fl,
375        });
376    }
377
378    Ok(Program {
379        globals,
380        functions: funs,
381    })
382}