lua2hcb_compiler/
compile.rs

1use crate::ir::{Item, Label, OpKind};
2use crate::lua::{Function, GlobalDecl, GlobalKind, Program, Stmt};
3use crate::meta::Meta;
4use anyhow::{anyhow, bail, Result};
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7
8#[derive(Clone, Copy, Debug, PartialEq, Eq)]
9enum CondKind {
10    NonZero,
11    Zero,
12    AlwaysTrue,
13    AlwaysFalse,
14    Generic,
15}
16
17fn parse_cond(cond: &str) -> CondKind {
18    let c = cond.trim();
19    if c == "true" {
20        return CondKind::AlwaysTrue;
21    }
22    if c == "false" || c == "nil" {
23        return CondKind::AlwaysFalse;
24    }
25
26    let c = c.trim_start_matches('(').trim_end_matches(')').trim();
27
28    let re_ne0 = Regex::new(r"^S\d+\s*~=\s*0$").unwrap();
29    let re_eq0 = Regex::new(r"^S\d+\s*==\s*0$").unwrap();
30    let re_s = Regex::new(r"^S\d+$").unwrap();
31
32    if re_ne0.is_match(c) {
33        return CondKind::NonZero;
34    }
35    if re_eq0.is_match(c) {
36        return CondKind::Zero;
37    }
38    if re_s.is_match(c) {
39        return CondKind::NonZero;
40    }
41
42    CondKind::Generic
43}
44
45#[derive(Clone, Debug)]
46pub struct GlobalLayout {
47    pub non_volatile_count: u16,
48    pub volatile_count: u16,
49    name_to_idx: HashMap<String, u16>,
50    declared: HashSet<String>,
51}
52
53impl GlobalLayout {
54    fn from_globals(globals: &[GlobalDecl]) -> Result<Self> {
55        let mut max_g: Option<u16> = None;
56        let mut max_vg: Option<u16> = None;
57        let re_g = Regex::new(r"^g(\d+)$").unwrap();
58        let re_vg = Regex::new(r"^vg(\d+)$").unwrap();
59        let mut declared = HashSet::new();
60
61        for g in globals {
62            if !declared.insert(g.name.clone()) {
63                bail!("duplicate global declaration: {}", g.name);
64            }
65            match g.kind {
66                GlobalKind::NonVolatile => {
67                    let caps = re_g
68                        .captures(&g.name)
69                        .ok_or_else(|| anyhow!("invalid non-volatile global name: {}", g.name))?;
70                    let idx: u16 = caps.get(1).unwrap().as_str().parse()?;
71                    max_g = Some(max_g.map(|x| x.max(idx)).unwrap_or(idx));
72                }
73                GlobalKind::Volatile => {
74                    let caps = re_vg
75                        .captures(&g.name)
76                        .ok_or_else(|| anyhow!("invalid volatile global name: {}", g.name))?;
77                    let idx: u16 = caps.get(1).unwrap().as_str().parse()?;
78                    max_vg = Some(max_vg.map(|x| x.max(idx)).unwrap_or(idx));
79                }
80            }
81        }
82
83        let non_volatile_count = max_g.map(|x| x + 1).unwrap_or(0);
84        let volatile_count = max_vg.map(|x| x + 1).unwrap_or(0);
85        let mut name_to_idx = HashMap::new();
86        for g in globals {
87            let idx = match g.kind {
88                GlobalKind::NonVolatile => g.name[1..].parse::<u16>()?,
89                GlobalKind::Volatile => non_volatile_count + g.name[2..].parse::<u16>()?,
90            };
91            name_to_idx.insert(g.name.clone(), idx);
92        }
93
94        Ok(Self {
95            non_volatile_count,
96            volatile_count,
97            name_to_idx,
98            declared,
99        })
100    }
101
102    fn global_idx(&self, name: &str) -> Option<u16> {
103        self.name_to_idx.get(name).copied()
104    }
105
106    fn is_declared(&self, name: &str) -> bool {
107        self.declared.contains(name)
108    }
109}
110
111fn slot_to_stack_idx(var: &str, args_count: i8) -> Result<i8> {
112    let re_a = Regex::new(r"^a(\d+)$").unwrap();
113    let re_l = Regex::new(r"^l(\d+)$").unwrap();
114
115    if let Some(c) = re_a.captures(var) {
116        let v: i16 = c.get(1).unwrap().as_str().parse()?;
117        return Ok(i8::try_from(v).map_err(|_| anyhow!("stack index out of i8"))?);
118    }
119    if let Some(c) = re_l.captures(var) {
120        let v: i16 = c.get(1).unwrap().as_str().parse()?;
121        let idx: i16 = i16::from(args_count) + v;
122        return Ok(i8::try_from(idx).map_err(|_| anyhow!("stack index out of i8"))?);
123    }
124    bail!("not a frame slot: {var}")
125}
126
127fn push_int(v: i64) -> Result<OpKind> {
128    if v < i64::from(i32::MIN) || v > i64::from(i32::MAX) {
129        bail!("integer out of i32 range: {v}");
130    }
131    if v >= -128 && v <= 127 {
132        return Ok(OpKind::PushI8(v as i8));
133    }
134    if v >= -32768 && v <= 32767 {
135        return Ok(OpKind::PushI16(v as i16));
136    }
137    Ok(OpKind::PushI32(v as i32))
138}
139
140fn lua_unescape_string(lit: &str) -> String {
141    let mut out = String::new();
142    let mut chars = lit.chars();
143    while let Some(ch) = chars.next() {
144        if ch != '\\' {
145            out.push(ch);
146            continue;
147        }
148        match chars.next() {
149            Some('n') => out.push('\n'),
150            Some('r') => out.push('\r'),
151            Some('t') => out.push('\t'),
152            Some('\\') => out.push('\\'),
153            Some('"') => out.push('"'),
154            Some(other) => out.push(other),
155            None => break,
156        }
157    }
158    out
159}
160
161#[derive(Clone, Debug, PartialEq)]
162enum Tok {
163    Ident(String),
164    Int(i64),
165    Float(f32),
166    Str(String),
167    Nil,
168    True,
169    False,
170    LParen,
171    RParen,
172    LBracket,
173    RBracket,
174    Comma,
175    Plus,
176    Minus,
177    Star,
178    Slash,
179    Percent,
180    Amp,
181    EqEq,
182    NotEq,
183    Lt,
184    Le,
185    Gt,
186    Ge,
187    And,
188    Or,
189}
190
191fn tokenize_expr(s: &str) -> Result<Vec<Tok>> {
192    let mut toks = Vec::new();
193    let b = s.as_bytes();
194    let mut i = 0usize;
195    while i < b.len() {
196        let ch = b[i] as char;
197        if ch.is_ascii_whitespace() {
198            i += 1;
199            continue;
200        }
201        match ch {
202            '(' => {
203                toks.push(Tok::LParen);
204                i += 1;
205            }
206            ')' => {
207                toks.push(Tok::RParen);
208                i += 1;
209            }
210            '[' => {
211                toks.push(Tok::LBracket);
212                i += 1;
213            }
214            ']' => {
215                toks.push(Tok::RBracket);
216                i += 1;
217            }
218            ',' => {
219                toks.push(Tok::Comma);
220                i += 1;
221            }
222            '+' => {
223                toks.push(Tok::Plus);
224                i += 1;
225            }
226            '-' => {
227                if i + 1 < b.len() && (b[i + 1] as char).is_ascii_digit() {
228                    let start = i;
229                    i += 1;
230                    while i < b.len() && (b[i] as char).is_ascii_digit() {
231                        i += 1;
232                    }
233                    let mut is_float = false;
234                    if i < b.len() && b[i] as char == '.' {
235                        is_float = true;
236                        i += 1;
237                        while i < b.len() && (b[i] as char).is_ascii_digit() {
238                            i += 1;
239                        }
240                    }
241                    let lit = &s[start..i];
242                    if is_float {
243                        toks.push(Tok::Float(lit.parse()?));
244                    } else {
245                        toks.push(Tok::Int(lit.parse()?));
246                    }
247                } else {
248                    toks.push(Tok::Minus);
249                    i += 1;
250                }
251            }
252            '*' => {
253                toks.push(Tok::Star);
254                i += 1;
255            }
256            '/' => {
257                toks.push(Tok::Slash);
258                i += 1;
259            }
260            '%' => {
261                toks.push(Tok::Percent);
262                i += 1;
263            }
264            '&' => {
265                toks.push(Tok::Amp);
266                i += 1;
267            }
268            '=' => {
269                if i + 1 < b.len() && b[i + 1] as char == '=' {
270                    toks.push(Tok::EqEq);
271                    i += 2;
272                } else {
273                    bail!("unexpected '=' inside expression: {s}");
274                }
275            }
276            '~' => {
277                if i + 1 < b.len() && b[i + 1] as char == '=' {
278                    toks.push(Tok::NotEq);
279                    i += 2;
280                } else {
281                    bail!("unexpected '~' inside expression: {s}");
282                }
283            }
284            '<' => {
285                if i + 1 < b.len() && b[i + 1] as char == '=' {
286                    toks.push(Tok::Le);
287                    i += 2;
288                } else {
289                    toks.push(Tok::Lt);
290                    i += 1;
291                }
292            }
293            '>' => {
294                if i + 1 < b.len() && b[i + 1] as char == '=' {
295                    toks.push(Tok::Ge);
296                    i += 2;
297                } else {
298                    toks.push(Tok::Gt);
299                    i += 1;
300                }
301            }
302            '"' => {
303                i += 1;
304                let start = i;
305                let mut out = String::new();
306                while i < b.len() {
307                    let c = b[i] as char;
308                    if c == '\\' {
309                        if i + 1 >= b.len() {
310                            bail!("unterminated string literal");
311                        }
312                        let esc = b[i + 1] as char;
313                        match esc {
314                            'n' => out.push('\n'),
315                            'r' => out.push('\r'),
316                            't' => out.push('\t'),
317                            '\\' => out.push('\\'),
318                            '"' => out.push('"'),
319                            other => out.push(other),
320                        }
321                        i += 2;
322                        continue;
323                    }
324                    if c == '"' {
325                        break;
326                    }
327                    out.push(c);
328                    i += 1;
329                }
330                if i >= b.len() || b[i] as char != '"' {
331                    bail!("unterminated string literal starting at: {}", &s[start - 1..]);
332                }
333                i += 1;
334                toks.push(Tok::Str(out));
335            }
336            c if c.is_ascii_digit() => {
337                let start = i;
338                i += 1;
339                while i < b.len() && (b[i] as char).is_ascii_digit() {
340                    i += 1;
341                }
342                let mut is_float = false;
343                if i < b.len() && b[i] as char == '.' {
344                    is_float = true;
345                    i += 1;
346                    while i < b.len() && (b[i] as char).is_ascii_digit() {
347                        i += 1;
348                    }
349                }
350                let lit = &s[start..i];
351                if is_float {
352                    toks.push(Tok::Float(lit.parse()?));
353                } else {
354                    toks.push(Tok::Int(lit.parse()?));
355                }
356            }
357            c if c.is_ascii_alphabetic() || c == '_' => {
358                let start = i;
359                i += 1;
360                while i < b.len() {
361                    let c = b[i] as char;
362                    if c.is_ascii_alphanumeric() || c == '_' {
363                        i += 1;
364                    } else {
365                        break;
366                    }
367                }
368                let ident = &s[start..i];
369                match ident {
370                    "nil" => toks.push(Tok::Nil),
371                    "true" => toks.push(Tok::True),
372                    "false" => toks.push(Tok::False),
373                    "and" => toks.push(Tok::And),
374                    "or" => toks.push(Tok::Or),
375                    _ => toks.push(Tok::Ident(ident.to_string())),
376                }
377            }
378            _ => bail!("unsupported character in expression: {ch}"),
379        }
380    }
381    Ok(toks)
382}
383
384#[derive(Clone, Debug)]
385enum UnaryOp {
386    Neg,
387}
388
389#[derive(Clone, Debug, PartialEq, Eq)]
390enum BinaryOp {
391    Add,
392    Sub,
393    Mul,
394    Div,
395    Mod,
396    BitAnd,
397    Eq,
398    Ne,
399    Lt,
400    Le,
401    Gt,
402    Ge,
403    And,
404    Or,
405}
406
407#[derive(Clone, Debug)]
408enum Expr {
409    Nil,
410    True,
411    False,
412    Int(i64),
413    Float(f32),
414    Str(String),
415    Var(String),
416    Call { name: String, args: Vec<Expr> },
417    GlobalTable { idx: u16, key: Box<Expr> },
418    LocalTable { idx: i8, key: Box<Expr> },
419    Unary { op: UnaryOp, expr: Box<Expr> },
420    Binary { op: BinaryOp, left: Box<Expr>, right: Box<Expr> },
421}
422
423struct ExprParser {
424    toks: Vec<Tok>,
425    pos: usize,
426}
427
428impl ExprParser {
429    fn new(toks: Vec<Tok>) -> Self {
430        Self { toks, pos: 0 }
431    }
432
433    fn peek(&self) -> Option<&Tok> {
434        self.toks.get(self.pos)
435    }
436
437    fn bump(&mut self) -> Option<Tok> {
438        let t = self.toks.get(self.pos).cloned();
439        if t.is_some() {
440            self.pos += 1;
441        }
442        t
443    }
444
445    fn eat(&mut self, tok: &Tok) -> bool {
446        if self.peek() == Some(tok) {
447            self.pos += 1;
448            true
449        } else {
450            false
451        }
452    }
453
454    fn parse(mut self) -> Result<Expr> {
455        let expr = self.parse_or()?;
456        if self.pos != self.toks.len() {
457            bail!("unexpected trailing tokens in expression");
458        }
459        Ok(expr)
460    }
461
462    fn parse_or(&mut self) -> Result<Expr> {
463        let mut expr = self.parse_and()?;
464        while self.eat(&Tok::Or) {
465            let rhs = self.parse_and()?;
466            expr = Expr::Binary { op: BinaryOp::Or, left: Box::new(expr), right: Box::new(rhs) };
467        }
468        Ok(expr)
469    }
470
471    fn parse_and(&mut self) -> Result<Expr> {
472        let mut expr = self.parse_cmp()?;
473        while self.eat(&Tok::And) {
474            let rhs = self.parse_cmp()?;
475            expr = Expr::Binary { op: BinaryOp::And, left: Box::new(expr), right: Box::new(rhs) };
476        }
477        Ok(expr)
478    }
479
480    fn parse_cmp(&mut self) -> Result<Expr> {
481        let mut expr = self.parse_add()?;
482        loop {
483            let op = match self.peek() {
484                Some(Tok::EqEq) => BinaryOp::Eq,
485                Some(Tok::NotEq) => BinaryOp::Ne,
486                Some(Tok::Lt) => BinaryOp::Lt,
487                Some(Tok::Le) => BinaryOp::Le,
488                Some(Tok::Gt) => BinaryOp::Gt,
489                Some(Tok::Ge) => BinaryOp::Ge,
490                _ => break,
491            };
492            self.bump();
493            let rhs = self.parse_add()?;
494            expr = Expr::Binary { op, left: Box::new(expr), right: Box::new(rhs) };
495        }
496        Ok(expr)
497    }
498
499    fn parse_add(&mut self) -> Result<Expr> {
500        let mut expr = self.parse_mul()?;
501        loop {
502            let op = match self.peek() {
503                Some(Tok::Plus) => BinaryOp::Add,
504                Some(Tok::Minus) => BinaryOp::Sub,
505                _ => break,
506            };
507            self.bump();
508            let rhs = self.parse_mul()?;
509            expr = Expr::Binary { op, left: Box::new(expr), right: Box::new(rhs) };
510        }
511        Ok(expr)
512    }
513
514    fn parse_mul(&mut self) -> Result<Expr> {
515        let mut expr = self.parse_bitand()?;
516        loop {
517            let op = match self.peek() {
518                Some(Tok::Star) => BinaryOp::Mul,
519                Some(Tok::Slash) => BinaryOp::Div,
520                Some(Tok::Percent) => BinaryOp::Mod,
521                _ => break,
522            };
523            self.bump();
524            let rhs = self.parse_bitand()?;
525            expr = Expr::Binary { op, left: Box::new(expr), right: Box::new(rhs) };
526        }
527        Ok(expr)
528    }
529
530    fn parse_bitand(&mut self) -> Result<Expr> {
531        let mut expr = self.parse_unary()?;
532        while self.eat(&Tok::Amp) {
533            let rhs = self.parse_unary()?;
534            expr = Expr::Binary { op: BinaryOp::BitAnd, left: Box::new(expr), right: Box::new(rhs) };
535        }
536        Ok(expr)
537    }
538
539    fn parse_unary(&mut self) -> Result<Expr> {
540        if self.eat(&Tok::Minus) {
541            let expr = self.parse_unary()?;
542            return Ok(Expr::Unary { op: UnaryOp::Neg, expr: Box::new(expr) });
543        }
544        self.parse_primary()
545    }
546
547    fn parse_primary(&mut self) -> Result<Expr> {
548        match self.bump().ok_or_else(|| anyhow!("unexpected end of expression"))? {
549            Tok::Nil => Ok(Expr::Nil),
550            Tok::True => Ok(Expr::True),
551            Tok::False => Ok(Expr::False),
552            Tok::Int(v) => Ok(Expr::Int(v)),
553            Tok::Float(v) => Ok(Expr::Float(v)),
554            Tok::Str(s) => Ok(Expr::Str(s)),
555            Tok::LParen => {
556                let e = self.parse_or()?;
557                if !self.eat(&Tok::RParen) {
558                    bail!("missing ')' in expression");
559                }
560                Ok(e)
561            }
562            Tok::Ident(name) => {
563                if self.eat(&Tok::LParen) {
564                    let mut args = Vec::new();
565                    if !self.eat(&Tok::RParen) {
566                        loop {
567                            args.push(self.parse_or()?);
568                            if self.eat(&Tok::Comma) {
569                                continue;
570                            }
571                            if !self.eat(&Tok::RParen) {
572                                bail!("missing ')' after call arguments");
573                            }
574                            break;
575                        }
576                    }
577                    return Ok(Expr::Call { name, args });
578                }
579
580                if (name == "GT" || name == "LT") && self.eat(&Tok::LBracket) {
581                    let idx = match self.bump() {
582                        Some(Tok::Int(v)) => v,
583                        Some(Tok::Minus) => match self.bump() {
584                            Some(Tok::Int(v)) => -v,
585                            _ => bail!("table index must be integer"),
586                        },
587                        _ => bail!("table index must be integer"),
588                    };
589                    if !self.eat(&Tok::RBracket) || !self.eat(&Tok::LBracket) {
590                        bail!("table access must be GT[idx][key] or LT[idx][key]");
591                    }
592                    let key = self.parse_or()?;
593                    if !self.eat(&Tok::RBracket) {
594                        bail!("table access missing closing ']'");
595                    }
596                    if name == "GT" {
597                        if idx < 0 || idx > i64::from(u16::MAX) {
598                            bail!("GT index out of range: {idx}");
599                        }
600                        return Ok(Expr::GlobalTable { idx: idx as u16, key: Box::new(key) });
601                    }
602                    if idx < i64::from(i8::MIN) || idx > i64::from(i8::MAX) {
603                        bail!("LT index out of range: {idx}");
604                    }
605                    return Ok(Expr::LocalTable { idx: idx as i8, key: Box::new(key) });
606                }
607
608                Ok(Expr::Var(name))
609            }
610            other => bail!("unexpected token in expression: {:?}", other),
611        }
612    }
613}
614
615fn parse_expr(expr: &str) -> Result<Expr> {
616    let toks = tokenize_expr(expr)?;
617    ExprParser::new(toks).parse()
618}
619
620fn emit_call(name: &str, args: &[Expr], meta: &Meta, user_fns: &HashSet<String>, layout: &GlobalLayout, args_count: i8, out: &mut Vec<Item>) -> Result<()> {
621    for arg in args {
622        compile_expr(arg, args_count, meta, user_fns, layout, out)?;
623    }
624
625    if let Some(sid) = meta.syscall_id_by_name(name) {
626        if let Some(expect) = meta.syscall_args_by_id(sid) {
627            if usize::from(expect) != args.len() {
628                bail!("syscall {name} expects {expect} args, got {}", args.len());
629            }
630        }
631        out.push(Item::Op(OpKind::Syscall { id: sid }));
632        return Ok(());
633    }
634
635    if name.starts_with("f_") || user_fns.contains(name) {
636        out.push(Item::Op(OpKind::CallFn { name: name.to_string() }));
637        return Ok(());
638    }
639
640    bail!("unknown callee: {name}")
641}
642
643fn compile_expr(expr: &Expr, args_count: i8, meta: &Meta, user_fns: &HashSet<String>, layout: &GlobalLayout, out: &mut Vec<Item>) -> Result<()> {
644    match expr {
645        Expr::Nil => out.push(Item::Op(OpKind::PushNil)),
646        Expr::True => out.push(Item::Op(OpKind::PushTrue)),
647        Expr::False => bail!("false is not supported as a runtime value in this compiler"),
648        Expr::Int(v) => out.push(Item::Op(push_int(*v)?)),
649        Expr::Float(v) => out.push(Item::Op(OpKind::PushF32(*v))),
650        Expr::Str(s) => out.push(Item::Op(OpKind::PushString(s.clone()))),
651        Expr::Var(name) => {
652            let re_s = Regex::new(r"^S\d+$").unwrap();
653            let re_slot = Regex::new(r"^(a\d+|l\d+)$").unwrap();
654            if name == "__ret" {
655                out.push(Item::Op(OpKind::PushReturn));
656            } else if let Some(idx) = layout.global_idx(name) {
657                out.push(Item::Op(OpKind::PushGlobal(idx)));
658            } else if re_slot.is_match(name) {
659                let idx = slot_to_stack_idx(name, args_count)?;
660                out.push(Item::Op(OpKind::PushStack(idx)));
661            } else if re_s.is_match(name) {
662                out.push(Item::Op(OpKind::PushTop));
663            } else {
664                bail!("unsupported variable reference: {name}");
665            }
666        }
667        Expr::Call { name, args } => {
668            emit_call(name, args, meta, user_fns, layout, args_count, out)?;
669            out.push(Item::Op(OpKind::PushReturn));
670        }
671        Expr::GlobalTable { idx, key } => {
672            compile_expr(key, args_count, meta, user_fns, layout, out)?;
673            out.push(Item::Op(OpKind::PushGlobalTable(*idx)));
674        }
675        Expr::LocalTable { idx, key } => {
676            compile_expr(key, args_count, meta, user_fns, layout, out)?;
677            out.push(Item::Op(OpKind::PushLocalTable(*idx)));
678        }
679        Expr::Unary { op: UnaryOp::Neg, expr } => {
680            compile_expr(expr, args_count, meta, user_fns, layout, out)?;
681            out.push(Item::Op(OpKind::Neg));
682        }
683        Expr::Binary { op, left, right } => {
684            if *op == BinaryOp::Ne {
685                if let Expr::Binary { op: BinaryOp::BitAnd, left: bleft, right: bright } = &**left {
686                    if matches!(&**right, Expr::Int(0)) {
687                        compile_expr(bleft, args_count, meta, user_fns, layout, out)?;
688                        compile_expr(bright, args_count, meta, user_fns, layout, out)?;
689                        out.push(Item::Op(OpKind::BitTest));
690                        return Ok(());
691                    }
692                }
693            }
694            if *op == BinaryOp::And || *op == BinaryOp::Or {
695                compile_expr(left, args_count, meta, user_fns, layout, out)?;
696                compile_expr(right, args_count, meta, user_fns, layout, out)?;
697                out.push(Item::Op(match op {
698                    BinaryOp::And => OpKind::And,
699                    BinaryOp::Or => OpKind::Or,
700                    _ => unreachable!(),
701                }));
702                return Ok(());
703            }
704            if *op == BinaryOp::BitAnd {
705                bail!("plain bitwise '&' values are not supported, use '(x & y) ~= 0'");
706            }
707            compile_expr(left, args_count, meta, user_fns, layout, out)?;
708            compile_expr(right, args_count, meta, user_fns, layout, out)?;
709            let inst = match op {
710                BinaryOp::Add => OpKind::Add,
711                BinaryOp::Sub => OpKind::Sub,
712                BinaryOp::Mul => OpKind::Mul,
713                BinaryOp::Div => OpKind::Div,
714                BinaryOp::Mod => OpKind::Mod,
715                BinaryOp::Eq => OpKind::SetE,
716                BinaryOp::Ne => OpKind::SetNe,
717                BinaryOp::Lt => OpKind::SetL,
718                BinaryOp::Le => OpKind::SetLe,
719                BinaryOp::Gt => OpKind::SetG,
720                BinaryOp::Ge => OpKind::SetGe,
721                BinaryOp::BitAnd | BinaryOp::And | BinaryOp::Or => unreachable!(),
722            };
723            out.push(Item::Op(inst));
724        }
725    }
726    Ok(())
727}
728
729fn split_assignment(stmt: &str) -> Option<(String, String)> {
730    let b = stmt.as_bytes();
731    let mut depth_paren = 0i32;
732    let mut depth_brack = 0i32;
733    let mut in_string = false;
734    let mut i = 0usize;
735    while i < b.len() {
736        let c = b[i] as char;
737        if in_string {
738            if c == '\\' {
739                i += 2;
740                continue;
741            }
742            if c == '"' {
743                in_string = false;
744            }
745            i += 1;
746            continue;
747        }
748        match c {
749            '"' => in_string = true,
750            '(' => depth_paren += 1,
751            ')' => depth_paren -= 1,
752            '[' => depth_brack += 1,
753            ']' => depth_brack -= 1,
754            '=' if depth_paren == 0 && depth_brack == 0 => {
755                let prev = if i > 0 { Some(b[i - 1] as char) } else { None };
756                let next = if i + 1 < b.len() { Some(b[i + 1] as char) } else { None };
757                if prev != Some('=') && prev != Some('~') && next != Some('=') {
758                    let lhs = stmt[..i].trim().to_string();
759                    let rhs = stmt[i + 1..].trim().to_string();
760                    return Some((lhs, rhs));
761                }
762            }
763            _ => {}
764        }
765        i += 1;
766    }
767    None
768}
769
770enum AssignTarget {
771    Stack(i8),
772    Global(u16),
773    StackTemp,
774    GlobalTable(u16, Expr),
775    LocalTable(i8, Expr),
776}
777
778fn parse_assign_target(lhs: &str, args_count: i8, layout: &GlobalLayout) -> Result<AssignTarget> {
779    let re_slot = Regex::new(r"^(a\d+|l\d+)$").unwrap();
780    let re_s = Regex::new(r"^S\d+$").unwrap();
781    if re_slot.is_match(lhs) {
782        return Ok(AssignTarget::Stack(slot_to_stack_idx(lhs, args_count)?));
783    }
784    if re_s.is_match(lhs) {
785        return Ok(AssignTarget::StackTemp);
786    }
787    if let Some(idx) = layout.global_idx(lhs) {
788        return Ok(AssignTarget::Global(idx));
789    }
790
791    let re_gt = Regex::new(r"^GT\[(\d+)\]\[(.+)\]$").unwrap();
792    if let Some(c) = re_gt.captures(lhs) {
793        let idx: u16 = c.get(1).unwrap().as_str().parse()?;
794        let key = parse_expr(c.get(2).unwrap().as_str().trim())?;
795        return Ok(AssignTarget::GlobalTable(idx, key));
796    }
797    let re_lt = Regex::new(r"^LT\[(-?\d+)\]\[(.+)\]$").unwrap();
798    if let Some(c) = re_lt.captures(lhs) {
799        let idx: i8 = c.get(1).unwrap().as_str().parse()?;
800        let key = parse_expr(c.get(2).unwrap().as_str().trim())?;
801        return Ok(AssignTarget::LocalTable(idx, key));
802    }
803
804    bail!("unsupported assignment target: {lhs}")
805}
806
807fn compile_simple_stmt(
808    stmt: &str,
809    args_count: i8,
810    meta: &Meta,
811    user_fns: &HashSet<String>,
812    layout: &GlobalLayout,
813    out: &mut Vec<Item>,
814) -> Result<()> {
815    let s = stmt.trim();
816    if s.is_empty() {
817        return Ok(());
818    }
819
820    let ignore_re = Regex::new(r#"^__ret\s*=\s*(nil|true|false|-?\d+(?:\.\d+)?|\"(?:\\.|[^\"])*\")\s*$"#).unwrap();
821    if ignore_re.is_match(s) {
822        return Ok(());
823    }
824
825    if let Some((lhs, rhs)) = split_assignment(s) {
826        if lhs == "__ret" {
827            let expr = parse_expr(&rhs)?;
828            if let Expr::Call { name, args } = expr {
829                emit_call(&name, &args, meta, user_fns, layout, args_count, out)?;
830                return Ok(());
831            }
832            bail!("__ret assignment requires a call: {s}");
833        }
834
835        let target = parse_assign_target(&lhs, args_count, layout)?;
836        let expr = parse_expr(&rhs)?;
837        match target {
838            AssignTarget::Stack(idx) => {
839                compile_expr(&expr, args_count, meta, user_fns, layout, out)?;
840                out.push(Item::Op(OpKind::PopStack(idx)));
841            }
842            AssignTarget::Global(idx) => {
843                compile_expr(&expr, args_count, meta, user_fns, layout, out)?;
844                out.push(Item::Op(OpKind::PopGlobal(idx)));
845            }
846            AssignTarget::StackTemp => {
847                compile_expr(&expr, args_count, meta, user_fns, layout, out)?;
848            }
849            AssignTarget::GlobalTable(idx, key) => {
850                compile_expr(&key, args_count, meta, user_fns, layout, out)?;
851                compile_expr(&expr, args_count, meta, user_fns, layout, out)?;
852                out.push(Item::Op(OpKind::PopGlobalTable(idx)));
853            }
854            AssignTarget::LocalTable(idx, key) => {
855                compile_expr(&key, args_count, meta, user_fns, layout, out)?;
856                compile_expr(&expr, args_count, meta, user_fns, layout, out)?;
857                out.push(Item::Op(OpKind::PopLocalTable(idx)));
858            }
859        }
860        return Ok(());
861    }
862
863    let expr = parse_expr(s)?;
864    if let Expr::Call { name, args } = expr {
865        emit_call(&name, &args, meta, user_fns, layout, args_count, out)?;
866        return Ok(());
867    }
868
869    bail!("unsupported statement: {s}")
870}
871
872struct LabelGen {
873    prefix: String,
874    n: u32,
875}
876
877impl LabelGen {
878    fn new(prefix: impl Into<String>) -> Self {
879        Self {
880            prefix: prefix.into(),
881            n: 0,
882        }
883    }
884
885    fn fresh(&mut self, kind: &str) -> String {
886        let id = self.n;
887        self.n += 1;
888        format!("{}:{}:{}", self.prefix, kind, id)
889    }
890}
891
892fn compile_cond_generic(
893    cond: &str,
894    args_count: i8,
895    meta: &Meta,
896    user_fns: &HashSet<String>,
897    layout: &GlobalLayout,
898    out: &mut Vec<Item>,
899) -> Result<()> {
900    let expr = parse_expr(cond)?;
901    compile_expr(&expr, args_count, meta, user_fns, layout, out)
902}
903
904fn compile_stmts(
905    stmts: &[Stmt],
906    args_count: i8,
907    meta: &Meta,
908    user_fns: &HashSet<String>,
909    layout: &GlobalLayout,
910    out: &mut Vec<Item>,
911    lg: &mut LabelGen,
912    break_stack: &mut Vec<String>,
913) -> Result<()> {
914    for st in stmts {
915        match st {
916            Stmt::Simple(s) => compile_simple_stmt(s, args_count, meta, user_fns, layout, out)?,
917            Stmt::Return(None) => out.push(Item::Op(OpKind::Ret)),
918            Stmt::Return(Some(expr)) => {
919                let e = parse_expr(expr)?;
920                compile_expr(&e, args_count, meta, user_fns, layout, out)?;
921                out.push(Item::Op(OpKind::Retv));
922            }
923            Stmt::Break => {
924                let tgt = break_stack
925                    .last()
926                    .ok_or_else(|| anyhow!("break outside of loop"))?
927                    .clone();
928                out.push(Item::Op(OpKind::JmpLabel { label: tgt }));
929            }
930            Stmt::If { arms, else_arm } => {
931                let end_lbl = lg.fresh("if_end");
932                for (idx, (cond, body)) in arms.iter().enumerate() {
933                    let after_lbl = lg.fresh(&format!("if_next_{idx}"));
934                    match parse_cond(cond) {
935                        CondKind::AlwaysTrue => {
936                            compile_stmts(body, args_count, meta, user_fns, layout, out, lg, break_stack)?;
937                            out.push(Item::Op(OpKind::JmpLabel { label: end_lbl.clone() }));
938                            break;
939                        }
940                        CondKind::AlwaysFalse => {
941                            out.push(Item::Label(Label::new(after_lbl.clone())));
942                        }
943                        CondKind::NonZero => {
944                            out.push(Item::Op(OpKind::JzLabel { label: after_lbl.clone() }));
945                            compile_stmts(body, args_count, meta, user_fns, layout, out, lg, break_stack)?;
946                            out.push(Item::Op(OpKind::JmpLabel { label: end_lbl.clone() }));
947                            out.push(Item::Label(Label::new(after_lbl)));
948                        }
949                        CondKind::Zero => {
950                            let body_lbl = lg.fresh(&format!("if_body_{idx}"));
951                            out.push(Item::Op(OpKind::JzLabel { label: body_lbl.clone() }));
952                            out.push(Item::Op(OpKind::JmpLabel { label: after_lbl.clone() }));
953                            out.push(Item::Label(Label::new(body_lbl)));
954                            compile_stmts(body, args_count, meta, user_fns, layout, out, lg, break_stack)?;
955                            out.push(Item::Op(OpKind::JmpLabel { label: end_lbl.clone() }));
956                            out.push(Item::Label(Label::new(after_lbl)));
957                        }
958                        CondKind::Generic => {
959                            compile_cond_generic(cond, args_count, meta, user_fns, layout, out)?;
960                            out.push(Item::Op(OpKind::JzLabel { label: after_lbl.clone() }));
961                            compile_stmts(body, args_count, meta, user_fns, layout, out, lg, break_stack)?;
962                            out.push(Item::Op(OpKind::JmpLabel { label: end_lbl.clone() }));
963                            out.push(Item::Label(Label::new(after_lbl)));
964                        }
965                    }
966                }
967                if let Some(eb) = else_arm {
968                    compile_stmts(eb, args_count, meta, user_fns, layout, out, lg, break_stack)?;
969                }
970                out.push(Item::Label(Label::new(end_lbl)));
971            }
972            Stmt::While { cond, body } => {
973                let head = lg.fresh("while_head");
974                let end = lg.fresh("while_end");
975                let body_lbl = lg.fresh("while_body");
976
977                out.push(Item::Label(Label::new(head.clone())));
978                break_stack.push(end.clone());
979
980                match parse_cond(cond) {
981                    CondKind::AlwaysTrue => {
982                        compile_stmts(body, args_count, meta, user_fns, layout, out, lg, break_stack)?;
983                        out.push(Item::Op(OpKind::JmpLabel { label: head }));
984                    }
985                    CondKind::AlwaysFalse => {
986                        out.push(Item::Op(OpKind::JmpLabel { label: end.clone() }));
987                    }
988                    CondKind::NonZero => {
989                        out.push(Item::Op(OpKind::JzLabel { label: end.clone() }));
990                        compile_stmts(body, args_count, meta, user_fns, layout, out, lg, break_stack)?;
991                        out.push(Item::Op(OpKind::JmpLabel { label: head }));
992                    }
993                    CondKind::Zero => {
994                        out.push(Item::Op(OpKind::JzLabel { label: body_lbl.clone() }));
995                        out.push(Item::Op(OpKind::JmpLabel { label: end.clone() }));
996                        out.push(Item::Label(Label::new(body_lbl)));
997                        compile_stmts(body, args_count, meta, user_fns, layout, out, lg, break_stack)?;
998                        out.push(Item::Op(OpKind::JmpLabel { label: head }));
999                    }
1000                    CondKind::Generic => {
1001                        compile_cond_generic(cond, args_count, meta, user_fns, layout, out)?;
1002                        out.push(Item::Op(OpKind::JzLabel { label: end.clone() }));
1003                        compile_stmts(body, args_count, meta, user_fns, layout, out, lg, break_stack)?;
1004                        out.push(Item::Op(OpKind::JmpLabel { label: head }));
1005                    }
1006                }
1007
1008                break_stack.pop();
1009                out.push(Item::Label(Label::new(end)));
1010            }
1011        }
1012    }
1013    Ok(())
1014}
1015
1016// ----------------------------
1017// __pc dispatcher mode
1018// ----------------------------
1019
1020fn looks_like_pc_dispatcher(raw: &[String]) -> bool {
1021    let mut saw_pc = false;
1022    let mut saw_case = false;
1023    let mut saw_while_true = false;
1024    for ln in raw {
1025        let t = ln.trim();
1026        if t.contains("__pc") {
1027            saw_pc = true;
1028        }
1029        if t == "while true do" {
1030            saw_while_true = true;
1031        }
1032        if t.starts_with("if __pc ==") || t.starts_with("elseif __pc ==") {
1033            saw_case = true;
1034        }
1035    }
1036    saw_pc && saw_while_true && saw_case
1037}
1038
1039fn is_comment_or_empty_line(t: &str) -> bool {
1040    let tt = t.trim();
1041    tt.is_empty() || tt.starts_with("--")
1042}
1043
1044fn is_if_start_line(t: &str) -> bool {
1045    let tt = t.trim();
1046    tt.starts_with("if ") && tt.ends_with(" then")
1047}
1048
1049fn is_while_start_line(t: &str) -> bool {
1050    let tt = t.trim();
1051    tt.starts_with("while ") && tt.ends_with(" do")
1052}
1053
1054fn is_for_start_line(t: &str) -> bool {
1055    let tt = t.trim();
1056    tt.starts_with("for ") && tt.ends_with(" do")
1057}
1058
1059fn is_repeat_start_line(t: &str) -> bool {
1060    t.trim() == "repeat"
1061}
1062
1063fn is_until_line(t: &str) -> bool {
1064    t.trim().starts_with("until ")
1065}
1066
1067fn is_end_line(t: &str) -> bool {
1068    t.trim() == "end"
1069}
1070
1071fn bb_label(fn_name: &str, pc: u32) -> String {
1072    format!("bb:{fn_name}:{pc}")
1073}
1074
1075fn parse_entry_pc(body: &[String]) -> u32 {
1076    let re = Regex::new(r"^(?:local\s+)?__pc\s*=\s*(\d+)\s*$").unwrap();
1077    for ln in body {
1078        let t = ln.trim();
1079        if let Some(c) = re.captures(t) {
1080            if let Ok(v) = c.get(1).unwrap().as_str().parse::<u32>() {
1081                return v;
1082            }
1083        }
1084        if t == "while true do" {
1085            break;
1086        }
1087    }
1088    0
1089}
1090
1091fn collect_case_body(body: &[String], mut i: usize, re_case: &Regex) -> (Vec<String>, usize) {
1092    let mut out: Vec<String> = Vec::new();
1093    let mut nest: i32 = 0;
1094
1095    while i < body.len() {
1096        let t = body[i].trim();
1097        if nest == 0 {
1098            if re_case.is_match(t) || t == "else" {
1099                break;
1100            }
1101        }
1102
1103        out.push(body[i].clone());
1104
1105        if is_if_start_line(t) || is_while_start_line(t) || is_for_start_line(t) || is_repeat_start_line(t) {
1106            nest += 1;
1107        } else if is_end_line(t) {
1108            nest -= 1;
1109        } else if is_until_line(t) {
1110            nest -= 1;
1111        }
1112
1113        i += 1;
1114    }
1115
1116    (out, i)
1117}
1118
1119fn compile_pc_case(
1120    pc: u32,
1121    lines: &[String],
1122    fn_name: &str,
1123    args_count: i8,
1124    meta: &Meta,
1125    user_fns: &HashSet<String>,
1126    layout: &GlobalLayout,
1127    out: &mut Vec<Item>,
1128) -> Result<()> {
1129    out.push(Item::Label(Label::new(bb_label(fn_name, pc))));
1130
1131    let re_pc_set = Regex::new(r"^__pc\s*=\s*(\d+)\s*$").unwrap();
1132    let re_term_if = Regex::new(r"^if\s+S\d+\s*(==|~=)\s*0\s+then\s*$").unwrap();
1133
1134    let mut i = 0usize;
1135    while i < lines.len() {
1136        let mut t = lines[i].trim().to_string();
1137        if is_comment_or_empty_line(&t) {
1138            i += 1;
1139            continue;
1140        }
1141
1142        if t.starts_with("local ") && !t.contains('=') {
1143            i += 1;
1144            continue;
1145        }
1146        if let Some(rest) = t.strip_prefix("local ") {
1147            t = rest.trim().to_string();
1148        }
1149
1150        if t == "return" {
1151            out.push(Item::Op(OpKind::Ret));
1152            return Ok(());
1153        }
1154
1155        if let Some(rest) = t.strip_prefix("return ") {
1156            let e = parse_expr(rest.trim())?;
1157            compile_expr(&e, args_count, meta, user_fns, layout, out)?;
1158            out.push(Item::Op(OpKind::Retv));
1159            return Ok(());
1160        }
1161
1162        if let Some(c) = re_pc_set.captures(&t) {
1163            let target: u32 = c.get(1).unwrap().as_str().parse()?;
1164            out.push(Item::Op(OpKind::JmpLabel {
1165                label: bb_label(fn_name, target),
1166            }));
1167            return Ok(());
1168        }
1169
1170        if let Some(c) = re_term_if.captures(&t) {
1171            let op = c.get(1).unwrap().as_str();
1172            let mut j = i + 1;
1173            while j < lines.len() && is_comment_or_empty_line(lines[j].trim()) {
1174                j += 1;
1175            }
1176            if j >= lines.len() {
1177                bail!("unterminated pc-if in bb {pc}");
1178            }
1179            let then_line = lines[j].trim();
1180            let then_pc: u32 = re_pc_set
1181                .captures(then_line)
1182                .ok_or_else(|| anyhow!("pc-if then arm must set __pc in bb {pc}"))?
1183                .get(1)
1184                .unwrap()
1185                .as_str()
1186                .parse()?;
1187
1188            j += 1;
1189            while j < lines.len() && is_comment_or_empty_line(lines[j].trim()) {
1190                j += 1;
1191            }
1192            if j >= lines.len() || lines[j].trim() != "else" {
1193                bail!("pc-if missing else in bb {pc}");
1194            }
1195
1196            j += 1;
1197            while j < lines.len() && is_comment_or_empty_line(lines[j].trim()) {
1198                j += 1;
1199            }
1200            if j >= lines.len() {
1201                bail!("pc-if missing else pc assignment in bb {pc}");
1202            }
1203            let else_line = lines[j].trim();
1204            let else_pc: u32 = re_pc_set
1205                .captures(else_line)
1206                .ok_or_else(|| anyhow!("pc-if else arm must set __pc in bb {pc}"))?
1207                .get(1)
1208                .unwrap()
1209                .as_str()
1210                .parse()?;
1211
1212            j += 1;
1213            while j < lines.len() && is_comment_or_empty_line(lines[j].trim()) {
1214                j += 1;
1215            }
1216            if j >= lines.len() || lines[j].trim() != "end" {
1217                bail!("pc-if missing end in bb {pc}");
1218            }
1219
1220            let (zero_target, nonzero_target) = if op == "==" {
1221                (then_pc, else_pc)
1222            } else {
1223                (else_pc, then_pc)
1224            };
1225            out.push(Item::Op(OpKind::JzLabel {
1226                label: bb_label(fn_name, zero_target),
1227            }));
1228            out.push(Item::Op(OpKind::JmpLabel {
1229                label: bb_label(fn_name, nonzero_target),
1230            }));
1231            return Ok(());
1232        }
1233
1234        compile_simple_stmt(&t, args_count, meta, user_fns, layout, out)?;
1235        i += 1;
1236    }
1237
1238    out.push(Item::Op(OpKind::Ret));
1239    Ok(())
1240}
1241
1242fn compile_pc_dispatcher_function(
1243    f: &Function,
1244    meta: &Meta,
1245    user_fns: &HashSet<String>,
1246    layout: &GlobalLayout,
1247    out: &mut Vec<Item>,
1248) -> Result<()> {
1249    if f.raw.len() < 2 {
1250        bail!("function {}: too short", f.name);
1251    }
1252    let body: Vec<String> = f.raw[1..f.raw.len() - 1].to_vec();
1253
1254    let entry_pc = parse_entry_pc(&body);
1255
1256    let re_case = Regex::new(r"^(if|elseif)\s+__pc\s*==\s*(\d+)\s+then\s*$").unwrap();
1257
1258    let mut i = 0usize;
1259    while i < body.len() {
1260        if re_case.is_match(body[i].trim()) {
1261            break;
1262        }
1263        i += 1;
1264    }
1265    if i >= body.len() {
1266        bail!("function {}: pc-dispatcher header not found", f.name);
1267    }
1268
1269    let mut cases: Vec<(u32, Vec<String>)> = Vec::new();
1270    while i < body.len() {
1271        let t = body[i].trim();
1272        if t == "else" {
1273            break;
1274        }
1275        if let Some(c) = re_case.captures(t) {
1276            let pc: u32 = c.get(2).unwrap().as_str().parse()?;
1277            i += 1;
1278            let (case_lines, next_i) = collect_case_body(&body, i, &re_case);
1279            cases.push((pc, case_lines));
1280            i = next_i;
1281            continue;
1282        }
1283        i += 1;
1284    }
1285
1286    if cases.is_empty() {
1287        bail!("function {}: no pc-dispatcher cases found", f.name);
1288    }
1289
1290    if let Some(pos) = cases.iter().position(|(pc, _)| *pc == entry_pc) {
1291        if pos != 0 {
1292            let entry = cases.remove(pos);
1293            cases.insert(0, entry);
1294        }
1295    }
1296
1297    for (pc, lines) in cases {
1298        compile_pc_case(pc, &lines, &f.name, f.args_count, meta, user_fns, layout, out)?;
1299    }
1300
1301    Ok(())
1302}
1303
1304pub fn compile_program(meta: &Meta, program: &Program) -> Result<(Vec<Item>, GlobalLayout)> {
1305    let mut items: Vec<Item> = Vec::new();
1306    let layout = GlobalLayout::from_globals(&program.globals)?;
1307    let user_fns: HashSet<String> = program.functions.iter().map(|f| f.name.clone()).collect();
1308
1309    for f in &program.functions {
1310        items.push(Item::Label(Label::new(format!("fn:{}", f.name))));
1311        items.push(Item::Op(OpKind::InitStack {
1312            args: f.args_count,
1313            locals: f.locals_count,
1314        }));
1315
1316        if looks_like_pc_dispatcher(&f.raw) {
1317            compile_pc_dispatcher_function(f, meta, &user_fns, &layout, &mut items)?;
1318        } else {
1319            let mut lg = LabelGen::new(format!("fn:{}", f.name));
1320            let mut break_stack: Vec<String> = Vec::new();
1321            compile_stmts(
1322                &f.body,
1323                f.args_count,
1324                meta,
1325                &user_fns,
1326                &layout,
1327                &mut items,
1328                &mut lg,
1329                &mut break_stack,
1330            )?;
1331        }
1332
1333        if !matches!(items.last(), Some(Item::Op(OpKind::Ret | OpKind::Retv))) {
1334            items.push(Item::Op(OpKind::Ret));
1335        }
1336    }
1337
1338    Ok((items, layout))
1339}