Skip to main content

siglus_cfx_decompiler/
names.rs

1use std::collections::BTreeSet;
2use std::path::Path;
3
4use crate::cfx::ShaderBlob;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum StringEncoding {
8    Ascii,
9    Utf16Le,
10}
11
12impl StringEncoding {
13    pub fn as_str(self) -> &'static str {
14        match self {
15            Self::Ascii => "ascii",
16            Self::Utf16Le => "utf16le",
17        }
18    }
19}
20
21#[derive(Debug, Clone)]
22pub struct ExtractedString {
23    pub offset: usize,
24    pub encoding: StringEncoding,
25    pub text: String,
26}
27
28pub fn extract_strings(data: &[u8]) -> Vec<ExtractedString> {
29    let mut out = Vec::new();
30    out.extend(extract_ascii_strings(data, 3));
31    out.extend(extract_utf16le_strings(data, 3));
32    out.sort_by_key(|s| (s.offset, match s.encoding { StringEncoding::Ascii => 0u8, StringEncoding::Utf16Le => 1u8 }));
33    out.dedup_by(|a, b| a.offset == b.offset && a.encoding == b.encoding && a.text == b.text);
34    out
35}
36
37fn extract_ascii_strings(data: &[u8], min_len: usize) -> Vec<ExtractedString> {
38    let mut out = Vec::new();
39    let mut i = 0usize;
40    while i < data.len() {
41        while i < data.len() && !is_printable_ascii(data[i]) {
42            i += 1;
43        }
44        let start = i;
45        while i < data.len() && is_printable_ascii(data[i]) {
46            i += 1;
47        }
48        if i.saturating_sub(start) >= min_len {
49            let text = String::from_utf8_lossy(&data[start..i]).to_string();
50            out.push(ExtractedString { offset: start, encoding: StringEncoding::Ascii, text });
51        }
52    }
53    out
54}
55
56fn extract_utf16le_strings(data: &[u8], min_chars: usize) -> Vec<ExtractedString> {
57    let mut out = Vec::new();
58    let mut i = 0usize;
59    while i + 1 < data.len() {
60        while i + 1 < data.len() && !is_printable_utf16le_at(data, i) {
61            i += 2;
62        }
63        let start = i;
64        let mut chars = Vec::new();
65        while i + 1 < data.len() && is_printable_utf16le_at(data, i) {
66            chars.push(u16::from_le_bytes([data[i], data[i + 1]]));
67            i += 2;
68        }
69        if chars.len() >= min_chars {
70            if let Ok(text) = String::from_utf16(&chars) {
71                out.push(ExtractedString { offset: start, encoding: StringEncoding::Utf16Le, text });
72            }
73        }
74        if i == start {
75            i += 2;
76        }
77    }
78    out
79}
80
81fn is_printable_ascii(b: u8) -> bool {
82    matches!(b, 0x20..=0x7e)
83}
84
85fn is_printable_utf16le_at(data: &[u8], off: usize) -> bool {
86    if off + 1 >= data.len() {
87        return false;
88    }
89    let c = u16::from_le_bytes([data[off], data[off + 1]]);
90    matches!(c, 0x20..=0x7e)
91}
92
93pub fn is_hlsl_identifier(s: &str) -> bool {
94    let mut chars = s.chars();
95    let Some(first) = chars.next() else { return false; };
96    if !(first == '_' || first.is_ascii_alphabetic()) {
97        return false;
98    }
99    chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
100}
101
102pub fn looks_like_technique_name(s: &str) -> bool {
103    s.starts_with("tec_") || s.starts_with("tech_") || s.starts_with("technique")
104}
105
106pub fn looks_like_shader_function_name(s: &str) -> bool {
107    if !is_hlsl_identifier(s) {
108        return false;
109    }
110    let lower = s.to_ascii_lowercase();
111    lower.starts_with("vs_")
112        || lower.starts_with("ps_")
113        || lower.starts_with("v_")
114        || lower.starts_with("p_")
115        || lower.starts_with("vertex")
116        || lower.starts_with("pixel")
117}
118
119pub fn looks_like_interface_or_struct_name(s: &str) -> bool {
120    if !is_hlsl_identifier(s) {
121        return false;
122    }
123    let lower = s.to_ascii_lowercase();
124    lower.contains("input")
125        || lower.contains("output")
126        || lower.ends_with("_in")
127        || lower.ends_with("_out")
128        || lower.ends_with("interface")
129        || lower.ends_with("struct")
130}
131
132pub fn format_original_name_report(input: &Path, data: &[u8], shaders: &[ShaderBlob]) -> String {
133    let strings = extract_strings(data);
134    let mut identifiers = BTreeSet::new();
135    let mut techniques = BTreeSet::new();
136    let mut shader_function_candidates = BTreeSet::new();
137    let mut interface_candidates = BTreeSet::new();
138
139    for s in &strings {
140        if is_hlsl_identifier(&s.text) {
141            identifiers.insert(s.text.clone());
142        }
143        if looks_like_technique_name(&s.text) {
144            techniques.insert(s.text.clone());
145        }
146        if looks_like_shader_function_name(&s.text) {
147            shader_function_candidates.insert(s.text.clone());
148        }
149        if looks_like_interface_or_struct_name(&s.text) {
150            interface_candidates.insert(s.text.clone());
151        }
152    }
153
154    let mut out = String::new();
155    out.push_str(&format!("input: {}\n", input.display()));
156    out.push_str("\n");
157    out.push_str("name_recovery_policy:\n");
158    out.push_str("  exact_shader_entry_function_name: only used if explicitly stored in source/debug/effect metadata; SM2 bytecode CTAB does not carry it.\n");
159    out.push_str("  exact_interface_type_name: only used if explicitly stored in source/debug/effect metadata; SM2 bytecode declarations carry semantics, not original HLSL struct/interface type names.\n");
160    out.push_str("  uniforms_samplers_and_struct_members: recovered from CTAB when present.\n");
161    out.push_str("  technique_pass_names: recovered only after effect-container parsing; raw shader scanning alone cannot map them to a shader safely.\n");
162    out.push_str("\n");
163
164    out.push_str("shader_ctab_names:\n");
165    for shader in shaders {
166        out.push_str(&format!("  {} offset=0x{:08x} profile={}\n", shader.file_prefix(), shader.offset, shader.profile()));
167        if let Some(ctab) = &shader.ctab {
168            if let Some(creator) = &ctab.creator {
169                out.push_str(&format!("    creator: {}\n", creator));
170            }
171            if let Some(target) = &ctab.target {
172                out.push_str(&format!("    target: {}\n", target));
173            }
174            for c in &ctab.constants {
175                out.push_str(&format!("    {} {} {} count={}\n", c.register_name(), c.hlsl_decl_type(), c.name, c.register_count));
176                if let Some(t) = &c.type_info {
177                    for m in &t.members {
178                        out.push_str(&format!("      member {} {}\n", m.type_info.hlsl_type_name(), m.name));
179                    }
180                }
181            }
182        } else {
183            out.push_str("    no CTAB\n");
184        }
185    }
186
187    out.push_str("\ntechnique_name_candidates_from_container_strings:\n");
188    for name in techniques.iter().take(2000) {
189        out.push_str(&format!("  {}\n", name));
190    }
191    if techniques.len() > 2000 {
192        out.push_str(&format!("  ... {} more\n", techniques.len() - 2000));
193    }
194
195    out.push_str("\nshader_function_name_candidates_from_container_strings:\n");
196    for name in shader_function_candidates.iter().take(2000) {
197        out.push_str(&format!("  {}\n", name));
198    }
199    if shader_function_candidates.len() > 2000 {
200        out.push_str(&format!("  ... {} more\n", shader_function_candidates.len() - 2000));
201    }
202
203    out.push_str("\ninterface_or_struct_name_candidates_from_container_strings:\n");
204    for name in interface_candidates.iter().take(2000) {
205        out.push_str(&format!("  {}\n", name));
206    }
207    if interface_candidates.len() > 2000 {
208        out.push_str(&format!("  ... {} more\n", interface_candidates.len() - 2000));
209    }
210
211    out.push_str("\nall_identifier_strings_from_container:\n");
212    for name in identifiers.iter().take(5000) {
213        out.push_str(&format!("  {}\n", name));
214    }
215    if identifiers.len() > 5000 {
216        out.push_str(&format!("  ... {} more\n", identifiers.len() - 5000));
217    }
218
219    out.push_str("\nall_strings_with_offsets:\n");
220    for s in strings.iter().take(10000) {
221        out.push_str(&format!("  0x{:08x} {} {}\n", s.offset, s.encoding.as_str(), s.text));
222    }
223    if strings.len() > 10000 {
224        out.push_str(&format!("  ... {} more\n", strings.len() - 10000));
225    }
226
227    out
228}