siglus_cfx_decompiler/
names.rs1use std::collections::BTreeSet;
2use std::path::Path;
3
4use crate::cfx::ShaderBlob;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum StringEncoding {
8 Ascii,
9 Utf16Le,
10}
11
12impl StringEncoding {
13 pub fn as_str(self) -> &'static str {
14 match self {
15 Self::Ascii => "ascii",
16 Self::Utf16Le => "utf16le",
17 }
18 }
19}
20
21#[derive(Debug, Clone)]
22pub struct ExtractedString {
23 pub offset: usize,
24 pub encoding: StringEncoding,
25 pub text: String,
26}
27
28pub fn extract_strings(data: &[u8]) -> Vec<ExtractedString> {
29 let mut out = Vec::new();
30 out.extend(extract_ascii_strings(data, 3));
31 out.extend(extract_utf16le_strings(data, 3));
32 out.sort_by_key(|s| (s.offset, match s.encoding { StringEncoding::Ascii => 0u8, StringEncoding::Utf16Le => 1u8 }));
33 out.dedup_by(|a, b| a.offset == b.offset && a.encoding == b.encoding && a.text == b.text);
34 out
35}
36
37fn extract_ascii_strings(data: &[u8], min_len: usize) -> Vec<ExtractedString> {
38 let mut out = Vec::new();
39 let mut i = 0usize;
40 while i < data.len() {
41 while i < data.len() && !is_printable_ascii(data[i]) {
42 i += 1;
43 }
44 let start = i;
45 while i < data.len() && is_printable_ascii(data[i]) {
46 i += 1;
47 }
48 if i.saturating_sub(start) >= min_len {
49 let text = String::from_utf8_lossy(&data[start..i]).to_string();
50 out.push(ExtractedString { offset: start, encoding: StringEncoding::Ascii, text });
51 }
52 }
53 out
54}
55
56fn extract_utf16le_strings(data: &[u8], min_chars: usize) -> Vec<ExtractedString> {
57 let mut out = Vec::new();
58 let mut i = 0usize;
59 while i + 1 < data.len() {
60 while i + 1 < data.len() && !is_printable_utf16le_at(data, i) {
61 i += 2;
62 }
63 let start = i;
64 let mut chars = Vec::new();
65 while i + 1 < data.len() && is_printable_utf16le_at(data, i) {
66 chars.push(u16::from_le_bytes([data[i], data[i + 1]]));
67 i += 2;
68 }
69 if chars.len() >= min_chars {
70 if let Ok(text) = String::from_utf16(&chars) {
71 out.push(ExtractedString { offset: start, encoding: StringEncoding::Utf16Le, text });
72 }
73 }
74 if i == start {
75 i += 2;
76 }
77 }
78 out
79}
80
81fn is_printable_ascii(b: u8) -> bool {
82 matches!(b, 0x20..=0x7e)
83}
84
85fn is_printable_utf16le_at(data: &[u8], off: usize) -> bool {
86 if off + 1 >= data.len() {
87 return false;
88 }
89 let c = u16::from_le_bytes([data[off], data[off + 1]]);
90 matches!(c, 0x20..=0x7e)
91}
92
93pub fn is_hlsl_identifier(s: &str) -> bool {
94 let mut chars = s.chars();
95 let Some(first) = chars.next() else { return false; };
96 if !(first == '_' || first.is_ascii_alphabetic()) {
97 return false;
98 }
99 chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
100}
101
102pub fn looks_like_technique_name(s: &str) -> bool {
103 s.starts_with("tec_") || s.starts_with("tech_") || s.starts_with("technique")
104}
105
106pub fn looks_like_shader_function_name(s: &str) -> bool {
107 if !is_hlsl_identifier(s) {
108 return false;
109 }
110 let lower = s.to_ascii_lowercase();
111 lower.starts_with("vs_")
112 || lower.starts_with("ps_")
113 || lower.starts_with("v_")
114 || lower.starts_with("p_")
115 || lower.starts_with("vertex")
116 || lower.starts_with("pixel")
117}
118
119pub fn looks_like_interface_or_struct_name(s: &str) -> bool {
120 if !is_hlsl_identifier(s) {
121 return false;
122 }
123 let lower = s.to_ascii_lowercase();
124 lower.contains("input")
125 || lower.contains("output")
126 || lower.ends_with("_in")
127 || lower.ends_with("_out")
128 || lower.ends_with("interface")
129 || lower.ends_with("struct")
130}
131
132pub fn format_original_name_report(input: &Path, data: &[u8], shaders: &[ShaderBlob]) -> String {
133 let strings = extract_strings(data);
134 let mut identifiers = BTreeSet::new();
135 let mut techniques = BTreeSet::new();
136 let mut shader_function_candidates = BTreeSet::new();
137 let mut interface_candidates = BTreeSet::new();
138
139 for s in &strings {
140 if is_hlsl_identifier(&s.text) {
141 identifiers.insert(s.text.clone());
142 }
143 if looks_like_technique_name(&s.text) {
144 techniques.insert(s.text.clone());
145 }
146 if looks_like_shader_function_name(&s.text) {
147 shader_function_candidates.insert(s.text.clone());
148 }
149 if looks_like_interface_or_struct_name(&s.text) {
150 interface_candidates.insert(s.text.clone());
151 }
152 }
153
154 let mut out = String::new();
155 out.push_str(&format!("input: {}\n", input.display()));
156 out.push_str("\n");
157 out.push_str("name_recovery_policy:\n");
158 out.push_str(" exact_shader_entry_function_name: only used if explicitly stored in source/debug/effect metadata; SM2 bytecode CTAB does not carry it.\n");
159 out.push_str(" exact_interface_type_name: only used if explicitly stored in source/debug/effect metadata; SM2 bytecode declarations carry semantics, not original HLSL struct/interface type names.\n");
160 out.push_str(" uniforms_samplers_and_struct_members: recovered from CTAB when present.\n");
161 out.push_str(" technique_pass_names: recovered only after effect-container parsing; raw shader scanning alone cannot map them to a shader safely.\n");
162 out.push_str("\n");
163
164 out.push_str("shader_ctab_names:\n");
165 for shader in shaders {
166 out.push_str(&format!(" {} offset=0x{:08x} profile={}\n", shader.file_prefix(), shader.offset, shader.profile()));
167 if let Some(ctab) = &shader.ctab {
168 if let Some(creator) = &ctab.creator {
169 out.push_str(&format!(" creator: {}\n", creator));
170 }
171 if let Some(target) = &ctab.target {
172 out.push_str(&format!(" target: {}\n", target));
173 }
174 for c in &ctab.constants {
175 out.push_str(&format!(" {} {} {} count={}\n", c.register_name(), c.hlsl_decl_type(), c.name, c.register_count));
176 if let Some(t) = &c.type_info {
177 for m in &t.members {
178 out.push_str(&format!(" member {} {}\n", m.type_info.hlsl_type_name(), m.name));
179 }
180 }
181 }
182 } else {
183 out.push_str(" no CTAB\n");
184 }
185 }
186
187 out.push_str("\ntechnique_name_candidates_from_container_strings:\n");
188 for name in techniques.iter().take(2000) {
189 out.push_str(&format!(" {}\n", name));
190 }
191 if techniques.len() > 2000 {
192 out.push_str(&format!(" ... {} more\n", techniques.len() - 2000));
193 }
194
195 out.push_str("\nshader_function_name_candidates_from_container_strings:\n");
196 for name in shader_function_candidates.iter().take(2000) {
197 out.push_str(&format!(" {}\n", name));
198 }
199 if shader_function_candidates.len() > 2000 {
200 out.push_str(&format!(" ... {} more\n", shader_function_candidates.len() - 2000));
201 }
202
203 out.push_str("\ninterface_or_struct_name_candidates_from_container_strings:\n");
204 for name in interface_candidates.iter().take(2000) {
205 out.push_str(&format!(" {}\n", name));
206 }
207 if interface_candidates.len() > 2000 {
208 out.push_str(&format!(" ... {} more\n", interface_candidates.len() - 2000));
209 }
210
211 out.push_str("\nall_identifier_strings_from_container:\n");
212 for name in identifiers.iter().take(5000) {
213 out.push_str(&format!(" {}\n", name));
214 }
215 if identifiers.len() > 5000 {
216 out.push_str(&format!(" ... {} more\n", identifiers.len() - 5000));
217 }
218
219 out.push_str("\nall_strings_with_offsets:\n");
220 for s in strings.iter().take(10000) {
221 out.push_str(&format!(" 0x{:08x} {} {}\n", s.offset, s.encoding.as_str(), s.text));
222 }
223 if strings.len() > 10000 {
224 out.push_str(&format!(" ... {} more\n", strings.len() - 10000));
225 }
226
227 out
228}