wmv_decoder/
na_simple_idct.rs

1#[inline(always)]
2fn clip_u8(x: i32) -> u8 {
3    if x < 0 { 0 } else if x > 255 { 255 } else { x as u8 }
4}
5
6// --- Constants (BIT_DEPTH=8) ---
7const W1: i64 = 22725;
8const W2: i64 = 21407;
9const W3: i64 = 19266;
10const W4: i64 = 16383;
11const W5: i64 = 12873;
12const W6: i64 = 8867;
13const W7: i64 = 4520;
14
15const ROW_SHIFT: i32 = 11;
16const COL_SHIFT: i32 = 20;
17const DC_SHIFT: i32 = 3;
18
19// ((1<<(COL_SHIFT-1))/W4) in upstream (integer division)
20const COL_RND_W4_DIV: i16 = ((1i64 << (COL_SHIFT - 1)) / W4) as i16;
21
22#[inline(always)]
23fn idct_row_cond_dc_int16_8bit(row: &mut [i16; 8]) {
24    // DC-only shortcut (matches upstream's int16 path semantics).
25    if row[1] == 0
26        && row[2] == 0
27        && row[3] == 0
28        && row[4] == 0
29        && row[5] == 0
30        && row[6] == 0
31        && row[7] == 0
32    {
33        let t: i16 = (((row[0] as i32) << DC_SHIFT) as i16);
34        *row = [t; 8];
35        return;
36    }
37
38    // Use i64 for safety; upstream uses carefully-sized unsigned intermediates.
39    let r0 = row[0] as i64;
40    let r1 = row[1] as i64;
41    let r2 = row[2] as i64;
42    let r3 = row[3] as i64;
43    let r4 = row[4] as i64;
44    let r5 = row[5] as i64;
45    let r6 = row[6] as i64;
46    let r7 = row[7] as i64;
47
48    let mut a0 = W4 * r0 + (1i64 << (ROW_SHIFT - 1));
49    let mut a1 = a0;
50    let mut a2 = a0;
51    let mut a3 = a0;
52
53    a0 += W2 * r2;
54    a1 += W6 * r2;
55    a2 -= W6 * r2;
56    a3 -= W2 * r2;
57
58    let mut b0 = W1 * r1 + W3 * r3;
59    let mut b1 = W3 * r1 - W7 * r3;
60    let mut b2 = W5 * r1 - W1 * r3;
61    let mut b3 = W7 * r1 - W5 * r3;
62
63    if r4 != 0 || r5 != 0 || r6 != 0 || r7 != 0 {
64        a0 += W4 * r4 + W6 * r6;
65        a1 += -W4 * r4 - W2 * r6;
66        a2 += -W4 * r4 + W2 * r6;
67        a3 += W4 * r4 - W6 * r6;
68
69        b0 += W5 * r5 + W7 * r7;
70        b1 += -W1 * r5 - W5 * r7;
71        b2 += W7 * r5 + W3 * r7;
72        b3 += W3 * r5 - W1 * r7;
73    }
74
75    let rs = ROW_SHIFT as i64;
76    row[0] = ((a0 + b0) >> rs) as i16;
77    row[7] = ((a0 - b0) >> rs) as i16;
78    row[1] = ((a1 + b1) >> rs) as i16;
79    row[6] = ((a1 - b1) >> rs) as i16;
80    row[2] = ((a2 + b2) >> rs) as i16;
81    row[5] = ((a2 - b2) >> rs) as i16;
82    row[3] = ((a3 + b3) >> rs) as i16;
83    row[4] = ((a3 - b3) >> rs) as i16;
84}
85
86#[inline(always)]
87fn idct_sparse_col_int16_8bit(block: &mut [i16; 64], col: usize) {
88    // Column elements are block[col + 8*r]
89    let c0 = block[col + 8 * 0] as i64;
90    let c1 = block[col + 8 * 1] as i64;
91    let c2 = block[col + 8 * 2] as i64;
92    let c3 = block[col + 8 * 3] as i64;
93    let c4 = block[col + 8 * 4] as i64;
94    let c5 = block[col + 8 * 5] as i64;
95    let c6 = block[col + 8 * 6] as i64;
96    let c7 = block[col + 8 * 7] as i64;
97
98    let mut a0 = W4 * (c0 + COL_RND_W4_DIV as i64);
99    let mut a1 = a0;
100    let mut a2 = a0;
101    let mut a3 = a0;
102
103    a0 += W2 * c2;
104    a1 += W6 * c2;
105    a2 -= W6 * c2;
106    a3 -= W2 * c2;
107
108    let mut b0 = W1 * c1 + W3 * c3;
109    let mut b1 = W3 * c1 - W7 * c3;
110    let mut b2 = W5 * c1 - W1 * c3;
111    let mut b3 = W7 * c1 - W5 * c3;
112
113    if c4 != 0 {
114        a0 += W4 * c4;
115        a1 += -W4 * c4;
116        a2 += -W4 * c4;
117        a3 += W4 * c4;
118    }
119    if c5 != 0 {
120        b0 += W5 * c5;
121        b1 += -W1 * c5;
122        b2 += W7 * c5;
123        b3 += W3 * c5;
124    }
125    if c6 != 0 {
126        a0 += W6 * c6;
127        a1 += -W2 * c6;
128        a2 += W2 * c6;
129        a3 += -W6 * c6;
130    }
131    if c7 != 0 {
132        b0 += W7 * c7;
133        b1 += -W5 * c7;
134        b2 += W3 * c7;
135        b3 += -W1 * c7;
136    }
137
138    let cs = COL_SHIFT as i64;
139    block[col + 8 * 0] = ((a0 + b0) >> cs) as i16;
140    block[col + 8 * 1] = ((a1 + b1) >> cs) as i16;
141    block[col + 8 * 2] = ((a2 + b2) >> cs) as i16;
142    block[col + 8 * 3] = ((a3 + b3) >> cs) as i16;
143    block[col + 8 * 4] = ((a3 - b3) >> cs) as i16;
144    block[col + 8 * 5] = ((a2 - b2) >> cs) as i16;
145    block[col + 8 * 6] = ((a1 - b1) >> cs) as i16;
146    block[col + 8 * 7] = ((a0 - b0) >> cs) as i16;
147}
148
149#[inline(always)]
150fn idct_sparse_col_add_int16_8bit(dest: &mut [u8], dest_off: usize, line_size: usize, block: &[i16; 64], col: usize) {
151    let c0 = block[col + 8 * 0] as i64;
152    let c1 = block[col + 8 * 1] as i64;
153    let c2 = block[col + 8 * 2] as i64;
154    let c3 = block[col + 8 * 3] as i64;
155    let c4 = block[col + 8 * 4] as i64;
156    let c5 = block[col + 8 * 5] as i64;
157    let c6 = block[col + 8 * 6] as i64;
158    let c7 = block[col + 8 * 7] as i64;
159
160    let mut a0 = W4 * (c0 + COL_RND_W4_DIV as i64);
161    let mut a1 = a0;
162    let mut a2 = a0;
163    let mut a3 = a0;
164
165    a0 += W2 * c2;
166    a1 += W6 * c2;
167    a2 -= W6 * c2;
168    a3 -= W2 * c2;
169
170    let mut b0 = W1 * c1 + W3 * c3;
171    let mut b1 = W3 * c1 - W7 * c3;
172    let mut b2 = W5 * c1 - W1 * c3;
173    let mut b3 = W7 * c1 - W5 * c3;
174
175    if c4 != 0 {
176        a0 += W4 * c4;
177        a1 += -W4 * c4;
178        a2 += -W4 * c4;
179        a3 += W4 * c4;
180    }
181    if c5 != 0 {
182        b0 += W5 * c5;
183        b1 += -W1 * c5;
184        b2 += W7 * c5;
185        b3 += W3 * c5;
186    }
187    if c6 != 0 {
188        a0 += W6 * c6;
189        a1 += -W2 * c6;
190        a2 += W2 * c6;
191        a3 += -W6 * c6;
192    }
193    if c7 != 0 {
194        b0 += W7 * c7;
195        b1 += -W5 * c7;
196        b2 += W3 * c7;
197        b3 += -W1 * c7;
198    }
199
200    let cs = COL_SHIFT as i64;
201    let vals = [
202        ((a0 + b0) >> cs) as i32,
203        ((a1 + b1) >> cs) as i32,
204        ((a2 + b2) >> cs) as i32,
205        ((a3 + b3) >> cs) as i32,
206        ((a3 - b3) >> cs) as i32,
207        ((a2 - b2) >> cs) as i32,
208        ((a1 - b1) >> cs) as i32,
209        ((a0 - b0) >> cs) as i32,
210    ];
211
212    for r in 0..8usize {
213        let off = dest_off + r * line_size + col;
214        if off < dest.len() {
215            let cur = dest[off] as i32;
216            dest[off] = clip_u8(cur + vals[r]);
217        }
218    }
219}
220
221/// In-place 8x8 IDCT: equivalent to upstream `ff_simple_idct_int16_8bit`.
222pub fn ff_simple_idct_int16_8bit(block: &mut [i16; 64]) {
223    for r in 0..8usize {
224        let mut row = [0i16; 8];
225        for c in 0..8usize {
226            row[c] = block[r * 8 + c];
227        }
228        idct_row_cond_dc_int16_8bit(&mut row);
229        for c in 0..8usize {
230            block[r * 8 + c] = row[c];
231        }
232    }
233    for c in 0..8usize {
234        idct_sparse_col_int16_8bit(block, c);
235    }
236}
237
238/// Add an 8x8 IDCT block into destination: equivalent to upstream `ff_simple_idct_add_int16_8bit`.
239pub fn ff_simple_idct_add_int16_8bit(dest: &mut [u8], dest_off: usize, line_size: usize, block: &mut [i16; 64]) {
240    // Row transform in-place
241    for r in 0..8usize {
242        let mut row = [0i16; 8];
243        for c in 0..8usize {
244            row[c] = block[r * 8 + c];
245        }
246        idct_row_cond_dc_int16_8bit(&mut row);
247        for c in 0..8usize {
248            block[r * 8 + c] = row[c];
249        }
250    }
251    // Column add (without overwriting block, like upstream idctSparseColAdd)
252    let tmp = *block;
253    for c in 0..8usize {
254        idct_sparse_col_add_int16_8bit(dest, dest_off, line_size, &tmp, c);
255    }
256}
257
258// --- WMV2 ABT helpers (ported from upstream simple_idct.c) ---
259
260const CN_SHIFT: i32 = 12;
261const RN_SHIFT: i32 = 15;
262const C_SHIFT: i32 = 17; // (4+1+12)
263const R_SHIFT: i32 = 11;
264
265// Values computed exactly as upstream C_FIX/R_FIX with M_SQRT2 and +0.5 rounding.
266const C1: i64 = 3784;
267const C2: i64 = 1567;
268const C3: i64 = 2896;
269
270const R1: i64 = 30274;
271const R2: i64 = 12540;
272const R3: i64 = 23170;
273
274#[inline(always)]
275fn idct4col_add(dest: &mut [u8], dest_off: usize, line_size: usize, col: &[i16; 64], col_idx: usize) {
276    // col points to block + i (column i), but in upstream idct4col_add reads col[8*0..8*3]
277    let a0 = col[col_idx + 8 * 0] as i64;
278    let a1 = col[col_idx + 8 * 1] as i64;
279    let a2 = col[col_idx + 8 * 2] as i64;
280    let a3 = col[col_idx + 8 * 3] as i64;
281
282    let c0 = (a0 + a2) * C3 + (1i64 << (C_SHIFT - 1));
283    let c2 = (a0 - a2) * C3 + (1i64 << (C_SHIFT - 1));
284    let c1 = a1 * C1 + a3 * C2;
285    let c3 = a1 * C2 - a3 * C1;
286
287    let out = [
288        ((c0 + c1) >> C_SHIFT) as i32,
289        ((c2 + c3) >> C_SHIFT) as i32,
290        ((c2 - c3) >> C_SHIFT) as i32,
291        ((c0 - c1) >> C_SHIFT) as i32,
292    ];
293
294    for r in 0..4usize {
295        let off = dest_off + r * line_size;
296        if off < dest.len() {
297            let cur = dest[off] as i32;
298            dest[off] = clip_u8(cur + out[r]);
299        }
300    }
301}
302
303#[inline(always)]
304fn idct4row(row: &mut [i16; 8]) {
305    // Operates on row[0..3] only (upstream's idct4row)
306    let a0 = row[0] as i64;
307    let a1 = row[1] as i64;
308    let a2 = row[2] as i64;
309    let a3 = row[3] as i64;
310
311    let c0 = (a0 + a2) * R3 + (1i64 << (R_SHIFT - 1));
312    let c2 = (a0 - a2) * R3 + (1i64 << (R_SHIFT - 1));
313    let c1 = a1 * R1 + a3 * R2;
314    let c3 = a1 * R2 - a3 * R1;
315
316    row[0] = ((c0 + c1) >> R_SHIFT) as i16;
317    row[1] = ((c2 + c3) >> R_SHIFT) as i16;
318    row[2] = ((c2 - c3) >> R_SHIFT) as i16;
319    row[3] = ((c0 - c1) >> R_SHIFT) as i16;
320}
321
322/// WMV2 ABT: add an 8x4 IDCT block (top or bottom half). Equivalent to upstream `ff_simple_idct84_add`.
323pub fn ff_simple_idct84_add(dest: &mut [u8], dest_off: usize, line_size: usize, block: &mut [i16; 64]) {
324    // IDCT8 on each of the first 4 rows
325    for r in 0..4usize {
326        let mut row = [0i16; 8];
327        for c in 0..8usize {
328            row[c] = block[r * 8 + c];
329        }
330        idct_row_cond_dc_int16_8bit(&mut row);
331        for c in 0..8usize {
332            block[r * 8 + c] = row[c];
333        }
334    }
335    let snap = *block;
336    for c in 0..8usize {
337        idct4col_add(dest, dest_off + c, line_size, &snap, c);
338    }
339}
340
341/// WMV2 ABT: add a 4x8 IDCT block (left or right half). Equivalent to upstream `ff_simple_idct48_add`.
342pub fn ff_simple_idct48_add(dest: &mut [u8], dest_off: usize, line_size: usize, block: &mut [i16; 64]) {
343    // IDCT4 on each line (8 rows)
344    for r in 0..8usize {
345        let mut row = [0i16; 8];
346        for c in 0..8usize {
347            row[c] = block[r * 8 + c];
348        }
349        idct4row(&mut row);
350        for c in 0..8usize {
351            block[r * 8 + c] = row[c];
352        }
353    }
354    // IDCT8 and store for first 4 columns
355    let snap = *block;
356    for c in 0..4usize {
357        idct_sparse_col_add_int16_8bit(dest, dest_off, line_size, &snap, c);
358    }
359}