wmv_decoder/
na_wmv2dsp.rs

1
2
3#[inline(always)]
4fn clip_u8(v: i32) -> u8 {
5    if v < 0 {
6        0
7    } else if v > 255 {
8        255
9    } else {
10        v as u8
11    }
12}
13
14const W0: i32 = 2048;
15const W1: i32 = 2841;
16const W2: i32 = 2676;
17const W3: i32 = 2408;
18const W4: i32 = 2048;
19const W5: i32 = 1609;
20const W6: i32 = 1108;
21const W7: i32 = 565;
22
23#[inline(always)]
24fn wmv2_idct_row(b: &mut [i16]) {
25    debug_assert!(b.len() == 8);
26    let (b0, b1, b2, b3, b4, b5, b6, b7) = (
27        b[0] as i32,
28        b[1] as i32,
29        b[2] as i32,
30        b[3] as i32,
31        b[4] as i32,
32        b[5] as i32,
33        b[6] as i32,
34        b[7] as i32,
35    );
36
37    // step 1
38    let a1 = W1 * b1 + W7 * b7;
39    let a7 = W7 * b1 - W1 * b7;
40    let a5 = W5 * b5 + W3 * b3;
41    let a3 = W3 * b5 - W5 * b3;
42    let a2 = W2 * b2 + W6 * b6;
43    let a6 = W6 * b2 - W2 * b6;
44    let a0 = W0 * b0 + W0 * b4;
45    let a4 = W0 * b0 - W0 * b4;
46
47    // step 2
48    let s1 = ((181i32 * (a1 - a5 + a7 - a3) + 128) >> 8) as i32;
49    let s2 = ((181i32 * (a1 - a5 - a7 + a3) + 128) >> 8) as i32;
50
51    // step 3
52    b[0] = ((a0 + a2 + a1 + a5 + (1 << 7)) >> 8) as i16;
53    b[1] = ((a4 + a6 + s1 + (1 << 7)) >> 8) as i16;
54    b[2] = ((a4 - a6 + s2 + (1 << 7)) >> 8) as i16;
55    b[3] = ((a0 - a2 + a7 + a3 + (1 << 7)) >> 8) as i16;
56    b[4] = ((a0 - a2 - a7 - a3 + (1 << 7)) >> 8) as i16;
57    b[5] = ((a4 - a6 - s2 + (1 << 7)) >> 8) as i16;
58    b[6] = ((a4 + a6 - s1 + (1 << 7)) >> 8) as i16;
59    b[7] = ((a0 + a2 - a1 - a5 + (1 << 7)) >> 8) as i16;
60}
61
62#[inline(always)]
63fn wmv2_idct_col(block: &mut [i16; 64], col: usize) {
64    // step 1, with extended precision
65    let b1 = block[8 * 1 + col] as i32;
66    let b7 = block[8 * 7 + col] as i32;
67    let b5 = block[8 * 5 + col] as i32;
68    let b3 = block[8 * 3 + col] as i32;
69    let b2 = block[8 * 2 + col] as i32;
70    let b6 = block[8 * 6 + col] as i32;
71    let b0 = block[8 * 0 + col] as i32;
72    let b4 = block[8 * 4 + col] as i32;
73
74    let a1 = (W1 * b1 + W7 * b7 + 4) >> 3;
75    let a7 = (W7 * b1 - W1 * b7 + 4) >> 3;
76    let a5 = (W5 * b5 + W3 * b3 + 4) >> 3;
77    let a3 = (W3 * b5 - W5 * b3 + 4) >> 3;
78    let a2 = (W2 * b2 + W6 * b6 + 4) >> 3;
79    let a6 = (W6 * b2 - W2 * b6 + 4) >> 3;
80    let a0 = (W0 * b0 + W0 * b4) >> 3;
81    let a4 = (W0 * b0 - W0 * b4) >> 3;
82
83    // step 2
84    let s1 = (181i32 * (a1 - a5 + a7 - a3) + 128) >> 8;
85    let s2 = (181i32 * (a1 - a5 - a7 + a3) + 128) >> 8;
86
87    // step 3
88    block[8 * 0 + col] = ((a0 + a2 + a1 + a5 + (1 << 13)) >> 14) as i16;
89    block[8 * 1 + col] = ((a4 + a6 + s1 + (1 << 13)) >> 14) as i16;
90    block[8 * 2 + col] = ((a4 - a6 + s2 + (1 << 13)) >> 14) as i16;
91    block[8 * 3 + col] = ((a0 - a2 + a7 + a3 + (1 << 13)) >> 14) as i16;
92
93    block[8 * 4 + col] = ((a0 - a2 - a7 - a3 + (1 << 13)) >> 14) as i16;
94    block[8 * 5 + col] = ((a4 - a6 - s2 + (1 << 13)) >> 14) as i16;
95    block[8 * 6 + col] = ((a4 + a6 - s1 + (1 << 13)) >> 14) as i16;
96    block[8 * 7 + col] = ((a0 + a2 - a1 - a5 + (1 << 13)) >> 14) as i16;
97}
98
99pub fn wmv2_idct_add(dest: &mut [u8], dest_off: usize, stride: usize, block: &mut [i16; 64]) {
100    // row pass
101    for i in (0..64).step_by(8) {
102        wmv2_idct_row(&mut block[i..i + 8]);
103    }
104    // col pass
105    for c in 0..8 {
106        wmv2_idct_col(block, c);
107    }
108
109    // add
110    for r in 0..8 {
111        let d = dest_off + r * stride;
112        let b = r * 8;
113        for c in 0..8 {
114            let idx = d + c;
115            if idx >= dest.len() {
116                continue;
117            }
118            let v = dest[idx] as i32 + block[b + c] as i32;
119            dest[idx] = clip_u8(v);
120        }
121    }
122}
123
124pub fn wmv2_idct_put(dest: &mut [u8], dest_off: usize, stride: usize, block: &mut [i16; 64]) {
125    // row pass
126    for i in (0..64).step_by(8) {
127        wmv2_idct_row(&mut block[i..i + 8]);
128    }
129    // col pass
130    for c in 0..8 {
131        wmv2_idct_col(block, c);
132    }
133
134    // put
135    for r in 0..8 {
136        let d = dest_off + r * stride;
137        let b = r * 8;
138        for c in 0..8 {
139            let idx = d + c;
140            if idx >= dest.len() {
141                continue;
142            }
143            dest[idx] = clip_u8(block[b + c] as i32);
144        }
145    }
146}