wmv_decoder/
decoder.rs

1//! VC-1 / WMV9 Macroblock Decoder
2//!
3//! Full Simple/Main-profile decode path:
4//!   • Proper VLC coefficient decoding (intra + inter TCOEF tables)
5//!   • Uniform / non-uniform inverse quantization
6//!   • VC-1 integer IDCT (8×8, 8×4, 4×8, 4×4)
7//!   • Half-pixel motion compensation with bilinear filter
8//!   • Overlap smoothing filter (Main profile)
9//!   • Reference frame buffer for P/B frames
10
11use crate::bitreader::BitReader;
12use crate::error::{DecoderError, Result};
13use crate::vc1::{FrameType, PictureHeader, SequenceHeader};
14use crate::vlc::{
15    cbpcy_i_vlc, cbpcy_p_vlc, dc_chroma_vlc, dc_luma_vlc, inter_tcoef_vlc, intra_tcoef_vlc,
16    mv_diff_vlc, ttblk_vlc, ttmb_vlc, unpack_rl, VlcTable, VLC_ESCAPE,
17    SCAN_INTRA, SCAN_VERT, ZIGZAG,
18    wmv2_tcoef_inter_vlc, wmv2_tcoef_intra_vlc, wmv2_cbpy_vlc, wmv2_cbpc_p_vlc,
19};
20use crate::wmv2::{Wmv2FrameHeader, Wmv2FrameType, Wmv2Params};
21use crate::vlc_tree::VlcTree;
22use crate::na_wmv2_tables::{FF_MSMP4_MB_I_TABLE, FF_MSMP4_DC_TABLES};
23use crate::na_msmpeg4_tables::{FF_MB_NON_INTRA_TABLES};
24use crate::na_msmpeg4_mv_tables::{
25    FF_MSMP4_MV_TABLE0, FF_MSMP4_MV_TABLE0_LENS,
26    FF_MSMP4_MV_TABLE1, FF_MSMP4_MV_TABLE1_LENS,
27};
28use crate::na_rl_tables::{FF_RL_BASES, FF_WMV1_SCANTABLE, FF_WMV2_SCANTABLE_A, FF_WMV2_SCANTABLE_B};
29use crate::na_simple_idct as ffidct;
30use crate::na_wmv2dsp as wmv2dsp;
31
32// ─── Frame buffer ────────────────────────────────────────────────────────────
33
34#[derive(Clone, Debug)]
35pub struct YuvFrame {
36    pub width:  u32,
37    pub height: u32,
38    pub y:      Vec<u8>,
39    pub cb:     Vec<u8>,
40    pub cr:     Vec<u8>,
41}
42
43impl YuvFrame {
44    pub fn new(width: u32, height: u32) -> Self {
45        let y_sz  = (width * height) as usize;
46        let uv_sz = y_sz / 4;
47        YuvFrame {
48            width, height,
49            y:  vec![16u8;  y_sz],
50            cb: vec![128u8; uv_sz],
51            cr: vec![128u8; uv_sz],
52        }
53    }
54
55    pub fn to_planar_u8(&self) -> Vec<u8> {
56        let mut out = Vec::with_capacity(self.y.len() + self.cb.len() + self.cr.len());
57        out.extend_from_slice(&self.y);
58        out.extend_from_slice(&self.cb);
59        out.extend_from_slice(&self.cr);
60        out
61    }
62
63    pub fn clear(&mut self) {
64        self.y.fill(16);
65        self.cb.fill(128);
66        self.cr.fill(128);
67    }
68}
69
70// ─── VC-1 IDCT ───────────────────────────────────────────────────────────────
71// SMPTE 421M §4.4.1 — Simple/Main profile integer IDCT.
72//
73// Exact butterfly constants: 12, 6, 16, 15, 9, 4  (no floating point).
74// Row pass produces values × 8; column pass divides by 128 (>>7).
75// Total normalization: ×8 / 128 = 1/16  per spatial pixel per coefficient.
76//
77
78/// One-dimensional 8-point VC-1 inverse DCT row kernel.
79/// Input/output in-place.  Output is NOT yet shifted (caller does >>7 in col pass).
80#[inline(always)]
81fn idct_row8(b: &mut [i32; 8]) {
82    // Even part
83    let t1 = 12 * b[2] + 6 * b[6];
84    let t2 =  6 * b[2] - 12 * b[6];
85    let mut s0 = b[0] + b[4];
86    let mut s1 = b[0] - b[4];
87    let s2 = s1 + (t2 >> 3);
88    let s3 = s0 - (t1 >> 3);
89    s0    += t1 >> 3;
90    s1    -= t2 >> 3;
91
92    // Odd part
93    let t0 = 16 * b[1] + 15 * b[3] +  9 * b[5] + 4 * b[7];
94    let t1 = 15 * b[1] -  4 * b[3] - 16 * b[5] - 9 * b[7];
95    let t2 =  9 * b[1] - 16 * b[3] +  4 * b[5] + 15 * b[7];
96    let t3 =  4 * b[1] -  9 * b[3] + 15 * b[5] - 16 * b[7];
97
98    b[0] = s0 + (t0 >> 3);
99    b[1] = s2 + (t2 >> 3);
100    b[2] = s1 + (t3 >> 3);
101    b[3] = s3 + (t1 >> 3);
102    b[4] = s3 - (t1 >> 3);
103    b[5] = s1 - (t3 >> 3);
104    b[6] = s2 - (t2 >> 3);
105    b[7] = s0 - (t0 >> 3);
106}
107
108/// One-dimensional 4-point VC-1 inverse DCT row kernel (SMPTE 421M §4.4.2).
109#[inline(always)]
110fn idct_row4(b: &[i32; 4]) -> [i32; 4] {
111    let t0 = 17 * b[0] + 17 * b[2];
112    let t1 = 17 * b[0] - 17 * b[2];
113    let t2 = 22 * b[1] + 10 * b[3];
114    let t3 = 10 * b[1] - 22 * b[3];
115    [
116        t0 + t2,
117        t1 + t3,
118        t1 - t3,
119        t0 - t2,
120    ]
121}
122
123pub fn idct8x8(blk: &mut [i32; 64]) {
124    // Row pass (no shift — values grow by ×8 nominal)
125    for r in 0..8 {
126        let o = r * 8;
127        let mut row = [blk[o], blk[o+1], blk[o+2], blk[o+3],
128                       blk[o+4], blk[o+5], blk[o+6], blk[o+7]];
129        idct_row8(&mut row);
130        blk[o..o+8].copy_from_slice(&row);
131    }
132    // Column pass + final >>7 rounding shift
133    for c in 0..8 {
134        let mut col = [
135            blk[c],    blk[c+8],  blk[c+16], blk[c+24],
136            blk[c+32], blk[c+40], blk[c+48], blk[c+56],
137        ];
138        idct_row8(&mut col);
139        for r in 0..8 {
140            blk[c + r*8] = (col[r] + 64) >> 7;
141        }
142    }
143}
144
145fn idct8x4(blk: &mut [i32; 64]) {
146    // Row pass (8 wide, 4 high)
147    for r in 0..4 {
148        let o = r * 8;
149        let mut row = [blk[o], blk[o+1], blk[o+2], blk[o+3],
150                       blk[o+4], blk[o+5], blk[o+6], blk[o+7]];
151        idct_row8(&mut row);
152        blk[o..o+8].copy_from_slice(&row);
153    }
154    // Column pass (only 4 rows), with >>7
155    for c in 0..8 {
156        let col4 = [blk[c], blk[c+8], blk[c+16], blk[c+24]];
157        let out = idct_row4(&col4);
158        for r in 0..4 {
159            blk[c + r*8] = (out[r] + 64) >> 7;
160        }
161        let _ = col4[0]; // suppress unused warning
162    }
163}
164
165fn idct4x8(blk: &mut [i32; 64]) {
166    // Row pass (only 4 wide)
167    for r in 0..8 {
168        let o = r * 8;
169        let col4 = [blk[o], blk[o+1], blk[o+2], blk[o+3]];
170        let out = idct_row4(&col4);
171        for c in 0..4 { blk[o+c] = out[c]; }
172    }
173    // Column pass (8 rows), with >>7
174    for c in 0..4 {
175        let mut col = [
176            blk[c],    blk[c+8],  blk[c+16], blk[c+24],
177            blk[c+32], blk[c+40], blk[c+48], blk[c+56],
178        ];
179        idct_row8(&mut col);
180        for r in 0..8 {
181            blk[c + r*8] = (col[r] + 64) >> 7;
182        }
183    }
184}
185
186fn idct4x4(blk: &mut [i32; 64]) {
187    // Row pass (4 wide)
188    for r in 0..4 {
189        let o = r * 8;
190        let col4 = [blk[o], blk[o+1], blk[o+2], blk[o+3]];
191        let out = idct_row4(&col4);
192        for c in 0..4 { blk[o+c] = out[c]; }
193    }
194    // Column pass (4 rows), with >>7
195    for c in 0..4 {
196        let col4 = [blk[c], blk[c+8], blk[c+16], blk[c+24]];
197        let out = idct_row4(&col4);
198        for r in 0..4 {
199            blk[c + r*8] = (out[r] + 64) >> 7;
200        }
201    }
202}
203
204/// Apply IDCT according to transform type.
205/// tt: 0=8x8, 1=8x4_top, 2=8x4_bot, 3=4x8_left, 4=4x8_right, 5=4x4, 6=per_block
206pub fn apply_idct(blk: &mut [i32; 64], tt: u8) {
207    match tt {
208        0 => idct8x8(blk),
209        1 | 2 => idct8x4(blk),
210        3 | 4 => idct4x8(blk),
211        5 | 6 => idct4x4(blk),
212        _ => idct8x8(blk),
213    }
214}
215
216// ─── Inverse quantization ────────────────────────────────────────────────────
217// SMPTE 421M §8.1.4.  Two modes: uniform and non-uniform.
218
219fn iquant_uniform(level: i32, pquant: i32, halfqp: bool) -> i32 {
220    if level == 0 { return 0; }
221    let step = 2 * pquant;
222    let base = step * level.abs() + pquant;
223    let delta = if halfqp { pquant } else { 0 };
224    let result = if level > 0 { base + delta } else { -(base + delta) };
225    result.clamp(-2048, 2047)
226}
227
228fn iquant_nonuniform(level: i32, pquant: i32) -> i32 {
229    if level == 0 { return 0; }
230    // Non-uniform quantizer step table from SMPTE 421M Table 3
231    const STEP: [i32; 32] = [
232        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
233        17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 29, 31, 33, 35, 37, 63,
234    ];
235    let step = STEP[(pquant as usize).min(31)];
236    let result = step * level.abs() + pquant;
237    if level > 0 { result.clamp(-2048, 2047) }
238    else         { (-result).clamp(-2048, 2047) }
239}
240
241// ─── DC step-size tables (SMPTE 421M Table 3) ───────────────────────────────
242// Indexed by pquant (0 unused, 1..31).
243// Luma and chroma have separate tables.
244// The value is multiplied by 128 here to match the IDCT normalization domain
245// (IDCT output = input / 128, so DC_recon must be in the ×128 domain).
246
247const DC_STEP_LUMA: [i32; 32] = [
248    0,   // 0: unused
249    128, 256, 384, 512, 640, 768, 896, 1024,  // pquant 1-8:  step = pquant
250    1152, 1280, 1408, 1536, 1664, 1792, 1920, 2048, // 9-16
251    2176, 2304, 2432, 2560, 2688, 2816, 2944, 3072, // 17-24
252    3328, 3584, 3840, 4096, 4352, 4608, 8192,        // 25-31
253];
254
255const DC_STEP_CHROMA: [i32; 32] = [
256    0,   // 0: unused
257    128, 128, 128, 256, 256, 384, 384, 512,    // pquant 1-8
258    512, 640, 640, 768, 768, 896, 896, 1024,   // 9-16
259    1024, 1152, 1152, 1280, 1280, 1408, 1408, 1536, // 17-24
260    1664, 1792, 1920, 2048, 2176, 2304, 4096,       // 25-31
261];
262
263#[inline]
264fn dc_step(pquant: i32, is_luma: bool) -> i32 {
265    let idx = pquant.clamp(1, 31) as usize;
266    if is_luma { DC_STEP_LUMA[idx] } else { DC_STEP_CHROMA[idx] }
267}
268
269
270// ─── Loop filter (deblocking) ────────────────────────────────────────────────
271// SMPTE 421M §8.6 — Simple/Main profile deblocking filter.
272//
273// Applied at every 8-pixel block boundary in the decoded frame.
274// Modifies the two pixels straddling each boundary to reduce blocking artefacts.
275//
276//   d   = (p1 - 2*p2 + 2*p3 - p4 + 4) >> 3
277//   d   = clamp(d, -p2, 255 - p3)
278//   p2 += d;  p3 -= d
279
280#[inline(always)]
281fn lf_filter4(p: &mut [u8], a: usize, b: usize, c: usize, d: usize) {
282    let p1 = p[a] as i32;
283    let p2 = p[b] as i32;
284    let p3 = p[c] as i32;
285    let p4 = p[d] as i32;
286    let mut delta = (p1 - 2*p2 + 2*p3 - p4 + 4) >> 3;
287    delta = delta.clamp(-p2, 255 - p3);
288    p[b] = (p2 + delta) as u8;
289    p[c] = (p3 - delta) as u8;
290}
291
292/// Apply deblocking loop filter to one plane.
293/// `stride`: number of pixels per row (= width for luma, width/2 for chroma).
294/// `block_size`: 8 for luma, 8 for chroma (chroma plane is already half-size).
295fn loop_filter_plane(plane: &mut Vec<u8>, stride: usize, height: usize) {
296    let w = stride;
297    let h = height;
298    if w < 16 || h < 16 { return; } // nothing to filter
299
300    // ── Vertical boundaries (filter horizontal rows) ───────────────────────
301    // At column boundaries x = 8, 16, 24, ...
302    for x in (8..w-1).step_by(8) {
303        for y in 0..h {
304            let base = y * w;
305            // Pixels: x-2, x-1, x, x+1
306            if x + 1 < w {
307                lf_filter4(plane, base + x - 2, base + x - 1, base + x, base + x + 1);
308            }
309        }
310    }
311
312    // ── Horizontal boundaries (filter vertical columns) ────────────────────
313    // At row boundaries y = 8, 16, 24, ...
314    for y in (8..h-1).step_by(8) {
315        for x in 0..w {
316            // Pixels in column x at rows y-2, y-1, y, y+1
317            let a = (y-2)*w + x;
318            let b = (y-1)*w + x;
319            let c =  y   *w + x;
320            let d = (y+1)*w + x;
321            lf_filter4(plane, a, b, c, d);
322        }
323    }
324}
325
326/// Apply loop filter to a decoded YUV frame (luma + both chroma planes).
327pub fn apply_loop_filter(frame: &mut YuvFrame) {
328    let w  = frame.width  as usize;
329    let h  = frame.height as usize;
330    let cw = (w + 1) / 2;
331    let ch = (h + 1) / 2;
332    loop_filter_plane(&mut frame.y,  w,  h);
333    loop_filter_plane(&mut frame.cb, cw, ch);
334    loop_filter_plane(&mut frame.cr, cw, ch);
335}
336
337// ─── Coefficient decoder ─────────────────────────────────────────────────────
338
339/// Read raw DC differential (before prediction).
340/// Returns the signed differential value (NOT yet scaled / predicted).
341fn read_dc_diff(br: &mut BitReader<'_>, dc_vlc: &VlcTable) -> i32 {
342    let dc_size = match dc_vlc.decode(br) {
343        Some(s) if s >= 0 => s as u8,
344        _ => return 0,
345    };
346    if dc_size == 0 { return 0; }
347    let raw = br.read_bits(dc_size).unwrap_or(0) as i32;
348    // MSB=0 → negative (one's complement offset per SMPTE 421M §8.1.4.4)
349    if raw & (1 << (dc_size - 1)) != 0 { raw } else { raw - (1 << dc_size) + 1 }
350}
351
352// ─── DC Prediction buffer ────────────────────────────────────────────────────
353// SMPTE 421M §8.1.4.6.
354//
355// For each macroblock position we store the reconstructed (post-IDCT) DC
356// value for each of the 6 blocks (Y0 Y1 Y2 Y3 Cb Cr) in the "DC scale"
357// domain (i.e. the integer value before the final /8 normalisation step).
358//
359// Prediction direction is chosen per-block by comparing the gradient
360// magnitudes of the left and top neighbours.
361
362#[derive(Clone)]
363pub struct DcPredBuffer {
364    mb_w:  usize,
365    /// Stored as reconstructed DC * 8 / pquant to stay in coeff domain.
366    /// Layout: [mb_row * mb_w + mb_col][blk 0..6]
367    dc: Vec<[i32; 6]>,
368}
369
370impl DcPredBuffer {
371    pub fn new(mb_w: usize, mb_h: usize) -> Self {
372        DcPredBuffer { mb_w, dc: vec![[1024i32; 6]; mb_w * mb_h] }
373    }
374
375    /// Return the predicted DC for block `blk` at (mb_row, mb_col).
376    /// Also decides prediction direction (horizontal vs vertical).
377    /// Returns (pred_value, use_left: bool).
378    pub fn predict(&self, mb_row: usize, mb_col: usize, blk: usize) -> (i32, bool) {
379        // Neighbour block positions in the DC grid (SMPTE 421M Fig. 8-4)
380        // For luma: blocks are arranged as:
381        //   0 1
382        //   2 3
383        // Left-of-block:  blk 0←left_mb.blk1,  blk 1←same_mb.blk0,
384        //                  blk 2←left_mb.blk3,  blk 3←same_mb.blk2
385        // Top-of-block:   blk 0←top_mb.blk2,   blk 1←top_mb.blk3,
386        //                  blk 2←same_mb.blk0,  blk 3←same_mb.blk1
387        // Chroma (blk 4/5): left = left_mb.same_blk, top = top_mb.same_blk
388        let (dc_left, dc_top, dc_topleft) = self.dc_neighbours(mb_row, mb_col, blk);
389
390        // Gradient: |A - C| (horizontal) vs |B - C| (vertical)
391        // A = left, B = top, C = top-left
392        let grad_h = (dc_left   - dc_topleft).unsigned_abs();
393        let grad_v = (dc_top    - dc_topleft).unsigned_abs();
394
395        if grad_v <= grad_h {
396            // Predict from top (vertical predictor)
397            (dc_top, false)
398        } else {
399            // Predict from left (horizontal predictor)
400            (dc_left, true)
401        }
402    }
403
404    fn dc_neighbours(&self, mb_row: usize, mb_col: usize, blk: usize)
405        -> (i32, i32, i32)
406    {
407        // Helper: get stored DC for a possibly-out-of-bounds MB/blk
408        let get = |r: isize, c: isize, b: usize| -> i32 {
409            if r < 0 || c < 0 { return 1024; } // mid-gray default
410            let idx = r as usize * self.mb_w + c as usize;
411            if idx >= self.dc.len() { return 1024; }
412            self.dc[idx][b]
413        };
414
415        let r = mb_row as isize;
416        let c = mb_col as isize;
417
418        match blk {
419            // ── luma ───────────────────────────────────────────────────────
420            0 => {
421                let left     = get(r, c-1, 1); // right half of left MB
422                let top      = get(r-1, c, 2); // bottom-left of top MB
423                let topleft  = get(r-1, c-1, 3);
424                (left, top, topleft)
425            }
426            1 => {
427                // SMPTE 421M §8.1.4.6 Fig 8-4: topleft = blk3 of top-left MB
428                let left     = get(r, c, 0);
429                let top      = get(r-1, c, 3);
430                let topleft  = get(r-1, c-1, 3);
431                (left, top, topleft)
432            }
433            2 => {
434                let left     = get(r, c-1, 3);
435                let top      = get(r, c, 0);   // blk0 of same MB
436                let topleft  = get(r, c-1, 1);
437                (left, top, topleft)
438            }
439            3 => {
440                let left     = get(r, c, 2);
441                let top      = get(r, c, 1);
442                let topleft  = get(r, c, 0);
443                (left, top, topleft)
444            }
445            // ── chroma ─────────────────────────────────────────────────────
446            _ => {
447                let left    = get(r, c-1, blk);
448                let top     = get(r-1, c, blk);
449                let topleft = get(r-1, c-1, blk);
450                (left, top, topleft)
451            }
452        }
453    }
454
455    /// Store the reconstructed DC value (in coeff domain) for later prediction.
456    pub fn store(&mut self, mb_row: usize, mb_col: usize, blk: usize, dc_recon: i32) {
457        let idx = mb_row * self.mb_w + mb_col;
458        if idx < self.dc.len() {
459            self.dc[idx][blk] = dc_recon;
460        }
461    }
462}
463
464// ─── WMV2/MSMPEG4 DC predictor (upstream ff_msmpeg4_pred_dc logic) ─────────────
465
466/// Predictor storage is in "scaled DC coefficient" domain (level * dc_scale).
467/// Default value 1024 corresponds to mid-gray (128) with scale=8.
468pub struct Wmv2DcPredBuffer {
469    mb_w: usize,
470    dc: Vec<[i32; 6]>,
471}
472
473impl Wmv2DcPredBuffer {
474    pub fn new(mb_w: usize, mb_h: usize) -> Self {
475        Wmv2DcPredBuffer { mb_w, dc: vec![[1024i32; 6]; mb_w * mb_h] }
476    }
477
478    fn neighbours(&self, mb_row: usize, mb_col: usize, blk: usize) -> (i32, i32, i32) {
479        // upstream alignment note:
480        //   ff_msmpeg4_pred_dc() predicts in the *8x8 block grid* (block_index[n]) with
481        //   neighbours A(left), B(above-left), C(above):
482        //       B C
483        //       A X
484        // Our storage is per-macroblock ([i32; 6]), so we emulate upstream's block-grid
485        // addressing for luma blocks (0..3) and macroblock-grid for chroma (4..5).
486
487        #[inline(always)]
488        fn get_luma(dc: &Vec<[i32; 6]>, mb_w: usize, bx: isize, by: isize) -> i32 {
489            if bx < 0 || by < 0 {
490                return 1024;
491            }
492            let mb_x = (bx >> 1) as usize;
493            let mb_y = (by >> 1) as usize;
494            let idx = mb_y * mb_w + mb_x;
495            if idx >= dc.len() {
496                return 1024;
497            }
498            let sub_x = (bx & 1) as usize;
499            let sub_y = (by & 1) as usize;
500            let b = (sub_y << 1) | sub_x; // 0..3
501            dc[idx][b]
502        }
503
504        #[inline(always)]
505        fn get_chroma(dc: &Vec<[i32; 6]>, mb_w: usize, mb_x: isize, mb_y: isize, blk: usize) -> i32 {
506            if mb_x < 0 || mb_y < 0 {
507                return 1024;
508            }
509            let idx = (mb_y as usize) * mb_w + (mb_x as usize);
510            if idx >= dc.len() {
511                return 1024;
512            }
513            dc[idx][blk]
514        }
515
516        if blk < 4 {
517            let bx = (mb_col as isize) * 2 + ((blk & 1) as isize);
518            let by = (mb_row as isize) * 2 + ((blk >> 1) as isize);
519            let a = get_luma(&self.dc, self.mb_w, bx - 1, by);
520            let b = get_luma(&self.dc, self.mb_w, bx - 1, by - 1);
521            let c = get_luma(&self.dc, self.mb_w, bx, by - 1);
522            (a, b, c)
523        } else {
524            let mx = mb_col as isize;
525            let my = mb_row as isize;
526            let a = get_chroma(&self.dc, self.mb_w, mx - 1, my, blk);
527            let b = get_chroma(&self.dc, self.mb_w, mx - 1, my - 1, blk);
528            let c = get_chroma(&self.dc, self.mb_w, mx, my - 1, blk);
529            (a, b, c)
530        }
531    }
532
533    /// Returns (pred_level, dir). dir=0 => left, dir=1 => top.
534    pub fn predict(&self, mb_row: usize, mb_col: usize, blk: usize, scale: i32) -> (i32, i32) {
535        let (a0, b0, c0) = self.neighbours(mb_row, mb_col, blk);
536        // Convert from scaled DC to level domain with rounding: (x + scale/2) / scale.
537        let a = (a0 + (scale >> 1)) / scale;
538        let b = (b0 + (scale >> 1)) / scale;
539        let c = (c0 + (scale >> 1)) / scale;
540
541        // WMV2/MSMPEG4 version > V3 uses STRICT '<' (see upstream ff_msmpeg4_pred_dc).
542        if (a - b).abs() < (b - c).abs() {
543            (c, 1)
544        } else {
545            (a, 0)
546        }
547    }
548
549    pub fn store(&mut self, mb_row: usize, mb_col: usize, blk: usize, dc_coeff_scaled: i32) {
550        let idx = mb_row * self.mb_w + mb_col;
551        if idx < self.dc.len() {
552            self.dc[idx][blk] = dc_coeff_scaled;
553        }
554    }
555}
556
557// ─── AC escape decoder ───────────────────────────────────────────────────────
558// SMPTE 421M §8.1.4.5 — Three escape modes following VLC_ESCAPE sentinel.
559//
560//   After VLC_ESCAPE, read mode bits:
561//     "0"  → Mode 1: level offset
562//     "10" → Mode 2: run offset
563//     "11" → Mode 3: absolute fixed-length
564//
565// Returns (run, signed_level, last).
566#[inline]
567fn decode_escape_coeff(
568    br:     &mut BitReader<'_>,
569    ac_vlc: &VlcTable,
570) -> (u8, i32, bool) {
571    let mode = {
572        let b0 = br.read_bit().unwrap_or(false);
573        if !b0 { 1u8 } else {
574            let b1 = br.read_bit().unwrap_or(false);
575            if b1 { 3 } else { 2 }
576        }
577    };
578    match mode {
579        1 => {
580            // Mode 1: level offset — VLC gives (run, base_level, last)
581            let sym = ac_vlc.decode(br).unwrap_or(0);
582            if sym == VLC_ESCAPE { return (0, 0, true); }
583            let (run, base_level, last) = unpack_rl(sym);
584            let sign   = br.read_bit().unwrap_or(false);
585            let offset = ac_vlc.max_level(run as usize, last) as i32 + 1;
586            let level  = base_level as i32 + offset;
587            (run, if sign { -level } else { level }, last)
588        }
589        2 => {
590            // Mode 2: run offset — VLC gives (base_run, level, last)
591            let sym = ac_vlc.decode(br).unwrap_or(0);
592            if sym == VLC_ESCAPE { return (0, 0, true); }
593            let (base_run, level, last) = unpack_rl(sym);
594            let sign   = br.read_bit().unwrap_or(false);
595            let offset = ac_vlc.max_run(level as usize, last) as i32 + 1;
596            let run    = (base_run as i32 + offset).min(63) as u8;
597            let sl     = level as i32;
598            (run, if sign { -sl } else { sl }, last)
599        }
600        _ => {
601            // Mode 3: absolute — 1-bit LAST + 6-bit RUN + 8-bit |LEVEL| + 1-bit SIGN
602            let last  = br.read_bit().unwrap_or(false);
603            let run   = br.read_bits(6).unwrap_or(0) as u8;
604            let level = br.read_bits(8).unwrap_or(1).max(1) as i32;
605            let sign  = br.read_bit().unwrap_or(false);
606            (run, if sign { -level } else { level }, last)
607        }
608    }
609}
610
611/// Decode one 8×8 block of AC+DC coefficients.
612/// `is_intra`: use intra table / scan, otherwise inter.
613/// `is_luma`:  use luma DC VLC.
614/// Returns filled `[i32; 64]` in natural order (not zigzag).
615fn decode_block(
616    br:       &mut BitReader<'_>,
617    is_intra: bool,
618    is_luma:  bool,
619    pquant:   i32,
620    halfqp:   bool,
621    uniform:  bool,
622    tt:       u8,
623    dc_luma:  &VlcTable,
624    dc_chroma:&VlcTable,
625    ac_intra: &VlcTable,
626    ac_inter: &VlcTable,
627) -> [i32; 64] {
628    let mut blk = [0i32; 64];
629    let scan: &[usize; 64] = if is_intra { &SCAN_INTRA } else { &ZIGZAG };
630
631    // DC coefficient (intra only)
632    if is_intra {
633        let dc_vlc = if is_luma { dc_luma } else { dc_chroma };
634        blk[0] = read_dc_diff(br, dc_vlc);
635    }
636
637    // AC coefficients
638    let ac_vlc = if is_intra { ac_intra } else { ac_inter };
639    let mut idx = if is_intra { 1usize } else { 0 };
640
641    loop {
642        let sym = match ac_vlc.decode(br) {
643            Some(s) => s,
644            None    => break,
645        };
646
647        let (run, signed_level, last) = if sym == VLC_ESCAPE {
648            decode_escape_coeff(br, ac_vlc)
649        } else {
650            let (r, l, last) = unpack_rl(sym);
651            let sign = br.read_bit().unwrap_or(false);
652            (r, if sign { -(l as i32) } else { l as i32 }, last)
653        };
654
655        idx += run as usize;
656        if idx >= 64 { break; }
657
658        let mag   = signed_level.abs();
659        let qval  = if uniform {
660            iquant_uniform(mag, pquant, halfqp)
661        } else {
662            iquant_nonuniform(mag, pquant)
663        };
664        let signed_val = if signed_level < 0 { -qval } else { qval };
665
666        // Use appropriate scan based on transform type
667        let scan_order: &[usize; 64] = match tt {
668            3 | 4 => &SCAN_VERT,
669            _     => scan,
670        };
671        let pos = scan_order.get(idx).copied().unwrap_or(idx);
672        blk[pos] = signed_val;
673        idx += 1;
674
675        if last || br.is_empty() { break; }
676    }
677
678    blk
679}
680
681/// Decode only the AC coefficients of one intra block (DC is handled separately).
682fn decode_block_ac(
683    br:       &mut BitReader<'_>,
684    _is_luma: bool,
685    pquant:   i32,
686    halfqp:   bool,
687    uniform:  bool,
688    tt:       u8,
689    ac_vlc:   &VlcTable,
690) -> [i32; 64] {
691    let mut blk   = [0i32; 64];
692    let mut idx   = 1usize; // start at 1, skip DC slot
693
694    loop {
695        let sym = match ac_vlc.decode(br) {
696            Some(s) => s,
697            None    => break,
698        };
699
700        let (run, signed_level, last) = if sym == VLC_ESCAPE {
701            decode_escape_coeff(br, ac_vlc)
702        } else {
703            let (r, l, last) = unpack_rl(sym);
704            let sign = br.read_bit().unwrap_or(false);
705            (r, if sign { -(l as i32) } else { l as i32 }, last)
706        };
707
708        idx += run as usize;
709        if idx >= 64 { break; }
710
711        let mag  = signed_level.abs();
712        let qval = if uniform { iquant_uniform(mag, pquant, halfqp) }
713                   else       { iquant_nonuniform(mag, pquant)       };
714        let sval = if signed_level < 0 { -qval } else { qval };
715
716        let scan_order: &[usize; 64] = match tt {
717            3 | 4 => &SCAN_VERT,
718            _     => &SCAN_INTRA,
719        };
720        let pos = scan_order.get(idx).copied().unwrap_or(idx);
721        blk[pos] = sval;
722        idx += 1;
723
724        if last || br.is_empty() { break; }
725    }
726    blk
727}
728
729// ─── AC Prediction buffer ────────────────────────────────────────────────────
730// SMPTE 421M §8.1.4.7.
731//
732// For each macroblock/block we cache the first row (AC[1..7]) and first
733// column (AC[8,16,24,32,40,48,56]) of reconstructed coefficients (pre-IDCT,
734// post-IQ) so they can be used as predictors for neighbouring blocks.
735
736#[derive(Clone)]
737pub struct AcPredBuffer {
738    mb_w: usize,
739    /// First row of coefficients for each MB×block: [mb_idx][blk][0..7]
740    row: Vec<[[i32; 7]; 6]>,
741    /// First col of coefficients for each MB×block: [mb_idx][blk][0..7]
742    col: Vec<[[i32; 7]; 6]>,
743}
744
745impl AcPredBuffer {
746    pub fn new(mb_w: usize, mb_h: usize) -> Self {
747        let n = mb_w * mb_h;
748        AcPredBuffer {
749            mb_w,
750            row: vec![[[0i32; 7]; 6]; n],
751            col: vec![[[0i32; 7]; 6]; n],
752        }
753    }
754
755    /// Get the AC predictor row (indices 1..7 of the reconstructed block).
756    /// Returns the first row of the left neighbour (for horizontal prediction).
757    pub fn pred_row(&self, mb_row: usize, mb_col: usize, blk: usize) -> [i32; 7] {
758        let (src_mb_r, src_mb_c, src_blk) = Self::left_neighbour(mb_row, mb_col, blk);
759        if src_mb_r as isize >= 0 && src_mb_c as isize >= 0 {
760            let idx = src_mb_r * self.mb_w + src_mb_c;
761            if idx < self.row.len() { return self.row[idx][src_blk]; }
762        }
763        [0i32; 7]
764    }
765
766    /// Get the AC predictor column (rows 1..7 of the reconstructed block).
767    /// Returns the first column of the top neighbour (for vertical prediction).
768    pub fn pred_col(&self, mb_row: usize, mb_col: usize, blk: usize) -> [i32; 7] {
769        let (src_mb_r, src_mb_c, src_blk) = Self::top_neighbour(mb_row, mb_col, blk);
770        if src_mb_r as isize >= 0 && src_mb_c as isize >= 0 {
771            let idx = src_mb_r * self.mb_w + src_mb_c;
772            if idx < self.col.len() { return self.col[idx][src_blk]; }
773        }
774        [0i32; 7]
775    }
776
777    pub fn store_row(&mut self, mb_row: usize, mb_col: usize, blk: usize, row: [i32; 7]) {
778        let idx = mb_row * self.mb_w + mb_col;
779        if idx < self.row.len() { self.row[idx][blk] = row; }
780    }
781
782    pub fn store_col(&mut self, mb_row: usize, mb_col: usize, blk: usize, col: [i32; 7]) {
783        let idx = mb_row * self.mb_w + mb_col;
784        if idx < self.col.len() { self.col[idx][blk] = col; }
785    }
786
787    /// Left neighbour source: same logic as DC prediction neighbour mapping.
788    fn left_neighbour(mb_row: usize, mb_col: usize, blk: usize)
789        -> (usize, usize, usize)
790    {
791        match blk {
792            0 => (mb_row, mb_col.wrapping_sub(1), 1),
793            1 => (mb_row, mb_col, 0),
794            2 => (mb_row, mb_col.wrapping_sub(1), 3),
795            3 => (mb_row, mb_col, 2),
796            _ => (mb_row, mb_col.wrapping_sub(1), blk),
797        }
798    }
799
800    fn top_neighbour(mb_row: usize, mb_col: usize, blk: usize)
801        -> (usize, usize, usize)
802    {
803        match blk {
804            0 => (mb_row.wrapping_sub(1), mb_col, 2),
805            1 => (mb_row.wrapping_sub(1), mb_col, 3),
806            2 => (mb_row, mb_col, 0),
807            3 => (mb_row, mb_col, 1),
808            _ => (mb_row.wrapping_sub(1), mb_col, blk),
809        }
810    }
811}
812
813// ─── MV Predictor ────────────────────────────────────────────────────────────
814// SMPTE 421M §8.3.5.3.
815//
816// The MV predictor for 1-MV macroblocks is the median of three neighbouring
817// MVs: left (A), top (B), and top-right (C).  When a neighbour is out-of-
818// frame or skipped, its MV is treated as (0,0).
819
820#[derive(Clone, Default)]
821pub struct MvPredictor {
822    mb_w:  usize,
823    /// Stored MVs per MB: (mvx, mvy) in half-pixel units
824    mvs:   Vec<(i32, i32)>,
825    /// Whether each MB was skipped (skipped MBs propagate MV=0)
826    skipped: Vec<bool>,
827}
828
829impl MvPredictor {
830    pub fn new(mb_w: usize, mb_h: usize) -> Self {
831        let n = mb_w * mb_h;
832        MvPredictor { mb_w, mvs: vec![(0,0); n], skipped: vec![true; n] }
833    }
834
835    /// Compute the predicted MV for (mb_row, mb_col) from three neighbours.
836    pub fn predict(&self, mb_row: usize, mb_col: usize) -> (i32, i32) {
837        let get = |r: isize, c: isize| -> (i32, i32) {
838            if r < 0 || c < 0 { return (0, 0); }
839            let idx = r as usize * self.mb_w + c as usize;
840            if idx >= self.mvs.len() || self.skipped[idx] { return (0, 0); }
841            self.mvs[idx]
842        };
843
844        let r = mb_row as isize;
845        let c = mb_col as isize;
846
847        let (ax, ay) = get(r,   c-1);   // left
848        let (bx, by) = get(r-1, c  );   // top
849        let (cx, cy) = get(r-1, c+1);   // top-right (or top-left if rightmost)
850        // If top-right is out of bounds, use top-left instead (per spec)
851        let (cx, cy) = if c + 1 >= self.mb_w as isize {
852            get(r-1, c-1)
853        } else {
854            (cx, cy)
855        };
856
857        (median3(ax, bx, cx), median3(ay, by, cy))
858    }
859
860    pub fn store(&mut self, mb_row: usize, mb_col: usize, mv: (i32, i32), skipped: bool) {
861        let idx = mb_row * self.mb_w + mb_col;
862        if idx < self.mvs.len() {
863            self.mvs[idx]    = mv;
864            self.skipped[idx] = skipped;
865        }
866    }
867}
868
869#[inline]
870fn median3(a: i32, b: i32, c: i32) -> i32 {
871    // Returns the median of three values
872    if (a <= b && b <= c) || (c <= b && b <= a) { b }
873    else if (b <= a && a <= c) || (c <= a && a <= b) { a }
874    else { c }
875}
876
877#[inline(always)]
878fn mid_pred(a: i32, b: i32, c: i32) -> i32 {
879    // upstream mid_pred() helper.
880    median3(a, b, c)
881}
882
883// ─── Overlap smoothing filter (Main profile) ─────────────────────────────────
884// SMPTE 421M §6.2.1.  Applied at 8×8 block boundaries post-IDCT.
885
886#[allow(dead_code)]
887fn overlap_filter_h(a: &mut [i16], b: &mut [i16]) {
888    // 4-tap filter across horizontal boundary between a[] and b[]
889    // a holds last 4 samples of left block, b holds first 4 of right block
890    for i in 0..4 {
891        let x0 = a[i] as i32;
892        let x1 = b[i] as i32;
893        a[i] = ((9 * x0 + 3 * x1 + 8) >> 4) as i16;
894        b[i] = ((3 * x0 + 9 * x1 + 8) >> 4) as i16;
895    }
896}
897
898#[allow(dead_code)]
899fn overlap_filter_v(a: &mut i16, b: &mut i16) {
900    let x0 = *a as i32;
901    let x1 = *b as i32;
902    *a = ((9 * x0 + 3 * x1 + 8) >> 4) as i16;
903    *b = ((3 * x0 + 9 * x1 + 8) >> 4) as i16;
904}
905
906pub fn apply_overlap_filter(frame: &mut YuvFrame) {
907    let w  = frame.width  as usize;
908    let h  = frame.height as usize;
909    let cw = w / 2;
910    let ch = h / 2;
911
912    // Convert to i16 for filtering, then back to u8
913    let mut y_i16: Vec<i16> = frame.y.iter().map(|&v| v as i16).collect();
914
915    // Horizontal block boundaries (every 8 columns)
916    for row in 0..h {
917        for col in (8..w).step_by(8) {
918            for k in 0..4 {
919                let ia = row*w + col - 4 + k;
920                let ib = row*w + col     + k;
921                if ib < y_i16.len() {
922                    let a = y_i16[ia] as i32;
923                    let b = y_i16[ib] as i32;
924                    y_i16[ia] = ((9*a + 3*b + 8) >> 4) as i16;
925                    y_i16[ib] = ((3*a + 9*b + 8) >> 4) as i16;
926                }
927            }
928        }
929    }
930
931    // Vertical block boundaries
932    for row in (8..h).step_by(8) {
933        for col in 0..w {
934            let a = y_i16[(row-1)*w + col] as i32;
935            let b = y_i16[row    *w + col] as i32;
936            y_i16[(row-1)*w + col] = ((9*a + 3*b + 8) >> 4) as i16;
937            y_i16[row    *w + col] = ((3*a + 9*b + 8) >> 4) as i16;
938        }
939    }
940
941    // Write back
942    for (dst, src) in frame.y.iter_mut().zip(y_i16.iter()) {
943        *dst = (*src).clamp(0, 255) as u8;
944    }
945
946    // Chroma (same logic, half size)
947    let mut cb_i16: Vec<i16> = frame.cb.iter().map(|&v| v as i16).collect();
948    let mut cr_i16: Vec<i16> = frame.cr.iter().map(|&v| v as i16).collect();
949    for plane in [&mut cb_i16, &mut cr_i16] {
950        for row in 0..ch {
951            for col in (8..cw).step_by(8) {
952                for k in 0..4 {
953                    let a = plane[row*cw + col - 4 + k] as i32;
954                    let b = plane[row*cw + col     + k] as i32;
955                    plane[row*cw + col - 4 + k] = ((9*a + 3*b + 8) >> 4) as i16;
956                    plane[row*cw + col     + k] = ((3*a + 9*b + 8) >> 4) as i16;
957                }
958            }
959        }
960        for row in (8..ch).step_by(8) {
961            for col in 0..cw {
962                let a = plane[(row-1)*cw + col] as i32;
963                let b = plane[row    *cw + col] as i32;
964                plane[(row-1)*cw + col] = ((9*a + 3*b + 8) >> 4) as i16;
965                plane[row    *cw + col] = ((3*a + 9*b + 8) >> 4) as i16;
966            }
967        }
968    }
969    for (dst, src) in frame.cb.iter_mut().zip(cb_i16.iter()) { *dst = (*src).clamp(0, 255) as u8; }
970    for (dst, src) in frame.cr.iter_mut().zip(cr_i16.iter()) { *dst = (*src).clamp(0, 255) as u8; }
971}
972
973// ─── Motion compensation ─────────────────────────────────────────────────────
974// Half-pixel bilinear interpolation per SMPTE 421M §7.3.
975
976fn mc_luma(
977    dst: &mut [u8], dst_stride: usize,
978    src: &[u8],     src_stride: usize,
979    src_w: usize, src_h: usize,
980    x: i32, y: i32,
981    w: usize, h: usize,
982) {
983    // x and y in half-pixel units
984    let xh = x & 1 != 0;
985    let yh = y & 1 != 0;
986    let x0 = (x >> 1) as isize;
987    let y0 = (y >> 1) as isize;
988
989    for dy in 0..h {
990        for dx in 0..w {
991            let sx = (x0 + dx as isize).clamp(0, src_w as isize - 1) as usize;
992            let sy = (y0 + dy as isize).clamp(0, src_h as isize - 1) as usize;
993            let sx1 = (sx + 1).min(src_w - 1);
994            let sy1 = (sy + 1).min(src_h - 1);
995
996            let p00 = src[sy  * src_stride + sx ] as i32;
997            let p10 = src[sy  * src_stride + sx1] as i32;
998            let p01 = src[sy1 * src_stride + sx ] as i32;
999            let p11 = src[sy1 * src_stride + sx1] as i32;
1000
1001            let val = match (xh, yh) {
1002                (false, false) => p00,
1003                (true,  false) => (p00 + p10 + 1) >> 1,
1004                (false, true ) => (p00 + p01 + 1) >> 1,
1005                (true,  true ) => (p00 + p10 + p01 + p11 + 2) >> 2,
1006            };
1007            dst[dy * dst_stride + dx] = val.clamp(0, 255) as u8;
1008        }
1009    }
1010}
1011
1012// ─── DQUANT: macroblock-level differential quantizer ────────────────────────
1013// SMPTE 421M §8.1.4.10 / §8.3.7.
1014//
1015// When seq.dquant != 0, each macroblock may override the frame-level PQUANT.
1016// dquant=1: 1-bit flag; if set, read 2-bit MQUANT (absolute value).
1017// dquant=2: always present 2-bit MQDIFF; if == 7, read 5-bit MQUANT absolute.
1018
1019fn read_mquant(br: &mut BitReader<'_>, dquant: u8, pquant: i32) -> i32 {
1020    match dquant {
1021        0 => pquant, // no per-MB quant
1022        1 => {
1023            // 1-bit DQUANT flag; if 1, read 2-bit delta
1024            if br.read_bit().unwrap_or(false) {
1025                let mqdiff = br.read_bits(2).unwrap_or(0) as i32;
1026                // mqdiff: 0=+2, 1=-2, 2=+4, 3=-4 relative to pquant
1027                let delta = match mqdiff {
1028                    0 => 2, 1 => -2, 2 => 4, _ => -4,
1029                };
1030                (pquant + delta).clamp(1, 31)
1031            } else {
1032                pquant
1033            }
1034        }
1035        _ => {
1036            // dquant==2: always read 3-bit MQDIFF
1037            let mqdiff = br.read_bits(3).unwrap_or(0);
1038            if mqdiff == 7 {
1039                // escape: read 5-bit absolute MQUANT
1040                br.read_bits(5).unwrap_or(pquant as u32) as i32
1041            } else {
1042                // relative to pquant: +1..+6
1043                (pquant + mqdiff as i32).clamp(1, 31)
1044            }
1045        }
1046    }
1047}
1048
1049// ─── Range Reduction / Expansion ─────────────────────────────────────────────
1050// SMPTE 421M §7.1.1.9.
1051//
1052// RANGEREDFRM=1 means the encoder reduced the dynamic range before coding.
1053// The decoder must expand it back.  Applied to the reconstructed frame.
1054
1055pub fn apply_rangered_expand(frame: &mut YuvFrame) {
1056    // Expand: x' = (x - 128) * 2 + 128  (clamp 0..255)
1057    for p in frame.y.iter_mut() {
1058        *p = ((*p as i32 - 128) * 2 + 128).clamp(0, 255) as u8;
1059    }
1060    for p in frame.cb.iter_mut() {
1061        *p = ((*p as i32 - 128) * 2 + 128).clamp(0, 255) as u8;
1062    }
1063    for p in frame.cr.iter_mut() {
1064        *p = ((*p as i32 - 128) * 2 + 128).clamp(0, 255) as u8;
1065    }
1066}
1067
1068/// Compress: applied to reference frame before motion compensation when
1069/// the current frame does NOT have RANGEREDFRM but the reference did.
1070pub fn apply_rangered_compress(frame: &mut YuvFrame) {
1071    for p in frame.y.iter_mut() {
1072        *p = ((*p as i32 - 128).div_euclid(2) + 128).clamp(0, 255) as u8;
1073    }
1074    for p in frame.cb.iter_mut() {
1075        *p = ((*p as i32 - 128).div_euclid(2) + 128).clamp(0, 255) as u8;
1076    }
1077    for p in frame.cr.iter_mut() {
1078        *p = ((*p as i32 - 128).div_euclid(2) + 128).clamp(0, 255) as u8;
1079    }
1080}
1081
1082// ─── Write helpers ───────────────────────────────────────────────────────────
1083
1084/// Block (mb_row, mb_col, blk_idx) → (plane ref, x, y, stride, plane_h)
1085fn block_coords(mb_row: u32, mb_col: u32, blk: usize, width: u32, height: u32)
1086    -> (bool, usize, usize, usize, usize)
1087{
1088    // Returns (is_luma, px, py, stride, plane_height)
1089    let (is_luma, bx, by) = match blk {
1090        0 => (true,  (mb_col*16) as usize,     (mb_row*16) as usize    ),
1091        1 => (true,  (mb_col*16+8) as usize,   (mb_row*16) as usize    ),
1092        2 => (true,  (mb_col*16) as usize,     (mb_row*16+8) as usize  ),
1093        3 => (true,  (mb_col*16+8) as usize,   (mb_row*16+8) as usize  ),
1094        _ => (false, (mb_col*8) as usize,      (mb_row*8) as usize     ),
1095    };
1096    let stride = if is_luma { width as usize } else { (width/2) as usize };
1097    let ph     = if is_luma { height as usize } else { (height/2) as usize };
1098    (is_luma, bx, by, stride, ph)
1099}
1100
1101fn write_intra_block(frame: &mut YuvFrame, mb_row: u32, mb_col: u32, blk: usize,
1102                     coeff: &[i32; 64]) {
1103    let (is_luma, bx, by, stride, ph) =
1104        block_coords(mb_row, mb_col, blk, frame.width, frame.height);
1105    let plane: &mut Vec<u8> = if is_luma { &mut frame.y }
1106                               else if blk == 4 { &mut frame.cb }
1107                               else { &mut frame.cr };
1108    for r in 0..8 {
1109        if by + r >= ph { break; }
1110        for c in 0..8 {
1111            if bx + c >= stride { break; }
1112            let idx = (by + r) * stride + (bx + c);
1113            plane[idx] = (128 + coeff[r*8 + c]).clamp(0, 255) as u8;
1114        }
1115    }
1116}
1117
1118
1119#[inline]
1120fn write_block_to_frame(
1121    frame: &mut YuvFrame,
1122    mb_row: usize,
1123    mb_col: usize,
1124    blk: usize,
1125    coeff: &[i32; 64],
1126) {
1127    write_intra_block(frame, mb_row as u32, mb_col as u32, blk, coeff);
1128}
1129
1130// WMV2 path uses upstream's Simple IDCT (int16). Provide i16 write/add helpers.
1131fn write_intra_block_i16(frame: &mut YuvFrame, mb_row: u32, mb_col: u32, blk: usize, coeff: &[i16; 64]) {
1132    let (is_luma, bx, by, stride, ph) = block_coords(mb_row, mb_col, blk, frame.width, frame.height);
1133    let plane: &mut Vec<u8> = if is_luma { &mut frame.y } else if blk == 4 { &mut frame.cb } else { &mut frame.cr };
1134    for r in 0..8usize {
1135        if by + r >= ph { break; }
1136        for c in 0..8usize {
1137            if bx + c >= stride { break; }
1138            let idx = (by + r) * stride + (bx + c);
1139            let v = coeff[r * 8 + c] as i32;
1140            plane[idx] = (v + 128).clamp(0, 255) as u8;
1141        }
1142    }
1143}
1144
1145fn add_residual_block_i16(frame: &mut YuvFrame, mb_row: u32, mb_col: u32, blk: usize, coeff: &[i16; 64]) {
1146    let (is_luma, bx, by, stride, ph) = block_coords(mb_row, mb_col, blk, frame.width, frame.height);
1147    let plane: &mut Vec<u8> = if is_luma { &mut frame.y } else if blk == 4 { &mut frame.cb } else { &mut frame.cr };
1148    for r in 0..8usize {
1149        if by + r >= ph { break; }
1150        for c in 0..8usize {
1151            if bx + c >= stride { break; }
1152            let idx = (by + r) * stride + (bx + c);
1153            let v = plane[idx] as i32 + coeff[r * 8 + c] as i32;
1154            plane[idx] = v.clamp(0, 255) as u8;
1155        }
1156    }
1157}
1158
1159/// Motion compensate one 16×16 macroblock from `reference` into `dst`.
1160/// Motion vectors are in half-pel units (like H.263/MSMPEG4/WMV2).
1161fn motion_compensate_mb(
1162    dst:       &mut YuvFrame,
1163    reference: &YuvFrame,
1164    mb_row:    usize,
1165    mb_col:    usize,
1166    mvx:       i32,
1167    mvy:       i32,
1168) {
1169    let fw = dst.width as usize;
1170    let fh = dst.height as usize;
1171    if fw == 0 || fh == 0 { return; }
1172    let cw = fw / 2;
1173    let ch = fh / 2;
1174
1175    // ── Luma (16×16) ────────────────────────────────────────────────────────
1176    let dst_x = mb_col * 16;
1177    let dst_y = mb_row * 16;
1178    let src_x = dst_x as i32 * 2 + mvx; // half-pel coordinate
1179    let src_y = dst_y as i32 * 2 + mvy;
1180
1181    if reference.y.len() == fw * fh && dst.y.len() == fw * fh {
1182        let mut tmp = [0u8; 256];
1183        mc_luma(&mut tmp, 16, &reference.y, fw, fw, fh, src_x, src_y, 16, 16);
1184        for r in 0..16 {
1185            if dst_y + r >= fh { break; }
1186            if dst_x >= fw { break; }
1187            let d_off = (dst_y + r) * fw + dst_x;
1188            let s_off = r * 16;
1189            let max = (fw - dst_x).min(16);
1190            dst.y[d_off..d_off + max].copy_from_slice(&tmp[s_off..s_off + max]);
1191        }
1192    }
1193
1194    // ── Chroma (8×8) ───────────────────────────────────────────────────────
1195    // upstream (ff_mspel_motion): motion vectors are in half-luma-pel units.
1196    // For 4:2:0 chroma, 1 chroma pixel = 2 luma pixels, so the same MV value
1197    // corresponds to quarter-chroma-pel units.
1198    // upstream collapses the 2-bit chroma fraction to a boolean (any non-zero
1199    // fractional part triggers half-chroma interpolation):
1200    //   dxy |= (motion_x & 3) != 0
1201    //   mx  = motion_x >> 2
1202    // We reproduce that mapping here by converting to half-chroma-pel coords.
1203    if reference.cb.len() == cw * ch && dst.cb.len() == cw * ch {
1204        let dst_xc = mb_col * 8;
1205        let dst_yc = mb_row * 8;
1206
1207        let mx = mvx >> 2;
1208        let my = mvy >> 2;
1209        let xh = (mvx & 3) != 0;
1210        let yh = (mvy & 3) != 0;
1211
1212        // Half-chroma-pel coordinate for mc_luma().
1213        let src_xc = (dst_xc as i32 + mx) * 2 + if xh { 1 } else { 0 };
1214        let src_yc = (dst_yc as i32 + my) * 2 + if yh { 1 } else { 0 };
1215
1216        let mut tmp_cb = [0u8; 64];
1217        let mut tmp_cr = [0u8; 64];
1218        mc_luma(&mut tmp_cb, 8, &reference.cb, cw, cw, ch, src_xc, src_yc, 8, 8);
1219        mc_luma(&mut tmp_cr, 8, &reference.cr, cw, cw, ch, src_xc, src_yc, 8, 8);
1220
1221        for r in 0..8 {
1222            if dst_yc + r >= ch { break; }
1223            if dst_xc >= cw { break; }
1224            let d_off = (dst_yc + r) * cw + dst_xc;
1225            let s_off = r * 8;
1226            let max = (cw - dst_xc).min(8);
1227            dst.cb[d_off..d_off + max].copy_from_slice(&tmp_cb[s_off..s_off + max]);
1228            dst.cr[d_off..d_off + max].copy_from_slice(&tmp_cr[s_off..s_off + max]);
1229        }
1230    }
1231}
1232
1233
1234// ── WMV2 MSPEL motion compensation (direct port of upstream ff_mspel_motion + wmv2_mspel_init) ──
1235
1236#[inline]
1237fn clip_u8(v: i32) -> u8 {
1238    if v < 0 { 0 } else if v > 255 { 255 } else { v as u8 }
1239}
1240
1241#[inline]
1242fn rnd_avg_u8(a: u8, b: u8) -> u8 {
1243    ((a as u16 + b as u16 + 1) >> 1) as u8
1244}
1245
1246#[inline]
1247fn no_rnd_avg_u8(a: u8, b: u8) -> u8 {
1248    ((a as u16 + b as u16) >> 1) as u8
1249}
1250
1251fn wmv2_mspel8_h_lowpass(dst: &mut [u8], dst_off: usize, dst_stride: usize,
1252                         src: &[u8], src_off: usize, src_stride: usize, h: usize) {
1253    for i in 0..h {
1254        let so = src_off + i * src_stride;
1255        let doff = dst_off + i * dst_stride;
1256        // dst[0..8]
1257        dst[doff + 0] = clip_u8(((9 * (src[so + 0] as i32 + src[so + 1] as i32)
1258            - (src[so - 1] as i32 + src[so + 2] as i32) + 8) >> 4));
1259        dst[doff + 1] = clip_u8(((9 * (src[so + 1] as i32 + src[so + 2] as i32)
1260            - (src[so + 0] as i32 + src[so + 3] as i32) + 8) >> 4));
1261        dst[doff + 2] = clip_u8(((9 * (src[so + 2] as i32 + src[so + 3] as i32)
1262            - (src[so + 1] as i32 + src[so + 4] as i32) + 8) >> 4));
1263        dst[doff + 3] = clip_u8(((9 * (src[so + 3] as i32 + src[so + 4] as i32)
1264            - (src[so + 2] as i32 + src[so + 5] as i32) + 8) >> 4));
1265        dst[doff + 4] = clip_u8(((9 * (src[so + 4] as i32 + src[so + 5] as i32)
1266            - (src[so + 3] as i32 + src[so + 6] as i32) + 8) >> 4));
1267        dst[doff + 5] = clip_u8(((9 * (src[so + 5] as i32 + src[so + 6] as i32)
1268            - (src[so + 4] as i32 + src[so + 7] as i32) + 8) >> 4));
1269        dst[doff + 6] = clip_u8(((9 * (src[so + 6] as i32 + src[so + 7] as i32)
1270            - (src[so + 5] as i32 + src[so + 8] as i32) + 8) >> 4));
1271        dst[doff + 7] = clip_u8(((9 * (src[so + 7] as i32 + src[so + 8] as i32)
1272            - (src[so + 6] as i32 + src[so + 9] as i32) + 8) >> 4));
1273    }
1274}
1275
1276fn wmv2_mspel8_v_lowpass(dst: &mut [u8], dst_off: usize, dst_stride: usize,
1277                         src: &[u8], src_off: usize, src_stride: usize, w: usize) {
1278    for i in 0..w {
1279        let so = src_off + i;
1280        let s_1 = src[so - src_stride] as i32;
1281        let s0  = src[so] as i32;
1282        let s1  = src[so + src_stride] as i32;
1283        let s2  = src[so + 2 * src_stride] as i32;
1284        let s3  = src[so + 3 * src_stride] as i32;
1285        let s4  = src[so + 4 * src_stride] as i32;
1286        let s5  = src[so + 5 * src_stride] as i32;
1287        let s6  = src[so + 6 * src_stride] as i32;
1288        let s7  = src[so + 7 * src_stride] as i32;
1289        let s8  = src[so + 8 * src_stride] as i32;
1290        let s9  = src[so + 9 * src_stride] as i32;
1291
1292        let do0 = dst_off + i + 0 * dst_stride;
1293        let do1 = dst_off + i + 1 * dst_stride;
1294        let do2 = dst_off + i + 2 * dst_stride;
1295        let do3 = dst_off + i + 3 * dst_stride;
1296        let do4 = dst_off + i + 4 * dst_stride;
1297        let do5 = dst_off + i + 5 * dst_stride;
1298        let do6 = dst_off + i + 6 * dst_stride;
1299        let do7 = dst_off + i + 7 * dst_stride;
1300
1301        dst[do0] = clip_u8(((9 * (s0 + s1) - (s_1 + s2) + 8) >> 4));
1302        dst[do1] = clip_u8(((9 * (s1 + s2) - (s0  + s3) + 8) >> 4));
1303        dst[do2] = clip_u8(((9 * (s2 + s3) - (s1  + s4) + 8) >> 4));
1304        dst[do3] = clip_u8(((9 * (s3 + s4) - (s2  + s5) + 8) >> 4));
1305        dst[do4] = clip_u8(((9 * (s4 + s5) - (s3  + s6) + 8) >> 4));
1306        dst[do5] = clip_u8(((9 * (s5 + s6) - (s4  + s7) + 8) >> 4));
1307        dst[do6] = clip_u8(((9 * (s6 + s7) - (s5  + s8) + 8) >> 4));
1308        dst[do7] = clip_u8(((9 * (s7 + s8) - (s6  + s9) + 8) >> 4));
1309    }
1310}
1311
1312#[inline]
1313fn put_pixels8x8(dst: &mut [u8], dst_off: usize, src: &[u8], src_off: usize, stride: usize) {
1314    for y in 0..8 {
1315        let d = dst_off + y * stride;
1316        let s = src_off + y * stride;
1317        dst[d..d + 8].copy_from_slice(&src[s..s + 8]);
1318    }
1319}
1320
1321#[inline]
1322fn put_pixels8_l2_8_no_rnd(dst: &mut [u8], dst_off: usize,
1323                           src1: &[u8], src1_off: usize,
1324                           src2: &[u8], src2_off: usize,
1325                           dst_stride: usize, src1_stride: usize, src2_stride: usize,
1326                           h: usize) {
1327    for y in 0..h {
1328        let d = dst_off + y * dst_stride;
1329        let s1 = src1_off + y * src1_stride;
1330        let s2 = src2_off + y * src2_stride;
1331        for x in 0..8 {
1332            dst[d + x] = no_rnd_avg_u8(src1[s1 + x], src2[s2 + x]);
1333        }
1334    }
1335}
1336
1337fn put_mspel8_mc10(dst: &mut [u8], dst_off: usize, src: &[u8], src_off: usize, stride: usize) {
1338    let mut half = [0u8; 64];
1339    wmv2_mspel8_h_lowpass(&mut half, 0, 8, src, src_off, stride, 8);
1340    put_pixels8_l2_8_no_rnd(dst, dst_off, src, src_off, &half, 0, stride, stride, 8, 8);
1341}
1342
1343fn put_mspel8_mc20(dst: &mut [u8], dst_off: usize, src: &[u8], src_off: usize, stride: usize) {
1344    wmv2_mspel8_h_lowpass(dst, dst_off, stride, src, src_off, stride, 8);
1345}
1346
1347fn put_mspel8_mc30(dst: &mut [u8], dst_off: usize, src: &[u8], src_off: usize, stride: usize) {
1348    let mut half = [0u8; 64];
1349    wmv2_mspel8_h_lowpass(&mut half, 0, 8, src, src_off, stride, 8);
1350    put_pixels8_l2_8_no_rnd(dst, dst_off, src, src_off + 1, &half, 0, stride, stride, 8, 8);
1351}
1352
1353fn put_mspel8_mc02(dst: &mut [u8], dst_off: usize, src: &[u8], src_off: usize, stride: usize) {
1354    wmv2_mspel8_v_lowpass(dst, dst_off, stride, src, src_off, stride, 8);
1355}
1356
1357fn put_mspel8_mc12(dst: &mut [u8], dst_off: usize, src: &[u8], src_off: usize, stride: usize) {
1358    let mut half_h = [0u8; 88];
1359    let mut half_v = [0u8; 64];
1360    let mut half_hv = [0u8; 64];
1361    // h_lowpass(halfH, src - stride, 8, stride, 11)
1362    wmv2_mspel8_h_lowpass(&mut half_h, 0, 8, src, src_off - stride, stride, 11);
1363    // v_lowpass(halfV, src, 8, stride, 8)
1364    wmv2_mspel8_v_lowpass(&mut half_v, 0, 8, src, src_off, stride, 8);
1365    // v_lowpass(halfHV, halfH + 8, 8, 8, 8)
1366    wmv2_mspel8_v_lowpass(&mut half_hv, 0, 8, &half_h, 8, 8, 8);
1367    put_pixels8_l2_8_no_rnd(dst, dst_off, &half_v, 0, &half_hv, 0, stride, 8, 8, 8);
1368}
1369
1370fn put_mspel8_mc22(dst: &mut [u8], dst_off: usize, src: &[u8], src_off: usize, stride: usize) {
1371    let mut half_h = [0u8; 88];
1372    wmv2_mspel8_h_lowpass(&mut half_h, 0, 8, src, src_off - stride, stride, 11);
1373    wmv2_mspel8_v_lowpass(dst, dst_off, stride, &half_h, 8, 8, 8);
1374}
1375
1376fn put_mspel8_mc32(dst: &mut [u8], dst_off: usize, src: &[u8], src_off: usize, stride: usize) {
1377    let mut half_h = [0u8; 88];
1378    let mut half_v = [0u8; 64];
1379    let mut half_hv = [0u8; 64];
1380    wmv2_mspel8_h_lowpass(&mut half_h, 0, 8, src, src_off - stride, stride, 11);
1381    wmv2_mspel8_v_lowpass(&mut half_v, 0, 8, src, src_off + 1, stride, 8);
1382    wmv2_mspel8_v_lowpass(&mut half_hv, 0, 8, &half_h, 8, 8, 8);
1383    put_pixels8_l2_8_no_rnd(dst, dst_off, &half_v, 0, &half_hv, 0, stride, 8, 8, 8);
1384}
1385
1386#[inline]
1387fn wmv2_put_mspel_pixels(dxy: usize,
1388                         dst: &mut [u8], dst_off: usize,
1389                         src: &[u8], src_off: usize,
1390                         stride: usize) {
1391    match dxy {
1392        0 => put_pixels8x8(dst, dst_off, src, src_off, stride),
1393        1 => put_mspel8_mc10(dst, dst_off, src, src_off, stride),
1394        2 => put_mspel8_mc20(dst, dst_off, src, src_off, stride),
1395        3 => put_mspel8_mc30(dst, dst_off, src, src_off, stride),
1396        4 => put_mspel8_mc02(dst, dst_off, src, src_off, stride),
1397        5 => put_mspel8_mc12(dst, dst_off, src, src_off, stride),
1398        6 => put_mspel8_mc22(dst, dst_off, src, src_off, stride),
1399        7 => put_mspel8_mc32(dst, dst_off, src, src_off, stride),
1400        _ => put_pixels8x8(dst, dst_off, src, src_off, stride),
1401    }
1402}
1403
1404fn emulated_edge_mc(buf: &mut [u8], buf_stride: usize,
1405                    src: &[u8], src_stride: usize,
1406                    block_w: usize, block_h: usize,
1407                    src_x: i32, src_y: i32,
1408                    h_edge: usize, v_edge: usize) {
1409    let max_x = (h_edge as i32 - 1).max(0);
1410    let max_y = (v_edge as i32 - 1).max(0);
1411    for y in 0..block_h {
1412        let sy = (src_y + y as i32).clamp(0, max_y) as usize;
1413        let drow = y * buf_stride;
1414        let srow = sy * src_stride;
1415        for x in 0..block_w {
1416            let sx = (src_x + x as i32).clamp(0, max_x) as usize;
1417            buf[drow + x] = src[srow + sx];
1418        }
1419    }
1420}
1421
1422#[inline]
1423fn chroma_put_pixels(dst: &mut [u8], dst_off: usize,
1424                     src: &[u8], src_off: usize,
1425                     stride: usize, h: usize) {
1426    for y in 0..h {
1427        let d = dst_off + y * stride;
1428        let s = src_off + y * stride;
1429        dst[d..d + 8].copy_from_slice(&src[s..s + 8]);
1430    }
1431}
1432
1433#[inline]
1434fn chroma_put_x2(dst: &mut [u8], dst_off: usize,
1435                 src: &[u8], src_off: usize,
1436                 stride: usize, h: usize) {
1437    for y in 0..h {
1438        let d = dst_off + y * stride;
1439        let s = src_off + y * stride;
1440        for x in 0..8 {
1441            dst[d + x] = rnd_avg_u8(src[s + x], src[s + x + 1]);
1442        }
1443    }
1444}
1445
1446#[inline]
1447fn chroma_put_y2(dst: &mut [u8], dst_off: usize,
1448                 src: &[u8], src_off: usize,
1449                 stride: usize, h: usize) {
1450    for y in 0..h {
1451        let d = dst_off + y * stride;
1452        let s = src_off + y * stride;
1453        let s2 = s + stride;
1454        for x in 0..8 {
1455            dst[d + x] = rnd_avg_u8(src[s + x], src[s2 + x]);
1456        }
1457    }
1458}
1459
1460#[inline]
1461fn chroma_put_xy2(dst: &mut [u8], dst_off: usize,
1462                  src: &[u8], src_off: usize,
1463                  stride: usize, h: usize) {
1464    for y in 0..h {
1465        let d = dst_off + y * stride;
1466        let s = src_off + y * stride;
1467        let s2 = s + stride;
1468        for x in 0..8 {
1469            let a = src[s + x] as u16;
1470            let b = src[s + x + 1] as u16;
1471            let c = src[s2 + x] as u16;
1472            let e = src[s2 + x + 1] as u16;
1473            dst[d + x] = ((a + b + c + e + 2) >> 2) as u8;
1474        }
1475    }
1476}
1477
1478/// Direct port of upstream `ff_mspel_motion` for WMV2 (MV in half-luma-pel units).
1479fn wmv2_mspel_motion_mb(
1480    dst: &mut YuvFrame,
1481    reference: &YuvFrame,
1482    mb_row: usize,
1483    mb_col: usize,
1484    motion_x: i32,
1485    motion_y: i32,
1486    hshift: u8,
1487) {
1488    let fw = dst.width as usize;
1489    let fh = dst.height as usize;
1490    if fw == 0 || fh == 0 { return; }
1491    let cw = fw / 2;
1492    let ch = fh / 2;
1493
1494    // ---- Luma ----
1495    let mut dxy = (((motion_y & 1) << 1) | (motion_x & 1)) as i32;
1496    dxy = 2 * dxy + hshift as i32;
1497
1498    let mut src_x = mb_col as i32 * 16 + (motion_x >> 1);
1499    let mut src_y = mb_row as i32 * 16 + (motion_y >> 1);
1500
1501    // clip to [-16, width] / [-16, height]
1502    if src_x < -16 { src_x = -16; }
1503    if src_x > dst.width as i32 { src_x = dst.width as i32; }
1504    if src_y < -16 { src_y = -16; }
1505    if src_y > dst.height as i32 { src_y = dst.height as i32; }
1506
1507    if src_x <= -16 || src_x >= dst.width as i32 { dxy &= !3; }
1508    if src_y <= -16 || src_y >= dst.height as i32 { dxy &= !4; }
1509
1510    let linesize = fw;
1511    let mut src_plane: &[u8] = &reference.y;
1512    let mut src_off: usize;
1513
1514    // edge condition: same as upstream (using h_edge_pos=width, v_edge_pos=height)
1515    if src_x < 1 || src_y < 1 || src_x + 17 >= dst.width as i32 || src_y + 16 + 1 >= dst.height as i32 {
1516        let mut edge = vec![0u8; linesize * 19];
1517        emulated_edge_mc(
1518            &mut edge, linesize,
1519            &reference.y, linesize,
1520            19, 19,
1521            src_x - 1, src_y - 1,
1522            fw, fh,
1523        );
1524        src_plane = edge.as_slice();
1525        src_off = 1 + linesize;
1526        // keep edge alive via scope capture
1527        // (we rebind below for actual reads)
1528        // NOTE: src_plane points into `edge` which must live for the rest of this function.
1529        // Rust ensures this because `edge` is in this scope.
1530
1531        // Use the edge buffer for the remainder of this luma section.
1532        let dst_x = mb_col * 16;
1533        let dst_y = mb_row * 16;
1534        let dxyu = (dxy as usize).min(7);
1535
1536        // 4x 8x8 blocks
1537        let dst00 = dst_y * linesize + dst_x;
1538        let src00 = src_off;
1539        wmv2_put_mspel_pixels(dxyu, &mut dst.y, dst00, src_plane, src00, linesize);
1540        wmv2_put_mspel_pixels(dxyu, &mut dst.y, dst00 + 8, src_plane, src00 + 8, linesize);
1541        wmv2_put_mspel_pixels(dxyu, &mut dst.y, dst00 + 8 * linesize, src_plane, src00 + 8 * linesize, linesize);
1542        wmv2_put_mspel_pixels(dxyu, &mut dst.y, dst00 + 8 + 8 * linesize, src_plane, src00 + 8 + 8 * linesize, linesize);
1543
1544        // ---- Chroma (still within edge scope) ----
1545        if dst.cb.is_empty() || reference.cb.is_empty() { return; }
1546
1547        let mut cdxy = 0usize;
1548        if (motion_x & 3) != 0 { cdxy |= 1; }
1549        if (motion_y & 3) != 0 { cdxy |= 2; }
1550        let mx = motion_x >> 2;
1551        let my = motion_y >> 2;
1552
1553        let mut csrc_x = mb_col as i32 * 8 + mx;
1554        let mut csrc_y = mb_row as i32 * 8 + my;
1555
1556        if csrc_x < -8 { csrc_x = -8; }
1557        if csrc_x > (dst.width as i32 >> 1) { csrc_x = dst.width as i32 >> 1; }
1558        if csrc_y < -8 { csrc_y = -8; }
1559        if csrc_y > (dst.height as i32 >> 1) { csrc_y = dst.height as i32 >> 1; }
1560
1561        if csrc_x == (dst.width as i32 >> 1) { cdxy &= !1; }
1562        if csrc_y == (dst.height as i32 >> 1) { cdxy &= !2; }
1563
1564        let uvlinesize = cw;
1565
1566        let mut edge_uv = vec![0u8; uvlinesize * 9];
1567        // cb
1568        emulated_edge_mc(
1569            &mut edge_uv, uvlinesize,
1570            &reference.cb, uvlinesize,
1571            9, 9,
1572            csrc_x, csrc_y,
1573            cw, ch,
1574        );
1575        let dst_xc = mb_col * 8;
1576        let dst_yc = mb_row * 8;
1577        let dst_cb_off = dst_yc * uvlinesize + dst_xc;
1578        match cdxy {
1579            0 => chroma_put_pixels(&mut dst.cb, dst_cb_off, &edge_uv, 0, uvlinesize, 8),
1580            1 => chroma_put_x2(&mut dst.cb, dst_cb_off, &edge_uv, 0, uvlinesize, 8),
1581            2 => chroma_put_y2(&mut dst.cb, dst_cb_off, &edge_uv, 0, uvlinesize, 8),
1582            _ => chroma_put_xy2(&mut dst.cb, dst_cb_off, &edge_uv, 0, uvlinesize, 8),
1583        }
1584
1585        // cr
1586        emulated_edge_mc(
1587            &mut edge_uv, uvlinesize,
1588            &reference.cr, uvlinesize,
1589            9, 9,
1590            csrc_x, csrc_y,
1591            cw, ch,
1592        );
1593        let dst_cr_off = dst_yc * uvlinesize + dst_xc;
1594        match cdxy {
1595            0 => chroma_put_pixels(&mut dst.cr, dst_cr_off, &edge_uv, 0, uvlinesize, 8),
1596            1 => chroma_put_x2(&mut dst.cr, dst_cr_off, &edge_uv, 0, uvlinesize, 8),
1597            2 => chroma_put_y2(&mut dst.cr, dst_cr_off, &edge_uv, 0, uvlinesize, 8),
1598            _ => chroma_put_xy2(&mut dst.cr, dst_cr_off, &edge_uv, 0, uvlinesize, 8),
1599        }
1600        return;
1601    }
1602
1603    // non-emu luma
1604    src_off = (src_y as usize) * linesize + (src_x as usize);
1605    let dst_x = mb_col * 16;
1606    let dst_y = mb_row * 16;
1607    let dxyu = (dxy as usize).min(7);
1608
1609    let dst00 = dst_y * linesize + dst_x;
1610    wmv2_put_mspel_pixels(dxyu, &mut dst.y, dst00, src_plane, src_off, linesize);
1611    wmv2_put_mspel_pixels(dxyu, &mut dst.y, dst00 + 8, src_plane, src_off + 8, linesize);
1612    wmv2_put_mspel_pixels(dxyu, &mut dst.y, dst00 + 8 * linesize, src_plane, src_off + 8 * linesize, linesize);
1613    wmv2_put_mspel_pixels(dxyu, &mut dst.y, dst00 + 8 + 8 * linesize, src_plane, src_off + 8 + 8 * linesize, linesize);
1614
1615    // ---- Chroma ----
1616    if dst.cb.is_empty() || reference.cb.is_empty() { return; }
1617
1618    let mut cdxy = 0usize;
1619    if (motion_x & 3) != 0 { cdxy |= 1; }
1620    if (motion_y & 3) != 0 { cdxy |= 2; }
1621    let mx = motion_x >> 2;
1622    let my = motion_y >> 2;
1623
1624    let mut csrc_x = mb_col as i32 * 8 + mx;
1625    let mut csrc_y = mb_row as i32 * 8 + my;
1626
1627    if csrc_x < -8 { csrc_x = -8; }
1628    if csrc_x > (dst.width as i32 >> 1) { csrc_x = dst.width as i32 >> 1; }
1629    if csrc_y < -8 { csrc_y = -8; }
1630    if csrc_y > (dst.height as i32 >> 1) { csrc_y = dst.height as i32 >> 1; }
1631
1632    if csrc_x == (dst.width as i32 >> 1) { cdxy &= !1; }
1633    if csrc_y == (dst.height as i32 >> 1) { cdxy &= !2; }
1634
1635    let uvlinesize = cw;
1636    let need_emu_uv = csrc_x < 0 || csrc_y < 0 || csrc_x + 9 >= cw as i32 || csrc_y + 9 >= ch as i32;
1637    if need_emu_uv {
1638        let mut edge_uv = vec![0u8; uvlinesize * 9];
1639        let dst_xc = mb_col * 8;
1640        let dst_yc = mb_row * 8;
1641        let dst_cb_off = dst_yc * uvlinesize + dst_xc;
1642        emulated_edge_mc(
1643            &mut edge_uv, uvlinesize,
1644            &reference.cb, uvlinesize,
1645            9, 9,
1646            csrc_x, csrc_y,
1647            cw, ch,
1648        );
1649        match cdxy {
1650            0 => chroma_put_pixels(&mut dst.cb, dst_cb_off, &edge_uv, 0, uvlinesize, 8),
1651            1 => chroma_put_x2(&mut dst.cb, dst_cb_off, &edge_uv, 0, uvlinesize, 8),
1652            2 => chroma_put_y2(&mut dst.cb, dst_cb_off, &edge_uv, 0, uvlinesize, 8),
1653            _ => chroma_put_xy2(&mut dst.cb, dst_cb_off, &edge_uv, 0, uvlinesize, 8),
1654        }
1655
1656        let dst_cr_off = dst_yc * uvlinesize + dst_xc;
1657        emulated_edge_mc(
1658            &mut edge_uv, uvlinesize,
1659            &reference.cr, uvlinesize,
1660            9, 9,
1661            csrc_x, csrc_y,
1662            cw, ch,
1663        );
1664        match cdxy {
1665            0 => chroma_put_pixels(&mut dst.cr, dst_cr_off, &edge_uv, 0, uvlinesize, 8),
1666            1 => chroma_put_x2(&mut dst.cr, dst_cr_off, &edge_uv, 0, uvlinesize, 8),
1667            2 => chroma_put_y2(&mut dst.cr, dst_cr_off, &edge_uv, 0, uvlinesize, 8),
1668            _ => chroma_put_xy2(&mut dst.cr, dst_cr_off, &edge_uv, 0, uvlinesize, 8),
1669        }
1670        return;
1671    }
1672    let coff = (csrc_y as usize) * uvlinesize + (csrc_x as usize);
1673    let dst_xc = mb_col * 8;
1674    let dst_yc = mb_row * 8;
1675    let dst_cb_off = dst_yc * uvlinesize + dst_xc;
1676
1677    match cdxy {
1678        0 => chroma_put_pixels(&mut dst.cb, dst_cb_off, &reference.cb, coff, uvlinesize, 8),
1679        1 => chroma_put_x2(&mut dst.cb, dst_cb_off, &reference.cb, coff, uvlinesize, 8),
1680        2 => chroma_put_y2(&mut dst.cb, dst_cb_off, &reference.cb, coff, uvlinesize, 8),
1681        _ => chroma_put_xy2(&mut dst.cb, dst_cb_off, &reference.cb, coff, uvlinesize, 8),
1682    }
1683
1684    let dst_cr_off = dst_yc * uvlinesize + dst_xc;
1685    match cdxy {
1686        0 => chroma_put_pixels(&mut dst.cr, dst_cr_off, &reference.cr, coff, uvlinesize, 8),
1687        1 => chroma_put_x2(&mut dst.cr, dst_cr_off, &reference.cr, coff, uvlinesize, 8),
1688        2 => chroma_put_y2(&mut dst.cr, dst_cr_off, &reference.cr, coff, uvlinesize, 8),
1689        _ => chroma_put_xy2(&mut dst.cr, dst_cr_off, &reference.cr, coff, uvlinesize, 8),
1690    }
1691}
1692
1693fn add_residual_block(frame: &mut YuvFrame, mb_row: u32, mb_col: u32, blk: usize,
1694                      coeff: &[i32; 64]) {
1695    let (is_luma, bx, by, stride, ph) =
1696        block_coords(mb_row, mb_col, blk, frame.width, frame.height);
1697    let plane: &mut Vec<u8> = if is_luma { &mut frame.y }
1698                               else if blk == 4 { &mut frame.cb }
1699                               else { &mut frame.cr };
1700    for r in 0..8 {
1701        if by + r >= ph { break; }
1702        for c in 0..8 {
1703            if bx + c >= stride { break; }
1704            let idx = (by + r) * stride + (bx + c);
1705            plane[idx] = (plane[idx] as i32 + coeff[r*8 + c]).clamp(0, 255) as u8;
1706        }
1707    }
1708}
1709
1710// ─── Macroblock Decoder ───────────────────────────────────────────────────────
1711
1712
1713// ─── upstream RLTable (WMV1/2/MSMPEG4) ───────────────────────────────────
1714
1715const FF_RL_MAX_RUN: usize = 64;
1716const FF_RL_MAX_LEVEL: usize = 64;
1717
1718#[derive(Clone)]
1719struct Wmv2Rl {
1720    n: usize,
1721    last: usize,
1722    vlc: VlcTree,
1723    run: &'static [u8],
1724    level: &'static [u8],
1725    max_level: [[u8; FF_RL_MAX_RUN + 1]; 2],
1726    max_run: [[u8; FF_RL_MAX_LEVEL + 1]; 2],
1727}
1728
1729impl Wmv2Rl {
1730    fn new(base: &crate::na_rl_tables::RlBase) -> Self {
1731        let mut t = VlcTree::new();
1732        for (idx, (code, len)) in base.vlc.iter().enumerate() {
1733            if *len != 0 {
1734                t.insert(*code, *len, idx as i32);
1735            }
1736        }
1737
1738        let mut max_level = [[0u8; FF_RL_MAX_RUN + 1]; 2];
1739        let mut max_run = [[0u8; FF_RL_MAX_LEVEL + 1]; 2];
1740
1741        for last_flag in 0..2usize {
1742            let (start, end) = if last_flag == 0 { (0usize, base.last) } else { (base.last, base.n) };
1743            for i in start..end {
1744                let r = base.run[i] as usize;
1745                let l = base.level[i] as usize;
1746                if r <= FF_RL_MAX_RUN && l <= FF_RL_MAX_LEVEL {
1747                    if base.level[i] > max_level[last_flag][r] {
1748                        max_level[last_flag][r] = base.level[i];
1749                    }
1750                    if base.run[i] > max_run[last_flag][l] {
1751                        max_run[last_flag][l] = base.run[i];
1752                    }
1753                }
1754            }
1755        }
1756
1757        Wmv2Rl {
1758            n: base.n,
1759            last: base.last,
1760            vlc: t,
1761            run: base.run,
1762            level: base.level,
1763            max_level,
1764            max_run,
1765        }
1766    }
1767
1768    #[inline(always)]
1769    fn decode_sym(&self, br: &mut BitReader<'_>, qscale: i32) -> Option<(i32, i32)> {
1770        let idx = self.vlc.decode(br)? as usize;
1771        if idx == self.n {
1772            // Match upstream ff_rl_init_vlc(): escape maps to level==0, run==66.
1773            // Using run==0 can underflow i (starts at -1) and trigger OOB.
1774            return Some((0, 66));
1775        }
1776        let (qmul, qadd) = if qscale == 0 { (1i32, 0i32) } else { (qscale * 2, (qscale - 1) | 1) };
1777        let mut run = (self.run[idx] as i32) + 1;
1778        let level = (self.level[idx] as i32) * qmul + qadd;
1779        if idx >= self.last {
1780            run += 192;
1781        }
1782        Some((level, run))
1783    }
1784
1785    #[inline(always)]
1786    fn max_level_for(&self, last: usize, run: usize) -> i32 {
1787        self.max_level[last.min(1)][run.min(FF_RL_MAX_RUN)] as i32
1788    }
1789
1790    #[inline(always)]
1791    fn max_run_for(&self, last: usize, level: usize) -> i32 {
1792        self.max_run[last.min(1)][level.min(FF_RL_MAX_LEVEL)] as i32
1793    }
1794}
1795
1796pub struct MacroblockDecoder {
1797    pub width:     u32,
1798    pub height:    u32,
1799    pub width_mb:  u32,
1800    pub height_mb: u32,
1801    /// Reference frame for P/B decoding
1802    pub ref_frame: Option<YuvFrame>,
1803    // Lazily-built VLC tables
1804    dc_luma:    VlcTable,
1805    dc_chroma:  VlcTable,
1806    ac_inter:   [VlcTable; 4],
1807    ac_intra:   [VlcTable; 4],
1808    cbpcy_i:    VlcTable,
1809    cbpcy_p:    [VlcTable; 2],   // CBPTAB 0-1
1810    ttmb:       VlcTable,
1811    ttblk:      VlcTable,
1812    mv_vlc:     [VlcTable; 4],   // MVTAB 0-3
1813    dc_pred:    DcPredBuffer,
1814    mv_pred:    MvPredictor,
1815    // ── WMV2 VLC tables (built lazily; shared with VC-1 decode machinery) ─────
1816    wmv2_inter: [VlcTable; 2],  // ttcoef 0-1
1817    wmv2_intra: [VlcTable; 2],
1818    wmv2_cbpy:  VlcTable,
1819    wmv2_cbpc:  VlcTable,
1820    /// WMV2 reference frame (single-reference; no B-frame support)
1821    wmv2_ref:   Option<YuvFrame>,
1822    // ── WMV2/MSMPEG4 (upstream-aligned) VLCs / state ─────────────────────────
1823    wmv2_mb_i_vlc: VlcTree,
1824    wmv2_dc_vlc:   [[VlcTree; 2]; 2], // [dc_table_index][is_chroma]
1825    wmv2_coded_block: Vec<u8>,        // coded_block predictor grid (luma 8×8)
1826    wmv2_dc_pred: Wmv2DcPredBuffer,
1827    // ext-header flags (decode_ext_header)
1828    wmv2_mspel_bit: bool,
1829    wmv2_abt_flag: bool,
1830    wmv2_j_type_bit: bool,
1831    wmv2_top_left_mv_flag: bool,
1832    wmv2_per_mb_rl_bit: bool,
1833    // per-picture derived state (secondary picture header)
1834    wmv2_j_type: bool,
1835    wmv2_per_mb_rl_table: bool,
1836    wmv2_rl_table_index: u8,
1837    wmv2_rl_chroma_table_index: u8,
1838    wmv2_dc_table_index: usize,
1839
1840    // P-picture secondary header state (upstream wmv2dec.c)
1841    wmv2_cbp_table_index: usize,
1842    wmv2_mv_table_index: usize,
1843    wmv2_mspel: bool,
1844    wmv2_hshift: u8,
1845    wmv2_per_mb_abt: bool,
1846    wmv2_abt_type: u8,
1847    wmv2_skip_type: u8,
1848    wmv2_slice_height: usize,
1849    wmv2_mb_skip: Vec<bool>,
1850    wmv2_motion: Vec<(i32, i32)>,
1851
1852    // upstream MB and MV VLC tables
1853    wmv2_mb_non_intra_vlc: [VlcTree; 4],
1854    wmv2_mv_vlc: [VlcTree; 2],
1855    // upstream RL tables (run/level)
1856    wmv2_rl: [Wmv2Rl; 6],
1857    // WMV2 escape-3 adaptive lengths (reset each picture)
1858    wmv2_esc3_level_length: u8,
1859    wmv2_esc3_run_length: u8,
1860    // AC prediction buffer (16 values per block: [1..7] left, [9..15] top)
1861    wmv2_ac_val: Vec<[i16; 16]>,
1862    /// Whether the last stored reference frame had RANGEREDFRM applied
1863    ref_rangeredfrm: bool,
1864    ac_pred:    AcPredBuffer,
1865    /// Forward reference (anchor before B-frames in display order)
1866    fwd_ref: Option<YuvFrame>,
1867    /// Backward reference (anchor after B-frames in display order)
1868    bwd_ref: Option<YuvFrame>,
1869}
1870
1871impl MacroblockDecoder {
1872    pub fn new(width: u32, height: u32) -> Self {
1873        let mb_w = ((width  + 15) / 16) as usize;
1874        let mb_h = ((height + 15) / 16) as usize;
1875
1876        // Build upstream MSMPEG4/WMV2 VLCs (MB I-table + DC tables).
1877        let wmv2_mb_i_vlc: VlcTree = {
1878            let mut t = VlcTree::new();
1879            for (sym, (code, len)) in FF_MSMP4_MB_I_TABLE.iter().enumerate() {
1880                t.insert(*code, *len, sym as i32);
1881            }
1882            t
1883        };
1884
1885        let wmv2_dc_vlc: [[VlcTree; 2]; 2] = std::array::from_fn(|ti| {
1886            std::array::from_fn(|ch| {
1887                let mut t = VlcTree::new();
1888                for (sym, (code, len)) in FF_MSMP4_DC_TABLES[ti][ch].iter().enumerate() {
1889                    t.insert(*code, *len, sym as i32);
1890                }
1891                t
1892            })
1893        });
1894
1895        // upstream mb_non_intra VLC tables (4 variants)
1896        let wmv2_mb_non_intra_vlc: [VlcTree; 4] = std::array::from_fn(|ti| {
1897            let mut t = VlcTree::new();
1898            for (sym, (code, len)) in FF_MB_NON_INTRA_TABLES[ti].iter().enumerate() {
1899                if *len != 0 {
1900                    t.insert(*code, *len, sym as i32);
1901                }
1902            }
1903            t
1904        });
1905
1906        // upstream motion vector VLC tables (2 variants)
1907        // Built exactly like ff_vlc_init_tables_from_lengths() + ff_vlc_init_from_lengths()
1908        // (msmpeg4dec.c msmpeg4_decode_init_static).
1909        let build_mv_from_lengths = |lens: &[u8; 1100], syms: &[u16; 1100]| -> VlcTree {
1910            let mut t = VlcTree::new();
1911            let mut code: u32 = 0;
1912            for i in 0..1100usize {
1913                let len = lens[i] as i32;
1914                if len == 0 {
1915                    continue;
1916                }
1917                let l = len.abs() as u8;
1918                // upstream stores code left-aligned in a 32-bit word.
1919                let right_aligned = if l == 0 { 0 } else { code >> (32 - l) };
1920                if len > 0 {
1921                    t.insert(right_aligned, l, syms[i] as i32);
1922                }
1923                code = code.wrapping_add(1u32 << (32 - l));
1924            }
1925            t
1926        };
1927        let wmv2_mv_vlc: [VlcTree; 2] = [
1928            build_mv_from_lengths(&FF_MSMP4_MV_TABLE0_LENS, &FF_MSMP4_MV_TABLE0),
1929            build_mv_from_lengths(&FF_MSMP4_MV_TABLE1_LENS, &FF_MSMP4_MV_TABLE1),
1930        ];
1931
1932// upstream RL tables (run/level)
1933        let wmv2_rl: [Wmv2Rl; 6] = std::array::from_fn(|i| Wmv2Rl::new(&FF_RL_BASES[i]));
1934        let wmv2_ac_val: Vec<[i16; 16]> = vec![[0i16; 16]; mb_w * mb_h * 6];
1935
1936        MacroblockDecoder {
1937            width,
1938            height,
1939            width_mb:  (width  + 15) / 16,
1940            height_mb: (height + 15) / 16,
1941            ref_frame:  None,
1942            dc_luma:   dc_luma_vlc(),
1943            dc_chroma: dc_chroma_vlc(),
1944            ac_inter:  [
1945                inter_tcoef_vlc(0), inter_tcoef_vlc(1),
1946                inter_tcoef_vlc(2), inter_tcoef_vlc(3),
1947            ],
1948            ac_intra:  [
1949                intra_tcoef_vlc(0), intra_tcoef_vlc(1),
1950                intra_tcoef_vlc(2), intra_tcoef_vlc(3),
1951            ],
1952            cbpcy_i:   cbpcy_i_vlc(),
1953            cbpcy_p:   [cbpcy_p_vlc(0), cbpcy_p_vlc(1)],
1954            ttmb:      ttmb_vlc(),
1955            ttblk:     ttblk_vlc(),
1956            mv_vlc:    [
1957                mv_diff_vlc(0), mv_diff_vlc(1),
1958                mv_diff_vlc(2), mv_diff_vlc(3),
1959            ],
1960            dc_pred:   DcPredBuffer::new(mb_w, mb_h),
1961            mv_pred:   MvPredictor::new(mb_w, mb_h),
1962            ref_rangeredfrm: false,
1963            ac_pred: AcPredBuffer::new(mb_w, mb_h),
1964            fwd_ref: None,
1965            bwd_ref: None,
1966            wmv2_inter: [wmv2_tcoef_inter_vlc(0), wmv2_tcoef_inter_vlc(1)],
1967            wmv2_intra: [wmv2_tcoef_intra_vlc(0), wmv2_tcoef_intra_vlc(1)],
1968            wmv2_cbpy:  wmv2_cbpy_vlc(),
1969            wmv2_cbpc:  wmv2_cbpc_p_vlc(),
1970            wmv2_ref:   None,
1971
1972            // upstream-aligned WMV2/MSMPEG4 state
1973            wmv2_mb_i_vlc,
1974            wmv2_dc_vlc,
1975            wmv2_coded_block: vec![0u8; (2 * mb_w) * (2 * mb_h)],
1976            wmv2_dc_pred: Wmv2DcPredBuffer::new(mb_w, mb_h),
1977
1978            // ext-header flags (default false until set_extradata)
1979            wmv2_mspel_bit: false,
1980            wmv2_abt_flag: false,
1981            wmv2_j_type_bit: false,
1982            wmv2_top_left_mv_flag: false,
1983            wmv2_per_mb_rl_bit: false,
1984
1985            // per-picture derived state
1986            wmv2_j_type: false,
1987            wmv2_per_mb_rl_table: false,
1988            wmv2_rl_table_index: 0,
1989            wmv2_rl_chroma_table_index: 0,
1990            wmv2_dc_table_index: 0,
1991
1992            wmv2_cbp_table_index: 0,
1993            wmv2_mv_table_index: 0,
1994            wmv2_mspel: false,
1995            wmv2_hshift: 0,
1996            wmv2_per_mb_abt: false,
1997            wmv2_abt_type: 0,
1998            wmv2_skip_type: 0,
1999            wmv2_slice_height: mb_h.max(1),
2000            wmv2_mb_skip: vec![false; mb_w * mb_h],
2001            wmv2_motion: vec![(0, 0); mb_w * mb_h],
2002
2003            wmv2_mb_non_intra_vlc,
2004            wmv2_mv_vlc,
2005
2006            wmv2_rl,
2007            wmv2_esc3_level_length: 0,
2008            wmv2_esc3_run_length: 0,
2009            wmv2_ac_val,
2010        }
2011    }
2012
2013    pub fn decode_frame(
2014        &mut self,
2015        payload:  &[u8],
2016        pic_hdr:  &PictureHeader,
2017        seq:      &SequenceHeader,
2018        frame:    &mut YuvFrame,
2019    ) -> Result<()> {
2020        match pic_hdr.frame_type {
2021            FrameType::I | FrameType::BI => {
2022                self.decode_intra(payload, pic_hdr, seq, frame)?;
2023                if seq.overlap && pic_hdr.pquant >= 9 {
2024                    apply_overlap_filter(frame);
2025                }
2026                if seq.loop_filter { apply_loop_filter(frame); }
2027            }
2028            FrameType::P => {
2029                if seq.rangered {
2030                    let cur_rr   = pic_hdr.rangeredfrm;
2031                    let ref_rr   = self.ref_rangeredfrm;
2032                    if ref_rr && !cur_rr {
2033                        if let Some(ref mut rf) = self.ref_frame {
2034                            apply_rangered_compress(rf);
2035                        }
2036                    }
2037                }
2038                self.decode_p(payload, pic_hdr, seq, frame)?;
2039                if seq.loop_filter { apply_loop_filter(frame); }
2040            }
2041            FrameType::B => {
2042                self.decode_b(payload, pic_hdr, seq, frame)?;
2043                if seq.loop_filter { apply_loop_filter(frame); }
2044            }
2045            FrameType::Skipped => {
2046                if let Some(ref rf) = self.ref_frame {
2047                    frame.y.copy_from_slice(&rf.y);
2048                    frame.cb.copy_from_slice(&rf.cb);
2049                    frame.cr.copy_from_slice(&rf.cr);
2050                }
2051            }
2052        }
2053
2054        // Post-decode: expand range if RANGEREDFRM
2055        if seq.rangered && pic_hdr.rangeredfrm {
2056            apply_rangered_expand(frame);
2057        }
2058        self.ref_rangeredfrm = pic_hdr.rangeredfrm;
2059
2060        // Update reference frame chain.
2061        // Anchor frames (I/P) become forward reference for upcoming B-frames
2062        // and also get stored as the backward reference.
2063        match pic_hdr.frame_type {
2064            FrameType::B | FrameType::BI => {
2065                // B-frames don't update the anchor chain
2066            }
2067            _ => {
2068                // Current forward becomes previous, new frame becomes forward anchor
2069                self.fwd_ref = self.bwd_ref.take();
2070                self.bwd_ref = Some(frame.clone());
2071                self.ref_frame = Some(frame.clone());
2072            }
2073        }
2074        Ok(())
2075    }
2076
2077    // ─── Intra frame ─────────────────────────────────────────────────────────
2078
2079    fn decode_intra(
2080        &mut self,
2081        payload:  &[u8],
2082        pic_hdr:  &PictureHeader,
2083        seq:      &SequenceHeader,
2084        frame:    &mut YuvFrame,
2085    ) -> Result<()> {
2086        // The ASF payload includes the picture header and bitplanes.
2087        // Start macroblock decoding exactly at the macroblock layer.
2088        let mut br   = BitReader::new_at(payload, pic_hdr.header_bits);
2089        let pquant   = pic_hdr.pquant as i32;
2090        let halfqp   = pic_hdr.halfqp;
2091        let uniform  = seq.quantizer_mode != crate::vc1::QuantizerMode::NonUniform;
2092
2093        // Reset DC and AC prediction buffers for this frame
2094        let mb_w = self.width_mb as usize;
2095        let mb_h = self.height_mb as usize;
2096        self.dc_pred = DcPredBuffer::new(mb_w, mb_h);
2097        self.ac_pred = AcPredBuffer::new(mb_w, mb_h);
2098
2099        for mb_row in 0..self.height_mb {
2100            for mb_col in 0..self.width_mb {
2101                if br.is_empty() { return Ok(()); }
2102
2103                // CBPCY: 6-bit coded-block pattern
2104                let cbp = self.cbpcy_i.decode(&mut br).unwrap_or(0) as u8;
2105
2106                // Per-MB quantizer override (DQUANT)
2107                let mb_pquant = read_mquant(&mut br, seq.dquant, pquant);
2108
2109                // Transform type for this MB
2110                let mb_tt = if seq.vstransform {
2111                    self.ttmb.decode(&mut br).unwrap_or(0) as u8
2112                } else { 0 };
2113
2114                for blk in 0..6usize {
2115                    let is_luma = blk < 4;
2116
2117                    // ── DC prediction (SMPTE 421M §8.1.4.6) ──────────────────
2118                    let dc_vlc   = if is_luma { &self.dc_luma } else { &self.dc_chroma };
2119                    let dc_diff  = read_dc_diff(&mut br, dc_vlc);
2120
2121                    // DC step from SMPTE 421M Table 3 (×128 domain for IDCT)
2122                    let dc_scale = dc_step(mb_pquant, is_luma);
2123                    let (dc_pred_val, _use_left) =
2124                        self.dc_pred.predict(mb_row as usize, mb_col as usize, blk);
2125
2126                    let dc_recon = dc_pred_val + dc_diff * dc_scale;
2127                    self.dc_pred.store(mb_row as usize, mb_col as usize, blk, dc_recon);
2128
2129                    let coded = (cbp >> (5 - blk)) & 1 != 0;
2130                    let blk_tt = if coded && mb_tt == 6 {
2131                        self.ttblk.decode(&mut br).unwrap_or(0) as u8
2132                    } else { mb_tt };
2133
2134                    let mut coeff = if coded {
2135                        decode_block_ac(
2136                            &mut br, is_luma,
2137                            mb_pquant, halfqp, uniform, blk_tt,
2138                            &self.ac_intra[seq.transacfrm2 as usize],
2139                        )
2140                    } else {
2141                        [0i32; 64]
2142                    };
2143
2144                    // Place reconstructed DC into coeff[0]
2145                    coeff[0] = dc_recon;
2146
2147                    // ── AC Prediction (SMPTE 421M §8.1.4.7) ──────────────────
2148                    // The direction follows the DC predictor choice:
2149                    // use_left=true  → horizontal: add pred_row to coeff[1..7]
2150                    // use_left=false → vertical:   add pred_col to coeff[8,16,..56]
2151                    if _use_left {
2152                        // Horizontal: predictor is first row of left neighbour
2153                        let pred = self.ac_pred.pred_row(mb_row as usize, mb_col as usize, blk);
2154                        for i in 0..7 { coeff[i + 1] += pred[i]; }
2155                        // Store our first row for future left→right neighbours
2156                        let our_row = [coeff[1], coeff[2], coeff[3], coeff[4],
2157                                       coeff[5], coeff[6], coeff[7]];
2158                        self.ac_pred.store_row(mb_row as usize, mb_col as usize, blk, our_row);
2159                    } else {
2160                        // Vertical: predictor is first column of top neighbour
2161                        let pred = self.ac_pred.pred_col(mb_row as usize, mb_col as usize, blk);
2162                        for i in 0..7 { coeff[(i + 1) * 8] += pred[i]; }
2163                        let our_col = [coeff[8], coeff[16], coeff[24], coeff[32],
2164                                       coeff[40], coeff[48], coeff[56]];
2165                        self.ac_pred.store_col(mb_row as usize, mb_col as usize, blk, our_col);
2166                    }
2167
2168                    apply_idct(&mut coeff, blk_tt);
2169                    write_intra_block(frame, mb_row, mb_col, blk, &coeff);
2170                }
2171            }
2172        }
2173        Ok(())
2174    }
2175
2176    // ─── P frame ─────────────────────────────────────────────────────────────
2177
2178    fn decode_p(
2179        &mut self,
2180        payload:  &[u8],
2181        pic_hdr:  &PictureHeader,
2182        seq:      &SequenceHeader,
2183        frame:    &mut YuvFrame,
2184    ) -> Result<()> {
2185        // Start from reference frame copy
2186        if let Some(ref rf) = self.ref_frame {
2187            frame.y.copy_from_slice(&rf.y);
2188            frame.cb.copy_from_slice(&rf.cb);
2189            frame.cr.copy_from_slice(&rf.cr);
2190        }
2191
2192        let mut br  = BitReader::new_at(payload, pic_hdr.header_bits);
2193        let pquant  = pic_hdr.pquant as i32;
2194        let halfqp  = pic_hdr.halfqp;
2195        let uniform = seq.quantizer_mode != crate::vc1::QuantizerMode::NonUniform;
2196        let mv_vlc = &self.mv_vlc[seq.mvtab as usize];
2197
2198        // MV range (quarter-pel)
2199        let mv_scale = 1i32 << (pic_hdr.mvrange as i32 + 1);
2200
2201        let ref_y   = self.ref_frame.as_ref().map(|f| f.y.clone())  .unwrap_or_default();
2202        let ref_cb  = self.ref_frame.as_ref().map(|f| f.cb.clone()) .unwrap_or_default();
2203        let ref_cr  = self.ref_frame.as_ref().map(|f| f.cr.clone()) .unwrap_or_default();
2204        let fw = self.width as usize;
2205        let fh = self.height as usize;
2206
2207        // Reset MV predictor for this frame
2208        let mb_w = self.width_mb  as usize;
2209        let mb_h = self.height_mb as usize;
2210        self.mv_pred = MvPredictor::new(mb_w, mb_h);
2211
2212        // Read skipped-MB bitplane from picture header
2213        let skip_plane = pic_hdr.skipmb_plane.clone().unwrap_or_default();
2214
2215        for mb_row in 0..self.height_mb {
2216            for mb_col in 0..self.width_mb {
2217                if br.is_empty() { return Ok(()); }
2218
2219                let mb_idx = mb_row as usize * mb_w + mb_col as usize;
2220
2221                // Skipped macroblock (from bitplane or inline 1-bit flag)
2222                let skipped = if skip_plane.is_empty() {
2223                    br.read_bit().unwrap_or(false)
2224                } else {
2225                    skip_plane.get(mb_idx).copied().unwrap_or(0) != 0
2226                };
2227
2228                if skipped {
2229                    // Use predicted MV for skipped MB (copy from reference)
2230                    let (pvx, pvy) = self.mv_pred.predict(mb_row as usize, mb_col as usize);
2231                    self.mv_pred.store(mb_row as usize, mb_col as usize, (pvx, pvy), true);
2232                    continue;
2233                }
2234
2235                // Median MV predictor from A (left), B (top), C (top-right)
2236                let (pvx, pvy) = self.mv_pred.predict(mb_row as usize, mb_col as usize);
2237
2238                // Motion vector differential
2239                let dmvx = Self::read_mv_diff(&mut br, mv_vlc, mv_scale);
2240                let dmvy = Self::read_mv_diff(&mut br, mv_vlc, mv_scale);
2241                let mvx  = pvx + dmvx;
2242                let mvy  = pvy + dmvy;
2243
2244                // Store for future neighbours
2245                self.mv_pred.store(mb_row as usize, mb_col as usize, (mvx, mvy), false);
2246
2247                // Luma motion compensation (16×16)
2248                {
2249                    let dst_x = (mb_col * 16) as usize;
2250                    let dst_y = (mb_row * 16) as usize;
2251                    let src_x = (dst_x as i32 * 2 + mvx) as i32; // half-pel
2252                    let src_y = (dst_y as i32 * 2 + mvy) as i32;
2253                    let mut tmp = [0u8; 256];
2254                    if ref_y.len() == fw * fh {
2255                        mc_luma(&mut tmp, 16, &ref_y, fw, fw, fh,
2256                                src_x, src_y, 16, 16);
2257                        let dst = &mut frame.y[dst_y * fw + dst_x..];
2258                        for r in 0..16 {
2259                            dst[r * fw..r * fw + 16].copy_from_slice(&tmp[r*16..r*16+16]);
2260                        }
2261                    }
2262                }
2263
2264                // Chroma MC (8×8, MV /2 with optional FASTUVMC rounding)
2265                {
2266                    let cw = fw / 2;
2267                    let ch = fh / 2;
2268                    let dst_x = (mb_col * 8) as usize;
2269                    let dst_y = (mb_row * 8) as usize;
2270                    // Chroma MV is luma MV / 2 (half-pel chroma = quarter-pel luma)
2271                    let cmvx_raw = mvx / 2;
2272                    let cmvy_raw = mvy / 2;
2273                    // FASTUVMC: round chroma MVs so that fractional part is 0 or ½ pel
2274                    // i.e. strip the quarter-pel bit, rounding toward zero.
2275                    let (cmvx, cmvy) = if seq.fastuvmc {
2276                        // round: remove lowest half-pel bit, biased toward zero
2277                        let round = |v: i32| -> i32 {
2278                            if v >= 0 { v & !1 } else { -((-v) & !1) }
2279                        };
2280                        (round(cmvx_raw), round(cmvy_raw))
2281                    } else {
2282                        (cmvx_raw, cmvy_raw)
2283                    };
2284                    let src_x = (dst_x as i32 * 2 + cmvx) as i32;
2285                    let src_y = (dst_y as i32 * 2 + cmvy) as i32;
2286                    let mut tmp_cb = [0u8; 64];
2287                    let mut tmp_cr = [0u8; 64];
2288                    if ref_cb.len() == cw * ch {
2289                        mc_luma(&mut tmp_cb, 8, &ref_cb, cw, cw, ch, src_x, src_y, 8, 8);
2290                        mc_luma(&mut tmp_cr, 8, &ref_cr, cw, cw, ch, src_x, src_y, 8, 8);
2291                        let dcb = &mut frame.cb[dst_y * cw + dst_x..];
2292                        let dcr = &mut frame.cr[dst_y * cw + dst_x..];
2293                        for r in 0..8 {
2294                            dcb[r * cw..r * cw + 8].copy_from_slice(&tmp_cb[r*8..r*8+8]);
2295                            dcr[r * cw..r * cw + 8].copy_from_slice(&tmp_cr[r*8..r*8+8]);
2296                        }
2297                    }
2298                }
2299
2300                // Residual (CBPCY + coefficients)
2301                let cbp = self.cbpcy_p[(seq.cbptab as usize).min(1)].decode(&mut br).unwrap_or(0) as u8;
2302                // Per-MB quantizer (DQUANT)
2303                let mb_pquant = read_mquant(&mut br, seq.dquant, pquant);
2304                let mb_tt = if seq.vstransform {
2305                    self.ttmb.decode(&mut br).unwrap_or(0) as u8
2306                } else { 0 };
2307
2308                for blk in 0..6usize {
2309                    if (cbp >> (5 - blk)) & 1 == 0 { continue; }
2310                    let is_luma = blk < 4;
2311                    let blk_tt = if mb_tt == 6 {
2312                        self.ttblk.decode(&mut br).unwrap_or(0) as u8
2313                    } else { mb_tt };
2314
2315                    let mut coeff = decode_block(
2316                        &mut br, false, is_luma,
2317                        mb_pquant, halfqp, uniform, blk_tt,
2318                        &self.dc_luma, &self.dc_chroma,
2319                        &self.ac_intra[seq.transacfrm2 as usize],
2320                        &self.ac_inter[seq.transacfrm  as usize],
2321                    );
2322                    apply_idct(&mut coeff, blk_tt);
2323                    add_residual_block(frame, mb_row as u32, mb_col as u32, blk, &coeff);
2324                }
2325            }
2326        }
2327        Ok(())
2328    }
2329
2330    // ─── B frame ─────────────────────────────────────────────────────────────
2331    // SMPTE 421M §8.4.
2332    // Each MB can be: direct (interpolated from fwd+bwd), forward, backward,
2333    // bidirectional, or intra.  We decode the MB type and both MV differentials
2334    // then blend fwd and bwd MC results.
2335
2336    fn decode_b(
2337        &mut self,
2338        payload:  &[u8],
2339        pic_hdr:  &PictureHeader,
2340        seq:      &SequenceHeader,
2341        frame:    &mut YuvFrame,
2342    ) -> Result<()> {
2343        let fwd = match &self.fwd_ref {
2344            Some(f) => f.clone(),
2345            None    => return Ok(()), // no anchor yet
2346        };
2347        let bwd = match &self.bwd_ref {
2348            Some(f) => f.clone(),
2349            None    => return Ok(()), // no backward anchor
2350        };
2351
2352        let mut br  = BitReader::new(payload);
2353        let pquant  = pic_hdr.pquant as i32;
2354        let halfqp  = pic_hdr.halfqp;
2355        let uniform = seq.quantizer_mode != crate::vc1::QuantizerMode::NonUniform;
2356        let mv_vlc = &self.mv_vlc[seq.mvtab as usize];
2357        let mv_scale = 1i32 << (pic_hdr.mvrange as i32 + 1);
2358
2359        let fw = self.width  as usize;
2360        let fh = self.height as usize;
2361        let mb_w = self.width_mb  as usize;
2362        let mb_h = self.height_mb as usize;
2363        self.mv_pred = MvPredictor::new(mb_w, mb_h);
2364
2365        // Direct-mode and skip bitplanes
2366        let direct_plane = pic_hdr.directmb_plane.clone().unwrap_or_default();
2367        let skip_plane   = pic_hdr.skipmb_plane.clone().unwrap_or_default();
2368
2369        // Temporal MV scaling from BFRACTION (SMPTE 421M §8.4.1.3).
2370        let direct_scale_num = pic_hdr.bfrac_num;
2371        let direct_scale_den = pic_hdr.bfrac_den;
2372
2373        for mb_row in 0..self.height_mb {
2374            for mb_col in 0..self.width_mb {
2375                if br.is_empty() { return Ok(()); }
2376
2377                let mb_idx = mb_row as usize * mb_w + mb_col as usize;
2378                let is_direct = direct_plane.get(mb_idx).copied().unwrap_or(0) != 0;
2379                let is_skip   = skip_plane.get(mb_idx).copied().unwrap_or(0) != 0;
2380
2381                if is_skip || is_direct {
2382                    // Direct / skip: interpolate fwd + bwd with equal weight
2383                    let (pvx, pvy) = self.mv_pred.predict(mb_row as usize, mb_col as usize);
2384                    // Scale fwd MV and derive bwd MV
2385                    let fvx = pvx * direct_scale_num / direct_scale_den;
2386                    let fvy = pvy * direct_scale_num / direct_scale_den;
2387                    let bvx = fvx - pvx;
2388                    let bvy = fvy - pvy;
2389
2390                    self.mc_blend_mb(frame, mb_row, mb_col, &fwd, &bwd,
2391                                     fvx, fvy, bvx, bvy, fw, fh);
2392                    self.mv_pred.store(mb_row as usize, mb_col as usize, (pvx, pvy), is_skip);
2393                    continue;
2394                }
2395
2396                // Read MB type: 2 bits
2397                // 00=intra, 01=backward, 10=forward, 11=bidirectional
2398                let mb_type = br.read_bits(2).unwrap_or(2);
2399
2400                if mb_type == 0 {
2401                    // Intra MB in B-frame (rare)
2402                    let cbp = self.cbpcy_i.decode(&mut br).unwrap_or(0) as u8;
2403                    let mb_pquant = read_mquant(&mut br, seq.dquant, pquant);
2404                    let mb_tt = if seq.vstransform {
2405                        self.ttmb.decode(&mut br).unwrap_or(0) as u8
2406                    } else { 0 };
2407                    for blk in 0..6usize {
2408                        let coded = (cbp >> (5 - blk)) & 1 != 0;
2409                        if coded {
2410                            let blk_tt = if mb_tt == 6 {
2411                                self.ttblk.decode(&mut br).unwrap_or(0) as u8
2412                            } else { mb_tt };
2413                            let mut coeff = decode_block(
2414                                &mut br, true, blk < 4,
2415                                mb_pquant, halfqp, uniform, blk_tt,
2416                                &self.dc_luma, &self.dc_chroma,
2417                                &self.ac_intra[seq.transacfrm2 as usize],
2418                                &self.ac_inter[seq.transacfrm  as usize],
2419                            );
2420                            apply_idct(&mut coeff, blk_tt);
2421                            write_intra_block(frame, mb_row, mb_col, blk, &coeff);
2422                        }
2423                    }
2424                    self.mv_pred.store(mb_row as usize, mb_col as usize, (0,0), false);
2425                    continue;
2426                }
2427
2428                // Motion vectors
2429                let use_fwd = mb_type == 2 || mb_type == 3;
2430                let use_bwd = mb_type == 1 || mb_type == 3;
2431
2432                let (pvx, pvy) = self.mv_pred.predict(mb_row as usize, mb_col as usize);
2433
2434                let (fvx, fvy) = if use_fwd {
2435                    let dx = Self::read_mv_diff(&mut br, mv_vlc, mv_scale);
2436                    let dy = Self::read_mv_diff(&mut br, mv_vlc, mv_scale);
2437                    (pvx + dx, pvy + dy)
2438                } else { (0, 0) };
2439
2440                let (bvx, bvy) = if use_bwd {
2441                    let dx = Self::read_mv_diff(&mut br, mv_vlc, mv_scale);
2442                    let dy = Self::read_mv_diff(&mut br, mv_vlc, mv_scale);
2443                    (pvx + dx, pvy + dy)
2444                } else { (0, 0) };
2445
2446                self.mv_pred.store(mb_row as usize, mb_col as usize,
2447                                   if use_fwd { (fvx, fvy) } else { (bvx, bvy) }, false);
2448
2449                if use_fwd && use_bwd {
2450                    self.mc_blend_mb(frame, mb_row, mb_col, &fwd, &bwd,
2451                                     fvx, fvy, bvx, bvy, fw, fh);
2452                } else if use_fwd {
2453                    self.mc_single_mb(frame, mb_row, mb_col, &fwd, fvx, fvy, fw, fh);
2454                } else {
2455                    self.mc_single_mb(frame, mb_row, mb_col, &bwd, bvx, bvy, fw, fh);
2456                }
2457
2458                // Residual
2459                let cbp = self.cbpcy_p[(seq.cbptab as usize).min(1)].decode(&mut br).unwrap_or(0) as u8;
2460                let mb_pquant = read_mquant(&mut br, seq.dquant, pquant);
2461                let mb_tt = if seq.vstransform {
2462                    self.ttmb.decode(&mut br).unwrap_or(0) as u8
2463                } else { 0 };
2464                for blk in 0..6usize {
2465                    if (cbp >> (5 - blk)) & 1 == 0 { continue; }
2466                    let blk_tt = if mb_tt == 6 {
2467                        self.ttblk.decode(&mut br).unwrap_or(0) as u8
2468                    } else { mb_tt };
2469                    let mut coeff = decode_block(
2470                        &mut br, false, blk < 4,
2471                        mb_pquant, halfqp, uniform, blk_tt,
2472                        &self.dc_luma, &self.dc_chroma,
2473                        &self.ac_intra[seq.transacfrm2 as usize],
2474                        &self.ac_inter[seq.transacfrm  as usize],
2475                    );
2476                    apply_idct(&mut coeff, blk_tt);
2477                    add_residual_block(frame, mb_row as u32, mb_col as u32, blk, &coeff);
2478                }
2479            }
2480        }
2481        Ok(())
2482    }
2483
2484    /// Copy one 16×16 luma + 8×8 chroma macroblock from ref with given MV.
2485    fn mc_single_mb(&self, frame: &mut YuvFrame, mb_row: u32, mb_col: u32,
2486                    refp: &YuvFrame, mvx: i32, mvy: i32, fw: usize, fh: usize) {
2487        let dst_x = (mb_col * 16) as usize;
2488        let dst_y = (mb_row * 16) as usize;
2489        let src_x = dst_x as i32 * 2 + mvx;
2490        let src_y = dst_y as i32 * 2 + mvy;
2491        let mut tmp = [0u8; 256];
2492        mc_luma(&mut tmp, 16, &refp.y, fw, fw, fh, src_x, src_y, 16, 16);
2493        let dst = &mut frame.y[dst_y * fw + dst_x..];
2494        for r in 0..16 { dst[r * fw..r * fw + 16].copy_from_slice(&tmp[r*16..r*16+16]); }
2495
2496        let cw = fw / 2;
2497        let ch = fh / 2;
2498        let cdst_x = (mb_col * 8) as usize;
2499        let cdst_y = (mb_row * 8) as usize;
2500        let csrc_x = cdst_x as i32 * 2 + mvx / 2;
2501        let csrc_y = cdst_y as i32 * 2 + mvy / 2;
2502        let mut tmp_cb = [0u8; 64];
2503        let mut tmp_cr = [0u8; 64];
2504        mc_luma(&mut tmp_cb, 8, &refp.cb, cw, cw, ch, csrc_x, csrc_y, 8, 8);
2505        mc_luma(&mut tmp_cr, 8, &refp.cr, cw, cw, ch, csrc_x, csrc_y, 8, 8);
2506        let dcb = &mut frame.cb[cdst_y * cw + cdst_x..];
2507        let dcr = &mut frame.cr[cdst_y * cw + cdst_x..];
2508        for r in 0..8 {
2509            dcb[r * cw..r * cw + 8].copy_from_slice(&tmp_cb[r*8..r*8+8]);
2510            dcr[r * cw..r * cw + 8].copy_from_slice(&tmp_cr[r*8..r*8+8]);
2511        }
2512    }
2513
2514    /// Bidirectional blend: average of forward and backward MC.
2515    fn mc_blend_mb(&self, frame: &mut YuvFrame, mb_row: u32, mb_col: u32,
2516                   fwd: &YuvFrame, bwd: &YuvFrame,
2517                   fvx: i32, fvy: i32, bvx: i32, bvy: i32,
2518                   fw: usize, fh: usize) {
2519        let dst_x  = (mb_col * 16) as usize;
2520        let dst_y  = (mb_row * 16) as usize;
2521        let fsrc_x = dst_x as i32 * 2 + fvx;
2522        let fsrc_y = dst_y as i32 * 2 + fvy;
2523        let bsrc_x = dst_x as i32 * 2 + bvx;
2524        let bsrc_y = dst_y as i32 * 2 + bvy;
2525
2526        let mut ftmp = [0u8; 256];
2527        let mut btmp = [0u8; 256];
2528        mc_luma(&mut ftmp, 16, &fwd.y, fw, fw, fh, fsrc_x, fsrc_y, 16, 16);
2529        mc_luma(&mut btmp, 16, &bwd.y, fw, fw, fh, bsrc_x, bsrc_y, 16, 16);
2530        let dst = &mut frame.y[dst_y * fw + dst_x..];
2531        for r in 0..16 {
2532            for c in 0..16 {
2533                dst[r * fw + c] = ((ftmp[r*16+c] as u16 + btmp[r*16+c] as u16 + 1) >> 1) as u8;
2534            }
2535        }
2536
2537        let cw = fw / 2;
2538        let ch = fh / 2;
2539        let cdst_x  = (mb_col * 8) as usize;
2540        let cdst_y  = (mb_row * 8) as usize;
2541        let cfsrc_x = cdst_x as i32 * 2 + fvx / 2;
2542        let cfsrc_y = cdst_y as i32 * 2 + fvy / 2;
2543        let cbsrc_x = cdst_x as i32 * 2 + bvx / 2;
2544        let cbsrc_y = cdst_y as i32 * 2 + bvy / 2;
2545        let mut fcb = [0u8; 64]; let mut fcrb = [0u8; 64];
2546        let mut bcb = [0u8; 64]; let mut bcrb = [0u8; 64];
2547        mc_luma(&mut fcb,  8, &fwd.cb, cw, cw, ch, cfsrc_x, cfsrc_y, 8, 8);
2548        mc_luma(&mut fcrb, 8, &fwd.cr, cw, cw, ch, cfsrc_x, cfsrc_y, 8, 8);
2549        mc_luma(&mut bcb,  8, &bwd.cb, cw, cw, ch, cbsrc_x, cbsrc_y, 8, 8);
2550        mc_luma(&mut bcrb, 8, &bwd.cr, cw, cw, ch, cbsrc_x, cbsrc_y, 8, 8);
2551        let dcb = &mut frame.cb[cdst_y * cw + cdst_x..];
2552        let dcr = &mut frame.cr[cdst_y * cw + cdst_x..];
2553        for r in 0..8 {
2554            for c in 0..8 {
2555                dcb[r * cw + c] = ((fcb[r*8+c]  as u16 + bcb[r*8+c]  as u16 + 1) >> 1) as u8;
2556                dcr[r * cw + c] = ((fcrb[r*8+c] as u16 + bcrb[r*8+c] as u16 + 1) >> 1) as u8;
2557            }
2558        }
2559    }
2560
2561        fn read_mv_diff(br: &mut BitReader<'_>, mv_vlc: &VlcTable, scale: i32) -> i32 {
2562        let sym = mv_vlc.decode(br).unwrap_or(0);
2563        if sym == i32::MIN {
2564            // Fixed-length escape
2565            br.read_bits_signed(17).unwrap_or(0)
2566        } else {
2567            sym * scale / 4  // convert to quarter-pel
2568        }
2569    }
2570}
2571
2572// ═══════════════════════════════════════════════════════════════════════════════
2573// WMV2 (MS-MPEG4 V8) Decode Entry Points
2574// ═══════════════════════════════════════════════════════════════════════════════
2575//
2576// Public interface: MacroblockDecoder::decode_wmv2_frame()
2577//
2578// WMV2 simplifications vs VC-1:
2579//   • No B-frames, no BFRACTION, no overlap filter, no loop filter flag
2580//   • No TRANSACFRM/CBPTAB/MVTAB in seqhdr; ttcoef from frame header
2581//   • DC: 8-bit absolute (no VLC), sign separate
2582//   • AC escape: Mode-3 only (1-bit last, 6-bit run, 8-bit level, 1-bit sign)
2583//   • IDCT: same VC-1 integer transform reused
2584//   • Motion: half-pel bilinear (same MC as VC-1)
2585
2586// ─── WMV2 DC scale tables ───────────────────────────────────────────────────
2587// WMV2 uses MPEG-4 style DC scaling tables (much smaller than VC-1's ×128 domain
2588// tables). Using VC-1 DC step tables here will massively over-scale DC and
2589// saturate the reconstructed picture.
2590//
2591// These tables match the conventional MPEG-4 Part 2 DC scale tables.
2592// (They are also used by MSMPEG4/WMV1-family decoders.)
2593#[inline(always)]
2594fn wmv2_dc_scale(pquant: i32, is_luma: bool) -> i32 {
2595    // upstream: ff_wmv1_y_dc_scale_table / ff_wmv1_c_dc_scale_table (used for WMV1/WMV2).
2596    const Y: [i32; 32] = [
2597        0,
2598        8, 8, 8, 8, 8, 9, 9,
2599        10, 10, 11, 11, 12, 12, 13, 13,
2600        14, 14, 15, 15, 16, 16, 17, 17,
2601        18, 18, 19, 19, 20, 20, 21, 21,
2602    ];
2603    const C: [i32; 32] = [
2604        0,
2605        8, 8, 8, 8, 9, 9, 10,
2606        10, 11, 11, 12, 12, 13, 13, 14,
2607        14, 15, 15, 16, 16, 17, 17, 18,
2608        18, 19, 19, 20, 20, 21, 21, 22,
2609    ];
2610    let idx = pquant.clamp(1, 31) as usize;
2611    if is_luma { Y[idx] } else { C[idx] }
2612}
2613
2614#[inline(always)]
2615fn decode012(br: &mut BitReader<'_>) -> u8 {
2616    // upstream get_bits.h: n=get_bits1(); if n==0 return 0; else return get_bits1()+1;
2617    match br.read_bit() {
2618        Some(false) => 0,
2619        Some(true) => br.read_bit().map(|b| if b { 2 } else { 1 }).unwrap_or(0),
2620        None => 0,
2621    }
2622}
2623
2624#[inline(always)]
2625fn wmv2_get_cbp_table_index(qscale: i32, cbp_index: u8) -> usize {
2626    // upstream wmv2.h wmv2_get_cbp_table_index
2627    const MAP: [[u8; 3]; 3] = [
2628        [0, 2, 1],
2629        [1, 0, 2],
2630        [2, 1, 0],
2631    ];
2632    let a = if qscale > 10 { 1 } else { 0 };
2633    let b = if qscale > 20 { 1 } else { 0 };
2634    let row = (a + b) as usize;
2635    MAP[row][(cbp_index as usize).min(2)] as usize
2636}
2637
2638
2639
2640impl MacroblockDecoder {
2641    /// Decode one WMV2 frame. `hdr` is the already-parsed per-frame header.
2642    /// This is the public entry point called from main.rs.
2643    /// Parse WMV2 ext-header from ASF extradata (upstream decode_ext_header).
2644    ///
2645    /// If extradata is missing/short, we keep all flags at default false.
2646    pub fn wmv2_set_extradata(&mut self, extradata: &[u8]) {
2647        if extradata.len() < 4 {
2648            return;
2649        }
2650        let mut br = BitReader::new(&extradata[..4]);
2651        let _fps = br.read_bits(5).unwrap_or(0);
2652        let _bit_rate = br.read_bits(11).unwrap_or(0) * 1024;
2653        self.wmv2_mspel_bit = br.read_bit().unwrap_or(false);
2654        let _loop_filter = br.read_bit().unwrap_or(false);
2655        self.wmv2_abt_flag = br.read_bit().unwrap_or(false);
2656        self.wmv2_j_type_bit = br.read_bit().unwrap_or(false);
2657        self.wmv2_top_left_mv_flag = br.read_bit().unwrap_or(false);
2658        self.wmv2_per_mb_rl_bit = br.read_bit().unwrap_or(false);
2659        let code = br.read_bits(3).unwrap_or(0) as usize;
2660        if code == 0 {
2661            return;
2662        }
2663        let mb_h = self.height_mb as usize;
2664        self.wmv2_slice_height = mb_h / code;
2665    }
2666
2667    pub fn wmv2_copy_ref(&self, out: &mut YuvFrame) -> bool {
2668        let Some(r) = self.wmv2_ref.as_ref() else { return false; };
2669        if out.width != r.width || out.height != r.height {
2670            *out = r.clone();
2671            return true;
2672        }
2673        if out.y.len() == r.y.len() { out.y.copy_from_slice(&r.y); } else { out.y = r.y.clone(); }
2674        if out.cb.len() == r.cb.len() { out.cb.copy_from_slice(&r.cb); } else { out.cb = r.cb.clone(); }
2675        if out.cr.len() == r.cr.len() { out.cr.copy_from_slice(&r.cr); } else { out.cr = r.cr.clone(); }
2676        true
2677    }
2678
2679    pub fn decode_wmv2_frame(
2680        &mut self,
2681        payload: &[u8],
2682        hdr:     &Wmv2FrameHeader,
2683        params:  &Wmv2Params,
2684        frame:   &mut YuvFrame,
2685    ) -> Result<()> {
2686        // resize if needed
2687        if self.width != params.width || self.height != params.height {
2688            *self = MacroblockDecoder::new(params.width, params.height);
2689        }
2690
2691        if hdr.frame_skipped {
2692            let _ = self.wmv2_copy_ref(frame);
2693            return Ok(());
2694        }
2695        match hdr.frame_type {
2696            Wmv2FrameType::I => self.wmv2_decode_intra(payload, hdr, frame),
2697            Wmv2FrameType::P => self.wmv2_decode_p(payload, hdr, frame),
2698        }
2699    }
2700
2701
2702/// Heuristic probe: try to parse a few macroblock headers after `hdr.header_bits`.
2703/// Used to disambiguate ASF framing-byte offsets when the picture header can be
2704/// (mis-)parsed at multiple byte offsets.
2705///
2706/// Returns a "score" = number of MB headers successfully parsed (higher is better).
2707    pub fn probe_wmv2_payload(&self, payload: &[u8], hdr: &Wmv2FrameHeader) -> usize {
2708    let mut br = BitReader::new_at(payload, hdr.header_bits);
2709
2710    // upstream-aligned quick probe for I-frames: only consume secondary header + MB header + 6×DC.
2711    if hdr.frame_type == Wmv2FrameType::I {
2712        let mut br = BitReader::new_at(payload, hdr.header_bits);
2713        // secondary picture header (I branch)
2714        let j_type = if self.wmv2_j_type_bit { br.read_bit().unwrap_or(false) } else { false };
2715        if j_type { return 1; }
2716        let per_mb_rl_table = if self.wmv2_per_mb_rl_bit { br.read_bit().unwrap_or(false) } else { false };
2717        if !per_mb_rl_table {
2718            let _ = decode012(&mut br);
2719            let _ = decode012(&mut br);
2720        }
2721        let dc_table_index = br.read_bit().unwrap_or(false) as usize;
2722        let code = match self.wmv2_mb_i_vlc.decode(&mut br) { Some(v) => v as u32, None => return 0 };
2723        let _ = code;
2724        let _ac_pred = br.read_bit().unwrap_or(false);
2725        let _ = _ac_pred;
2726        if per_mb_rl_table && code != 0 {
2727            let _ = decode012(&mut br);
2728        }
2729        // DCs
2730        const DC_MAX: i32 = 119;
2731        for blk in 0..6usize {
2732            let is_chroma = blk >= 4;
2733            let tbl = &self.wmv2_dc_vlc[dc_table_index][if is_chroma { 1 } else { 0 }];
2734            let mut level = match tbl.decode(&mut br) { Some(v) => v, None => return 0 };
2735            if level == DC_MAX {
2736                let _ = br.read_bits(8);
2737                let _ = br.read_bit();
2738            } else if level != 0 {
2739                let _ = br.read_bit();
2740            }
2741        }
2742        return 1;
2743    }
2744
2745    // upstream-aligned quick probe for P-frames: consume secondary header + first MB header.
2746    if hdr.frame_type == Wmv2FrameType::P {
2747        let mut br = BitReader::new_at(payload, hdr.header_bits);
2748        let mb_w = self.width_mb as usize;
2749        let mb_h = self.height_mb as usize;
2750        let qscale = hdr.pquant as i32;
2751
2752        // skip map (only check first MB skip flag)
2753        let skip_type = br.read_bits(2).unwrap_or(0) as u8;
2754        let first_skip = match skip_type {
2755            0 => false,
2756            1 => br.read_bit().unwrap_or(false),
2757            2 => {
2758                let all = br.read_bit().unwrap_or(false);
2759                if all { true } else { br.read_bit().unwrap_or(false) }
2760            }
2761            3 => {
2762                let all = br.read_bit().unwrap_or(false);
2763                if all { true } else { br.read_bit().unwrap_or(false) }
2764            }
2765            _ => false,
2766        };
2767
2768        // Drain remaining skip bits quickly (best-effort) to reach cbp_index.
2769        // We only do a lightweight skip consumption to keep probe cheap.
2770        if skip_type == 1 {
2771            let _ = mb_w * mb_h;
2772        }
2773
2774        let cbp_index = decode012(&mut br);
2775        let cbp_table_index = wmv2_get_cbp_table_index(qscale, cbp_index);
2776
2777        let _mspel = if self.wmv2_mspel_bit { br.read_bit().unwrap_or(false) } else { false };
2778        if self.wmv2_abt_flag {
2779            let per_mb_abt = br.read_bit().unwrap_or(false) ^ true;
2780            if !per_mb_abt {
2781                let _ = decode012(&mut br);
2782            }
2783        }
2784        let per_mb_rl_table = if self.wmv2_per_mb_rl_bit { br.read_bit().unwrap_or(false) } else { false };
2785        if !per_mb_rl_table {
2786            let _ = decode012(&mut br);
2787        }
2788        let dc_table_index = br.read_bit().unwrap_or(false) as usize;
2789        let mv_table_index = br.read_bit().unwrap_or(false) as usize;
2790
2791        if first_skip {
2792            return 1;
2793        }
2794
2795        let code = match self.wmv2_mb_non_intra_vlc[cbp_table_index.min(3)].decode(&mut br) {
2796            Some(v) => v as i32,
2797            None => return 0,
2798        };
2799        let mb_intra = (code & 0x40) == 0;
2800        let cbp = (code & 0x3f) as u8;
2801
2802        if mb_intra {
2803            let _ac_pred = br.read_bit().unwrap_or(false);
2804            if per_mb_rl_table && cbp != 0 {
2805                let _ = decode012(&mut br);
2806            }
2807            // Decode one DC to validate DC VLC table.
2808            const DC_MAX: i32 = 119;
2809            let tbl = &self.wmv2_dc_vlc[dc_table_index][0];
2810            let mut level = match tbl.decode(&mut br) { Some(v) => v, None => return 0 };
2811            if level == DC_MAX {
2812                let _ = br.read_bits(8);
2813                let _ = br.read_bit();
2814            } else if level != 0 {
2815                let _ = br.read_bit();
2816            }
2817        } else {
2818            // Decode one MV symbol.
2819            let tbl = &self.wmv2_mv_vlc[mv_table_index.min(1)];
2820            let sym = match tbl.decode(&mut br) { Some(v) => v as u16, None => return 0 };
2821            if sym == 0 {
2822                let _ = br.read_bits(12);
2823            }
2824        }
2825        return 1;
2826    }
2827
2828
2829    let max_mb = (self.width_mb as usize * self.height_mb as usize).min(64);
2830    let mut score: usize = 0;
2831
2832    // Use ttcoef=0 tables for probing; this is only a syntactic plausibility check.
2833    let ac_intra = &self.wmv2_intra[0];
2834    let ac_inter = &self.wmv2_inter[0];
2835
2836    for _ in 0..max_mb {
2837        if br.is_empty() { break; }
2838
2839        let cbpc_sym = match self.wmv2_cbpc.decode(&mut br) {
2840            Some(v) => v,
2841            None => break,
2842        };
2843        if cbpc_sym == -1 {
2844            score += 1;
2845            continue;
2846        }
2847        if cbpc_sym < 0 || cbpc_sym > 3 {
2848            break;
2849        }
2850
2851        let is_intra = match br.read_bit() {
2852            Some(b) => b,
2853            None => break,
2854        };
2855
2856        let cbpy_raw = match self.wmv2_cbpy.decode(&mut br) {
2857            Some(v) if v >= 0 && v <= 15 => v as u8,
2858            _ => break,
2859        };
2860
2861        let cbpy = if is_intra { cbpy_raw } else { cbpy_raw ^ 0x0F };
2862        let cbp: u8 = (cbpy << 2) | (cbpc_sym as u8 & 0x03);
2863
2864        if cbp != 0 {
2865            let vlc = if is_intra { ac_intra } else { ac_inter };
2866            let sym = match vlc.decode(&mut br) {
2867                Some(s) => s,
2868                None => break,
2869            };
2870            if sym == VLC_ESCAPE {
2871                // Consume escape payload (mode 1/2/3) so probing stays in sync.
2872                let _ = decode_escape_coeff(&mut br, vlc);
2873            } else {
2874                // Normal coefficient: single sign bit follows.
2875                let _ = br.read_bit();
2876            }
2877        }
2878
2879        score += 1;
2880    }
2881
2882    score
2883}
2884
2885    // ── WMV2/MSMPEG4 helpers (upstream-aligned) ─────────────────────────────
2886
2887    #[inline(always)]
2888    fn wmv2_coded_block_pred(&self, mb_row: usize, mb_col: usize, blk: usize) -> u8 {
2889        // Equivalent to upstream ff_msmpeg4_coded_block_pred(), but on a compact grid.
2890        let bw = (self.width_mb as usize) * 2;
2891        let bx = mb_col * 2 + (blk & 1);
2892        let by = mb_row * 2 + (blk >> 1);
2893        let idx = by * bw + bx;
2894        let a = if bx > 0 { self.wmv2_coded_block[idx - 1] } else { 0 };
2895        let b = if bx > 0 && by > 0 { self.wmv2_coded_block[idx - 1 - bw] } else { 0 };
2896        let c = if by > 0 { self.wmv2_coded_block[idx - bw] } else { 0 };
2897        if b == c { a } else { c }
2898    }
2899
2900    #[inline(always)]
2901    fn wmv2_coded_block_store(&mut self, mb_row: usize, mb_col: usize, blk: usize, v: u8) {
2902        let bw = (self.width_mb as usize) * 2;
2903        let bx = mb_col * 2 + (blk & 1);
2904        let by = mb_row * 2 + (blk >> 1);
2905        let idx = by * bw + bx;
2906        if idx < self.wmv2_coded_block.len() {
2907            self.wmv2_coded_block[idx] = v;
2908        }
2909    }
2910
2911    #[inline(always)]
2912    fn wmv2_decode_dc_diff(&self, br: &mut BitReader<'_>, is_chroma: bool) -> i32 {
2913        // upstream msmpeg4_decode_dc() for v3+/WMV2: VLC magnitude + optional sign; DC_MAX escape.
2914        const DC_MAX: i32 = 119;
2915        let tbl = &self.wmv2_dc_vlc[self.wmv2_dc_table_index][if is_chroma { 1 } else { 0 }];
2916        let mut level = tbl.decode(br).unwrap_or(0);
2917        if level == DC_MAX {
2918            let v = br.read_bits(8).unwrap_or(0) as i32;
2919            let sign = br.read_bit().unwrap_or(false);
2920            return if sign { -v } else { v };
2921        }
2922        if level != 0 {
2923            let sign = br.read_bit().unwrap_or(false);
2924            if sign { level = -level; }
2925        }
2926        level
2927    }
2928
2929
2930
2931    #[inline(always)]
2932    fn wmv2_reset_picture_state(&mut self) {
2933        self.wmv2_esc3_level_length = 0;
2934        self.wmv2_esc3_run_length = 0;
2935        for v in self.wmv2_ac_val.iter_mut() {
2936            *v = [0i16; 16];
2937        }
2938    }
2939
2940    #[inline(always)]
2941    fn wmv2_ac_val_idx(&self, mb_row: usize, mb_col: usize, blk: usize) -> usize {
2942        let mb_w = self.width_mb as usize;
2943        (mb_row * mb_w + mb_col) * 6 + blk
2944    }
2945
2946    #[inline(always)]
2947    fn wmv2_get_ac_val(&self, mb_row: usize, mb_col: usize, blk: usize) -> [i16; 16] {
2948        let idx = self.wmv2_ac_val_idx(mb_row, mb_col, blk);
2949        if idx < self.wmv2_ac_val.len() {
2950            self.wmv2_ac_val[idx]
2951        } else {
2952            [0i16; 16]
2953        }
2954    }
2955
2956    #[inline(always)]
2957    fn wmv2_set_ac_val(&mut self, mb_row: usize, mb_col: usize, blk: usize, v: [i16; 16]) {
2958        let idx = self.wmv2_ac_val_idx(mb_row, mb_col, blk);
2959        if idx < self.wmv2_ac_val.len() {
2960            self.wmv2_ac_val[idx] = v;
2961        }
2962    }
2963
2964    #[inline(always)]
2965    fn wmv2_pred_ac(
2966        &mut self,
2967        mb_row: usize,
2968        mb_col: usize,
2969        blk: usize,
2970        dc_pred_dir: i32,
2971        ac_pred: bool,
2972        block: &mut [i16; 64],
2973    ) {
2974        // Direct port of upstream ff_mpeg4_pred_ac() behavior for MSMPEG4/WMV2.
2975        // We keep identity idct_permutation (our scan tables are already permutated).
2976        // ac_val stores 16 values per block: [1..7] left column, [9..15] top row.
2977
2978        let mut cur = self.wmv2_get_ac_val(mb_row, mb_col, blk);
2979
2980        if ac_pred {
2981            if dc_pred_dir == 0 {
2982                // Left prediction: add first column from left neighbor.
2983                let (src_r, src_c, src_b) = match blk {
2984                    1 => (mb_row, mb_col, 0),
2985                    3 => (mb_row, mb_col, 2),
2986                    0 => (mb_row, mb_col.saturating_sub(1), 1),
2987                    2 => (mb_row, mb_col.saturating_sub(1), 3),
2988                    4 | 5 => (mb_row, mb_col.saturating_sub(1), blk),
2989                    _ => (mb_row, mb_col.saturating_sub(1), blk),
2990                };
2991                if (blk == 1 || blk == 3) || mb_col > 0 {
2992                    let src = self.wmv2_get_ac_val(src_r, src_c, src_b);
2993                    for i in 1..8usize {
2994                        let idx = i << 3;
2995                        block[idx] = block[idx].wrapping_add(src[i]);
2996                    }
2997                }
2998            } else {
2999                // Top prediction: add first row from top neighbor.
3000                let (src_r, src_c, src_b) = match blk {
3001                    2 => (mb_row, mb_col, 0),
3002                    3 => (mb_row, mb_col, 1),
3003                    0 => (mb_row.saturating_sub(1), mb_col, 2),
3004                    1 => (mb_row.saturating_sub(1), mb_col, 3),
3005                    4 | 5 => (mb_row.saturating_sub(1), mb_col, blk),
3006                    _ => (mb_row.saturating_sub(1), mb_col, blk),
3007                };
3008                if (blk == 2 || blk == 3) || mb_row > 0 {
3009                    let src = self.wmv2_get_ac_val(src_r, src_c, src_b);
3010                    for i in 1..8usize {
3011                        block[i] = block[i].wrapping_add(src[8 + i]);
3012                    }
3013                }
3014            }
3015        }
3016
3017        // Store our AC predictors for future blocks.
3018        for i in 1..8usize {
3019            cur[i] = block[i << 3];
3020        }
3021        for i in 1..8usize {
3022            cur[8 + i] = block[i];
3023        }
3024        self.wmv2_set_ac_val(mb_row, mb_col, blk, cur);
3025    }
3026
3027    #[inline(always)]
3028    fn wmv2_unquantize_h263_intra(&self, block: &mut [i16; 64], qscale: i32, dc_scale: i32) {
3029        // Direct port of upstream dct_unquantize_h263_intra_c().
3030        let qmul = qscale << 1;
3031        let qadd = (qscale - 1) | 1;
3032
3033        block[0] = ((block[0] as i32) * dc_scale) as i16;
3034        for i in 1..64usize {
3035            let mut level = block[i] as i32;
3036            if level != 0 {
3037                if level < 0 {
3038                    level = level * qmul - qadd;
3039                } else {
3040                    level = level * qmul + qadd;
3041                }
3042                block[i] = level as i16;
3043            }
3044        }
3045    }
3046
3047    fn wmv2_decode_block_intra_ref(
3048        &mut self,
3049        br: &mut BitReader<'_>,
3050        mb_row: usize,
3051        mb_col: usize,
3052        blk: usize,
3053        coded: bool,
3054        qscale: i32,
3055        ac_pred: bool,
3056    ) -> Result<[i16; 64]> {
3057        let is_luma = blk < 4;
3058        let dc_scale = wmv2_dc_scale(qscale, is_luma);
3059
3060        // DC diff VLC + sign, predictor in DC level domain.
3061        let diff = self.wmv2_decode_dc_diff(br, !is_luma);
3062        let (pred_level, dir) = self.wmv2_dc_pred.predict(mb_row, mb_col, blk, dc_scale);
3063        let level = pred_level + diff;
3064        self.wmv2_dc_pred.store(mb_row, mb_col, blk, level * dc_scale);
3065
3066        let mut block = [0i16; 64];
3067        block[0] = level as i16;
3068
3069        // Choose RL table.
3070        let rl = if is_luma {
3071            &self.wmv2_rl[(self.wmv2_rl_table_index as usize).min(2)]
3072        } else {
3073            &self.wmv2_rl[3 + (self.wmv2_rl_chroma_table_index as usize).min(2)]
3074        };
3075
3076        // Scan table selection.
3077        let scan = if ac_pred {
3078            if dir == 0 {
3079                &FF_WMV1_SCANTABLE[3] // intra_v
3080            } else {
3081                &FF_WMV1_SCANTABLE[2] // intra_h
3082            }
3083        } else {
3084            &FF_WMV1_SCANTABLE[1] // intra default
3085        };
3086
3087        let mut i: i32 = 0;
3088        let qmul: i32 = 1;
3089        let run_diff: i32 = 1; // msmpeg4_version >= WMV1
3090
3091        if coded {
3092            loop {
3093                let (mut level_uq, mut run) = rl
3094                    .decode_sym(br, 0)
3095                    .ok_or_else(|| DecoderError::InvalidData("WMV2: tcoeff VLC underrun".into()))?;
3096
3097                if level_uq == 0 {
3098                    // escape: prefix bits decide which escape.
3099                    let b0 = br.peek_bits(1).unwrap_or(0);
3100                    if b0 == 1 {
3101                        // escape1: prefix '1'
3102                        br.skip_bits(1);
3103                        let (lvl2, run2) = rl
3104                            .decode_sym(br, 0)
3105                            .ok_or_else(|| DecoderError::InvalidData("WMV2: escape1 VLC underrun".into()))?;
3106                        level_uq = lvl2;
3107                        run = run2;
3108                        i += run;
3109                        let last = ((run >> 7) & 1) as usize;
3110                        let base_run = ((run - 1) & 63) as usize;
3111                        level_uq += rl.max_level_for(last, base_run) * qmul;
3112                        let sign = br.read_bit().unwrap_or(false);
3113                        if sign {
3114                            level_uq = -level_uq;
3115                        }
3116                    } else {
3117                        let b1 = br.peek_bits(2).unwrap_or(0) & 1;
3118                        if b1 == 1 {
3119                            // escape2: prefix '01'
3120                            br.skip_bits(2);
3121                            let (lvl2, run2) = rl
3122                                .decode_sym(br, 0)
3123                                .ok_or_else(|| DecoderError::InvalidData("WMV2: escape2 VLC underrun".into()))?;
3124                            level_uq = lvl2;
3125                            run = run2;
3126                            let last = ((run >> 7) & 1) as usize;
3127                            let base_level = (level_uq / qmul).abs() as usize;
3128                            i += run + rl.max_run_for(last, base_level) + run_diff;
3129                            let sign = br.read_bit().unwrap_or(false);
3130                            if sign {
3131                                level_uq = -level_uq;
3132                            }
3133                        } else {
3134                            // escape3: prefix '00'
3135                            br.skip_bits(2);
3136                            let last = br.read_bit().unwrap_or(false);
3137                            if self.wmv2_esc3_level_length == 0 {
3138                                // derive esc3 lengths (WMV2: msmpeg4_version > V3)
3139                                let ll: u8 = if qscale < 8 {
3140                                    let mut x = br.read_bits(3).unwrap_or(0) as u8;
3141                                    if x == 0 {
3142                                        x = 8 + br.read_bits(1).unwrap_or(0) as u8;
3143                                    }
3144                                    x
3145                                } else {
3146                                    let mut x: u8 = 2;
3147                                    while x < 8 && br.peek_bits(1).unwrap_or(1) == 0 {
3148                                        br.skip_bits(1);
3149                                        x += 1;
3150                                    }
3151                                    if x < 8 {
3152                                        br.skip_bits(1);
3153                                    }
3154                                    x
3155                                };
3156                                self.wmv2_esc3_level_length = ll;
3157                                self.wmv2_esc3_run_length = (br.read_bits(2).unwrap_or(0) as u8) + 3;
3158                            }
3159                            let run_abs = br.read_bits(self.wmv2_esc3_run_length).unwrap_or(0) as i32;
3160                            let sign = br.read_bit().unwrap_or(false);
3161                            let mut lvl_abs = br.read_bits(self.wmv2_esc3_level_length).unwrap_or(0) as i32;
3162                            if sign {
3163                                lvl_abs = -lvl_abs;
3164                            }
3165                            level_uq = lvl_abs;
3166                            i += run_abs + 1;
3167                            if last {
3168                                i += 192;
3169                            }
3170                        }
3171                    }
3172                } else {
3173                    i += run;
3174                    let sign = br.read_bit().unwrap_or(false);
3175                    if sign {
3176                        level_uq = -level_uq;
3177                    }
3178                }
3179
3180                if i > 62 {
3181                    i -= 192;
3182                    if (i & !63) != 0 {
3183                        i = 63;
3184                    }
3185                    if i < 0 {
3186                        return Err(DecoderError::InvalidData("WMV2: negative coeff index (bitstream damaged)".into()));
3187                    }
3188                    let pos = scan[i as usize] as usize;
3189                    if pos < 64 {
3190                        block[pos] = level_uq as i16;
3191                    }
3192                    break;
3193                }
3194
3195                if i < 0 {
3196                    return Err(DecoderError::InvalidData("WMV2: negative coeff index (bitstream damaged)".into()));
3197                }
3198                let pos = scan[i as usize] as usize;
3199                if pos < 64 {
3200                    block[pos] = level_uq as i16;
3201                }
3202            }
3203        }
3204
3205        // AC prediction always runs (even if not coded).
3206        self.wmv2_pred_ac(mb_row, mb_col, blk, dir, ac_pred, &mut block);
3207
3208        // H.263 intra unquantization to match upstream pipeline.
3209        self.wmv2_unquantize_h263_intra(&mut block, qscale, dc_scale);
3210
3211        Ok(block)
3212    }
3213    fn wmv2_decode_block_inter_ref(
3214        &mut self,
3215        br: &mut BitReader<'_>,
3216        blk: usize,
3217        coded: bool,
3218        qscale: i32,
3219        scan: &[usize; 64],
3220    ) -> Result<[i16; 64]> {
3221        let mut block = [0i16; 64];
3222        if !coded {
3223            return Ok(block);
3224        }
3225
3226        let rl = &self.wmv2_rl[3 + (self.wmv2_rl_table_index as usize).min(2)];
3227
3228        let qmul = qscale << 1;
3229        let qadd = (qscale - 1) | 1;
3230        let run_diff: i32 = 1; // wmv2 != v2
3231
3232        let mut i: i32 = -1;
3233
3234        loop {
3235            let (mut level_uq, mut run) = rl
3236                .decode_sym(br, qscale)
3237                .ok_or_else(|| DecoderError::InvalidData("WMV2: inter tcoeff VLC underrun".into()))?;
3238
3239            if level_uq == 0 {
3240                // escape
3241                let b0 = br.peek_bits(1).unwrap_or(0);
3242                if b0 == 1 {
3243                    // escape1
3244                    br.skip_bits(1);
3245                    let (lvl2, run2) = rl
3246                        .decode_sym(br, qscale)
3247                        .ok_or_else(|| DecoderError::InvalidData("WMV2: inter escape1 VLC underrun".into()))?;
3248                    level_uq = lvl2;
3249                    run = run2;
3250                    i += run;
3251                    let last = ((run >> 7) & 1) as usize;
3252                    let base_run = ((run - 1) & 63) as usize;
3253                    level_uq += rl.max_level_for(last, base_run) * qmul;
3254                    let sign = br.read_bit().unwrap_or(false);
3255                    if sign {
3256                        level_uq = -level_uq;
3257                    }
3258                } else {
3259                    let b1 = br.peek_bits(2).unwrap_or(0) & 1;
3260                    if b1 == 1 {
3261                        // escape2
3262                        br.skip_bits(2);
3263                        let (lvl2, run2) = rl
3264                            .decode_sym(br, qscale)
3265                            .ok_or_else(|| DecoderError::InvalidData("WMV2: inter escape2 VLC underrun".into()))?;
3266                        level_uq = lvl2;
3267                        run = run2;
3268                        let last = ((run >> 7) & 1) as usize;
3269                        let base_level = (level_uq / qmul).abs() as usize;
3270                        i += run + rl.max_run_for(last, base_level) + run_diff;
3271                        let sign = br.read_bit().unwrap_or(false);
3272                        if sign {
3273                            level_uq = -level_uq;
3274                        }
3275                    } else {
3276                        // escape3
3277                        br.skip_bits(2);
3278                        let last = br.read_bit().unwrap_or(false);
3279                        if self.wmv2_esc3_level_length == 0 {
3280                            let ll: u8 = if qscale < 8 {
3281                                let mut x = br.read_bits(3).unwrap_or(0) as u8;
3282                                if x == 0 {
3283                                    x = 8 + br.read_bits(1).unwrap_or(0) as u8;
3284                                }
3285                                x
3286                            } else {
3287                                let mut x: u8 = 2;
3288                                while x < 8 && br.peek_bits(1).unwrap_or(1) == 0 {
3289                                    br.skip_bits(1);
3290                                    x += 1;
3291                                }
3292                                if x < 8 {
3293                                    br.skip_bits(1);
3294                                }
3295                                x
3296                            };
3297                            self.wmv2_esc3_level_length = ll;
3298                            self.wmv2_esc3_run_length = (br.read_bits(2).unwrap_or(0) as u8) + 3;
3299                        }
3300                        let run_abs = br.read_bits(self.wmv2_esc3_run_length).unwrap_or(0) as i32;
3301                        let sign = br.read_bit().unwrap_or(false);
3302                        let mut lvl_abs = br.read_bits(self.wmv2_esc3_level_length).unwrap_or(0) as i32;
3303                        if sign {
3304                            lvl_abs = -lvl_abs;
3305                        }
3306                        if lvl_abs > 0 {
3307                            level_uq = lvl_abs * qmul + qadd;
3308                        } else {
3309                            level_uq = lvl_abs * qmul - qadd;
3310                        }
3311                        i += run_abs + 1;
3312                        if last {
3313                            i += 192;
3314                        }
3315                    }
3316                }
3317            } else {
3318                i += run;
3319                let sign = br.read_bit().unwrap_or(false);
3320                if sign {
3321                    level_uq = -level_uq;
3322                }
3323            }
3324
3325            if i > 62 {
3326                i -= 192;
3327                if (i & !63) != 0 {
3328                    i = 63;
3329                }
3330                if i < 0 {
3331                    return Err(DecoderError::InvalidData("WMV2: negative coeff index (bitstream damaged)".into()));
3332                }
3333                let pos = scan[i as usize] as usize;
3334                if pos < 64 {
3335                    block[pos] = level_uq as i16;
3336                }
3337                break;
3338            }
3339
3340            if i < 0 {
3341                return Err(DecoderError::InvalidData("WMV2: negative coeff index (bitstream damaged)".into()));
3342            }
3343            let pos = scan[i as usize] as usize;
3344            if pos < 64 {
3345                block[pos] = level_uq as i16;
3346            }
3347        }
3348
3349        let _ = blk;
3350        Ok(block)
3351    }
3352    fn wmv2_parse_mb_skip(&mut self, br: &mut BitReader<'_>, mb_w: usize, mb_h: usize) -> Result<()> {
3353        // upstream wmv2dec.c parse_mb_skip
3354        let skip_type = br.read_bits(2).ok_or_else(|| DecoderError::InvalidData("WMV2: missing skip_type".into()))? as u8;
3355        self.wmv2_skip_type = skip_type;
3356        if self.wmv2_mb_skip.len() != mb_w * mb_h {
3357            self.wmv2_mb_skip.resize(mb_w * mb_h, false);
3358        }
3359        for v in self.wmv2_mb_skip.iter_mut() { *v = false; }
3360
3361        match skip_type {
3362            0 => {
3363                // SKIP_TYPE_NONE
3364            }
3365            1 => {
3366                // SKIP_TYPE_MPEG: 1 bit per MB
3367                if br.bits_left() < (mb_w * mb_h) as isize {
3368                    return Err(DecoderError::InvalidData("WMV2: skip map truncated".into()));
3369                }
3370                for y in 0..mb_h {
3371                    for x in 0..mb_w {
3372                        let b = br.read_bit().unwrap_or(false);
3373                        self.wmv2_mb_skip[y * mb_w + x] = b;
3374                    }
3375                }
3376            }
3377            2 => {
3378                // SKIP_TYPE_ROW
3379                for y in 0..mb_h {
3380                    let all = br.read_bit().ok_or_else(|| DecoderError::InvalidData("WMV2: skip row flag missing".into()))?;
3381                    if all {
3382                        for x in 0..mb_w {
3383                            self.wmv2_mb_skip[y * mb_w + x] = true;
3384                        }
3385                    } else {
3386                        for x in 0..mb_w {
3387                            let b = br.read_bit().unwrap_or(false);
3388                            self.wmv2_mb_skip[y * mb_w + x] = b;
3389                        }
3390                    }
3391                }
3392            }
3393            3 => {
3394                // SKIP_TYPE_COL
3395                for x in 0..mb_w {
3396                    let all = br.read_bit().ok_or_else(|| DecoderError::InvalidData("WMV2: skip col flag missing".into()))?;
3397                    if all {
3398                        for y in 0..mb_h {
3399                            self.wmv2_mb_skip[y * mb_w + x] = true;
3400                        }
3401                    } else {
3402                        for y in 0..mb_h {
3403                            let b = br.read_bit().unwrap_or(false);
3404                            self.wmv2_mb_skip[y * mb_w + x] = b;
3405                        }
3406                    }
3407                }
3408            }
3409            _ => {}
3410        }
3411
3412        // upstream also checks coded_mb_count against bits_left; keep a light version.
3413        let coded = self.wmv2_mb_skip.iter().filter(|s| !**s).count();
3414        if coded as isize > br.bits_left() {
3415            return Err(DecoderError::InvalidData("WMV2: coded MB count exceeds remaining bits".into()));
3416        }
3417        Ok(())
3418    }
3419
3420    #[inline(always)]
3421    fn wmv2_motion_get(&self, mb_row: isize, mb_col: isize) -> (i32, i32) {
3422        if mb_row < 0 || mb_col < 0 {
3423            return (0, 0);
3424        }
3425        let mb_w = self.width_mb as isize;
3426        let mb_h = self.height_mb as isize;
3427        if mb_row >= mb_h || mb_col >= mb_w {
3428            return (0, 0);
3429        }
3430        let idx = (mb_row as usize) * (mb_w as usize) + (mb_col as usize);
3431        if idx < self.wmv2_motion.len() { self.wmv2_motion[idx] } else { (0, 0) }
3432    }
3433
3434    #[inline(always)]
3435    fn wmv2_motion_set(&mut self, mb_row: usize, mb_col: usize, mv: (i32, i32)) {
3436        let mb_w = self.width_mb as usize;
3437        let idx = mb_row * mb_w + mb_col;
3438        if self.wmv2_motion.len() != mb_w * (self.height_mb as usize) {
3439            self.wmv2_motion.resize(mb_w * (self.height_mb as usize), (0, 0));
3440        }
3441        if idx < self.wmv2_motion.len() {
3442            self.wmv2_motion[idx] = mv;
3443        }
3444    }
3445
3446    #[inline(always)]
3447    fn wmv2_pred_motion(&self, br: &mut BitReader<'_>, mb_row: usize, mb_col: usize, first_slice_line: bool) -> (i32, i32) {
3448        // upstream wmv2dec.c wmv2_pred_motion (MB-level approximation).
3449        let a = self.wmv2_motion_get(mb_row as isize, mb_col as isize - 1);
3450        let b = self.wmv2_motion_get(mb_row as isize - 1, mb_col as isize);
3451        let c = self.wmv2_motion_get(mb_row as isize - 1, mb_col as isize + 1);
3452
3453        let diff = if mb_col != 0 && !first_slice_line && !self.wmv2_mspel && self.wmv2_top_left_mv_flag {
3454            let dx = (a.0 - b.0).abs();
3455            let dy = (a.1 - b.1).abs();
3456            dx.max(dy)
3457        } else {
3458            0
3459        };
3460
3461        let t = if diff >= 8 {
3462            if br.read_bit().unwrap_or(false) { 1 } else { 0 }
3463        } else {
3464            2
3465        };
3466
3467        match t {
3468            0 => a,
3469            1 => b,
3470            _ => {
3471                if first_slice_line {
3472                    a
3473                } else {
3474                    (mid_pred(a.0, b.0, c.0), mid_pred(a.1, b.1, c.1))
3475                }
3476            }
3477        }
3478    }
3479
3480    #[inline(always)]
3481    fn wmv2_decode_motion_ref(&self, br: &mut BitReader<'_>, pred: (i32, i32)) -> (i32, i32) {
3482        // Direct port of upstream msmpeg4dec.c ff_msmpeg4_decode_motion.
3483        let tbl = &self.wmv2_mv_vlc[self.wmv2_mv_table_index.min(1)];
3484        let sym = tbl.decode(br).unwrap_or(0) as u16;
3485        let (mut mx, mut my) = if sym != 0 {
3486            ((sym >> 8) as i32, (sym & 0xff) as i32)
3487        } else {
3488            // Escape: 6-bit mx + 6-bit my.
3489            (br.read_bits(6).unwrap_or(0) as i32, br.read_bits(6).unwrap_or(0) as i32)
3490        };
3491
3492        mx += pred.0 - 32;
3493        my += pred.1 - 32;
3494        // WARNING: they do not do exactly modulo encoding.
3495        if mx <= -64 { mx += 64; } else if mx >= 64 { mx -= 64; }
3496        if my <= -64 { my += 64; } else if my >= 64 { my -= 64; }
3497        (mx, my)
3498    }
3499
3500    // ── WMV2 I-frame ──────────────────────────────────────────────────────────
3501
3502    fn wmv2_decode_intra(
3503        &mut self,
3504        payload: &[u8],
3505        hdr:     &Wmv2FrameHeader,
3506        frame:   &mut YuvFrame,
3507    ) -> Result<()> {
3508        // Start at picture header end.
3509        let mut br = BitReader::new_at(payload, hdr.header_bits);
3510
3511        // upstream: ff_wmv2_decode_secondary_picture_header() (I-picture branch).
3512        // We parse/consume the fields that affect alignment and DC VLC selection.
3513        self.wmv2_j_type = if self.wmv2_j_type_bit { br.read_bit().unwrap_or(false) } else { false };
3514        if self.wmv2_j_type {
3515            // IntraX8 (j_type) is not handled in this A build.
3516            return Ok(());
3517        }
3518
3519        self.wmv2_per_mb_rl_table = if self.wmv2_per_mb_rl_bit { br.read_bit().unwrap_or(false) } else { false };
3520        if !self.wmv2_per_mb_rl_table {
3521            self.wmv2_rl_chroma_table_index = decode012(&mut br);
3522            self.wmv2_rl_table_index        = decode012(&mut br);
3523        }
3524        self.wmv2_dc_table_index = br.read_bit().unwrap_or(false) as usize;
3525
3526        let mb_w = self.width_mb  as usize;
3527        let mb_h = self.height_mb as usize;
3528
3529        // Reset predictors.
3530        self.wmv2_dc_pred = Wmv2DcPredBuffer::new(mb_w, mb_h);
3531        for v in self.wmv2_coded_block.iter_mut() { *v = 0; }
3532
3533        self.wmv2_reset_picture_state();
3534
3535        let qscale = hdr.pquant as i32;
3536        // WMV2 picture header variant used here (upstream-min) does not carry ttcoef;
3537        // keep using intra VLC set 0 to get the stream back in sync.
3538
3539        for mb_row in 0..mb_h {
3540            for mb_col in 0..mb_w {
3541                if br.is_empty() { break; }
3542
3543                // upstream: code = get_vlc2(ff_msmp4_mb_i_vlc)
3544                let code = self.wmv2_mb_i_vlc.decode(&mut br).unwrap_or(0) as u32;
3545
3546                // Predict coded block pattern.
3547                let mut cbp: u8 = 0;
3548                for i in 0..6usize {
3549                    let mut val = ((code >> (5 - i)) & 1) as u8;
3550                    if i < 4 {
3551                        let pred = self.wmv2_coded_block_pred(mb_row, mb_col, i);
3552                        val ^= pred;
3553                        self.wmv2_coded_block_store(mb_row, mb_col, i, val);
3554                    }
3555                    cbp |= val << (5 - i);
3556                }
3557
3558                // upstream: h->c.ac_pred = get_bits1();
3559                let ac_pred = br.read_bit().unwrap_or(false);
3560
3561                // upstream: if (per_mb_rl_table && cbp) rl_table_index = decode012();
3562                if self.wmv2_per_mb_rl_table && cbp != 0 {
3563                    let rl_idx = decode012(&mut br);
3564                    self.wmv2_rl_table_index = rl_idx;
3565                    self.wmv2_rl_chroma_table_index = rl_idx;
3566                }
3567
3568                for blk in 0..6usize {
3569                    let coded = ((cbp >> (5 - blk)) & 1) != 0;
3570                    let mut block = self.wmv2_decode_block_intra_ref(&mut br, mb_row, mb_col, blk, coded, qscale, ac_pred)?;
3571
3572                    let (is_luma, bx, by, stride, _ph) = block_coords(mb_row as u32, mb_col as u32, blk, frame.width, frame.height);
3573                    let plane: &mut Vec<u8> = if is_luma { &mut frame.y } else if blk == 4 { &mut frame.cb } else { &mut frame.cr };
3574                    let dst_off = by * stride + bx;
3575                    wmv2dsp::wmv2_idct_put(plane, dst_off, stride, &mut block);
3576                }
3577            }
3578        }
3579
3580        self.wmv2_ref = Some(frame.clone());
3581        Ok(())
3582    }
3583    // ── WMV2 P-frame ──────────────────────────────────────────────────────────
3584
3585    fn wmv2_decode_p(
3586        &mut self,
3587        payload: &[u8],
3588        hdr:     &Wmv2FrameHeader,
3589        frame:   &mut YuvFrame,
3590    ) -> Result<()> {
3591        // Start at picture header end.
3592        let mut br = BitReader::new_at(payload, hdr.header_bits);
3593
3594        let mb_w = self.width_mb as usize;
3595        let mb_h = self.height_mb as usize;
3596        let qscale = hdr.pquant as i32;
3597
3598        // upstream: ff_wmv2_decode_secondary_picture_header() (P-picture branch).
3599        self.wmv2_j_type = false;
3600        self.wmv2_parse_mb_skip(&mut br, mb_w, mb_h)?;
3601        let cbp_index = decode012(&mut br);
3602        self.wmv2_cbp_table_index = wmv2_get_cbp_table_index(qscale, cbp_index);
3603
3604        self.wmv2_mspel = if self.wmv2_mspel_bit { br.read_bit().unwrap_or(false) } else { false };
3605
3606        if self.wmv2_abt_flag {
3607            self.wmv2_per_mb_abt = br.read_bit().unwrap_or(false) ^ true;
3608            if !self.wmv2_per_mb_abt {
3609                self.wmv2_abt_type = decode012(&mut br);
3610            }
3611        } else {
3612            self.wmv2_per_mb_abt = false;
3613            self.wmv2_abt_type = 0;
3614        }
3615
3616        self.wmv2_per_mb_rl_table = if self.wmv2_per_mb_rl_bit { br.read_bit().unwrap_or(false) } else { false };
3617        if !self.wmv2_per_mb_rl_table {
3618            self.wmv2_rl_table_index = decode012(&mut br);
3619            self.wmv2_rl_chroma_table_index = self.wmv2_rl_table_index;
3620        }
3621        if br.bits_left() < 2 {
3622            return Err(DecoderError::InvalidData("WMV2: truncated secondary header".into()));
3623        }
3624        self.wmv2_dc_table_index = br.read_bit().unwrap_or(false) as usize;
3625        self.wmv2_mv_table_index = br.read_bit().unwrap_or(false) as usize;
3626
3627        // Reset predictors for this picture.
3628        self.wmv2_dc_pred = Wmv2DcPredBuffer::new(mb_w, mb_h);
3629        if self.wmv2_motion.len() != mb_w * mb_h {
3630            self.wmv2_motion.resize(mb_w * mb_h, (0, 0));
3631        }
3632        for v in self.wmv2_motion.iter_mut() { *v = (0, 0); }
3633
3634        self.wmv2_reset_picture_state();
3635
3636        let reference = match &self.wmv2_ref {
3637            Some(r) => r.clone(),
3638            None    => YuvFrame::new(frame.width, frame.height),
3639        };
3640
3641        for mb_row in 0..mb_h {
3642            let first_slice_line = self.wmv2_slice_height != 0 && (mb_row % self.wmv2_slice_height == 0);
3643            for mb_col in 0..mb_w {
3644                if br.bits_left() <= 0 { break; }
3645                let mi = mb_row * mb_w + mb_col;
3646                if mi < self.wmv2_mb_skip.len() && self.wmv2_mb_skip[mi] {
3647                    if self.wmv2_mspel { wmv2_mspel_motion_mb(frame, &reference, mb_row, mb_col, 0, 0, 0); } else { motion_compensate_mb(frame, &reference, mb_row, mb_col, 0, 0); }
3648                    self.wmv2_motion_set(mb_row, mb_col, (0, 0));
3649                    continue;
3650                }
3651
3652                let code = self.wmv2_mb_non_intra_vlc[self.wmv2_cbp_table_index.min(3)]
3653                    .decode(&mut br)
3654                    .ok_or_else(|| DecoderError::InvalidData("WMV2: MB header VLC underrun".into()))? as i32;
3655
3656                let mb_intra = (code & 0x40) == 0;
3657                let cbp = (code & 0x3f) as u8;
3658
3659                if !mb_intra {
3660                    let pred = self.wmv2_pred_motion(&mut br, mb_row, mb_col, first_slice_line);
3661
3662                    if cbp != 0 {
3663                        if self.wmv2_per_mb_rl_table {
3664                            self.wmv2_rl_table_index = decode012(&mut br);
3665                            self.wmv2_rl_chroma_table_index = self.wmv2_rl_table_index;
3666                        }
3667                    }
3668
3669                    let mut per_block_abt = false;
3670                    let mut abt_type = self.wmv2_abt_type;
3671                    if cbp != 0 && self.wmv2_abt_flag && self.wmv2_per_mb_abt {
3672                        per_block_abt = br.read_bit().unwrap_or(false);
3673                        if !per_block_abt {
3674                            abt_type = decode012(&mut br);
3675                        }
3676                    }
3677
3678                    let (mx, my) = self.wmv2_decode_motion_ref(&mut br, pred);
3679                    self.wmv2_hshift = if (((mx | my) & 1) != 0) && self.wmv2_mspel {
3680                        br.read_bit().unwrap_or(false) as u8
3681                    } else {
3682                        0
3683                    };
3684                    self.wmv2_motion_set(mb_row, mb_col, (mx, my));
3685
3686                    if self.wmv2_mspel { wmv2_mspel_motion_mb(frame, &reference, mb_row, mb_col, mx, my, self.wmv2_hshift); } else { motion_compensate_mb(frame, &reference, mb_row, mb_col, mx, my); }
3687
3688                    for blk in 0..6usize {
3689                        if (cbp >> (5 - blk)) & 1 == 0 { continue; }
3690
3691                        let mut cur_abt = abt_type;
3692                        if per_block_abt {
3693                            cur_abt = decode012(&mut br);
3694                        }
3695
3696                        // upstream: wmv2_decode_inter_block + wmv2_add_block
3697
3698                        if cur_abt == 0 {
3699
3700                            let scan = &FF_WMV1_SCANTABLE[0];
3701
3702                            let mut block = self.wmv2_decode_block_inter_ref(&mut br, blk, true, qscale, scan)?;
3703
3704                            let (is_luma, bx, by, stride, _ph) = block_coords(mb_row as u32, mb_col as u32, blk, frame.width, frame.height);
3705
3706                            let plane: &mut Vec<u8> = if is_luma { &mut frame.y } else if blk == 4 { &mut frame.cb } else { &mut frame.cr };
3707
3708                            let dst_off = by * stride + bx;
3709
3710                            wmv2dsp::wmv2_idct_add(plane, dst_off, stride, &mut block);
3711
3712                        } else {
3713
3714                            const SUB_CBP_TABLE: [u8; 3] = [2, 3, 1];
3715
3716                            let scantable = if cur_abt == 1 { &FF_WMV2_SCANTABLE_A } else { &FF_WMV2_SCANTABLE_B };
3717
3718                            let sub_cbp = SUB_CBP_TABLE[decode012(&mut br) as usize];
3719
3720                        
3721
3722                            let mut block1 = [0i16; 64];
3723
3724                            let mut block2 = [0i16; 64];
3725
3726                            if (sub_cbp & 1) != 0 {
3727
3728                                block1 = self.wmv2_decode_block_inter_ref(&mut br, blk, true, qscale, scantable)?;
3729
3730                            }
3731
3732                            if (sub_cbp & 2) != 0 {
3733
3734                                block2 = self.wmv2_decode_block_inter_ref(&mut br, blk, true, qscale, scantable)?;
3735
3736                            }
3737
3738                        
3739
3740                            let (is_luma, bx, by, stride, _ph) = block_coords(mb_row as u32, mb_col as u32, blk, frame.width, frame.height);
3741
3742                            let plane: &mut Vec<u8> = if is_luma { &mut frame.y } else if blk == 4 { &mut frame.cb } else { &mut frame.cr };
3743
3744                            let dst_off = by * stride + bx;
3745
3746                        
3747
3748                            match cur_abt {
3749
3750                                1 => {
3751
3752                                    // 8x4 + 8x4 (top/bottom)
3753
3754                                    ffidct::ff_simple_idct84_add(plane, dst_off, stride, &mut block1);
3755
3756                                    ffidct::ff_simple_idct84_add(plane, dst_off + 4 * stride, stride, &mut block2);
3757
3758                                }
3759
3760                                2 => {
3761
3762                                    // 4x8 + 4x8 (left/right)
3763
3764                                    ffidct::ff_simple_idct48_add(plane, dst_off, stride, &mut block1);
3765
3766                                    ffidct::ff_simple_idct48_add(plane, dst_off + 4, stride, &mut block2);
3767
3768                                }
3769
3770                                _ => {}
3771
3772                            }
3773
3774                        }
3775                    }
3776                } else {
3777                    // Intra MB in P-picture.
3778                    let ac_pred = br.read_bit().unwrap_or(false);
3779                    if self.wmv2_per_mb_rl_table && cbp != 0 {
3780                        let rl_idx = decode012(&mut br);
3781                        self.wmv2_rl_table_index = rl_idx;
3782                        self.wmv2_rl_chroma_table_index = rl_idx;
3783                    }
3784
3785                    for blk in 0..6usize {
3786                        let coded = ((cbp >> (5 - blk)) & 1) != 0;
3787                        let mut block = self.wmv2_decode_block_intra_ref(&mut br, mb_row, mb_col, blk, coded, qscale, ac_pred)?;
3788
3789                        let (is_luma, bx, by, stride, _ph) = block_coords(mb_row as u32, mb_col as u32, blk, frame.width, frame.height);
3790                        let plane: &mut Vec<u8> = if is_luma { &mut frame.y } else if blk == 4 { &mut frame.cb } else { &mut frame.cr };
3791                        let dst_off = by * stride + bx;
3792                        wmv2dsp::wmv2_idct_put(plane, dst_off, stride, &mut block);
3793                    }
3794                    self.wmv2_motion_set(mb_row, mb_col, (0, 0));
3795                }
3796            }
3797        }
3798
3799        self.wmv2_ref = Some(frame.clone());
3800        Ok(())
3801    }
3802}
3803
3804// ─── WMV2 AC block decoder ────────────────────────────────────────────────────
3805// Decodes AC coefficients using WMV2 TCOEF VLC.
3806// For intra: fills coeff[1..63] (coeff[0] is DC, already set by caller).
3807// For inter: fills coeff[0..63] (all AC).
3808// Escape is Mode-3 only: 1-bit LAST + 6-bit RUN + 8-bit |LEVEL| + 1-bit SIGN.
3809
3810fn wmv2_decode_ac_block(
3811    br:      &mut BitReader<'_>,
3812    ac_vlc:  &VlcTable,
3813    pquant:  i32,
3814    coeff:   &mut [i32; 64],
3815    is_intra: bool,
3816) {
3817    // WMV2/MSMPEG4 uses the standard zig-zag scan by default.
3818    // (AC prediction, if implemented, switches to horizontal/vertical scans.)
3819    let scan = &ZIGZAG;
3820    let mut idx = if is_intra { 1usize } else { 0 };
3821
3822    loop {
3823        let sym = match ac_vlc.decode(br) {
3824            Some(s) => s,
3825            None    => break,
3826        };
3827
3828        let (run, signed_level, last) = if sym == VLC_ESCAPE {
3829            // WMV2/MSMPEG4 uses the same 3-mode escape structure as VC-1:
3830            //   0  -> mode1 (level offset)
3831            //   10 -> mode2 (run offset)
3832            //   11 -> mode3 (absolute)
3833            decode_escape_coeff(br, ac_vlc)
3834        } else {
3835            let (r, l, last) = unpack_rl(sym);
3836            let sign = br.read_bit().unwrap_or(false);
3837            (r, if sign { -(l as i32) } else { l as i32 }, last)
3838        };
3839
3840        idx = idx.saturating_add(run as usize);
3841        if idx >= 64 { break; }
3842
3843        // Uniform quantization.
3844        let q = iquant_uniform(signed_level, pquant, false);
3845        coeff[scan[idx]] = q;
3846
3847        idx += 1;
3848        if last || br.is_empty() { break; }
3849    }
3850}
3851
3852// ─── WMV2 MV reader ───────────────────────────────────────────────────────────
3853// Reads a differential MV using a fixed 7-bit Huffman code (simplified from
3854// H.263 MVD table) then adds the median predictor.
3855
3856fn wmv2_read_mv(
3857    br:      &mut BitReader<'_>,
3858    mv_pred: &MvPredictor,
3859    mb_row:  usize,
3860    mb_col:  usize,
3861    mv_range: i32,
3862) -> (i32, i32) {
3863    let (px, py) = mv_pred.predict(mb_row, mb_col);
3864    let dx = wmv2_read_mv_component(br, mv_range);
3865    let dy = wmv2_read_mv_component(br, mv_range);
3866    (px + dx, py + dy)
3867}
3868
3869/// Read one MV component using H.263-style VLC differential coding.
3870/// Values are half-pel units in range [-mv_range, mv_range-1].
3871fn wmv2_read_mv_component(br: &mut BitReader<'_>, mv_range: i32) -> i32 {
3872    // H.263 MVD VLC: unary + suffix
3873    // Code for 0:     "1"         (1 bit)
3874    // Code for ±1:    "010"/"011" (3 bits)
3875    // Code for ±2:    "00110"/"00111"
3876    // etc.  — this is a simple magnitude + sign scheme
3877    let mag = {
3878        let mut m = 0i32;
3879        loop {
3880            if br.read_bit().unwrap_or(true) { break; }
3881            m += 1;
3882            if m >= mv_range { break; }
3883        }
3884        m
3885    };
3886    if mag == 0 { return 0; }
3887    let sign = br.read_bit().unwrap_or(false);
3888    if sign { -mag } else { mag }
3889}