OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_block_decoder.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_block_decoder.cpp
34// Author: Aous Naman
35// Date: 13 May 2022
36//***************************************************************************/
37
38//***************************************************************************/
43#include <string>
44#include <iostream>
45
46#include <cassert>
47#include <cstring>
48#include "ojph_block_common.h"
49#include "ojph_block_decoder.h"
50#include "ojph_arch.h"
51#include "ojph_message.h"
52
53namespace ojph {
54 namespace local {
55
56 //************************************************************************/
63 struct dec_mel_st {
64 dec_mel_st() : data(NULL), tmp(0), bits(0), size(0), unstuff(false),
65 k(0), num_runs(0), runs(0)
66 {}
67 // data decoding machinary
70 int bits;
71 int size;
72 bool unstuff;
73 int k;
74
75 // queue of decoded runs
78 };
79
80 //************************************************************************/
92 static inline
93 void mel_read(dec_mel_st *melp)
94 {
95 if (melp->bits > 32) //there are enough bits in the tmp variable
96 return; // return without reading new data
97
98 ui32 val = 0xFFFFFFFF; // feed in 0xFF if buffer is exhausted
99 if (melp->size > 4) { // if there is data in the MEL segment
100 val = *(ui32*)melp->data; // read 32 bits from MEL data
101 melp->data += 4; // advance pointer
102 melp->size -= 4; // reduce counter
103 }
104 else if (melp->size > 0)
105 { // 4 or less
106 int i = 0;
107 while (melp->size > 1) {
108 ui32 v = *melp->data++; // read one byte at a time
109 ui32 m = ~(0xFFu << i); // mask of location
110 val = (val & m) | (v << i);// put one byte in its correct location
111 --melp->size;
112 i += 8;
113 }
114 // size equal to 1
115 ui32 v = *melp->data++; // the one before the last is different
116 v |= 0xF; // MEL and VLC segments can overlap
117 ui32 m = ~(0xFFu << i);
118 val = (val & m) | (v << i);
119 --melp->size;
120 }
121
122 // next we unstuff them before adding them to the buffer
123 int bits = 32 - melp->unstuff; // number of bits in val, subtract 1 if
124 // the previously read byte requires
125 // unstuffing
126
127 // data is unstuffed and accumulated in t
128 // bits has the number of bits in t
129 ui32 t = val & 0xFF;
130 bool unstuff = ((val & 0xFF) == 0xFF); // true if we need unstuffing
131 bits -= unstuff; // there is one less bit in t if unstuffing is needed
132 t = t << (8 - unstuff); // move up to make room for the next byte
133
134 //this is a repeat of the above
135 t |= (val>>8) & 0xFF;
136 unstuff = (((val >> 8) & 0xFF) == 0xFF);
137 bits -= unstuff;
138 t = t << (8 - unstuff);
139
140 t |= (val>>16) & 0xFF;
141 unstuff = (((val >> 16) & 0xFF) == 0xFF);
142 bits -= unstuff;
143 t = t << (8 - unstuff);
144
145 t |= (val>>24) & 0xFF;
146 melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
147
148 // move t to tmp, and push the result all the way up, so we read from
149 // the MSB
150 melp->tmp |= ((ui64)t) << (64 - bits - melp->bits);
151 melp->bits += bits; //increment the number of bits in tmp
152 }
153
154 //************************************************************************/
169 static inline
171 {
172 static const int mel_exp[13] = { //MEL exponents
173 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
174 };
175
176 if (melp->bits < 6) // if there are less than 6 bits in tmp
177 mel_read(melp); // then read from the MEL bitstream
178 // 6 bits is the largest decodable MEL cwd
179
180 //repeat so long that there is enough decodable bits in tmp,
181 // and the runs store is not full (num_runs < 8)
182 while (melp->bits >= 6 && melp->num_runs < 8)
183 {
184 int eval = mel_exp[melp->k]; // number of bits associated with state
185 int run = 0;
186 if (melp->tmp & (1ull<<63)) //The next bit to decode (stored in MSB)
187 { //one is found
188 run = 1 << eval;
189 run--; // consecutive runs of 0 events - 1
190 melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;//increment, max is 12
191 melp->tmp <<= 1; // consume one bit from tmp
192 melp->bits -= 1;
193 run = run << 1; // a stretch of zeros not terminating in one
194 }
195 else
196 { //0 is found
197 run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
198 melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0; //decrement, min is 0
199 melp->tmp <<= eval + 1; //consume eval + 1 bits (max is 6)
200 melp->bits -= eval + 1;
201 run = (run << 1) + 1; // a stretch of zeros terminating with one
202 }
203 eval = melp->num_runs * 7; // 7 bits per run
204 melp->runs &= ~((ui64)0x3F << eval); // 6 bits are sufficient
205 melp->runs |= ((ui64)run) << eval; // store the value in runs
206 melp->num_runs++; // increment count
207 }
208 }
209
210 //************************************************************************/
220 static inline
221 void mel_init(dec_mel_st *melp, ui8* bbuf, int lcup, int scup)
222 {
223 melp->data = bbuf + lcup - scup; // move the pointer to the start of MEL
224 melp->bits = 0; // 0 bits in tmp
225 melp->tmp = 0; //
226 melp->unstuff = false; // no unstuffing
227 melp->size = scup - 1; // size is the length of MEL+VLC-1
228 melp->k = 0; // 0 for state
229 melp->num_runs = 0; // num_runs is 0
230 melp->runs = 0; //
231
232 //This code is borrowed; original is for a different architecture
233 //These few lines take care of the case where data is not at a multiple
234 // of 4 boundary. It reads 1,2,3 up to 4 bytes from the MEL segment
235 int num = 4 - (int)(intptr_t(melp->data) & 0x3);
236 for (int i = 0; i < num; ++i) { // this code is similar to mel_read
237 assert(melp->unstuff == false || melp->data[0] <= 0x8F);
238 ui64 d = (melp->size > 0) ? *melp->data : 0xFF;//if buffer is consumed
239 //set data to 0xFF
240 if (melp->size == 1) d |= 0xF; //if this is MEL+VLC-1, set LSBs to 0xF
241 // see the standard
242 melp->data += melp->size-- > 0; //increment if the end is not reached
243 int d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
244 melp->tmp = (melp->tmp << d_bits) | d; //store bits in tmp
245 melp->bits += d_bits; //increment tmp by number of bits
246 melp->unstuff = ((d & 0xFF) == 0xFF); //true of next byte needs
247 //unstuffing
248 }
249 melp->tmp <<= (64 - melp->bits); //push all the way up so the first bit
250 // is the MSB
251 }
252
253 //************************************************************************/
259 static inline
261 {
262 if (melp->num_runs == 0) //if no runs, decode more bit from MEL segment
263 mel_decode(melp);
264
265 int t = melp->runs & 0x7F; //retrieve one run
266 melp->runs >>= 7; // remove the retrieved run
267 melp->num_runs--;
268 return t; // return run
269 }
270
271 //************************************************************************/
275 struct rev_struct {
276 rev_struct() : data(NULL), tmp(0), bits(0), size(0), unstuff(false)
277 {}
278 //storage
282 int size;
283 bool unstuff;
285 };
286
287 //************************************************************************/
307 static inline
309 {
310 //process 4 bytes at a time
311 if (vlcp->bits > 32) // if there are more than 32 bits in tmp, then
312 return; // reading 32 bits can overflow vlcp->tmp
313 ui32 val = 0;
314 //the next line (the if statement) needs to be tested first
315 if (vlcp->size > 3) // if there are more than 3 bytes left in VLC
316 {
317 // (vlcp->data - 3) move pointer back to read 32 bits at once
318 val = *(ui32*)(vlcp->data - 3); // then read 32 bits
319 vlcp->data -= 4; // move data pointer back by 4
320 vlcp->size -= 4; // reduce available byte by 4
321 }
322 else if (vlcp->size > 0)
323 { // 4 or less
324 int i = 24;
325 while (vlcp->size > 0) {
326 ui32 v = *vlcp->data--; // read one byte at a time
327 val |= (v << i); // put byte in its correct location
328 --vlcp->size;
329 i -= 8;
330 }
331 }
332
333 //accumulate in tmp, number of bits in tmp are stored in bits
334 ui32 tmp = val >> 24; //start with the MSB byte
335 ui32 bits;
336
337 // test unstuff (previous byte is >0x8F), and this byte is 0x7F
338 bits = 8 - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
339 bool unstuff = (val >> 24) > 0x8F; //this is for the next byte
340
341 tmp |= ((val >> 16) & 0xFF) << bits; //process the next byte
342 bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
343 unstuff = ((val >> 16) & 0xFF) > 0x8F;
344
345 tmp |= ((val >> 8) & 0xFF) << bits;
346 bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
347 unstuff = ((val >> 8) & 0xFF) > 0x8F;
348
349 tmp |= (val & 0xFF) << bits;
350 bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
351 unstuff = (val & 0xFF) > 0x8F;
352
353 // now move the read and unstuffed bits into vlcp->tmp
354 vlcp->tmp |= (ui64)tmp << vlcp->bits;
355 vlcp->bits += bits;
356 vlcp->unstuff = unstuff; // this for the next read
357 }
358
359 //************************************************************************/
373 static inline
374 void rev_init(rev_struct *vlcp, ui8* data, int lcup, int scup)
375 {
376 //first byte has only the upper 4 bits
377 vlcp->data = data + lcup - 2;
378
379 //size can not be larger than this, in fact it should be smaller
380 vlcp->size = scup - 2;
381
382 ui32 d = *vlcp->data--; // read one byte (this is a half byte)
383 vlcp->tmp = d >> 4; // both initialize and set
384 vlcp->bits = 4 - ((vlcp->tmp & 7) == 7); //check standard
385 vlcp->unstuff = (d | 0xF) > 0x8F; //this is useful for the next byte
386
387 //This code is designed for an architecture that read address should
388 // align to the read size (address multiple of 4 if read size is 4)
389 //These few lines take care of the case where data is not at a multiple
390 // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream.
391 // To read 32 bits, read from (vlcp->data - 3)
392 int num = 1 + (int)(intptr_t(vlcp->data) & 0x3);
393 int tnum = num < vlcp->size ? num : vlcp->size;
394 for (int i = 0; i < tnum; ++i) {
395 ui64 d;
396 d = *vlcp->data--; // read one byte and move read pointer
397 //check if the last byte was >0x8F (unstuff == true) and this is 0x7F
398 ui32 d_bits = 8 - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
399 vlcp->tmp |= d << vlcp->bits; // move data to vlcp->tmp
400 vlcp->bits += d_bits;
401 vlcp->unstuff = d > 0x8F; // for next byte
402 }
403 vlcp->size -= tnum;
404 rev_read(vlcp); // read another 32 buts
405 }
406
407 //************************************************************************/
414 static inline
416 {
417 if (vlcp->bits < 32) // if there are less then 32 bits, read more
418 {
419 rev_read(vlcp); // read 32 bits, but unstuffing might reduce this
420 if (vlcp->bits < 32)// if there is still space in vlcp->tmp for 32 bits
421 rev_read(vlcp); // read another 32
422 }
423 return (ui32)vlcp->tmp; // return the head (bottom-most) of vlcp->tmp
424 }
425
426 //************************************************************************/
432 static inline
434 {
435 assert(num_bits <= vlcp->bits); // vlcp->tmp must have more than num_bits
436 vlcp->tmp >>= num_bits; // remove bits
437 vlcp->bits -= num_bits; // decrement the number of bits
438 return (ui32)vlcp->tmp;
439 }
440
441 //************************************************************************/
452 static inline
454 {
455 //process 4 bytes at a time
456 if (mrp->bits > 32)
457 return;
458 ui32 val = 0;
459 if (mrp->size > 3) // If there are 3 byte or more
460 { // (mrp->data - 3) move pointer back to read 32 bits at once
461 val = *(ui32*)(mrp->data - 3); // read 32 bits
462 mrp->data -= 4; // move back pointer
463 mrp->size -= 4; // reduce count
464 }
465 else if (mrp->size > 0)
466 {
467 int i = 24;
468 while (mrp->size > 0) {
469 ui32 v = *mrp->data--; // read one byte at a time
470 val |= (v << i); // put byte in its correct location
471 --mrp->size;
472 i -= 8;
473 }
474 }
475
476 //accumulate in tmp, and keep count in bits
477 ui32 bits, tmp = val >> 24;
478
479 //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
480 bits = 8 - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
481 bool unstuff = (val >> 24) > 0x8F;
482
483 //process the next byte
484 tmp |= ((val >> 16) & 0xFF) << bits;
485 bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
486 unstuff = ((val >> 16) & 0xFF) > 0x8F;
487
488 tmp |= ((val >> 8) & 0xFF) << bits;
489 bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
490 unstuff = ((val >> 8) & 0xFF) > 0x8F;
491
492 tmp |= (val & 0xFF) << bits;
493 bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
494 unstuff = (val & 0xFF) > 0x8F;
495
496 mrp->tmp |= (ui64)tmp << mrp->bits; // move data to mrp pointer
497 mrp->bits += bits;
498 mrp->unstuff = unstuff; // next byte
499 }
500
501 //************************************************************************/
516 static inline
517 void rev_init_mrp(rev_struct *mrp, ui8* data, int lcup, int len2)
518 {
519 mrp->data = data + lcup + len2 - 1;
520 mrp->size = len2;
521 mrp->unstuff = true;
522 mrp->bits = 0;
523 mrp->tmp = 0;
524
525 //This code is designed for an architecture that read address should
526 // align to the read size (address multiple of 4 if read size is 4)
527 //These few lines take care of the case where data is not at a multiple
528 // of 4 boundary. It reads 1,2,3 up to 4 bytes from the MRP stream
529 int num = 1 + (int)(intptr_t(mrp->data) & 0x3);
530 for (int i = 0; i < num; ++i) {
531 ui64 d;
532 //read a byte, 0 if no more data
533 d = (mrp->size-- > 0) ? *mrp->data-- : 0;
534 //check if unstuffing is needed
535 ui32 d_bits = 8 - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
536 mrp->tmp |= d << mrp->bits; // move data to vlcp->tmp
537 mrp->bits += d_bits;
538 mrp->unstuff = d > 0x8F; // for next byte
539 }
540 rev_read_mrp(mrp);
541 }
542
543 //************************************************************************/
550 static inline
552 {
553 if (mrp->bits < 32) // if there are less than 32 bits in mrp->tmp
554 {
555 rev_read_mrp(mrp); // read 30-32 bits from mrp
556 if (mrp->bits < 32) // if there is a space of 32 bits
557 rev_read_mrp(mrp); // read more
558 }
559 return (ui32)mrp->tmp; // return the head of mrp->tmp
560 }
561
562 //************************************************************************/
568 static inline
570 {
571 assert(num_bits <= mrp->bits); // we must not consume more than mrp->bits
572 mrp->tmp >>= num_bits; // discard the lowest num_bits bits
573 mrp->bits -= num_bits;
574 return (ui32)mrp->tmp; // return data after consumption
575 }
576
577 //************************************************************************/
581 struct frwd_struct {
582 const ui8* data;
586 int size;
587 };
588
589 //************************************************************************/
607 template<int X>
608 static inline
610 {
611 assert(msp->bits <= 32); // assert that there is a space for 32 bits
612
613 ui32 val = 0;
614 if (msp->size > 3) {
615 val = *(ui32*)msp->data; // read 32 bits
616 msp->data += 4; // increment pointer
617 msp->size -= 4; // reduce size
618 }
619 else if (msp->size > 0)
620 {
621 int i = 0;
622 val = X != 0 ? 0xFFFFFFFFu : 0;
623 while (msp->size > 0) {
624 ui32 v = *msp->data++; // read one byte at a time
625 ui32 m = ~(0xFFu << i); // mask of location
626 val = (val & m) | (v << i);// put one byte in its correct location
627 --msp->size;
628 i += 8;
629 }
630 }
631 else
632 val = X != 0 ? 0xFFFFFFFFu : 0;
633
634 // we accumulate in t and keep a count of the number of bits in bits
635 ui32 bits = 8 - msp->unstuff;
636 ui32 t = val & 0xFF;
637 bool unstuff = ((val & 0xFF) == 0xFF); // Do we need unstuffing next?
638
639 t |= ((val >> 8) & 0xFF) << bits;
640 bits += 8 - unstuff;
641 unstuff = (((val >> 8) & 0xFF) == 0xFF);
642
643 t |= ((val >> 16) & 0xFF) << bits;
644 bits += 8 - unstuff;
645 unstuff = (((val >> 16) & 0xFF) == 0xFF);
646
647 t |= ((val >> 24) & 0xFF) << bits;
648 bits += 8 - unstuff;
649 msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
650
651 msp->tmp |= ((ui64)t) << msp->bits; // move data to msp->tmp
652 msp->bits += bits;
653 }
654
655 //************************************************************************/
664 template<int X>
665 static inline
666 void frwd_init(frwd_struct *msp, const ui8* data, int size)
667 {
668 msp->data = data;
669 msp->tmp = 0;
670 msp->bits = 0;
671 msp->unstuff = 0;
672 msp->size = size;
673
674 //This code is designed for an architecture that read address should
675 // align to the read size (address multiple of 4 if read size is 4)
676 //These few lines take care of the case where data is not at a multiple
677 // of 4 boundary. It reads 1,2,3 up to 4 bytes from the bitstream
678 int num = 4 - (int)(intptr_t(msp->data) & 0x3);
679 for (int i = 0; i < num; ++i)
680 {
681 ui64 d;
682 //read a byte if the buffer is not exhausted, otherwise set it to X
683 d = msp->size-- > 0 ? *msp->data++ : X;
684 msp->tmp |= (d << msp->bits); // store data in msp->tmp
685 msp->bits += 8 - msp->unstuff; // number of bits added to msp->tmp
686 msp->unstuff = ((d & 0xFF) == 0xFF); // unstuffing for next byte
687 }
688 frwd_read<X>(msp); // read 32 bits more
689 }
690
691 //************************************************************************/
697 static inline
698 void frwd_advance(frwd_struct *msp, ui32 num_bits)
699 {
700 assert(num_bits <= msp->bits);
701 msp->tmp >>= num_bits; // consume num_bits
702 msp->bits -= num_bits;
703 }
704
705 //************************************************************************/
712 template<int X>
713 static inline
715 {
716 if (msp->bits < 32)
717 {
718 frwd_read<X>(msp);
719 if (msp->bits < 32) //need to test
720 frwd_read<X>(msp);
721 }
722 return (ui32)msp->tmp;
723 }
724
725 //************************************************************************/
742 bool ojph_decode_codeblock(ui8* coded_data, ui32* decoded_data,
743 ui32 missing_msbs, ui32 num_passes,
744 ui32 lengths1, ui32 lengths2,
745 ui32 width, ui32 height, ui32 stride,
746 bool stripe_causal)
747 {
748 static bool insufficient_precision = false;
749 static bool modify_code = false;
750 static bool truncate_spp_mrp = false;
751
752 if (num_passes > 1 && lengths2 == 0)
753 {
754 OJPH_WARN(0x00010001, "A malformed codeblock that has more than "
755 "one coding pass, but zero length for "
756 "2nd and potential 3rd pass.\n");
757 num_passes = 1;
758 }
759
760 if (num_passes > 3)
761 {
762 OJPH_WARN(0x00010002, "We do not support more than 3 coding passes; "
763 "This codeblocks has %d passes.\n",
764 num_passes);
765 return false;
766 }
767
768 if (missing_msbs > 30) // p < 0
769 {
770 if (insufficient_precision == false)
771 {
772 insufficient_precision = true;
773 OJPH_WARN(0x00010003, "32 bits are not enough to decode this "
774 "codeblock. This message will not be "
775 "displayed again.\n");
776 }
777 return false;
778 }
779 else if (missing_msbs == 30) // p == 0
780 { // not enough precision to decode and set the bin center to 1
781 if (modify_code == false) {
782 modify_code = true;
783 OJPH_WARN(0x00010004, "Not enough precision to decode the cleanup "
784 "pass. The code can be modified to support "
785 "this case. This message will not be "
786 "displayed again.\n");
787 }
788 return false; // 32 bits are not enough to decode this
789 }
790 else if (missing_msbs == 29) // if p is 1, then num_passes must be 1
791 {
792 if (num_passes > 1) {
793 num_passes = 1;
794 if (truncate_spp_mrp == false) {
795 truncate_spp_mrp = true;
796 OJPH_WARN(0x00010005, "Not enough precision to decode the SgnProp "
797 "nor MagRef passes; both will be skipped. "
798 "This message will not be displayed "
799 "again.\n");
800 }
801 }
802 }
803 ui32 p = 30 - missing_msbs; // The least significant bitplane for CUP
804 // There is a way to handle the case of p == 0, but a different path
805 // is required
806
807 if (lengths1 < 2)
808 {
809 OJPH_WARN(0x00010006, "Wrong codeblock length.\n");
810 return false;
811 }
812
813 // read scup and fix the bytes there
814 int lcup, scup;
815 lcup = (int)lengths1; // length of CUP
816 //scup is the length of MEL + VLC
817 scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
818 if (scup < 2 || scup > lcup || scup > 4079) //something is wrong
819 return false;
820
821 // The temporary storage scratch holds two types of data in an
822 // interleaved fashion. The interleaving allows us to use one
823 // memory pointer.
824 // We have one entry for a decoded VLC code, and one entry for UVLC.
825 // Entries are 16 bits each, corresponding to one quad,
826 // but since we want to use XMM registers of the SSE family
827 // of SIMD; we allocated 16 bytes or more per quad row; that is,
828 // the width is no smaller than 16 bytes (or 8 entries), and the
829 // height is 512 quads
830 // Each VLC entry contains, in the following order, starting
831 // from MSB
832 // e_k (4bits), e_1 (4bits), rho (4bits), useless for step 2 (4bits)
833 // Each entry in UVLC contains u_q
834 // One extra row to handle the case of SPP propagating downwards
835 // when codeblock width is 4
836 ui16 scratch[8 * 513] = {0}; // 8 kB
837
838 // We need an extra two entries (one inf and one u_q) beyond
839 // the last column.
840 // If the block width is 4 (2 quads), then we use sstr of 8
841 // (enough for 4 quads). If width is 8 (4 quads) we use
842 // sstr is 16 (enough for 8 quads). For a width of 16 (8
843 // quads), we use 24 (enough for 12 quads).
844 ui32 sstr = ((width + 2u) + 7u) & ~7u; // multiples of 8
845
846 ui32 mmsbp2 = missing_msbs + 2;
847
848 // The cleanup pass is decoded in two steps; in step one,
849 // the VLC and MEL segments are decoded, generating a record that
850 // has 2 bytes per quad. The 2 bytes contain, u, rho, e^1 & e^k.
851 // This information should be sufficient for the next step.
852 // In step 2, we decode the MagSgn segment.
853
854 // step 1 decoding VLC and MEL segments
855 {
856 // init structures
857 dec_mel_st mel;
858 mel_init(&mel, coded_data, lcup, scup);
859 rev_struct vlc;
860 rev_init(&vlc, coded_data, lcup, scup);
861
862 int run = mel_get_run(&mel); // decode runs of events from MEL bitstrm
863 // data represented as runs of 0 events
864 // See mel_decode description
865
866 ui32 vlc_val;
867 ui32 c_q = 0;
868 ui16 *sp = scratch;
869 //initial quad row
870 for (ui32 x = 0; x < width; sp += 4)
871 {
872 // decode VLC
874
875 // first quad
876 vlc_val = rev_fetch(&vlc);
877
878 //decode VLC using the context c_q and the head of VLC bitstream
879 ui16 t0 = vlc_tbl0[ c_q + (vlc_val & 0x7F) ];
880
881 // if context is zero, use one MEL event
882 if (c_q == 0) //zero context
883 {
884 run -= 2; //subtract 2, since events number if multiplied by 2
885
886 // Is the run terminated in 1? if so, use decoded VLC code,
887 // otherwise, discard decoded data, since we will decoded again
888 // using a different context
889 t0 = (run == -1) ? t0 : 0;
890
891 // is run -1 or -2? this means a run has been consumed
892 if (run < 0)
893 run = mel_get_run(&mel); // get another run
894 }
895 //run -= (c_q == 0) ? 2 : 0;
896 //t0 = (c_q != 0 || run == -1) ? t0 : 0;
897 //if (run < 0)
898 // run = mel_get_run(&mel); // get another run
899 sp[0] = t0;
900 x += 2;
901
902 // prepare context for the next quad; eqn. 1 in ITU T.814
903 c_q = ((t0 & 0x10U) << 3) | ((t0 & 0xE0U) << 2);
904
905 //remove data from vlc stream (0 bits are removed if vlc is not used)
906 vlc_val = rev_advance(&vlc, t0 & 0x7);
907
908 //second quad
909 ui16 t1 = 0;
910
911 //decode VLC using the context c_q and the head of VLC bitstream
912 t1 = vlc_tbl0[c_q + (vlc_val & 0x7F)];
913
914 // if context is zero, use one MEL event
915 if (c_q == 0 && x < width) //zero context
916 {
917 run -= 2; //subtract 2, since events number if multiplied by 2
918
919 // if event is 0, discard decoded t1
920 t1 = (run == -1) ? t1 : 0;
921
922 if (run < 0) // have we consumed all events in a run
923 run = mel_get_run(&mel); // if yes, then get another run
924 }
925 t1 = x < width ? t1 : 0;
926 //run -= (c_q == 0 && x < width) ? 2 : 0;
927 //t1 = (c_q != 0 || run == -1) ? t1 : 0;
928 //if (run < 0)
929 // run = mel_get_run(&mel); // get another run
930 sp[2] = t1;
931 x += 2;
932
933 //prepare context for the next quad, eqn. 1 in ITU T.814
934 c_q = ((t1 & 0x10U) << 3) | ((t1 & 0xE0U) << 2);
935
936 //remove data from vlc stream, if qinf is not used, cwdlen is 0
937 vlc_val = rev_advance(&vlc, t1 & 0x7);
938
939 // decode u
941 // uvlc_mode is made up of u_offset bits from the quad pair
942 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
943 if (uvlc_mode == 0xc0)// if both u_offset are set, get an event from
944 { // the MEL run of events
945 run -= 2; //subtract 2, since events number if multiplied by 2
946
947 uvlc_mode += (run == -1) ? 0x40 : 0; // increment uvlc_mode by
948 // is 0x40
949
950 if (run < 0)//if run is consumed (run is -1 or -2), get another run
951 run = mel_get_run(&mel);
952 }
953 //run -= (uvlc_mode == 0xc0) ? 2 : 0;
954 //uvlc_mode += (uvlc_mode == 0xc0 && run == -1) ? 0x40 : 0;
955 //if (run < 0)
956 // run = mel_get_run(&mel); // get another run
957
958 //decode uvlc_mode to get u for both quads
959 ui32 uvlc_entry = uvlc_tbl0[uvlc_mode + (vlc_val & 0x3F)];
960 //remove total prefix length
961 vlc_val = rev_advance(&vlc, uvlc_entry & 0x7);
962 uvlc_entry >>= 3;
963 //extract suffixes for quad 0 and 1
964 ui32 len = uvlc_entry & 0xF; //suffix length for 2 quads
965 ui32 tmp = vlc_val & ((1 << len) - 1); //suffix value for 2 quads
966 vlc_val = rev_advance(&vlc, len);
967 uvlc_entry >>= 4;
968 // quad 0 length
969 len = uvlc_entry & 0x7; // quad 0 suffix length
970 uvlc_entry >>= 3;
971 ui16 u_q = (ui16)(1 + (uvlc_entry&7) + (tmp&~(0xFFU<<len)));//kap. 1
972 sp[1] = u_q;
973 u_q = (ui16)(1 + (uvlc_entry >> 3) + (tmp >> len)); //kappa == 1
974 sp[3]= u_q;
975 }
976 sp[0] = sp[1] = 0;
977
978 //non initial quad rows
979 for (ui32 y = 2; y < height; y += 2)
980 {
981 c_q = 0; // context
982 ui16 *sp = scratch + (y >> 1) * sstr; // this row of quads
983
984 for (ui32 x = 0; x < width; sp += 4)
985 {
986 // decode VLC
988
989 // sigma_q (n, ne, nf)
990 c_q |= ((sp[0 - (si32)sstr] & 0xA0U) << 2);
991 c_q |= ((sp[2 - (si32)sstr] & 0x20U) << 4);
992
993 // first quad
994 vlc_val = rev_fetch(&vlc);
995
996 //decode VLC using the context c_q and the head of VLC bitstream
997 ui16 t0 = vlc_tbl1[ c_q + (vlc_val & 0x7F) ];
998
999 // if context is zero, use one MEL event
1000 if (c_q == 0) //zero context
1001 {
1002 run -= 2; //subtract 2, since events number is multiplied by 2
1003
1004 // Is the run terminated in 1? if so, use decoded VLC code,
1005 // otherwise, discard decoded data, since we will decoded again
1006 // using a different context
1007 t0 = (run == -1) ? t0 : 0;
1008
1009 // is run -1 or -2? this means a run has been consumed
1010 if (run < 0)
1011 run = mel_get_run(&mel); // get another run
1012 }
1013 //run -= (c_q == 0) ? 2 : 0;
1014 //t0 = (c_q != 0 || run == -1) ? t0 : 0;
1015 //if (run < 0)
1016 // run = mel_get_run(&mel); // get another run
1017 sp[0] = t0;
1018 x += 2;
1019
1020 // prepare context for the next quad; eqn. 2 in ITU T.814
1021 // sigma_q (w, sw)
1022 c_q = ((t0 & 0x40U) << 2) | ((t0 & 0x80U) << 1);
1023 // sigma_q (nw)
1024 c_q |= sp[0 - (si32)sstr] & 0x80;
1025 // sigma_q (n, ne, nf)
1026 c_q |= ((sp[2 - (si32)sstr] & 0xA0U) << 2);
1027 c_q |= ((sp[4 - (si32)sstr] & 0x20U) << 4);
1028
1029 //remove data from vlc stream (0 bits are removed if vlc is unused)
1030 vlc_val = rev_advance(&vlc, t0 & 0x7);
1031
1032 //second quad
1033 ui16 t1 = 0;
1034
1035 //decode VLC using the context c_q and the head of VLC bitstream
1036 t1 = vlc_tbl1[ c_q + (vlc_val & 0x7F)];
1037
1038 // if context is zero, use one MEL event
1039 if (c_q == 0 && x < width) //zero context
1040 {
1041 run -= 2; //subtract 2, since events number if multiplied by 2
1042
1043 // if event is 0, discard decoded t1
1044 t1 = (run == -1) ? t1 : 0;
1045
1046 if (run < 0) // have we consumed all events in a run
1047 run = mel_get_run(&mel); // if yes, then get another run
1048 }
1049 t1 = x < width ? t1 : 0;
1050 //run -= (c_q == 0 && x < width) ? 2 : 0;
1051 //t1 = (c_q != 0 || run == -1) ? t1 : 0;
1052 //if (run < 0)
1053 // run = mel_get_run(&mel); // get another run
1054 sp[2] = t1;
1055 x += 2;
1056
1057 // partial c_q, will be completed when we process the next quad
1058 // sigma_q (w, sw)
1059 c_q = ((t1 & 0x40U) << 2) | ((t1 & 0x80U) << 1);
1060 // sigma_q (nw)
1061 c_q |= sp[2 - (si32)sstr] & 0x80;
1062
1063 //remove data from vlc stream, if qinf is not used, cwdlen is 0
1064 vlc_val = rev_advance(&vlc, t1 & 0x7);
1065
1066 // decode u
1068 // uvlc_mode is made up of u_offset bits from the quad pair
1069 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1070 ui32 uvlc_entry = uvlc_tbl1[uvlc_mode + (vlc_val & 0x3F)];
1071 //remove total prefix length
1072 vlc_val = rev_advance(&vlc, uvlc_entry & 0x7);
1073 uvlc_entry >>= 3;
1074 //extract suffixes for quad 0 and 1
1075 ui32 len = uvlc_entry & 0xF; //suffix length for 2 quads
1076 ui32 tmp = vlc_val & ((1 << len) - 1); //suffix value for 2 quads
1077 vlc_val = rev_advance(&vlc, len);
1078 uvlc_entry >>= 4;
1079 // quad 0 length
1080 len = uvlc_entry & 0x7; // quad 0 suffix length
1081 uvlc_entry >>= 3;
1082 ui16 u_q = (ui16)((uvlc_entry & 7) + (tmp & ~(0xFU << len))); //u_q
1083 sp[1] = u_q;
1084 u_q = (ui16)((uvlc_entry >> 3) + (tmp >> len)); // u_q
1085 sp[3] = u_q;
1086 }
1087 sp[0] = sp[1] = 0;
1088 }
1089 }
1090
1091 // step2 we decode magsgn
1092 {
1093 // We allocate a scratch row for storing v_n values.
1094 // We have 512 quads horizontally.
1095 // We need an extra entry to handle the case of vp[1]
1096 // when vp is at the last column.
1097 // Here, we allocate 4 instead of 1 to make the buffer size
1098 // a multipled of 16 bytes.
1099 const int v_n_size = 512 + 4;
1100 ui32 v_n_scratch[v_n_size] = {0}; // 2+ kB
1101
1102 frwd_struct magsgn;
1103 frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);
1104
1105 ui16 *sp = scratch;
1106 ui32 *vp = v_n_scratch;
1107 ui32 *dp = decoded_data;
1108
1109 ui32 prev_v_n = 0;
1110 for (ui32 x = 0; x < width; sp += 2, ++vp)
1111 {
1112 ui32 inf = sp[0];
1113 ui32 U_q = sp[1];
1114 if (U_q > mmsbp2)
1115 return false;
1116
1117 ui32 v_n;
1118 ui32 val = 0;
1119 ui32 bit = 0;
1120 if (inf & (1 << (4 + bit)))
1121 {
1122 //get 32 bits of magsgn data
1123 ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1124 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1125 frwd_advance(&magsgn, m_n); //consume m_n
1126
1127 val = ms_val << 31; // get sign bit
1128 v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1129 v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1130 v_n |= 1; // add center of bin
1131 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1132 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1133 val |= (v_n + 2) << (p - 1);
1134 }
1135 dp[0] = val;
1136
1137 v_n = 0;
1138 val = 0;
1139 bit = 1;
1140 if (inf & (1 << (4 + bit)))
1141 {
1142 //get 32 bits of magsgn data
1143 ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1144 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1145 frwd_advance(&magsgn, m_n); //consume m_n
1146
1147 val = ms_val << 31; // get sign bit
1148 v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1149 v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1150 v_n |= 1; // add center of bin
1151 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1152 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1153 val |= (v_n + 2) << (p - 1);
1154 }
1155 dp[stride] = val;
1156 vp[0] = prev_v_n | v_n;
1157 prev_v_n = 0;
1158 ++dp;
1159 if (++x >= width)
1160 { ++vp; break; }
1161
1162 val = 0;
1163 bit = 2;
1164 if (inf & (1 << (4 + bit)))
1165 {
1166 //get 32 bits of magsgn data
1167 ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1168 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1169 frwd_advance(&magsgn, m_n); //consume m_n
1170
1171 val = ms_val << 31; // get sign bit
1172 v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1173 v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1174 v_n |= 1; // add center of bin
1175 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1176 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1177 val |= (v_n + 2) << (p - 1);
1178 }
1179 dp[0] = val;
1180
1181 v_n = 0;
1182 val = 0;
1183 bit = 3;
1184 if (inf & (1 << (4 + bit)))
1185 {
1186 //get 32 bits of magsgn data
1187 ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1188 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1189 frwd_advance(&magsgn, m_n); //consume m_n
1190
1191 val = ms_val << 31; // get sign bit
1192 v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1193 v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1194 v_n |= 1; // add center of bin
1195 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1196 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1197 val |= (v_n + 2) << (p - 1);
1198 }
1199 dp[stride] = val;
1200 prev_v_n = v_n;
1201 ++dp;
1202 ++x;
1203 }
1204 vp[0] = prev_v_n;
1205
1206 for (ui32 y = 2; y < height; y += 2)
1207 {
1208 ui16 *sp = scratch + (y >> 1) * sstr;
1209 ui32 *vp = v_n_scratch;
1210 ui32 *dp = decoded_data + y * stride;
1211
1212 prev_v_n = 0;
1213 for (ui32 x = 0; x < width; sp += 2, ++vp)
1214 {
1215 ui32 inf = sp[0];
1216 ui32 u_q = sp[1];
1217
1218 ui32 gamma = inf & 0xF0; gamma &= gamma - 0x10; //is gamma_q 1?
1219 ui32 emax = vp[0] | vp[1];
1220 emax = 31 - count_leading_zeros(emax | 2); // emax - 1
1221 ui32 kappa = gamma ? emax : 1;
1222
1223 ui32 U_q = u_q + kappa;
1224 if (U_q > mmsbp2)
1225 return false;
1226
1227 ui32 v_n;
1228 ui32 val = 0;
1229 ui32 bit = 0;
1230 if (inf & (1 << (4 + bit)))
1231 {
1232 //get 32 bits of magsgn data
1233 ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1234 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1235 frwd_advance(&magsgn, m_n); //consume m_n
1236
1237 val = ms_val << 31; // get sign bit
1238 v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1239 v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1240 v_n |= 1; // add center of bin
1241 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1242 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1243 val |= (v_n + 2) << (p - 1);
1244 }
1245 dp[0] = val;
1246
1247 v_n = 0;
1248 val = 0;
1249 bit = 1;
1250 if (inf & (1 << (4 + bit)))
1251 {
1252 //get 32 bits of magsgn data
1253 ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1254 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1255 frwd_advance(&magsgn, m_n); //consume m_n
1256
1257 val = ms_val << 31; // get sign bit
1258 v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1259 v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1260 v_n |= 1; // add center of bin
1261 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1262 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1263 val |= (v_n + 2) << (p - 1);
1264 }
1265 dp[stride] = val;
1266 vp[0] = prev_v_n | v_n;
1267 prev_v_n = 0;
1268 ++dp;
1269 if (++x >= width)
1270 { ++vp; break; }
1271
1272 val = 0;
1273 bit = 2;
1274 if (inf & (1 << (4 + bit)))
1275 {
1276 //get 32 bits of magsgn data
1277 ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1278 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1279 frwd_advance(&magsgn, m_n); //consume m_n
1280
1281 val = ms_val << 31; // get sign bit
1282 v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1283 v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1284 v_n |= 1; // add center of bin
1285 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1286 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1287 val |= (v_n + 2) << (p - 1);
1288 }
1289 dp[0] = val;
1290
1291 v_n = 0;
1292 val = 0;
1293 bit = 3;
1294 if (inf & (1 << (4 + bit)))
1295 {
1296 //get 32 bits of magsgn data
1297 ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1298 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1299 frwd_advance(&magsgn, m_n); //consume m_n
1300
1301 val = ms_val << 31; // get sign bit
1302 v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1303 v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1304 v_n |= 1; // add center of bin
1305 //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1306 //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1307 val |= (v_n + 2) << (p - 1);
1308 }
1309 dp[stride] = val;
1310 prev_v_n = v_n;
1311 ++dp;
1312 ++x;
1313 }
1314 vp[0] = prev_v_n;
1315 }
1316 }
1317
1318 if (num_passes > 1)
1319 {
1320 // We use scratch again, we can divide it into multiple regions
1321 // sigma holds all the significant samples, and it cannot
1322 // be modified after it is set. it will be used during the
1323 // Magnitude Refinement Pass
1324 ui16* const sigma = scratch;
1325
1326 ui32 mstr = (width + 3u) >> 2; // divide by 4, since each
1327 // ui16 contains 4 columns
1328 mstr = ((mstr + 2u) + 7u) & ~7u; // multiples of 8
1329
1330 // We re-arrange quad significance, where each 4 consecutive
1331 // bits represent one quad, into column significance, where,
1332 // each 4 consequtive bits represent one column of 4 rows
1333 {
1334 ui32 y;
1335 for (y = 0; y < height; y += 4)
1336 {
1337 ui16* sp = scratch + (y >> 1) * sstr;
1338 ui16* dp = sigma + (y >> 2) * mstr;
1339 for (ui32 x = 0; x < width; x += 4, sp += 4, ++dp) {
1340 ui32 t0 = 0, t1 = 0;
1341 t0 = ((sp[0 ] & 0x30u) >> 4) | ((sp[0 ] & 0xC0u) >> 2);
1342 t0 |= ((sp[2 ] & 0x30u) << 4) | ((sp[2 ] & 0xC0u) << 6);
1343 t1 = ((sp[0+sstr] & 0x30u) >> 2) | ((sp[0+sstr] & 0xC0u) );
1344 t1 |= ((sp[2+sstr] & 0x30u) << 6) | ((sp[2+sstr] & 0xC0u) << 8);
1345 dp[0] = (ui16)(t0 | t1);
1346 }
1347 dp[0] = 0; // set an extra entry on the right with 0
1348 }
1349 {
1350 // reset one row after the codeblock
1351 ui16* dp = sigma + (y >> 2) * mstr;
1352 for (ui32 x = 0; x < width; x += 4, ++dp)
1353 dp[0] = 0;
1354 dp[0] = 0; // set an extra entry on the right with 0
1355 }
1356 }
1357
1358 // We perform Significance Propagation Pass here
1359 {
1360 // This stores significance information of the previous
1361 // 4 rows. Significance information in this array includes
1362 // all signicant samples in bitplane p - 1; that is,
1363 // significant samples for bitplane p (discovered during the
1364 // cleanup pass and stored in sigma) and samples that have recently
1365 // became significant (during the SPP) in bitplane p-1.
1366 // We store enough for the widest row, containing 1024 columns,
1367 // which is equivalent to 256 of ui16, since each stores 4 columns.
1368 // We add an extra 8 entries, just in case we need more
1369 ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes
1370
1371 frwd_struct sigprop;
1372 frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2);
1373
1374 for (ui32 y = 0; y < height; y += 4)
1375 {
1376 ui32 pattern = 0xFFFFu; // a pattern needed samples
1377 if (height - y < 4) {
1378 pattern = 0x7777u;
1379 if (height - y < 3) {
1380 pattern = 0x3333u;
1381 if (height - y < 2)
1382 pattern = 0x1111u;
1383 }
1384 }
1385
1386 // prev holds sign. info. for the previous quad, together
1387 // with the rows on top of it and below it.
1388 ui32 prev = 0;
1389 ui16 *prev_sig = prev_row_sig;
1390 ui16 *cur_sig = sigma + (y >> 2) * mstr;
1391 ui32 *dpp = decoded_data + y * stride;
1392 for (ui32 x = 0; x < width; x += 4, ++cur_sig, ++prev_sig)
1393 {
1394 // only rows and columns inside the stripe are included
1395 si32 s = (si32)x + 4 - (si32)width;
1396 s = ojph_max(s, 0);
1397 pattern = pattern >> (s * 4);
1398
1399 // We first find locations that need to be tested (potential
1400 // SPP members); these location will end up in mbr
1401 // In each iteration, we produce 16 bits because cwd can have
1402 // up to 16 bits of significance information, followed by the
1403 // corresponding 16 bits of sign information; therefore, it is
1404 // sufficient to fetch 32 bit data per loop.
1405
1406 // Althougth we are interested in 16 bits only, we load 32 bits.
1407 // For the 16 bits we are producing, we need the next 4 bits --
1408 // We need data for at least 5 columns out of 8.
1409 // Therefore loading 32 bits is easier than loading 16 bits
1410 // twice.
1411 ui32 ps = *(ui32*)prev_sig;
1412 ui32 ns = *(ui32*)(cur_sig + mstr);
1413 ui32 u = (ps & 0x88888888) >> 3; // the row on top
1414 if (!stripe_causal)
1415 u |= (ns & 0x11111111) << 3; // the row below
1416
1417 ui32 cs = *(ui32*)cur_sig;
1418 // vertical integration
1419 ui32 mbr = cs; // this sig. info.
1420 mbr |= (cs & 0x77777777) << 1; //above neighbors
1421 mbr |= (cs & 0xEEEEEEEE) >> 1; //below neighbors
1422 mbr |= u;
1423 // horizontal integration
1424 ui32 t = mbr;
1425 mbr |= t << 4; // neighbors on the left
1426 mbr |= t >> 4; // neighbors on the right
1427 mbr |= prev >> 12; // significance of previous group
1428
1429 // remove outside samples, and already significant samples
1430 mbr &= pattern;
1431 mbr &= ~cs;
1432
1433 // find samples that become significant during the SPP
1434 ui32 new_sig = mbr;
1435 if (new_sig)
1436 {
1437 ui32 cwd = frwd_fetch<0>(&sigprop);
1438
1439 ui32 cnt = 0;
1440 ui32 col_mask = 0xFu;
1441 ui32 inv_sig = ~cs & pattern;
1442 for (int i = 0; i < 16; i += 4, col_mask <<= 4)
1443 {
1444 if ((col_mask & new_sig) == 0)
1445 continue;
1446
1447 //scan one column
1448 ui32 sample_mask = 0x1111u & col_mask;
1449 if (new_sig & sample_mask)
1450 {
1451 new_sig &= ~sample_mask;
1452 if (cwd & 1)
1453 {
1454 ui32 t = 0x33u << i;
1455 new_sig |= t & inv_sig;
1456 }
1457 cwd >>= 1; ++cnt;
1458 }
1459
1460 sample_mask <<= 1;
1461 if (new_sig & sample_mask)
1462 {
1463 new_sig &= ~sample_mask;
1464 if (cwd & 1)
1465 {
1466 ui32 t = 0x76u << i;
1467 new_sig |= t & inv_sig;
1468 }
1469 cwd >>= 1; ++cnt;
1470 }
1471
1472 sample_mask <<= 1;
1473 if (new_sig & sample_mask)
1474 {
1475 new_sig &= ~sample_mask;
1476 if (cwd & 1)
1477 {
1478 ui32 t = 0xECu << i;
1479 new_sig |= t & inv_sig;
1480 }
1481 cwd >>= 1; ++cnt;
1482 }
1483
1484 sample_mask <<= 1;
1485 if (new_sig & sample_mask)
1486 {
1487 new_sig &= ~sample_mask;
1488 if (cwd & 1)
1489 {
1490 ui32 t = 0xC8u << i;
1491 new_sig |= t & inv_sig;
1492 }
1493 cwd >>= 1; ++cnt;
1494 }
1495 }
1496
1497 if (new_sig)
1498 {
1499 // new_sig has newly-discovered sig. samples during SPP
1500 // find the signs and update decoded_data
1501 ui32 *dp = dpp + x;
1502 ui32 val = 3u << (p - 2);
1503 col_mask = 0xFu;
1504 for (int i = 0; i < 4; ++i, ++dp, col_mask <<= 4)
1505 {
1506 if ((col_mask & new_sig) == 0)
1507 continue;
1508
1509 //scan 4 signs
1510 ui32 sample_mask = 0x1111u & col_mask;
1511 if (new_sig & sample_mask)
1512 {
1513 assert(dp[0] == 0);
1514 dp[0] = (cwd << 31) | val;
1515 cwd >>= 1; ++cnt;
1516 }
1517
1518 sample_mask += sample_mask;
1519 if (new_sig & sample_mask)
1520 {
1521 assert(dp[stride] == 0);
1522 dp[stride] = (cwd << 31) | val;
1523 cwd >>= 1; ++cnt;
1524 }
1525
1526 sample_mask += sample_mask;
1527 if (new_sig & sample_mask)
1528 {
1529 assert(dp[2 * stride] == 0);
1530 dp[2 * stride] = (cwd << 31) | val;
1531 cwd >>= 1; ++cnt;
1532 }
1533
1534 sample_mask += sample_mask;
1535 if (new_sig & sample_mask)
1536 {
1537 assert(dp[3 * stride] == 0);
1538 dp[3 * stride] = (cwd << 31) | val;
1539 cwd >>= 1; ++cnt;
1540 }
1541 }
1542 }
1543 frwd_advance(&sigprop, cnt);
1544 }
1545
1546 new_sig |= cs;
1547 *prev_sig = (ui16)(new_sig);
1548
1549 // vertical integration for the new sig. info.
1550 t = new_sig;
1551 new_sig |= (t & 0x7777) << 1; //above neighbors
1552 new_sig |= (t & 0xEEEE) >> 1; //below neighbors
1553 // add sig. info. from the row on top and below
1554 prev = new_sig | u;
1555 // we need only the bits in 0xF000
1556 prev &= 0xF000;
1557 }
1558 }
1559 }
1560
1561 // We perform Magnitude Refinement Pass here
1562 if (num_passes > 2)
1563 {
1564 rev_struct magref;
1565 rev_init_mrp(&magref, coded_data, (int)lengths1, (int)lengths2);
1566
1567 for (ui32 y = 0; y < height; y += 4)
1568 {
1569 ui32 *cur_sig = (ui32*)(sigma + (y >> 2) * mstr);
1570 ui32 *dpp = decoded_data + y * stride;
1571 ui32 half = 1 << (p - 2);
1572 for (ui32 i = 0; i < width; i += 8)
1573 {
1574 //Process one entry from sigma array at a time
1575 // Each nibble (4 bits) in the sigma array represents 4 rows,
1576 // and the 32 bits contain 8 columns
1577 ui32 cwd = rev_fetch_mrp(&magref); // get 32 bit data
1578 ui32 sig = *cur_sig++; // 32 bit that will be processed now
1579 ui32 col_mask = 0xFu; // a mask for a column in sig
1580 if (sig) // if any of the 32 bits are set
1581 {
1582 for (int j = 0; j < 8; ++j) //one column at a time
1583 {
1584 if (sig & col_mask) // lowest nibble
1585 {
1586 ui32 *dp = dpp + i + j; // next column in decoded samples
1587 ui32 sample_mask = 0x11111111u & col_mask; //LSB
1588
1589 for (int k = 0; k < 4; ++k) {
1590 if (sig & sample_mask) //if LSB is set
1591 {
1592 assert(dp[0] != 0); // decoded value cannot be zero
1593 assert((dp[0] & half) == 0); // no half
1594 ui32 sym = cwd & 1; // get it value
1595 sym = (1 - sym) << (p - 1); // previous center of bin
1596 sym |= half; // put half the center of bin
1597 dp[0] ^= sym; // remove old bin center and put new
1598 cwd >>= 1; // consume word
1599 }
1600 sample_mask += sample_mask; //next row
1601 dp += stride; // next samples row
1602 }
1603 }
1604 col_mask <<= 4; //next column
1605 }
1606 }
1607 // consume data according to the number of bits set
1608 rev_advance_mrp(&magref, population_count(sig));
1609 }
1610 }
1611 }
1612 }
1613 return true;
1614 }
1615 }
1616}
ui16 uvlc_tbl0[256+64]
uvlc_tbl0 contains decoding information for initial row of quads
ui16 uvlc_tbl1[256]
uvlc_tbl1 contains decoding information for non-initial row of quads
ui16 vlc_tbl0[1024]
vlc_tbl0 contains decoding information for initial row of quads
ui16 vlc_tbl1[1024]
vlc_tbl1 contains decoding information for non-initial row of quads
static ui32 rev_fetch(rev_struct *vlcp)
Retrieves 32 bits from the head of a rev_struct structure.
static void rev_init_mrp(rev_struct *mrp, ui8 *data, int lcup, int len2)
Initialized rev_struct structure for MRP segment, and reads a number of bytes such that the next 32 b...
static void mel_read(dec_mel_st *melp)
Reads and unstuffs the MEL bitstream.
static void frwd_advance(frwd_struct *msp, ui32 num_bits)
Consume num_bits bits from the bitstream of frwd_struct.
static void rev_read_mrp(rev_struct *mrp)
Reads and unstuffs from rev_struct.
static ui32 rev_fetch_mrp(rev_struct *mrp)
Retrieves 32 bits from the head of a rev_struct structure.
static ui32 rev_advance_mrp(rev_struct *mrp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static void rev_read(rev_struct *vlcp)
Read and unstuff data from a backwardly-growing segment.
static int mel_get_run(dec_mel_st *melp)
Retrieves one run from dec_mel_st; if there are no runs stored MEL segment is decoded.
static void rev_init(rev_struct *vlcp, ui8 *data, int lcup, int scup)
Initiates the rev_struct structure and reads a few bytes to move the read address to multiple of 4.
static void mel_init(dec_mel_st *melp, ui8 *bbuf, int lcup, int scup)
Initiates a dec_mel_st structure for MEL decoding and reads some bytes in order to get the read addre...
static ui32 rev_advance(rev_struct *vlcp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static void frwd_read(frwd_struct *msp)
Read and unstuffs 32 bits from forward-growing bitstream.
static ui32 frwd_fetch(frwd_struct *msp)
Fetches 32 bits from the frwd_struct bitstream.
static void frwd_init(frwd_struct *msp, const ui8 *data, int size)
Initialize frwd_struct struct and reads some bytes.
bool ojph_decode_codeblock(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
static void mel_decode(dec_mel_st *melp)
Decodes unstuffed MEL segment bits stored in tmp to runs.
uint64_t ui64
Definition: ojph_defs.h:56
uint16_t ui16
Definition: ojph_defs.h:52
static ui32 population_count(ui32 val)
Definition: ojph_arch.h:110
static ui32 count_leading_zeros(ui32 val)
Definition: ojph_arch.h:130
int32_t si32
Definition: ojph_defs.h:55
uint32_t ui32
Definition: ojph_defs.h:54
uint8_t ui8
Definition: ojph_defs.h:50
#define ojph_max(a, b)
Definition: ojph_defs.h:73
#define OJPH_WARN(t,...)
Definition: ojph_message.h:128
MEL state structure for reading and decoding the MEL bitstream.
bool unstuff
true if the next bit needs to be unstuffed
int num_runs
number of decoded runs left in runs (maximum 8)
int size
number of bytes in MEL code
ui8 * data
the address of data (or bitstream)
int k
state of MEL decoder
int bits
number of bits stored in tmp
ui64 tmp
temporary buffer for read data
ui64 runs
runs of decoded MEL codewords (7 bits/run)
State structure for reading and unstuffing of forward-growing bitstreams; these are: MagSgn and SPP b...
const ui8 * data
pointer to bitstream
ui32 bits
number of bits stored in tmp
ui64 tmp
temporary buffer of read data
ui32 unstuff
1 if a bit needs to be unstuffed from next byte
A structure for reading and unstuffing a segment that grows backward, such as VLC and MRP.
ui32 bits
number of bits stored in tmp
int size
number of bytes left
ui8 * data
pointer to where to read data
ui64 tmp
temporary buffer of read data