100 ui32 val = 0xFFFFFFFF;
101 if (melp->size > 4) {
102 val = *(
ui32*)melp->data;
106 else if (melp->size > 0)
109 while (melp->size > 1) {
110 ui32 v = *melp->data++;
111 ui32 m = ~(0xFFu << i);
112 val = (val & m) | (v << i);
117 ui32 v = *melp->data++;
119 ui32 m = ~(0xFFu << i);
120 val = (val & m) | (v << i);
125 int bits = 32 - melp->unstuff;
132 bool unstuff = ((val & 0xFF) == 0xFF);
134 t = t << (8 - unstuff);
137 t |= (val>>8) & 0xFF;
138 unstuff = (((val >> 8) & 0xFF) == 0xFF);
140 t = t << (8 - unstuff);
142 t |= (val>>16) & 0xFF;
143 unstuff = (((val >> 16) & 0xFF) == 0xFF);
145 t = t << (8 - unstuff);
147 t |= (val>>24) & 0xFF;
148 melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
152 melp->tmp |= ((
ui64)t) << (64 - bits - melp->bits);
174 static const int mel_exp[13] = {
175 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
184 while (melp->bits >= 6 && melp->num_runs < 8)
186 int eval = mel_exp[melp->k];
188 if (melp->tmp & (1ull<<63))
192 melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;
199 run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
200 melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0;
201 melp->tmp <<= eval + 1;
202 melp->bits -= eval + 1;
203 run = (run << 1) + 1;
205 eval = melp->num_runs * 7;
206 melp->runs &= ~((
ui64)0x3F << eval);
207 melp->runs |= ((
ui64)run) << eval;
225 melp->data = bbuf + lcup - scup;
228 melp->unstuff =
false;
229 melp->size = scup - 1;
237 int num = 4 - (int)(intptr_t(melp->data) & 0x3);
238 for (
int i = 0; i < num; ++i) {
239 assert(melp->unstuff ==
false || melp->data[0] <= 0x8F);
240 ui64 d = (melp->size > 0) ? *melp->data : 0xFF;
242 if (melp->size == 1) d |= 0xF;
244 melp->data += melp->size-- > 0;
245 int d_bits = 8 - melp->unstuff;
246 melp->tmp = (melp->tmp << d_bits) | d;
247 melp->bits += d_bits;
248 melp->unstuff = ((d & 0xFF) == 0xFF);
251 melp->tmp <<= (64 - melp->bits);
264 if (melp->num_runs == 0)
267 int t = melp->runs & 0x7F;
320 val = *(
ui32*)(vlcp->data - 3);
324 else if (vlcp->size > 0)
327 while (vlcp->size > 0) {
328 ui32 v = *vlcp->data--;
336 ui32 tmp = val >> 24;
340 bits = 8 - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
341 bool unstuff = (val >> 24) > 0x8F;
343 tmp |= ((val >> 16) & 0xFF) << bits;
344 bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
345 unstuff = ((val >> 16) & 0xFF) > 0x8F;
347 tmp |= ((val >> 8) & 0xFF) << bits;
348 bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
349 unstuff = ((val >> 8) & 0xFF) > 0x8F;
351 tmp |= (val & 0xFF) << bits;
352 bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
353 unstuff = (val & 0xFF) > 0x8F;
356 vlcp->tmp |= (
ui64)tmp << vlcp->bits;
358 vlcp->unstuff = unstuff;
379 vlcp->data = data + lcup - 2;
382 vlcp->size = scup - 2;
384 ui32 d = *vlcp->data--;
386 vlcp->bits = 4 - ((vlcp->tmp & 7) == 7);
387 vlcp->unstuff = (d | 0xF) > 0x8F;
394 int num = 1 + (int)(intptr_t(vlcp->data) & 0x3);
395 int tnum = num < vlcp->size ? num : vlcp->size;
396 for (
int i = 0; i < tnum; ++i) {
400 ui32 d_bits = 8 - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
401 vlcp->tmp |= d << vlcp->bits;
402 vlcp->bits += d_bits;
403 vlcp->unstuff = d > 0x8F;
425 return (
ui32)vlcp->tmp;
437 assert(num_bits <= vlcp->bits);
438 vlcp->tmp >>= num_bits;
439 vlcp->bits -= num_bits;
440 return (
ui32)vlcp->tmp;
463 val = *(
ui32*)(mrp->data - 3);
467 else if (mrp->size > 0)
470 while (mrp->size > 0) {
471 ui32 v = *mrp->data--;
479 ui32 bits, tmp = val >> 24;
482 bits = 8 - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
483 bool unstuff = (val >> 24) > 0x8F;
486 tmp |= ((val >> 16) & 0xFF) << bits;
487 bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
488 unstuff = ((val >> 16) & 0xFF) > 0x8F;
490 tmp |= ((val >> 8) & 0xFF) << bits;
491 bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
492 unstuff = ((val >> 8) & 0xFF) > 0x8F;
494 tmp |= (val & 0xFF) << bits;
495 bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
496 unstuff = (val & 0xFF) > 0x8F;
498 mrp->tmp |= (
ui64)tmp << mrp->bits;
500 mrp->unstuff = unstuff;
521 mrp->data = data + lcup + len2 - 1;
531 int num = 1 + (int)(intptr_t(mrp->data) & 0x3);
532 for (
int i = 0; i < num; ++i) {
535 d = (mrp->size-- > 0) ? *mrp->data-- : 0;
537 ui32 d_bits = 8 - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
538 mrp->tmp |= d << mrp->bits;
540 mrp->unstuff = d > 0x8F;
561 return (
ui32)mrp->tmp;
572 assert(num_bits <= mrp->bits);
573 mrp->tmp >>= num_bits;
574 mrp->bits -= num_bits;
575 return (
ui32)mrp->tmp;
612 assert(msp->bits <= 128);
614 __m128i offset, val, validity, all_xff;
615 val = _mm_loadu_si128((__m128i*)msp->data);
616 int bytes = msp->size >= 16 ? 16 : msp->size;
617 validity = _mm_set1_epi8((
char)bytes);
621 offset = _mm_set_epi64x(0x0F0E0D0C0B0A0908,0x0706050403020100);
622 validity = _mm_cmpgt_epi8(validity, offset);
623 all_xff = _mm_set1_epi8(-1);
626 __m128i t = _mm_xor_si128(validity, all_xff);
627 val = _mm_or_si128(t, val);
630 val = _mm_and_si128(validity, val);
635 ff_bytes = _mm_cmpeq_epi8(val, all_xff);
636 ff_bytes = _mm_and_si128(ff_bytes, validity);
637 ui32 flags = (
ui32)_mm_movemask_epi8(ff_bytes);
639 ui32 next_unstuff = flags >> 16;
640 flags |= msp->unstuff;
652 t = _mm_set1_epi8((
char)loc);
653 m = _mm_cmpgt_epi8(offset, t);
655 t = _mm_and_si128(m, val);
656 c = _mm_srli_epi64(t, 1);
657 t = _mm_srli_si128(t, 8);
658 t = _mm_slli_epi64(t, 63);
659 t = _mm_or_si128(t, c);
661 val = _mm_or_si128(t, _mm_andnot_si128(m, val));
665 assert(msp->bits >= 0 && msp->bits <= 128);
666 int cur_bytes = msp->bits >> 3;
667 int cur_bits = msp->bits & 7;
669 b1 = _mm_sll_epi64(val, _mm_set1_epi64x(cur_bits));
670 b2 = _mm_slli_si128(val, 8);
671 b2 = _mm_srl_epi64(b2, _mm_set1_epi64x(64-cur_bits));
672 b1 = _mm_or_si128(b1, b2);
673 b2 = _mm_loadu_si128((__m128i*)(msp->tmp + cur_bytes));
674 b2 = _mm_or_si128(b1, b2);
675 _mm_storeu_si128((__m128i*)(msp->tmp + cur_bytes), b2);
677 int consumed_bits = bits < 128 - cur_bits ? bits : 128 - cur_bits;
678 cur_bytes = (msp->bits + (
ui32)consumed_bits + 7) >> 3;
679 int upper = _mm_extract_epi16(val, 7);
680 upper >>= consumed_bits - 128 + 16;
681 msp->tmp[cur_bytes] = (
ui8)upper;
683 msp->bits += (
ui32)bits;
684 msp->unstuff = next_unstuff;
685 assert(msp->unstuff == 0 || msp->unstuff == 1);
702 _mm_storeu_si128((__m128i *)msp->tmp, _mm_setzero_si128());
703 _mm_storeu_si128((__m128i *)msp->tmp + 1, _mm_setzero_si128());
704 _mm_storeu_si128((__m128i *)msp->tmp + 2, _mm_setzero_si128());
722 assert(num_bits > 0 && num_bits <= msp->bits && num_bits < 128);
723 msp->bits -= num_bits;
725 __m128i *p = (__m128i*)(msp->tmp + ((num_bits >> 3) & 0x18));
728 __m128i v0, v1, c0, c1, t;
729 v0 = _mm_loadu_si128(p);
730 v1 = _mm_loadu_si128(p + 1);
733 c0 = _mm_srl_epi64(v0, _mm_set1_epi64x(num_bits));
734 t = _mm_srli_si128(v0, 8);
735 t = _mm_sll_epi64(t, _mm_set1_epi64x(64 - num_bits));
736 c0 = _mm_or_si128(c0, t);
737 t = _mm_slli_si128(v1, 8);
738 t = _mm_sll_epi64(t, _mm_set1_epi64x(64 - num_bits));
739 c0 = _mm_or_si128(c0, t);
741 _mm_storeu_si128((__m128i*)msp->tmp, c0);
743 c1 = _mm_srl_epi64(v1, _mm_set1_epi64x(num_bits));
744 t = _mm_srli_si128(v1, 8);
745 t = _mm_sll_epi64(t, _mm_set1_epi64x(64 - num_bits));
746 c1 = _mm_or_si128(c1, t);
748 _mm_storeu_si128((__m128i*)msp->tmp + 1, c1);
762 if (msp->bits <= 128)
765 if (msp->bits <= 128)
768 __m128i t = _mm_loadu_si128((__m128i*)msp->tmp);
794 row = _mm_setzero_si128();
795 w0 = _mm_shuffle_epi32(inf_u_q, _MM_SHUFFLE(N, N, N, N));
797 flags = _mm_and_si128(w0, _mm_set_epi32(0x8880, 0x4440, 0x2220, 0x1110));
798 insig = _mm_cmpeq_epi32(flags, _mm_setzero_si128());
799 if (_mm_movemask_epi8(insig) != 0xFFFF)
801 U_q = _mm_shuffle_epi32(U_q, _MM_SHUFFLE(N, N, N, N));
802 flags = _mm_mullo_epi16(flags, _mm_set_epi16(1,1,2,2,4,4,8,8));
803 __m128i ms_vec = frwd_fetch<0xFF>(magsgn);
811 w0 = _mm_srli_epi32(flags, 15);
812 m_n = _mm_sub_epi32(U_q, w0);
813 m_n = _mm_andnot_si128(insig, m_n);
817 __m128i inc_sum = m_n;
818 inc_sum = _mm_add_epi32(inc_sum, _mm_bslli_si128(inc_sum, 4));
819 inc_sum = _mm_add_epi32(inc_sum, _mm_bslli_si128(inc_sum, 8));
820 int total_mn = _mm_extract_epi16(inc_sum, 6);
821 __m128i ex_sum = _mm_bslli_si128(inc_sum, 4);
824 __m128i byte_idx = _mm_srli_epi32(ex_sum, 3);
825 __m128i bit_idx = _mm_and_si128(ex_sum, _mm_set1_epi32(7));
826 byte_idx = _mm_shuffle_epi8(byte_idx,
827 _mm_set_epi32(0x0C0C0C0C, 0x08080808, 0x04040404, 0x00000000));
828 byte_idx = _mm_add_epi32(byte_idx, _mm_set1_epi32(0x03020100));
829 __m128i d0 = _mm_shuffle_epi8(ms_vec, byte_idx);
830 byte_idx = _mm_add_epi32(byte_idx, _mm_set1_epi32(0x01010101));
831 __m128i d1 = _mm_shuffle_epi8(ms_vec, byte_idx);
834 bit_idx = _mm_or_si128(bit_idx, _mm_slli_epi32(bit_idx, 16));
835 __m128i bit_shift = _mm_shuffle_epi8(
836 _mm_set_epi8(1, 3, 7, 15, 31, 63, 127, -1,
837 1, 3, 7, 15, 31, 63, 127, -1), bit_idx);
838 bit_shift = _mm_add_epi16(bit_shift, _mm_set1_epi16(0x0101));
839 d0 = _mm_mullo_epi16(d0, bit_shift);
840 d0 = _mm_srli_epi16(d0, 8);
841 d1 = _mm_mullo_epi16(d1, bit_shift);
842 d1 = _mm_and_si128(d1, _mm_set1_epi32((
si32)0xFF00FF00));
843 d0 = _mm_or_si128(d0, d1);
847 __m128i ones = _mm_set1_epi32(1);
848 __m128i twos = _mm_set1_epi32(2);
849 __m128i U_q_m1 = _mm_sub_epi32(U_q, ones);
850 U_q_m1 = _mm_and_si128(U_q_m1, _mm_set_epi32(0,0,0,0x1F));
851 w0 = _mm_sub_epi32(twos, w0);
852 shift = _mm_sll_epi32(w0, U_q_m1);
853 ms_vec = _mm_and_si128(d0, _mm_sub_epi32(shift, ones));
856 w0 = _mm_and_si128(flags, _mm_set1_epi32(0x800));
857 w0 = _mm_cmpeq_epi32(w0, _mm_setzero_si128());
858 w0 = _mm_andnot_si128(w0, shift);
859 ms_vec = _mm_or_si128(ms_vec, w0);
860 w0 = _mm_slli_epi32(ms_vec, 31);
861 ms_vec = _mm_or_si128(ms_vec, ones);
862 __m128i tvn = ms_vec;
863 ms_vec = _mm_add_epi32(ms_vec, twos);
864 ms_vec = _mm_slli_epi32(ms_vec, (
si32)p - 1);
865 ms_vec = _mm_or_si128(ms_vec, w0);
866 row = _mm_andnot_si128(insig, ms_vec);
868 ms_vec = _mm_andnot_si128(insig, tvn);
870 tvn = _mm_shuffle_epi8(ms_vec,
871 _mm_set_epi32(-1, -1, 0x0F0E0D0C, 0x07060504));
873 tvn = _mm_shuffle_epi8(ms_vec,
874 _mm_set_epi32(-1, 0x0F0E0D0C, 0x07060504, -1));
877 vn = _mm_or_si128(vn, tvn);
904 row = _mm_setzero_si128();
905 w0 = _mm_shuffle_epi8(inf_u_q,
906 _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504,
907 0x0100, 0x0100, 0x0100, 0x0100));
909 flags = _mm_and_si128(w0,
910 _mm_set_epi16((
si16)0x8880, 0x4440, 0x2220, 0x1110,
911 (
si16)0x8880, 0x4440, 0x2220, 0x1110));
912 insig = _mm_cmpeq_epi16(flags, _mm_setzero_si128());
913 if (_mm_movemask_epi8(insig) != 0xFFFF)
915 U_q = _mm_shuffle_epi8(U_q,
916 _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504,
917 0x0100, 0x0100, 0x0100, 0x0100));
918 flags = _mm_mullo_epi16(flags, _mm_set_epi16(1,2,4,8,1,2,4,8));
919 __m128i ms_vec = frwd_fetch<0xFF>(magsgn);
927 w0 = _mm_srli_epi16(flags, 15);
928 m_n = _mm_sub_epi16(U_q, w0);
929 m_n = _mm_andnot_si128(insig, m_n);
933 __m128i inc_sum = m_n;
934 inc_sum = _mm_add_epi16(inc_sum, _mm_bslli_si128(inc_sum, 2));
935 inc_sum = _mm_add_epi16(inc_sum, _mm_bslli_si128(inc_sum, 4));
936 inc_sum = _mm_add_epi16(inc_sum, _mm_bslli_si128(inc_sum, 8));
937 int total_mn = _mm_extract_epi16(inc_sum, 7);
938 __m128i ex_sum = _mm_bslli_si128(inc_sum, 2);
941 __m128i byte_idx = _mm_srli_epi16(ex_sum, 3);
942 __m128i bit_idx = _mm_and_si128(ex_sum, _mm_set1_epi16(7));
943 byte_idx = _mm_shuffle_epi8(byte_idx,
944 _mm_set_epi16(0x0E0E, 0x0C0C, 0x0A0A, 0x0808,
945 0x0606, 0x0404, 0x0202, 0x0000));
946 byte_idx = _mm_add_epi16(byte_idx, _mm_set1_epi16(0x0100));
947 __m128i d0 = _mm_shuffle_epi8(ms_vec, byte_idx);
948 byte_idx = _mm_add_epi16(byte_idx, _mm_set1_epi16(0x0101));
949 __m128i d1 = _mm_shuffle_epi8(ms_vec, byte_idx);
952 __m128i bit_shift = _mm_shuffle_epi8(
953 _mm_set_epi8(1, 3, 7, 15, 31, 63, 127, -1,
954 1, 3, 7, 15, 31, 63, 127, -1), bit_idx);
955 bit_shift = _mm_add_epi16(bit_shift, _mm_set1_epi16(0x0101));
956 d0 = _mm_mullo_epi16(d0, bit_shift);
957 d0 = _mm_srli_epi16(d0, 8);
958 d1 = _mm_mullo_epi16(d1, bit_shift);
959 d1 = _mm_and_si128(d1, _mm_set1_epi16((
si16)0xFF00));
960 d0 = _mm_or_si128(d0, d1);
963 __m128i shift, t0, t1, Uq0, Uq1;
964 __m128i ones = _mm_set1_epi16(1);
965 __m128i twos = _mm_set1_epi16(2);
966 __m128i U_q_m1 = _mm_sub_epi32(U_q, ones);
967 Uq0 = _mm_and_si128(U_q_m1, _mm_set_epi32(0,0,0,0x1F));
968 Uq1 = _mm_bsrli_si128(U_q_m1, 14);
969 w0 = _mm_sub_epi16(twos, w0);
970 t0 = _mm_and_si128(w0, _mm_set_epi64x(0, -1));
971 t1 = _mm_and_si128(w0, _mm_set_epi64x(-1, 0));
972 t0 = _mm_sll_epi16(t0, Uq0);
973 t1 = _mm_sll_epi16(t1, Uq1);
974 shift = _mm_or_si128(t0, t1);
975 ms_vec = _mm_and_si128(d0, _mm_sub_epi16(shift, ones));
978 w0 = _mm_and_si128(flags, _mm_set1_epi16(0x800));
979 w0 = _mm_cmpeq_epi16(w0, _mm_setzero_si128());
980 w0 = _mm_andnot_si128(w0, shift);
981 ms_vec = _mm_or_si128(ms_vec, w0);
982 w0 = _mm_slli_epi16(ms_vec, 15);
983 ms_vec = _mm_or_si128(ms_vec, ones);
984 __m128i tvn = ms_vec;
985 ms_vec = _mm_add_epi16(ms_vec, twos);
986 ms_vec = _mm_slli_epi16(ms_vec, (
si32)p - 1);
987 ms_vec = _mm_or_si128(ms_vec, w0);
988 row = _mm_andnot_si128(insig, ms_vec);
990 ms_vec = _mm_andnot_si128(insig, tvn);
991 w0 = _mm_shuffle_epi8(ms_vec,
992 _mm_set_epi16(-1, -1, -1, -1, -1, -1, 0x0706, 0x0302));
993 vn = _mm_or_si128(vn, w0);
994 w0 = _mm_shuffle_epi8(ms_vec,
995 _mm_set_epi16(-1, -1, -1, -1, -1, 0x0F0E, 0x0B0A, -1));
996 vn = _mm_or_si128(vn, w0);
1023 ui32 missing_msbs,
ui32 num_passes,
1028 static bool insufficient_precision =
false;
1029 static bool modify_code =
false;
1030 static bool truncate_spp_mrp =
false;
1032 if (num_passes > 1 && lengths2 == 0)
1034 OJPH_WARN(0x00010001,
"A malformed codeblock that has more than "
1035 "one coding pass, but zero length for "
1036 "2nd and potential 3rd pass.\n");
1042 OJPH_WARN(0x00010002,
"We do not support more than 3 coding passes; "
1043 "This codeblocks has %d passes.\n",
1048 if (missing_msbs > 30)
1050 if (insufficient_precision ==
false)
1052 insufficient_precision =
true;
1053 OJPH_WARN(0x00010003,
"32 bits are not enough to decode this "
1054 "codeblock. This message will not be "
1055 "displayed again.\n");
1059 else if (missing_msbs == 30)
1061 if (modify_code ==
false) {
1063 OJPH_WARN(0x00010004,
"Not enough precision to decode the cleanup "
1064 "pass. The code can be modified to support "
1065 "this case. This message will not be "
1066 "displayed again.\n");
1070 else if (missing_msbs == 29)
1072 if (num_passes > 1) {
1074 if (truncate_spp_mrp ==
false) {
1075 truncate_spp_mrp =
true;
1076 OJPH_WARN(0x00010005,
"Not enough precision to decode the SgnProp "
1077 "nor MagRef passes; both will be skipped. "
1078 "This message will not be displayed "
1083 ui32 p = 30 - missing_msbs;
1089 OJPH_WARN(0x00010006,
"Wrong codeblock length.\n");
1095 lcup = (int)lengths1;
1097 scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
1098 if (scup < 2 || scup > lcup || scup > 4079)
1116 ui16 scratch[8 * 513] = {0};
1124 ui32 sstr = ((width + 2u) + 7u) & ~7u;
1126 assert((stride & 0x3) == 0);
1128 ui32 mmsbp2 = missing_msbs + 2;
1140 mel_init(&mel, coded_data, lcup, scup);
1142 rev_init(&vlc, coded_data, lcup, scup);
1152 for (
ui32 x = 0; x < width; sp += 4)
1171 t0 = (run == -1) ? t0 : 0;
1185 c_q = ((t0 & 0x10U) << 3) | ((t0 & 0xE0U) << 2);
1194 t1 =
vlc_tbl0[c_q + (vlc_val & 0x7F)];
1197 if (c_q == 0 && x < width)
1202 t1 = (run == -1) ? t1 : 0;
1207 t1 = x < width ? t1 : 0;
1216 c_q = ((t1 & 0x10U) << 3) | ((t1 & 0xE0U) << 2);
1224 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1225 if (uvlc_mode == 0xc0)
1229 uvlc_mode += (run == -1) ? 0x40 : 0;
1246 ui32 len = uvlc_entry & 0xF;
1247 ui32 tmp = vlc_val & ((1 << len) - 1);
1251 len = uvlc_entry & 0x7;
1253 ui16 u_q = (
ui16)(1 + (uvlc_entry&7) + (tmp&~(0xFFU<<len)));
1255 u_q = (
ui16)(1 + (uvlc_entry >> 3) + (tmp >> len));
1261 for (
ui32 y = 2; y < height; y += 2)
1264 ui16 *sp = scratch + (y >> 1) * sstr;
1266 for (
ui32 x = 0; x < width; sp += 4)
1272 c_q |= ((sp[0 - (
si32)sstr] & 0xA0U) << 2);
1273 c_q |= ((sp[2 - (
si32)sstr] & 0x20U) << 4);
1289 t0 = (run == -1) ? t0 : 0;
1304 c_q = ((t0 & 0x40U) << 2) | ((t0 & 0x80U) << 1);
1306 c_q |= sp[0 - (
si32)sstr] & 0x80;
1308 c_q |= ((sp[2 - (
si32)sstr] & 0xA0U) << 2);
1309 c_q |= ((sp[4 - (
si32)sstr] & 0x20U) << 4);
1318 t1 =
vlc_tbl1[ c_q + (vlc_val & 0x7F)];
1321 if (c_q == 0 && x < width)
1326 t1 = (run == -1) ? t1 : 0;
1331 t1 = x < width ? t1 : 0;
1341 c_q = ((t1 & 0x40U) << 2) | ((t1 & 0x80U) << 1);
1343 c_q |= sp[2 - (
si32)sstr] & 0x80;
1351 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1357 ui32 len = uvlc_entry & 0xF;
1358 ui32 tmp = vlc_val & ((1 << len) - 1);
1362 len = uvlc_entry & 0x7;
1364 ui16 u_q = (
ui16)((uvlc_entry & 7) + (tmp & ~(0xFU << len)));
1366 u_q = (
ui16)((uvlc_entry >> 3) + (tmp >> len));
1389 const int v_n_size = 512 + 8;
1390 ui32 v_n_scratch[2 * v_n_size] = {0};
1393 frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);
1397 ui32 *vp = v_n_scratch;
1398 ui32 *dp = decoded_data;
1401 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1405 __m128i inf_u_q, U_q;
1408 inf_u_q = _mm_loadu_si128((__m128i*)sp);
1409 U_q = _mm_srli_epi32(inf_u_q, 16);
1411 w0 = _mm_cmpgt_epi32(U_q, _mm_set1_epi32((
int)mmsbp2));
1412 int i = _mm_movemask_epi8(w0);
1417 __m128i vn = _mm_set1_epi32(2);
1418 __m128i row0 = decode_one_quad32<0>(inf_u_q, U_q, &magsgn, p, vn);
1419 __m128i row1 = decode_one_quad32<1>(inf_u_q, U_q, &magsgn, p, vn);
1420 w0 = _mm_loadu_si128((__m128i*)vp);
1421 w0 = _mm_and_si128(w0, _mm_set_epi32(0,0,0,-1));
1422 w0 = _mm_or_si128(w0, vn);
1423 _mm_storeu_si128((__m128i*)vp, w0);
1426 w0 = _mm_unpacklo_epi32(row0, row1);
1427 w1 = _mm_unpackhi_epi32(row0, row1);
1428 row0 = _mm_unpacklo_epi32(w0, w1);
1429 row1 = _mm_unpackhi_epi32(w0, w1);
1430 _mm_store_si128((__m128i*)dp, row0);
1431 _mm_store_si128((__m128i*)(dp + stride), row1);
1435 for (
ui32 y = 2; y < height; y += 2)
1439 ui32 *vp = v_n_scratch;
1440 const __m128i lut_lo = _mm_set_epi8(
1441 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 31
1443 const __m128i lut_hi = _mm_set_epi8(
1444 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 31
1446 const __m128i nibble_mask = _mm_set1_epi8(0x0F);
1447 const __m128i byte_offset8 = _mm_set1_epi16(8);
1448 const __m128i byte_offset16 = _mm_set1_epi16(16);
1449 const __m128i cc = _mm_set1_epi32(31);
1450 for (
ui32 x = 0; x <= width; x += 8, vp += 4)
1453 v = _mm_loadu_si128((__m128i*)vp);
1455 t = _mm_and_si128(nibble_mask, v);
1456 v = _mm_and_si128(_mm_srli_epi16(v, 4), nibble_mask);
1457 t = _mm_shuffle_epi8(lut_lo, t);
1458 v = _mm_shuffle_epi8(lut_hi, v);
1459 v = _mm_min_epu8(v, t);
1461 t = _mm_srli_epi16(v, 8);
1462 v = _mm_or_si128(v, byte_offset8);
1463 v = _mm_min_epu8(v, t);
1465 t = _mm_srli_epi32(v, 16);
1466 v = _mm_or_si128(v, byte_offset16);
1467 v = _mm_min_epu8(v, t);
1469 v = _mm_sub_epi16(cc, v);
1470 _mm_storeu_si128((__m128i*)(vp + v_n_size), v);
1474 ui32 *vp = v_n_scratch;
1475 ui16 *sp = scratch + (y >> 1) * sstr;
1476 ui32 *dp = decoded_data + y * stride;
1479 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1483 __m128i inf_u_q, U_q;
1486 __m128i gamma, emax, kappa, u_q;
1488 inf_u_q = _mm_loadu_si128((__m128i*)sp);
1489 gamma = _mm_and_si128(inf_u_q, _mm_set1_epi32(0xF0));
1490 w0 = _mm_sub_epi32(gamma, _mm_set1_epi32(1));
1491 gamma = _mm_and_si128(gamma, w0);
1492 gamma = _mm_cmpeq_epi32(gamma, _mm_setzero_si128());
1494 emax = _mm_loadu_si128((__m128i*)(vp + v_n_size));
1495 w0 = _mm_bsrli_si128(emax, 4);
1496 emax = _mm_max_epi16(w0, emax);
1497 emax = _mm_andnot_si128(gamma, emax);
1499 kappa = _mm_set1_epi32(1);
1500 kappa = _mm_max_epi16(emax, kappa);
1502 u_q = _mm_srli_epi32(inf_u_q, 16);
1503 U_q = _mm_add_epi32(u_q, kappa);
1505 w0 = _mm_cmpgt_epi32(U_q, _mm_set1_epi32((
int)mmsbp2));
1506 int i = _mm_movemask_epi8(w0);
1511 __m128i vn = _mm_set1_epi32(2);
1512 __m128i row0 = decode_one_quad32<0>(inf_u_q, U_q, &magsgn, p, vn);
1513 __m128i row1 = decode_one_quad32<1>(inf_u_q, U_q, &magsgn, p, vn);
1514 w0 = _mm_loadu_si128((__m128i*)vp);
1515 w0 = _mm_and_si128(w0, _mm_set_epi32(0,0,0,-1));
1516 w0 = _mm_or_si128(w0, vn);
1517 _mm_storeu_si128((__m128i*)vp, w0);
1520 w0 = _mm_unpacklo_epi32(row0, row1);
1521 w1 = _mm_unpackhi_epi32(row0, row1);
1522 row0 = _mm_unpacklo_epi32(w0, w1);
1523 row1 = _mm_unpackhi_epi32(w0, w1);
1524 _mm_store_si128((__m128i*)dp, row0);
1525 _mm_store_si128((__m128i*)(dp + stride), row1);
1540 const int v_n_size = 512 + 8;
1541 ui16 v_n_scratch[2 * v_n_size] = {0};
1544 frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);
1548 ui16 *vp = v_n_scratch;
1549 ui32 *dp = decoded_data;
1552 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1556 __m128i inf_u_q, U_q;
1559 inf_u_q = _mm_loadu_si128((__m128i*)sp);
1560 U_q = _mm_srli_epi32(inf_u_q, 16);
1562 w0 = _mm_cmpgt_epi32(U_q, _mm_set1_epi32((
int)mmsbp2));
1563 int i = _mm_movemask_epi8(w0);
1568 __m128i vn = _mm_set1_epi16(2);
1570 w0 = _mm_loadu_si128((__m128i*)vp);
1571 w0 = _mm_and_si128(w0, _mm_set_epi16(0,0,0,0,0,0,0,-1));
1572 w0 = _mm_or_si128(w0, vn);
1573 _mm_storeu_si128((__m128i*)vp, w0);
1576 w0 = _mm_shuffle_epi8(row,
1577 _mm_set_epi16(0x0D0C, -1, 0x0908, -1,
1578 0x0504, -1, 0x0100, -1));
1579 _mm_store_si128((__m128i*)dp, w0);
1580 w1 = _mm_shuffle_epi8(row,
1581 _mm_set_epi16(0x0F0E, -1, 0x0B0A, -1,
1582 0x0706, -1, 0x0302, -1));
1583 _mm_store_si128((__m128i*)(dp + stride), w1);
1587 for (
ui32 y = 2; y < height; y += 2)
1591 ui16 *vp = v_n_scratch;
1592 const __m128i lut_lo = _mm_set_epi8(
1593 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 15
1595 const __m128i lut_hi = _mm_set_epi8(
1596 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 15
1598 const __m128i nibble_mask = _mm_set1_epi8(0x0F);
1599 const __m128i byte_offset8 = _mm_set1_epi16(8);
1600 const __m128i cc = _mm_set1_epi16(15);
1601 for (
ui32 x = 0; x <= width; x += 16, vp += 8)
1604 v = _mm_loadu_si128((__m128i*)vp);
1606 t = _mm_and_si128(nibble_mask, v);
1607 v = _mm_and_si128(_mm_srli_epi16(v, 4), nibble_mask);
1608 t = _mm_shuffle_epi8(lut_lo, t);
1609 v = _mm_shuffle_epi8(lut_hi, v);
1610 v = _mm_min_epu8(v, t);
1612 t = _mm_srli_epi16(v, 8);
1613 v = _mm_or_si128(v, byte_offset8);
1614 v = _mm_min_epu8(v, t);
1616 v = _mm_sub_epi16(cc, v);
1617 _mm_storeu_si128((__m128i*)(vp + v_n_size), v);
1621 ui16 *vp = v_n_scratch;
1622 ui16 *sp = scratch + (y >> 1) * sstr;
1623 ui32 *dp = decoded_data + y * stride;
1626 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1630 __m128i inf_u_q, U_q;
1633 __m128i gamma, emax, kappa, u_q;
1635 inf_u_q = _mm_loadu_si128((__m128i*)sp);
1636 gamma = _mm_and_si128(inf_u_q, _mm_set1_epi32(0xF0));
1637 w0 = _mm_sub_epi32(gamma, _mm_set1_epi32(1));
1638 gamma = _mm_and_si128(gamma, w0);
1639 gamma = _mm_cmpeq_epi32(gamma, _mm_setzero_si128());
1641 emax = _mm_loadu_si128((__m128i*)(vp + v_n_size));
1642 w0 = _mm_bsrli_si128(emax, 2);
1643 emax = _mm_max_epi16(w0, emax);
1644 emax = _mm_shuffle_epi8(emax,
1645 _mm_set_epi16(-1, 0x0706, -1, 0x0504,
1646 -1, 0x0302, -1, 0x0100));
1647 emax = _mm_andnot_si128(gamma, emax);
1649 kappa = _mm_set1_epi32(1);
1650 kappa = _mm_max_epi16(emax, kappa);
1652 u_q = _mm_srli_epi32(inf_u_q, 16);
1653 U_q = _mm_add_epi32(u_q, kappa);
1655 w0 = _mm_cmpgt_epi32(U_q, _mm_set1_epi32((
int)mmsbp2));
1656 int i = _mm_movemask_epi8(w0);
1661 __m128i vn = _mm_set1_epi16(2);
1663 w0 = _mm_loadu_si128((__m128i*)vp);
1664 w0 = _mm_and_si128(w0, _mm_set_epi16(0,0,0,0,0,0,0,-1));
1665 w0 = _mm_or_si128(w0, vn);
1666 _mm_storeu_si128((__m128i*)vp, w0);
1668 w0 = _mm_shuffle_epi8(row,
1669 _mm_set_epi16(0x0D0C, -1, 0x0908, -1,
1670 0x0504, -1, 0x0100, -1));
1671 _mm_store_si128((__m128i*)dp, w0);
1672 w1 = _mm_shuffle_epi8(row,
1673 _mm_set_epi16(0x0F0E, -1, 0x0B0A, -1,
1674 0x0706, -1, 0x0302, -1));
1675 _mm_store_si128((__m128i*)(dp + stride), w1);
1689 ui16*
const sigma = scratch;
1691 ui32 mstr = (width + 3u) >> 2;
1693 mstr = ((mstr + 2u) + 7u) & ~7u;
1701 const __m128i mask_3 = _mm_set1_epi32(0x30);
1702 const __m128i mask_C = _mm_set1_epi32(0xC0);
1703 const __m128i shuffle_mask = _mm_set_epi32(-1, -1, -1, 0x0C080400);
1704 for (y = 0; y < height; y += 4)
1706 ui16* sp = scratch + (y >> 1) * sstr;
1707 ui16* dp = sigma + (y >> 2) * mstr;
1708 for (
ui32 x = 0; x < width; x += 8, sp += 8, dp += 2)
1710 __m128i s0, s1, u3, uC, t0, t1;
1712 s0 = _mm_loadu_si128((__m128i*)(sp));
1713 u3 = _mm_and_si128(s0, mask_3);
1714 u3 = _mm_srli_epi32(u3, 4);
1715 uC = _mm_and_si128(s0, mask_C);
1716 uC = _mm_srli_epi32(uC, 2);
1717 t0 = _mm_or_si128(u3, uC);
1719 s1 = _mm_loadu_si128((__m128i*)(sp + sstr));
1720 u3 = _mm_and_si128(s1, mask_3);
1721 u3 = _mm_srli_epi32(u3, 2);
1722 uC = _mm_and_si128(s1, mask_C);
1723 t1 = _mm_or_si128(u3, uC);
1725 __m128i r = _mm_or_si128(t0, t1);
1726 r = _mm_shuffle_epi8(r, shuffle_mask);
1729 _mm_store_ss((
float*)dp, _mm_castsi128_ps(r));
1735 ui16* dp = sigma + (y >> 2) * mstr;
1736 __m128i zero = _mm_setzero_si128();
1737 for (
ui32 x = 0; x < width; x += 32, dp += 8)
1738 _mm_store_si128((__m128i*)dp, zero);
1754 ui16 prev_row_sig[256 + 8] = {0};
1757 frwd_init<0>(&sigprop, coded_data + lengths1, (
int)lengths2);
1759 for (
ui32 y = 0; y < height; y += 4)
1761 ui32 pattern = 0xFFFFu;
1762 if (height - y < 4) {
1764 if (height - y < 3) {
1774 ui16 *prev_sig = prev_row_sig;
1775 ui16 *cur_sig = sigma + (y >> 2) * mstr;
1776 ui32 *dpp = decoded_data + y * stride;
1777 for (
ui32 x = 0; x < width; x += 4, dpp += 4, ++cur_sig, ++prev_sig)
1782 pattern = pattern >> (s * 4);
1797 ui32 ns = *(
ui32*)(cur_sig + mstr);
1798 ui32 u = (ps & 0x88888888) >> 3;
1800 u |= (ns & 0x11111111) << 3;
1805 mbr |= (cs & 0x77777777) << 1;
1806 mbr |= (cs & 0xEEEEEEEE) >> 1;
1822 __m128i cwd_vec = frwd_fetch<0>(&sigprop);
1823 ui32 cwd = (
ui32)_mm_extract_epi16(cwd_vec, 0);
1826 ui32 col_mask = 0xFu;
1827 ui32 inv_sig = ~cs & pattern;
1828 for (
int i = 0; i < 16; i += 4, col_mask <<= 4)
1830 if ((col_mask & new_sig) == 0)
1834 ui32 sample_mask = 0x1111u & col_mask;
1835 if (new_sig & sample_mask)
1837 new_sig &= ~sample_mask;
1840 ui32 t = 0x33u << i;
1841 new_sig |= t & inv_sig;
1847 if (new_sig & sample_mask)
1849 new_sig &= ~sample_mask;
1852 ui32 t = 0x76u << i;
1853 new_sig |= t & inv_sig;
1859 if (new_sig & sample_mask)
1861 new_sig &= ~sample_mask;
1864 ui32 t = 0xECu << i;
1865 new_sig |= t & inv_sig;
1871 if (new_sig & sample_mask)
1873 new_sig &= ~sample_mask;
1876 ui32 t = 0xC8u << i;
1877 new_sig |= t & inv_sig;
1885 cwd |= (
ui32)_mm_extract_epi16(cwd_vec, 1) << (16 - cnt);
1889 __m128i new_sig_vec = _mm_set1_epi16((
si16)new_sig);
1890 new_sig_vec = _mm_shuffle_epi8(new_sig_vec,
1891 _mm_set_epi8(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0));
1892 new_sig_vec = _mm_and_si128(new_sig_vec,
1893 _mm_set1_epi64x((
si64)0x8040201008040201));
1894 new_sig_vec = _mm_cmpeq_epi8(new_sig_vec,
1895 _mm_set1_epi64x((
si64)0x8040201008040201));
1899 __m128i inc_sum = new_sig_vec;
1900 inc_sum = _mm_abs_epi8(inc_sum);
1901 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 1));
1902 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 2));
1903 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 4));
1904 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 8));
1905 cnt += (
ui32)_mm_extract_epi16(inc_sum, 7) >> 8;
1907 __m128i ex_sum = _mm_bslli_si128(inc_sum, 1);
1911 cwd_vec = _mm_set1_epi16((
si16)cwd);
1912 cwd_vec = _mm_shuffle_epi8(cwd_vec,
1913 _mm_set_epi8(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0));
1914 cwd_vec = _mm_and_si128(cwd_vec,
1915 _mm_set1_epi64x((
si64)0x8040201008040201));
1916 cwd_vec = _mm_cmpeq_epi8(cwd_vec,
1917 _mm_set1_epi64x((
si64)0x8040201008040201));
1918 cwd_vec = _mm_abs_epi8(cwd_vec);
1922 __m128i v = _mm_shuffle_epi8(cwd_vec, ex_sum);
1926 _mm_set_epi8(-1,-1,-1,12,-1,-1,-1,8,-1,-1,-1,4,-1,-1,-1,0);
1927 __m128i val = _mm_set1_epi32(3 << (p - 2));
1929 for (
int c = 0; c < 4; ++ c) {
1930 __m128i s0, s0_ns, s0_val;
1932 s0 = _mm_load_si128((__m128i*)dp);
1936 s0_ns = _mm_shuffle_epi8(new_sig_vec, m);
1937 s0_ns = _mm_cmpeq_epi32(s0_ns, _mm_set1_epi32(0xFF));
1940 s0_val = _mm_shuffle_epi8(v, m);
1941 s0_val = _mm_slli_epi32(s0_val, 31);
1942 s0_val = _mm_or_si128(s0_val, val);
1943 s0_val = _mm_and_si128(s0_val, s0_ns);
1946 s0 = _mm_or_si128(s0, s0_val);
1948 _mm_store_si128((__m128i*)dp, s0);
1951 m = _mm_add_epi32(m, _mm_set1_epi32(1));
1958 *prev_sig = (
ui16)(new_sig);
1962 new_sig |= (t & 0x7777) << 1;
1963 new_sig |= (t & 0xEEEE) >> 1;
1976 rev_init_mrp(&magref, coded_data, (
int)lengths1, (
int)lengths2);
1978 for (
ui32 y = 0; y < height; y += 4)
1980 ui16 *cur_sig = sigma + (y >> 2) * mstr;
1981 ui32 *dpp = decoded_data + y * stride;
1982 for (
ui32 i = 0; i < width; i += 4, dpp += 4)
1987 ui16 sig = *cur_sig++;
1995 __m128i sig_vec = _mm_set1_epi16((
si16)sig);
1996 sig_vec = _mm_shuffle_epi8(sig_vec,
1997 _mm_set_epi8(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0));
1998 sig_vec = _mm_and_si128(sig_vec,
1999 _mm_set1_epi64x((
si64)0x8040201008040201));
2000 sig_vec = _mm_cmpeq_epi8(sig_vec,
2001 _mm_set1_epi64x((
si64)0x8040201008040201));
2002 sig_vec = _mm_abs_epi8(sig_vec);
2006 __m128i inc_sum = sig_vec;
2007 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 1));
2008 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 2));
2009 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 4));
2010 inc_sum = _mm_add_epi8(inc_sum, _mm_bslli_si128(inc_sum, 8));
2011 total_bits = _mm_extract_epi16(inc_sum, 7) >> 8;
2012 __m128i ex_sum = _mm_bslli_si128(inc_sum, 1);
2019 __m128i cwd_vec = _mm_set1_epi16((
si16)cwd);
2020 cwd_vec = _mm_shuffle_epi8(cwd_vec,
2021 _mm_set_epi8(1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0));
2022 cwd_vec = _mm_and_si128(cwd_vec,
2023 _mm_set1_epi64x((
si64)0x8040201008040201));
2024 cwd_vec = _mm_cmpeq_epi8(cwd_vec,
2025 _mm_set1_epi64x((
si64)0x8040201008040201));
2026 cwd_vec = _mm_add_epi8(cwd_vec, _mm_set1_epi8(1));
2027 cwd_vec = _mm_add_epi8(cwd_vec, cwd_vec);
2028 cwd_vec = _mm_or_si128(cwd_vec, _mm_set1_epi8(1));
2032 _mm_set_epi8(-1,-1,-1,12,-1,-1,-1,8,-1,-1,-1,4,-1,-1,-1,0);
2034 for (
int c = 0; c < 4; ++c) {
2035 __m128i s0, s0_sig, s0_idx, s0_val;
2037 s0 = _mm_load_si128((__m128i*)dp);
2039 s0_sig = _mm_shuffle_epi8(sig_vec, m);
2040 s0_sig = _mm_cmpeq_epi8(s0_sig, _mm_setzero_si128());
2042 s0_idx = _mm_shuffle_epi8(ex_sum, m);
2043 s0_val = _mm_shuffle_epi8(cwd_vec, s0_idx);
2045 s0_val = _mm_andnot_si128(s0_sig, s0_val);
2047 s0_val = _mm_slli_epi32(s0_val, (
si32)p - 2);
2048 s0 = _mm_xor_si128(s0, s0_val);
2050 _mm_store_si128((__m128i*)dp, s0);
2053 m = _mm_add_epi32(m, _mm_set1_epi32(1));
ui16 uvlc_tbl0[256+64]
uvlc_tbl0 contains decoding information for initial row of quads
ui16 uvlc_tbl1[256]
uvlc_tbl1 contains decoding information for non-initial row of quads
ui16 vlc_tbl0[1024]
vlc_tbl0 contains decoding information for initial row of quads
ui16 vlc_tbl1[1024]
vlc_tbl1 contains decoding information for non-initial row of quads
static ui32 rev_fetch(rev_struct *vlcp)
Retrieves 32 bits from the head of a rev_struct structure.
static void rev_init_mrp(rev_struct *mrp, ui8 *data, int lcup, int len2)
Initialized rev_struct structure for MRP segment, and reads a number of bytes such that the next 32 b...
static void mel_read(dec_mel_st *melp)
Reads and unstuffs the MEL bitstream.
static void frwd_advance(frwd_struct *msp, ui32 num_bits)
Consume num_bits bits from the bitstream of frwd_struct.
bool ojph_decode_codeblock_ssse3(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
static __m128i decode_two_quad16(const __m128i inf_u_q, __m128i U_q, frwd_struct *magsgn, ui32 p, __m128i &vn)
decodes twos consecutive quads (one octet), using 16 bit data
static void rev_read_mrp(rev_struct *mrp)
Reads and unstuffs from rev_struct.
static ui32 rev_fetch_mrp(rev_struct *mrp)
Retrieves 32 bits from the head of a rev_struct structure.
static ui32 rev_advance_mrp(rev_struct *mrp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static void rev_read(rev_struct *vlcp)
Read and unstuff data from a backwardly-growing segment.
static int mel_get_run(dec_mel_st *melp)
Retrieves one run from dec_mel_st; if there are no runs stored MEL segment is decoded.
static void rev_init(rev_struct *vlcp, ui8 *data, int lcup, int scup)
Initiates the rev_struct structure and reads a few bytes to move the read address to multiple of 4.
static void mel_init(dec_mel_st *melp, ui8 *bbuf, int lcup, int scup)
Initiates a dec_mel_st structure for MEL decoding and reads some bytes in order to get the read addre...
static ui32 rev_advance(rev_struct *vlcp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static void frwd_read(frwd_struct *msp)
Read and unstuffs 32 bits from forward-growing bitstream.
static ui32 frwd_fetch(frwd_struct *msp)
Fetches 32 bits from the frwd_struct bitstream.
static void frwd_init(frwd_struct *msp, const ui8 *data, int size)
Initialize frwd_struct struct and reads some bytes.
static __m128i decode_one_quad32(const __m128i inf_u_q, __m128i U_q, frwd_struct *magsgn, ui32 p, __m128i &vn)
decodes one quad, using 32 bit data
static void mel_decode(dec_mel_st *melp)
Decodes unstuffed MEL segment bits stored in tmp to runs.
static ui32 count_leading_zeros(ui32 val)
MEL state structure for reading and decoding the MEL bitstream.
bool unstuff
true if the next bit needs to be unstuffed
int num_runs
number of decoded runs left in runs (maximum 8)
int size
number of bytes in MEL code
ui8 * data
the address of data (or bitstream)
int k
state of MEL decoder
int bits
number of bits stored in tmp
ui64 tmp
temporary buffer for read data
ui64 runs
runs of decoded MEL codewords (7 bits/run)
State structure for reading and unstuffing of forward-growing bitstreams; these are: MagSgn and SPP b...
const ui8 * data
pointer to bitstream
ui32 bits
number of bits stored in tmp
ui64 tmp
temporary buffer of read data
ui32 unstuff
1 if a bit needs to be unstuffed from next byte
A structure for reading and unstuffing a segment that grows backward, such as VLC and MRP.
ui32 bits
number of bits stored in tmp
int size
number of bytes left
ui8 * data
pointer to where to read data
ui64 tmp
temporary buffer of read data