53#include <wasm_simd128.h>
61 #define OJPH_REPEAT2(a) a,a
62 #define OJPH_REPEAT4(a) a,a,a,a
63 #define OJPH_REPEAT8(a) a,a,a,a,a,a,a,a
64 #define OJPH_REPEAT16(a) a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a
108 ui32 val = 0xFFFFFFFF;
109 if (melp->size > 4) {
110 val = *(
ui32*)melp->data;
114 else if (melp->size > 0)
117 while (melp->size > 1) {
118 ui32 v = *melp->data++;
119 ui32 m = ~(0xFFu << i);
120 val = (val & m) | (v << i);
125 ui32 v = *melp->data++;
127 ui32 m = ~(0xFFu << i);
128 val = (val & m) | (v << i);
133 int bits = 32 - melp->unstuff;
140 bool unstuff = ((val & 0xFF) == 0xFF);
142 t = t << (8 - unstuff);
145 t |= (val>>8) & 0xFF;
146 unstuff = (((val >> 8) & 0xFF) == 0xFF);
148 t = t << (8 - unstuff);
150 t |= (val>>16) & 0xFF;
151 unstuff = (((val >> 16) & 0xFF) == 0xFF);
153 t = t << (8 - unstuff);
155 t |= (val>>24) & 0xFF;
156 melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
160 melp->tmp |= ((
ui64)t) << (64 - bits - melp->bits);
182 static const int mel_exp[13] = {
183 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
192 while (melp->bits >= 6 && melp->num_runs < 8)
194 int eval = mel_exp[melp->k];
196 if (melp->tmp & (1ull<<63))
200 melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;
207 run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
208 melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0;
209 melp->tmp <<= eval + 1;
210 melp->bits -= eval + 1;
211 run = (run << 1) + 1;
213 eval = melp->num_runs * 7;
214 melp->runs &= ~((
ui64)0x3F << eval);
215 melp->runs |= ((
ui64)run) << eval;
233 melp->data = bbuf + lcup - scup;
236 melp->unstuff =
false;
237 melp->size = scup - 1;
245 int num = 4 - (int)(intptr_t(melp->data) & 0x3);
246 for (
int i = 0; i < num; ++i) {
247 assert(melp->unstuff ==
false || melp->data[0] <= 0x8F);
248 ui64 d = (melp->size > 0) ? *melp->data : 0xFF;
250 if (melp->size == 1) d |= 0xF;
252 melp->data += melp->size-- > 0;
253 int d_bits = 8 - melp->unstuff;
254 melp->tmp = (melp->tmp << d_bits) | d;
255 melp->bits += d_bits;
256 melp->unstuff = ((d & 0xFF) == 0xFF);
259 melp->tmp <<= (64 - melp->bits);
272 if (melp->num_runs == 0)
275 int t = melp->runs & 0x7F;
328 val = *(
ui32*)(vlcp->data - 3);
332 else if (vlcp->size > 0)
335 while (vlcp->size > 0) {
336 ui32 v = *vlcp->data--;
344 ui32 tmp = val >> 24;
348 bits = 8 - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
349 bool unstuff = (val >> 24) > 0x8F;
351 tmp |= ((val >> 16) & 0xFF) << bits;
352 bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
353 unstuff = ((val >> 16) & 0xFF) > 0x8F;
355 tmp |= ((val >> 8) & 0xFF) << bits;
356 bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
357 unstuff = ((val >> 8) & 0xFF) > 0x8F;
359 tmp |= (val & 0xFF) << bits;
360 bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
361 unstuff = (val & 0xFF) > 0x8F;
364 vlcp->tmp |= (
ui64)tmp << vlcp->bits;
366 vlcp->unstuff = unstuff;
387 vlcp->data = data + lcup - 2;
390 vlcp->size = scup - 2;
392 ui32 d = *vlcp->data--;
394 vlcp->bits = 4 - ((vlcp->tmp & 7) == 7);
395 vlcp->unstuff = (d | 0xF) > 0x8F;
402 int num = 1 + (int)(intptr_t(vlcp->data) & 0x3);
403 int tnum = num < vlcp->size ? num : vlcp->size;
404 for (
int i = 0; i < tnum; ++i) {
408 ui32 d_bits = 8 - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
409 vlcp->tmp |= d << vlcp->bits;
410 vlcp->bits += d_bits;
411 vlcp->unstuff = d > 0x8F;
433 return (
ui32)vlcp->tmp;
445 assert(num_bits <= vlcp->bits);
446 vlcp->tmp >>= num_bits;
447 vlcp->bits -= num_bits;
448 return (
ui32)vlcp->tmp;
471 val = *(
ui32*)(mrp->data - 3);
475 else if (mrp->size > 0)
478 while (mrp->size > 0) {
479 ui32 v = *mrp->data--;
487 ui32 bits, tmp = val >> 24;
490 bits = 8 - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
491 bool unstuff = (val >> 24) > 0x8F;
494 tmp |= ((val >> 16) & 0xFF) << bits;
495 bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
496 unstuff = ((val >> 16) & 0xFF) > 0x8F;
498 tmp |= ((val >> 8) & 0xFF) << bits;
499 bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
500 unstuff = ((val >> 8) & 0xFF) > 0x8F;
502 tmp |= (val & 0xFF) << bits;
503 bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
504 unstuff = (val & 0xFF) > 0x8F;
506 mrp->tmp |= (
ui64)tmp << mrp->bits;
508 mrp->unstuff = unstuff;
529 mrp->data = data + lcup + len2 - 1;
539 int num = 1 + (int)(intptr_t(mrp->data) & 0x3);
540 for (
int i = 0; i < num; ++i) {
543 d = (mrp->size-- > 0) ? *mrp->data-- : 0;
545 ui32 d_bits = 8 - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
546 mrp->tmp |= d << mrp->bits;
548 mrp->unstuff = d > 0x8F;
569 return (
ui32)mrp->tmp;
580 assert(num_bits <= mrp->bits);
581 mrp->tmp >>= num_bits;
582 mrp->bits -= num_bits;
583 return (
ui32)mrp->tmp;
620 assert(msp->bits <= 128);
622 v128_t offset, val, validity, all_xff;
623 val = wasm_v128_load(msp->data);
624 int bytes = msp->size >= 16 ? 16 : msp->size;
625 validity = wasm_i8x16_splat((
char)bytes);
629 offset = wasm_i64x2_const(0x0706050403020100,0x0F0E0D0C0B0A0908);
630 validity = wasm_i8x16_gt(validity, offset);
634 v128_t t = wasm_v128_xor(validity, all_xff);
635 val = wasm_v128_or(t, val);
638 val = wasm_v128_and(validity, val);
643 ff_bytes = wasm_i8x16_eq(val, all_xff);
644 ff_bytes = wasm_v128_and(ff_bytes, validity);
645 ui32 flags = wasm_i8x16_bitmask(ff_bytes);
647 ui32 next_unstuff = flags >> 16;
648 flags |= msp->unstuff;
660 t = wasm_i8x16_splat((
char)loc);
661 m = wasm_i8x16_gt(offset, t);
663 t = wasm_v128_and(m, val);
664 c = wasm_u64x2_shr(t, 1);
665 t = wasm_i64x2_shuffle(t, wasm_i64x2_const(0, 0), 1, 2);
666 t = wasm_i64x2_shl(t, 63);
667 t = wasm_v128_or(t, c);
669 val = wasm_v128_or(t, wasm_v128_andnot(val, m));
673 assert(msp->bits >= 0 && msp->bits <= 128);
674 int cur_bytes = msp->bits >> 3;
675 ui32 cur_bits = msp->bits & 7;
677 b1 = wasm_i64x2_shl(val, cur_bits);
679 b2 = wasm_i64x2_shuffle(wasm_i64x2_const(0, 0), val, 1, 2);
680 b2 = wasm_u64x2_shr(b2, 64u - cur_bits);
681 b2 = (cur_bits > 0) ? b2 : wasm_i64x2_const(0, 0);
682 b1 = wasm_v128_or(b1, b2);
683 b2 = wasm_v128_load(msp->tmp + cur_bytes);
684 b2 = wasm_v128_or(b1, b2);
685 wasm_v128_store(msp->tmp + cur_bytes, b2);
687 ui32 consumed_bits = bits < 128u - cur_bits ? bits : 128u - cur_bits;
688 cur_bytes = (msp->bits + consumed_bits + 7) >> 3;
689 int upper = wasm_u16x8_extract_lane(val, 7);
690 upper >>= consumed_bits + 16 - 128;
691 msp->tmp[cur_bytes] = (
ui8)upper;
694 msp->unstuff = next_unstuff;
695 assert(msp->unstuff == 0 || msp->unstuff == 1);
712 wasm_v128_store(msp->tmp, wasm_i64x2_const(0, 0));
713 wasm_v128_store(msp->tmp + 16, wasm_i64x2_const(0, 0));
714 wasm_v128_store(msp->tmp + 32, wasm_i64x2_const(0, 0));
732 assert(num_bits > 0 && num_bits <= msp->bits && num_bits < 128);
733 msp->bits -= num_bits;
735 v128_t *p = (v128_t*)(msp->tmp + ((num_bits >> 3) & 0x18));
738 v128_t v0, v1, c0, c1, t;
739 v0 = wasm_v128_load(p);
740 v1 = wasm_v128_load(p + 1);
743 c0 = wasm_u64x2_shr(v0, num_bits);
744 t = wasm_i64x2_shuffle(v0, wasm_i64x2_const(0, 0), 1, 2);
745 t = wasm_i64x2_shl(t, 64 - num_bits);
746 t = (num_bits > 0) ? t : wasm_i64x2_const(0, 0);
747 c0 = wasm_v128_or(c0, t);
748 t = wasm_i64x2_shuffle(wasm_i64x2_const(0, 0), v1, 1, 2);
749 t = wasm_i64x2_shl(t, 64 - num_bits);
750 t = (num_bits > 0) ? t : wasm_i64x2_const(0, 0);
751 c0 = wasm_v128_or(c0, t);
753 wasm_v128_store(msp->tmp, c0);
755 c1 = wasm_u64x2_shr(v1, num_bits);
756 t = wasm_i64x2_shuffle(v1, wasm_i64x2_const(0, 0), 1, 2);
757 t = wasm_i64x2_shl(t, 64 - num_bits);
758 t = (num_bits > 0) ? t : wasm_i64x2_const(0, 0);
759 c1 = wasm_v128_or(c1, t);
761 wasm_v128_store(msp->tmp + 16, c1);
775 if (msp->bits <= 128)
778 if (msp->bits <= 128)
781 v128_t t = wasm_v128_load(msp->tmp);
807 row = wasm_i64x2_const(0, 0);
808 w0 = wasm_i32x4_shuffle(inf_u_q, inf_u_q, N, N, N, N);
810 flags = wasm_v128_and(w0, wasm_i32x4_const(0x1110,0x2220,0x4440,0x8880));
811 insig = wasm_i32x4_eq(flags, wasm_i64x2_const(0, 0));
812 if (wasm_i8x16_bitmask(insig) != 0xFFFF)
814 U_q = wasm_i32x4_shuffle(U_q, U_q, N, N, N, N);
815 flags = wasm_i16x8_mul(flags, wasm_i16x8_const(8,8,4,4,2,2,1,1));
816 v128_t ms_vec = frwd_fetch<0xFF>(magsgn);
824 w0 = wasm_u32x4_shr(flags, 15);
825 m_n = wasm_i32x4_sub(U_q, w0);
826 m_n = wasm_v128_andnot(m_n, insig);
830 v128_t ex_sum, shfl, inc_sum = m_n;
831 shfl = wasm_i32x4_shuffle(wasm_i64x2_const(0,0), inc_sum, 3, 4, 5, 6);
832 inc_sum = wasm_i32x4_add(inc_sum, shfl);
833 shfl = wasm_i64x2_shuffle(wasm_i64x2_const(0,0), inc_sum, 1, 2);
834 inc_sum = wasm_i32x4_add(inc_sum, shfl);
835 int total_mn = wasm_u16x8_extract_lane(inc_sum, 6);
836 ex_sum = wasm_i32x4_shuffle(wasm_i64x2_const(0,0), inc_sum, 3, 4, 5, 6);
839 v128_t byte_idx = wasm_u32x4_shr(ex_sum, 3);
841 wasm_v128_and(ex_sum, wasm_i32x4_const(
OJPH_REPEAT4(7)));
842 byte_idx = wasm_i8x16_swizzle(byte_idx,
843 wasm_i32x4_const(0x00000000, 0x04040404, 0x08080808, 0x0C0C0C0C));
845 wasm_i32x4_add(byte_idx, wasm_i32x4_const(
OJPH_REPEAT4(0x03020100)));
846 v128_t d0 = wasm_i8x16_swizzle(ms_vec, byte_idx);
848 wasm_i32x4_add(byte_idx, wasm_i32x4_const(
OJPH_REPEAT4(0x01010101)));
849 v128_t d1 = wasm_i8x16_swizzle(ms_vec, byte_idx);
852 bit_idx = wasm_v128_or(bit_idx, wasm_i32x4_shl(bit_idx, 16));
853 v128_t bit_shift = wasm_i8x16_swizzle(
854 wasm_i8x16_const(-1, 127, 63, 31, 15, 7, 3, 1,
855 -1, 127, 63, 31, 15, 7, 3, 1), bit_idx);
857 wasm_i16x8_add(bit_shift, wasm_i16x8_const(
OJPH_REPEAT8(0x0101)));
858 d0 = wasm_i16x8_mul(d0, bit_shift);
859 d0 = wasm_u16x8_shr(d0, 8);
860 d1 = wasm_i16x8_mul(d1, bit_shift);
862 wasm_v128_and(d1, wasm_u32x4_const(
OJPH_REPEAT4(0xFF00FF00)));
863 d0 = wasm_v128_or(d0, d1);
869 ui32 U_q_m1 = wasm_u32x4_extract_lane(U_q, 0) - 1u;
870 w0 = wasm_i32x4_sub(twos, w0);
871 shift = wasm_i32x4_shl(w0, U_q_m1);
872 ms_vec = wasm_v128_and(d0, wasm_i32x4_sub(shift, ones));
875 w0 = wasm_v128_and(flags, wasm_i32x4_const(
OJPH_REPEAT4(0x800)));
876 w0 = wasm_i32x4_eq(w0, wasm_i64x2_const(0, 0));
877 w0 = wasm_v128_andnot(shift, w0);
878 ms_vec = wasm_v128_or(ms_vec, w0);
879 w0 = wasm_i32x4_shl(ms_vec, 31);
880 ms_vec = wasm_v128_or(ms_vec, ones);
882 ms_vec = wasm_i32x4_add(ms_vec, twos);
883 ms_vec = wasm_i32x4_shl(ms_vec, p - 1);
884 ms_vec = wasm_v128_or(ms_vec, w0);
885 row = wasm_v128_andnot(ms_vec, insig);
887 ms_vec = wasm_v128_andnot(tvn, insig);
889 tvn = wasm_i8x16_swizzle(ms_vec,
890 wasm_i32x4_const(0x07060504, 0x0F0E0D0C, -1, -1));
892 tvn = wasm_i8x16_swizzle(ms_vec,
893 wasm_i32x4_const(-1, 0x07060504, 0x0F0E0D0C, -1));
896 vn = wasm_v128_or(vn, tvn);
923 row = wasm_i64x2_const(0, 0);
924 w0 = wasm_i8x16_swizzle(inf_u_q,
925 wasm_i16x8_const(0x0100, 0x0100, 0x0100, 0x0100,
926 0x0504, 0x0504, 0x0504, 0x0504));
928 flags = wasm_v128_and(w0,
929 wasm_u16x8_const(0x1110, 0x2220, 0x4440, 0x8880,
930 0x1110, 0x2220, 0x4440, 0x8880));
931 insig = wasm_i16x8_eq(flags, wasm_i64x2_const(0, 0));
932 if (wasm_i8x16_bitmask(insig) != 0xFFFF)
934 U_q = wasm_i8x16_swizzle(U_q,
935 wasm_i16x8_const(0x0100, 0x0100, 0x0100, 0x0100,
936 0x0504, 0x0504, 0x0504, 0x0504));
937 flags = wasm_i16x8_mul(flags, wasm_i16x8_const(8,4,2,1,8,4,2,1));
938 v128_t ms_vec = frwd_fetch<0xFF>(magsgn);
946 w0 = wasm_u16x8_shr(flags, 15);
947 m_n = wasm_i16x8_sub(U_q, w0);
948 m_n = wasm_v128_andnot(m_n, insig);
952 v128_t ex_sum, shfl, inc_sum = m_n;
953 shfl = wasm_i16x8_shuffle(wasm_i64x2_const(0,0),
954 inc_sum, 7, 8, 9, 10, 11, 12, 13, 14);
955 inc_sum = wasm_i16x8_add(inc_sum, shfl);
956 shfl = wasm_i32x4_shuffle(wasm_i64x2_const(0,0), inc_sum, 3, 4, 5, 6);
957 inc_sum = wasm_i16x8_add(inc_sum, shfl);
958 shfl = wasm_i64x2_shuffle(wasm_i64x2_const(0,0), inc_sum, 1, 2);
959 inc_sum = wasm_i16x8_add(inc_sum, shfl);
960 int total_mn = wasm_u16x8_extract_lane(inc_sum, 7);
961 ex_sum = wasm_i16x8_shuffle(wasm_i64x2_const(0,0),
962 inc_sum, 7, 8, 9, 10, 11, 12, 13, 14);
965 v128_t byte_idx = wasm_u16x8_shr(ex_sum, 3);
967 wasm_v128_and(ex_sum, wasm_i16x8_const(
OJPH_REPEAT8(7)));
968 byte_idx = wasm_i8x16_swizzle(byte_idx,
969 wasm_i16x8_const(0x0000, 0x0202, 0x0404, 0x0606,
970 0x0808, 0x0A0A, 0x0C0C, 0x0E0E));
972 wasm_i16x8_add(byte_idx, wasm_i16x8_const(
OJPH_REPEAT8(0x0100)));
973 v128_t d0 = wasm_i8x16_swizzle(ms_vec, byte_idx);
975 wasm_i16x8_add(byte_idx, wasm_i16x8_const(
OJPH_REPEAT8(0x0101)));
976 v128_t d1 = wasm_i8x16_swizzle(ms_vec, byte_idx);
979 v128_t bit_shift = wasm_i8x16_swizzle(
980 wasm_i8x16_const(-1, 127, 63, 31, 15, 7, 3, 1,
981 -1, 127, 63, 31, 15, 7, 3, 1), bit_idx);
983 wasm_i16x8_add(bit_shift, wasm_i16x8_const(
OJPH_REPEAT8(0x0101)));
984 d0 = wasm_i16x8_mul(d0, bit_shift);
985 d0 = wasm_u16x8_shr(d0, 8);
986 d1 = wasm_i16x8_mul(d1, bit_shift);
989 d0 = wasm_v128_or(d0, d1);
992 v128_t shift, t0, t1;
995 v128_t U_q_m1 = wasm_i32x4_sub(U_q, ones);
996 ui32 Uq0 = wasm_u16x8_extract_lane(U_q_m1, 0);
997 ui32 Uq1 = wasm_u16x8_extract_lane(U_q_m1, 4);
998 w0 = wasm_i16x8_sub(twos, w0);
999 t0 = wasm_v128_and(w0, wasm_i64x2_const(-1, 0));
1000 t1 = wasm_v128_and(w0, wasm_i64x2_const(0, -1));
1001 t0 = wasm_i32x4_shl(t0, Uq0);
1002 t1 = wasm_i32x4_shl(t1, Uq1);
1003 shift = wasm_v128_or(t0, t1);
1004 ms_vec = wasm_v128_and(d0, wasm_i16x8_sub(shift, ones));
1007 w0 = wasm_v128_and(flags, wasm_i16x8_const(
OJPH_REPEAT8(0x800)));
1008 w0 = wasm_i16x8_eq(w0, wasm_i64x2_const(0, 0));
1009 w0 = wasm_v128_andnot(shift, w0);
1010 ms_vec = wasm_v128_or(ms_vec, w0);
1011 w0 = wasm_i16x8_shl(ms_vec, 15);
1012 ms_vec = wasm_v128_or(ms_vec, ones);
1013 v128_t tvn = ms_vec;
1014 ms_vec = wasm_i16x8_add(ms_vec, twos);
1015 ms_vec = wasm_i16x8_shl(ms_vec, p - 1);
1016 ms_vec = wasm_v128_or(ms_vec, w0);
1017 row = wasm_v128_andnot(ms_vec, insig);
1019 ms_vec = wasm_v128_andnot(tvn, insig);
1020 w0 = wasm_i8x16_swizzle(ms_vec,
1021 wasm_i16x8_const(0x0302, 0x0706, -1, -1, -1, -1, -1, -1));
1022 vn = wasm_v128_or(vn, w0);
1023 w0 = wasm_i8x16_swizzle(ms_vec,
1024 wasm_i16x8_const(-1, 0x0B0A, 0x0F0E, -1, -1, -1, -1, -1));
1025 vn = wasm_v128_or(vn, w0);
1052 ui32 missing_msbs,
ui32 num_passes,
1057 static bool insufficient_precision =
false;
1058 static bool modify_code =
false;
1059 static bool truncate_spp_mrp =
false;
1061 if (num_passes > 1 && lengths2 == 0)
1063 OJPH_WARN(0x00010001,
"A malformed codeblock that has more than "
1064 "one coding pass, but zero length for "
1065 "2nd and potential 3rd pass.\n");
1071 OJPH_WARN(0x00010002,
"We do not support more than 3 coding passes; "
1072 "This codeblocks has %d passes.\n",
1077 if (missing_msbs > 30)
1079 if (insufficient_precision ==
false)
1081 insufficient_precision =
true;
1082 OJPH_WARN(0x00010003,
"32 bits are not enough to decode this "
1083 "codeblock. This message will not be "
1084 "displayed again.\n");
1088 else if (missing_msbs == 30)
1090 if (modify_code ==
false) {
1092 OJPH_WARN(0x00010004,
"Not enough precision to decode the cleanup "
1093 "pass. The code can be modified to support "
1094 "this case. This message will not be "
1095 "displayed again.\n");
1099 else if (missing_msbs == 29)
1101 if (num_passes > 1) {
1103 if (truncate_spp_mrp ==
false) {
1104 truncate_spp_mrp =
true;
1105 OJPH_WARN(0x00010005,
"Not enough precision to decode the SgnProp "
1106 "nor MagRef passes; both will be skipped. "
1107 "This message will not be displayed "
1112 ui32 p = 30 - missing_msbs;
1118 OJPH_WARN(0x00010006,
"Wrong codeblock length.\n");
1124 lcup = (int)lengths1;
1126 scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
1127 if (scup < 2 || scup > lcup || scup > 4079)
1145 ui16 scratch[8 * 513] = {0};
1153 ui32 sstr = ((width + 2u) + 7u) & ~7u;
1155 assert((stride & 0x3) == 0);
1157 ui32 mmsbp2 = missing_msbs + 2;
1169 mel_init(&mel, coded_data, lcup, scup);
1171 rev_init(&vlc, coded_data, lcup, scup);
1181 for (
ui32 x = 0; x < width; sp += 4)
1200 t0 = (run == -1) ? t0 : 0;
1214 c_q = ((t0 & 0x10U) << 3) | ((t0 & 0xE0U) << 2);
1223 t1 =
vlc_tbl0[c_q + (vlc_val & 0x7F)];
1226 if (c_q == 0 && x < width)
1231 t1 = (run == -1) ? t1 : 0;
1236 t1 = x < width ? t1 : 0;
1245 c_q = ((t1 & 0x10U) << 3) | ((t1 & 0xE0U) << 2);
1253 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1254 if (uvlc_mode == 0xc0)
1258 uvlc_mode += (run == -1) ? 0x40 : 0;
1275 ui32 len = uvlc_entry & 0xF;
1276 ui32 tmp = vlc_val & ((1 << len) - 1);
1280 len = uvlc_entry & 0x7;
1282 ui16 u_q = (
ui16)(1 + (uvlc_entry&7) + (tmp&~(0xFFU<<len)));
1284 u_q = (
ui16)(1 + (uvlc_entry >> 3) + (tmp >> len));
1290 for (
ui32 y = 2; y < height; y += 2)
1293 ui16 *sp = scratch + (y >> 1) * sstr;
1295 for (
ui32 x = 0; x < width; sp += 4)
1301 c_q |= ((sp[0 - (
si32)sstr] & 0xA0U) << 2);
1302 c_q |= ((sp[2 - (
si32)sstr] & 0x20U) << 4);
1318 t0 = (run == -1) ? t0 : 0;
1333 c_q = ((t0 & 0x40U) << 2) | ((t0 & 0x80U) << 1);
1335 c_q |= sp[0 - (
si32)sstr] & 0x80;
1337 c_q |= ((sp[2 - (
si32)sstr] & 0xA0U) << 2);
1338 c_q |= ((sp[4 - (
si32)sstr] & 0x20U) << 4);
1347 t1 =
vlc_tbl1[ c_q + (vlc_val & 0x7F)];
1350 if (c_q == 0 && x < width)
1355 t1 = (run == -1) ? t1 : 0;
1360 t1 = x < width ? t1 : 0;
1370 c_q = ((t1 & 0x40U) << 2) | ((t1 & 0x80U) << 1);
1372 c_q |= sp[2 - (
si32)sstr] & 0x80;
1380 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1386 ui32 len = uvlc_entry & 0xF;
1387 ui32 tmp = vlc_val & ((1 << len) - 1);
1391 len = uvlc_entry & 0x7;
1393 ui16 u_q = (
ui16)((uvlc_entry & 7) + (tmp & ~(0xFU << len)));
1395 u_q = (
ui16)((uvlc_entry >> 3) + (tmp >> len));
1418 const int v_n_size = 512 + 8;
1419 ui32 v_n_scratch[2 * v_n_size] = {0};
1422 frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);
1426 ui32 *vp = v_n_scratch;
1427 ui32 *dp = decoded_data;
1430 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1434 v128_t inf_u_q, U_q;
1437 inf_u_q = wasm_v128_load(sp);
1438 U_q = wasm_u32x4_shr(inf_u_q, 16);
1440 w0 = wasm_i32x4_gt(U_q, wasm_u32x4_splat(mmsbp2));
1441 ui32 i = wasm_i8x16_bitmask(w0);
1447 v128_t row0 = decode_one_quad32<0>(inf_u_q, U_q, &magsgn, p, vn);
1448 v128_t row1 = decode_one_quad32<1>(inf_u_q, U_q, &magsgn, p, vn);
1449 w0 = wasm_v128_load(vp);
1450 w0 = wasm_v128_and(w0, wasm_i32x4_const(-1,0,0,0));
1451 w0 = wasm_v128_or(w0, vn);
1452 wasm_v128_store(vp, w0);
1456 w0 = wasm_i32x4_shuffle(row0, row1, 0, 4, 1, 5);
1457 w1 = wasm_i32x4_shuffle(row0, row1, 2, 6, 3, 7);
1458 row0 = wasm_i32x4_shuffle(w0, w1, 0, 4, 1, 5);
1459 row1 = wasm_i32x4_shuffle(w0, w1, 2, 6, 3, 7);
1460 wasm_v128_store(dp, row0);
1461 wasm_v128_store(dp + stride, row1);
1465 for (
ui32 y = 2; y < height; y += 2)
1469 ui32 *vp = v_n_scratch;
1470 const v128_t lut_lo = wasm_i8x16_const(
1471 31, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
1473 const v128_t lut_hi = wasm_i8x16_const(
1474 31, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1476 const v128_t nibble_mask = wasm_i8x16_const(
OJPH_REPEAT16(0x0F));
1477 const v128_t byte_offset8 = wasm_i16x8_const(
OJPH_REPEAT8(8));
1478 const v128_t byte_offset16 = wasm_i16x8_const(
OJPH_REPEAT8(16));
1480 for (
ui32 x = 0; x <= width; x += 8, vp += 4)
1483 v = wasm_v128_load(vp);
1485 t = wasm_v128_and(nibble_mask, v);
1486 v = wasm_v128_and(wasm_u16x8_shr(v, 4), nibble_mask);
1487 t = wasm_i8x16_swizzle(lut_lo, t);
1488 v = wasm_i8x16_swizzle(lut_hi, v);
1489 v = wasm_u8x16_min(v, t);
1491 t = wasm_u16x8_shr(v, 8);
1492 v = wasm_v128_or(v, byte_offset8);
1493 v = wasm_u8x16_min(v, t);
1495 t = wasm_u32x4_shr(v, 16);
1496 v = wasm_v128_or(v, byte_offset16);
1497 v = wasm_u8x16_min(v, t);
1499 v = wasm_i16x8_sub(cc, v);
1500 wasm_v128_store(vp + v_n_size, v);
1504 ui32 *vp = v_n_scratch;
1505 ui16 *sp = scratch + (y >> 1) * sstr;
1506 ui32 *dp = decoded_data + y * stride;
1509 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1513 v128_t inf_u_q, U_q;
1516 v128_t gamma, emax, kappa, u_q;
1518 inf_u_q = wasm_v128_load(sp);
1520 wasm_v128_and(inf_u_q, wasm_i32x4_const(
OJPH_REPEAT4(0xF0)));
1521 w0 = wasm_i32x4_sub(gamma, wasm_i32x4_const(
OJPH_REPEAT4(1)));
1522 gamma = wasm_v128_and(gamma, w0);
1523 gamma = wasm_i32x4_eq(gamma, wasm_i64x2_const(0, 0));
1525 emax = wasm_v128_load(vp + v_n_size);
1526 w0 = wasm_i32x4_shuffle(emax, wasm_i64x2_const(0,0), 1, 2, 3, 4);
1527 emax = wasm_i16x8_max(w0, emax);
1528 emax = wasm_v128_andnot(emax, gamma);
1531 kappa = wasm_i16x8_max(emax, kappa);
1533 u_q = wasm_u32x4_shr(inf_u_q, 16);
1534 U_q = wasm_i32x4_add(u_q, kappa);
1536 w0 = wasm_i32x4_gt(U_q, wasm_u32x4_splat(mmsbp2));
1537 ui32 i = wasm_i8x16_bitmask(w0);
1543 v128_t row0 = decode_one_quad32<0>(inf_u_q, U_q, &magsgn, p, vn);
1544 v128_t row1 = decode_one_quad32<1>(inf_u_q, U_q, &magsgn, p, vn);
1545 w0 = wasm_v128_load(vp);
1546 w0 = wasm_v128_and(w0, wasm_i32x4_const(-1,0,0,0));
1547 w0 = wasm_v128_or(w0, vn);
1548 wasm_v128_store(vp, w0);
1551 w0 = wasm_i32x4_shuffle(row0, row1, 0, 4, 1, 5);
1552 w1 = wasm_i32x4_shuffle(row0, row1, 2, 6, 3, 7);
1553 row0 = wasm_i32x4_shuffle(w0, w1, 0, 4, 1, 5);
1554 row1 = wasm_i32x4_shuffle(w0, w1, 2, 6, 3, 7);
1555 wasm_v128_store(dp, row0);
1556 wasm_v128_store(dp + stride, row1);
1571 const int v_n_size = 512 + 8;
1572 ui16 v_n_scratch[2 * v_n_size] = {0};
1575 frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);
1579 ui16 *vp = v_n_scratch;
1580 ui32 *dp = decoded_data;
1583 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1587 v128_t inf_u_q, U_q;
1590 inf_u_q = wasm_v128_load(sp);
1591 U_q = wasm_u32x4_shr(inf_u_q, 16);
1593 w0 = wasm_i32x4_gt(U_q, wasm_u32x4_splat(mmsbp2));
1594 ui32 i = wasm_i8x16_bitmask(w0);
1601 w0 = wasm_v128_load(vp);
1602 w0 = wasm_v128_and(w0, wasm_i16x8_const(-1,0,0,0,0,0,0,0));
1603 w0 = wasm_v128_or(w0, vn);
1604 wasm_v128_store(vp, w0);
1607 w0 = wasm_i8x16_swizzle(row,
1608 wasm_i16x8_const(-1, 0x0100, -1, 0x0504,
1609 -1, 0x0908, -1, 0x0D0C));
1610 wasm_v128_store(dp, w0);
1611 w1 = wasm_i8x16_swizzle(row,
1612 wasm_i16x8_const(-1, 0x0302, -1, 0x0706,
1613 -1, 0x0B0A, -1, 0x0F0E));
1614 wasm_v128_store(dp + stride, w1);
1618 for (
ui32 y = 2; y < height; y += 2)
1622 ui16 *vp = v_n_scratch;
1623 const v128_t lut_lo = wasm_i8x16_const(
1624 15, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
1626 const v128_t lut_hi = wasm_i8x16_const(
1627 15, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1629 const v128_t nibble_mask = wasm_i8x16_const(
OJPH_REPEAT16(0x0F));
1630 const v128_t byte_offset8 = wasm_i16x8_const(
OJPH_REPEAT8(8));
1632 for (
ui32 x = 0; x <= width; x += 16, vp += 8)
1635 v = wasm_v128_load(vp);
1637 t = wasm_v128_and(nibble_mask, v);
1638 v = wasm_v128_and(wasm_u16x8_shr(v, 4), nibble_mask);
1639 t = wasm_i8x16_swizzle(lut_lo, t);
1640 v = wasm_i8x16_swizzle(lut_hi, v);
1641 v = wasm_u8x16_min(v, t);
1643 t = wasm_u16x8_shr(v, 8);
1644 v = wasm_v128_or(v, byte_offset8);
1645 v = wasm_u8x16_min(v, t);
1647 v = wasm_i16x8_sub(cc, v);
1648 wasm_v128_store(vp + v_n_size, v);
1652 ui16 *vp = v_n_scratch;
1653 ui16 *sp = scratch + (y >> 1) * sstr;
1654 ui32 *dp = decoded_data + y * stride;
1657 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1661 v128_t inf_u_q, U_q;
1664 v128_t gamma, emax, kappa, u_q;
1666 inf_u_q = wasm_v128_load(sp);
1668 wasm_v128_and(inf_u_q, wasm_i32x4_const(
OJPH_REPEAT4(0xF0)));
1669 w0 = wasm_i32x4_sub(gamma, wasm_i32x4_const(
OJPH_REPEAT4(1)));
1670 gamma = wasm_v128_and(gamma, w0);
1671 gamma = wasm_i32x4_eq(gamma, wasm_i64x2_const(0, 0));
1673 emax = wasm_v128_load(vp + v_n_size);
1674 w0 = wasm_i16x8_shuffle(emax,
1675 wasm_i64x2_const(0, 0), 1, 2, 3, 4, 5, 6, 7, 8);
1676 emax = wasm_i16x8_max(w0, emax);
1677 emax = wasm_i8x16_swizzle(emax,
1678 wasm_i16x8_const(0x0100, -1, 0x0302, -1,
1679 0x0504, -1, 0x0706, -1));
1680 emax = wasm_v128_andnot(emax, gamma);
1683 kappa = wasm_i16x8_max(emax, kappa);
1685 u_q = wasm_u32x4_shr(inf_u_q, 16);
1686 U_q = wasm_i32x4_add(u_q, kappa);
1688 w0 = wasm_i32x4_gt(U_q, wasm_u32x4_splat(mmsbp2));
1689 ui32 i = wasm_i8x16_bitmask(w0);
1696 w0 = wasm_v128_load(vp);
1697 w0 = wasm_v128_and(w0, wasm_i16x8_const(-1,0,0,0,0,0,0,0));
1698 w0 = wasm_v128_or(w0, vn);
1699 wasm_v128_store(vp, w0);
1701 w0 = wasm_i8x16_swizzle(row,
1702 wasm_i16x8_const(-1, 0x0100, -1, 0x0504,
1703 -1, 0x0908, -1, 0x0D0C));
1704 wasm_v128_store(dp, w0);
1705 w1 = wasm_i8x16_swizzle(row,
1706 wasm_i16x8_const(-1, 0x0302, -1, 0x0706,
1707 -1, 0x0B0A, -1, 0x0F0E));
1708 wasm_v128_store(dp + stride, w1);
1722 ui16*
const sigma = scratch;
1724 ui32 mstr = (width + 3u) >> 2;
1726 mstr = ((mstr + 2u) + 7u) & ~7u;
1734 const v128_t mask_3 = wasm_i32x4_const(
OJPH_REPEAT4(0x30));
1735 const v128_t mask_C = wasm_i32x4_const(
OJPH_REPEAT4(0xC0));
1736 const v128_t shuffle_mask = wasm_i32x4_const(0x0C080400,-1,-1,-1);
1737 for (y = 0; y < height; y += 4)
1739 ui16* sp = scratch + (y >> 1) * sstr;
1740 ui16* dp = sigma + (y >> 2) * mstr;
1741 for (
ui32 x = 0; x < width; x += 8, sp += 8, dp += 2)
1743 v128_t s0, s1, u3, uC, t0, t1;
1745 s0 = wasm_v128_load(sp);
1746 u3 = wasm_v128_and(s0, mask_3);
1747 u3 = wasm_u32x4_shr(u3, 4);
1748 uC = wasm_v128_and(s0, mask_C);
1749 uC = wasm_u32x4_shr(uC, 2);
1750 t0 = wasm_v128_or(u3, uC);
1752 s1 = wasm_v128_load(sp + sstr);
1753 u3 = wasm_v128_and(s1, mask_3);
1754 u3 = wasm_u32x4_shr(u3, 2);
1755 uC = wasm_v128_and(s1, mask_C);
1756 t1 = wasm_v128_or(u3, uC);
1758 v128_t r = wasm_v128_or(t0, t1);
1759 r = wasm_i8x16_swizzle(r, shuffle_mask);
1761 wasm_v128_store32_lane(dp, r, 0);
1767 ui16* dp = sigma + (y >> 2) * mstr;
1768 v128_t zero = wasm_i64x2_const(0, 0);
1769 for (
ui32 x = 0; x < width; x += 32, dp += 8)
1770 wasm_v128_store(dp, zero);
1786 ui16 prev_row_sig[256 + 8] = {0};
1789 frwd_init<0>(&sigprop, coded_data + lengths1, (
int)lengths2);
1791 for (
ui32 y = 0; y < height; y += 4)
1793 ui32 pattern = 0xFFFFu;
1794 if (height - y < 4) {
1796 if (height - y < 3) {
1806 ui16 *prev_sig = prev_row_sig;
1807 ui16 *cur_sig = sigma + (y >> 2) * mstr;
1808 ui32 *dpp = decoded_data + y * stride;
1809 for (
ui32 x = 0; x < width; x += 4, dpp += 4, ++cur_sig, ++prev_sig)
1814 pattern = pattern >> (s * 4);
1829 ui32 ns = *(
ui32*)(cur_sig + mstr);
1830 ui32 u = (ps & 0x88888888) >> 3;
1832 u |= (ns & 0x11111111) << 3;
1837 mbr |= (cs & 0x77777777) << 1;
1838 mbr |= (cs & 0xEEEEEEEE) >> 1;
1854 v128_t cwd_vec = frwd_fetch<0>(&sigprop);
1855 ui32 cwd = wasm_u32x4_extract_lane(cwd_vec, 0);
1858 ui32 col_mask = 0xFu;
1859 ui32 inv_sig = ~cs & pattern;
1860 for (
int i = 0; i < 16; i += 4, col_mask <<= 4)
1862 if ((col_mask & new_sig) == 0)
1866 ui32 sample_mask = 0x1111u & col_mask;
1867 if (new_sig & sample_mask)
1869 new_sig &= ~sample_mask;
1872 ui32 t = 0x33u << i;
1873 new_sig |= t & inv_sig;
1879 if (new_sig & sample_mask)
1881 new_sig &= ~sample_mask;
1884 ui32 t = 0x76u << i;
1885 new_sig |= t & inv_sig;
1891 if (new_sig & sample_mask)
1893 new_sig &= ~sample_mask;
1896 ui32 t = 0xECu << i;
1897 new_sig |= t & inv_sig;
1903 if (new_sig & sample_mask)
1905 new_sig &= ~sample_mask;
1908 ui32 t = 0xC8u << i;
1909 new_sig |= t & inv_sig;
1919 v128_t new_sig_vec = wasm_i16x8_splat((
si16)new_sig);
1920 new_sig_vec = wasm_i8x16_swizzle(new_sig_vec,
1921 wasm_i8x16_const(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1));
1922 new_sig_vec = wasm_v128_and(new_sig_vec,
1924 new_sig_vec = wasm_i8x16_eq(new_sig_vec,
1929 v128_t ex_sum, shfl, inc_sum = new_sig_vec;
1930 inc_sum = wasm_i8x16_abs(inc_sum);
1931 shfl = wasm_i8x16_shuffle(wasm_i64x2_const(0,0), inc_sum,
1932 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
1933 inc_sum = wasm_i8x16_add(inc_sum, shfl);
1934 shfl = wasm_i16x8_shuffle(wasm_i64x2_const(0,0), inc_sum,
1935 7, 8, 9, 10, 11, 12, 13, 14);
1936 inc_sum = wasm_i8x16_add(inc_sum, shfl);
1937 shfl = wasm_i32x4_shuffle(wasm_i64x2_const(0,0), inc_sum,
1939 inc_sum = wasm_i8x16_add(inc_sum, shfl);
1940 shfl = wasm_i64x2_shuffle(wasm_i64x2_const(0,0), inc_sum,
1942 inc_sum = wasm_i8x16_add(inc_sum, shfl);
1943 cnt += wasm_u8x16_extract_lane(inc_sum, 15);
1945 ex_sum = wasm_i8x16_shuffle(wasm_i64x2_const(0,0), inc_sum,
1946 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
1950 cwd_vec = wasm_i16x8_splat((
si16)cwd);
1951 cwd_vec = wasm_i8x16_swizzle(cwd_vec,
1952 wasm_i8x16_const(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1));
1953 cwd_vec = wasm_v128_and(cwd_vec,
1955 cwd_vec = wasm_i8x16_eq(cwd_vec,
1957 cwd_vec = wasm_i8x16_abs(cwd_vec);
1961 v128_t v = wasm_i8x16_swizzle(cwd_vec, ex_sum);
1964 v128_t m = wasm_i8x16_const(
1965 0,-1,-1,-1,4,-1,-1,-1,8,-1,-1,-1,12,-1,-1,-1);
1966 v128_t val = wasm_i32x4_splat(3 << (p - 2));
1968 for (
int c = 0; c < 4; ++ c) {
1969 v128_t s0, s0_ns, s0_val;
1971 s0 = wasm_v128_load(dp);
1975 s0_ns = wasm_i8x16_swizzle(new_sig_vec, m);
1976 s0_ns = wasm_i32x4_eq(s0_ns,
1980 s0_val = wasm_i8x16_swizzle(v, m);
1981 s0_val = wasm_i32x4_shl(s0_val, 31);
1982 s0_val = wasm_v128_or(s0_val, val);
1983 s0_val = wasm_v128_and(s0_val, s0_ns);
1986 s0 = wasm_v128_or(s0, s0_val);
1988 wasm_v128_store(dp, s0);
1991 m = wasm_i32x4_add(m, wasm_i32x4_const(
OJPH_REPEAT4(1)));
1998 *prev_sig = (
ui16)(new_sig);
2002 new_sig |= (t & 0x7777) << 1;
2003 new_sig |= (t & 0xEEEE) >> 1;
2016 rev_init_mrp(&magref, coded_data, (
int)lengths1, (
int)lengths2);
2018 for (
ui32 y = 0; y < height; y += 4)
2020 ui16 *cur_sig = sigma + (y >> 2) * mstr;
2021 ui32 *dpp = decoded_data + y * stride;
2022 for (
ui32 i = 0; i < width; i += 4, dpp += 4)
2027 ui16 sig = *cur_sig++;
2035 v128_t sig_vec = wasm_i16x8_splat((
si16)sig);
2036 sig_vec = wasm_i8x16_swizzle(sig_vec,
2037 wasm_i8x16_const(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1));
2038 sig_vec = wasm_v128_and(sig_vec,
2040 sig_vec = wasm_i8x16_eq(sig_vec,
2042 sig_vec = wasm_i8x16_abs(sig_vec);
2046 v128_t ex_sum, shfl, inc_sum = sig_vec;
2047 shfl = wasm_i8x16_shuffle(wasm_i64x2_const(0,0), inc_sum,
2048 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
2049 inc_sum = wasm_i8x16_add(inc_sum, shfl);
2050 shfl = wasm_i16x8_shuffle(wasm_i64x2_const(0,0), inc_sum,
2051 7, 8, 9, 10, 11, 12, 13, 14);
2052 inc_sum = wasm_i8x16_add(inc_sum, shfl);
2053 shfl = wasm_i32x4_shuffle(wasm_i64x2_const(0,0), inc_sum,
2055 inc_sum = wasm_i8x16_add(inc_sum, shfl);
2056 shfl = wasm_i64x2_shuffle(wasm_i64x2_const(0,0), inc_sum,
2058 inc_sum = wasm_i8x16_add(inc_sum, shfl);
2059 total_bits = wasm_u8x16_extract_lane(inc_sum, 15);
2061 ex_sum = wasm_i8x16_shuffle(wasm_i64x2_const(0,0), inc_sum,
2062 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
2069 v128_t cwd_vec = wasm_i16x8_splat((
si16)cwd);
2070 cwd_vec = wasm_i8x16_swizzle(cwd_vec,
2071 wasm_i8x16_const(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1));
2072 cwd_vec = wasm_v128_and(cwd_vec,
2074 cwd_vec = wasm_i8x16_eq(cwd_vec,
2077 wasm_i8x16_add(cwd_vec, wasm_i8x16_const(
OJPH_REPEAT16(1)));
2078 cwd_vec = wasm_i8x16_add(cwd_vec, cwd_vec);
2083 v128_t m = wasm_i8x16_const(0,-1,-1,-1,4,-1,-1,-1,
2084 8,-1,-1,-1,12,-1,-1,-1);
2086 for (
int c = 0; c < 4; ++c) {
2087 v128_t s0, s0_sig, s0_idx, s0_val;
2089 s0 = wasm_v128_load(dp);
2091 s0_sig = wasm_i8x16_swizzle(sig_vec, m);
2092 s0_sig = wasm_i8x16_eq(s0_sig, wasm_i64x2_const(0, 0));
2094 s0_idx = wasm_i8x16_swizzle(ex_sum, m);
2095 s0_val = wasm_i8x16_swizzle(cwd_vec, s0_idx);
2097 s0_val = wasm_v128_andnot(s0_val, s0_sig);
2099 s0_val = wasm_i32x4_shl(s0_val, p - 2);
2100 s0 = wasm_v128_xor(s0, s0_val);
2102 wasm_v128_store(dp, s0);
2105 m = wasm_i32x4_add(m, wasm_i32x4_const(
OJPH_REPEAT4(1)));
ui16 uvlc_tbl0[256+64]
uvlc_tbl0 contains decoding information for initial row of quads
ui16 uvlc_tbl1[256]
uvlc_tbl1 contains decoding information for non-initial row of quads
ui16 vlc_tbl0[1024]
vlc_tbl0 contains decoding information for initial row of quads
ui16 vlc_tbl1[1024]
vlc_tbl1 contains decoding information for non-initial row of quads
bool ojph_decode_codeblock_wasm(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
static ui32 rev_fetch(rev_struct *vlcp)
Retrieves 32 bits from the head of a rev_struct structure.
static void rev_init_mrp(rev_struct *mrp, ui8 *data, int lcup, int len2)
Initialized rev_struct structure for MRP segment, and reads a number of bytes such that the next 32 b...
static void mel_read(dec_mel_st *melp)
Reads and unstuffs the MEL bitstream.
static void frwd_advance(frwd_struct *msp, ui32 num_bits)
Consume num_bits bits from the bitstream of frwd_struct.
static __m128i decode_two_quad16(const __m128i inf_u_q, __m128i U_q, frwd_struct *magsgn, ui32 p, __m128i &vn)
decodes twos consecutive quads (one octet), using 16 bit data
static void rev_read_mrp(rev_struct *mrp)
Reads and unstuffs from rev_struct.
static ui32 rev_fetch_mrp(rev_struct *mrp)
Retrieves 32 bits from the head of a rev_struct structure.
static ui32 rev_advance_mrp(rev_struct *mrp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static void rev_read(rev_struct *vlcp)
Read and unstuff data from a backwardly-growing segment.
static int mel_get_run(dec_mel_st *melp)
Retrieves one run from dec_mel_st; if there are no runs stored MEL segment is decoded.
static void rev_init(rev_struct *vlcp, ui8 *data, int lcup, int scup)
Initiates the rev_struct structure and reads a few bytes to move the read address to multiple of 4.
static void mel_init(dec_mel_st *melp, ui8 *bbuf, int lcup, int scup)
Initiates a dec_mel_st structure for MEL decoding and reads some bytes in order to get the read addre...
static ui32 rev_advance(rev_struct *vlcp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static void frwd_read(frwd_struct *msp)
Read and unstuffs 32 bits from forward-growing bitstream.
static ui32 frwd_fetch(frwd_struct *msp)
Fetches 32 bits from the frwd_struct bitstream.
static void frwd_init(frwd_struct *msp, const ui8 *data, int size)
Initialize frwd_struct struct and reads some bytes.
static __m128i decode_one_quad32(const __m128i inf_u_q, __m128i U_q, frwd_struct *magsgn, ui32 p, __m128i &vn)
decodes one quad, using 32 bit data
static void mel_decode(dec_mel_st *melp)
Decodes unstuffed MEL segment bits stored in tmp to runs.
static ui32 count_leading_zeros(ui32 val)
#define OJPH_REPEAT2(a)
Macros that help with typing and space.
MEL state structure for reading and decoding the MEL bitstream.
bool unstuff
true if the next bit needs to be unstuffed
int num_runs
number of decoded runs left in runs (maximum 8)
int size
number of bytes in MEL code
ui8 * data
the address of data (or bitstream)
int k
state of MEL decoder
int bits
number of bits stored in tmp
ui64 tmp
temporary buffer for read data
ui64 runs
runs of decoded MEL codewords (7 bits/run)
State structure for reading and unstuffing of forward-growing bitstreams; these are: MagSgn and SPP b...
const ui8 * data
pointer to bitstream
ui32 bits
number of bits stored in tmp
ui64 tmp
temporary buffer of read data
ui32 unstuff
1 if a bit needs to be unstuffed from next byte
A structure for reading and unstuffing a segment that grows backward, such as VLC and MRP.
ui32 bits
number of bits stored in tmp
int size
number of bytes left
ui8 * data
pointer to where to read data
ui64 tmp
temporary buffer of read data