39#include <wasm_simd128.h>
47 #define REPEAT(a) a,a,a,a
52 float* p = (
float*)addr;
53 v128_t zero = wasm_i32x4_const(
REPEAT(0));
54 for (
size_t i = 0; i < count; i += 16, p += 4)
55 wasm_v128_store(p, zero);
61 v128_t x1, x0 = wasm_v128_load(address);
62 x1 = wasm_i32x4_shuffle(x0, x0, 2, 3, 2, 3);
63 x0 = wasm_v128_or(x0, x1);
64 x1 = wasm_i32x4_shuffle(x0, x0, 1, 1, 1, 1);
65 x0 = wasm_v128_or(x0, x1);
66 ui32 t = (
ui32)wasm_i32x4_extract_lane(x0, 0);
72 float delta_inv,
ui32 count,
ui32* max_val)
77 ui32 shift = 31 - K_max;
78 v128_t m0 = wasm_i32x4_const(
REPEAT((
int)0x80000000));
79 v128_t zero = wasm_i32x4_const(
REPEAT(0));
80 v128_t one = wasm_i32x4_const(
REPEAT(1));
81 v128_t tmax = wasm_v128_load(max_val);
82 v128_t *p = (v128_t*)sp;
83 for (
ui32 i = 0; i < count; i += 4, p += 1, dp += 4)
85 v128_t v = wasm_v128_load(p);
86 v128_t sign = wasm_i32x4_lt(v, zero);
87 v128_t val = wasm_v128_xor(v, sign);
88 v128_t ones = wasm_v128_and(sign, one);
89 val = wasm_i32x4_add(val, ones);
90 sign = wasm_v128_and(sign, m0);
91 val = wasm_i32x4_shl(val, shift);
92 tmax = wasm_v128_or(tmax, val);
93 val = wasm_v128_or(val, sign);
94 wasm_v128_store(dp, val);
96 wasm_v128_store(max_val, tmax);
101 float delta_inv,
ui32 count,
ui32* max_val)
107 v128_t d = wasm_f32x4_splat(delta_inv);
108 v128_t zero = wasm_i32x4_const(
REPEAT(0));
109 v128_t one = wasm_i32x4_const(
REPEAT(1));
110 v128_t tmax = wasm_v128_load(max_val);
111 float *p = (
float*)sp;
112 for (
ui32 i = 0; i < count; i += 4, p += 4, dp += 4)
114 v128_t vf = wasm_v128_load(p);
115 vf = wasm_f32x4_mul(vf, d);
116 v128_t val = wasm_i32x4_trunc_sat_f32x4(vf);
117 v128_t sign = wasm_i32x4_lt(val, zero);
118 val = wasm_v128_xor(val, sign);
119 v128_t ones = wasm_v128_and(sign, one);
120 val = wasm_i32x4_add(val, ones);
121 tmax = wasm_v128_or(tmax, val);
122 sign = wasm_i32x4_shl(sign, 31);
123 val = wasm_v128_or(val, sign);
124 wasm_v128_store(dp, val);
126 wasm_v128_store(max_val, tmax);
131 float delta,
ui32 count)
134 ui32 shift = 31 - K_max;
135 v128_t m1 = wasm_i32x4_const(
REPEAT(0x7FFFFFFF));
136 v128_t zero = wasm_i32x4_const(
REPEAT(0));
137 v128_t one = wasm_i32x4_const(
REPEAT(1));
139 for (
ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
141 v128_t v = wasm_v128_load((v128_t*)sp);
142 v128_t val = wasm_v128_and(v, m1);
143 val = wasm_i32x4_shr(val, shift);
144 v128_t sign = wasm_i32x4_lt(v, zero);
145 val = wasm_v128_xor(val, sign);
146 v128_t ones = wasm_v128_and(sign, one);
147 val = wasm_i32x4_add(val, ones);
148 wasm_v128_store(p, val);
154 float delta,
ui32 count)
157 v128_t m1 = wasm_i32x4_const(
REPEAT(0x7FFFFFFF));
158 v128_t d = wasm_f32x4_splat(delta);
159 float *p = (
float*)dp;
160 for (
ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
162 v128_t v = wasm_v128_load((v128_t*)sp);
163 v128_t vali = wasm_v128_and(v, m1);
164 v128_t valf = wasm_f32x4_convert_i32x4(vali);
165 valf = wasm_f32x4_mul(valf, d);
166 v128_t sign = wasm_v128_andnot(v, m1);
167 valf = wasm_v128_or(valf, sign);
168 wasm_v128_store(p, valf);
void wasm_irv_tx_from_cb(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void wasm_irv_tx_to_cb(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void wasm_rev_tx_to_cb(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void wasm_mem_clear(void *addr, size_t count)
void wasm_rev_tx_from_cb(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
ui32 wasm_find_max_val(ui32 *address)