OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_colour_wasm.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2021, Aous Naman
6// Copyright (c) 2021, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2021, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_colour_wasm.cpp
34// Author: Aous Naman
35// Date: 9 February 2021
36//***************************************************************************/
37
38#include <cmath>
39#include <wasm_simd128.h>
40
41#include "ojph_defs.h"
42#include "ojph_colour.h"
43#include "ojph_colour_local.h"
44
45namespace ojph {
46 namespace local {
47
49 void wasm_cnvrt_si32_to_float_shftd(const si32 *sp, float *dp, float mul,
50 ui32 width)
51 {
52 v128_t shift = wasm_f32x4_splat(0.5f);
53 v128_t m = wasm_f32x4_splat(mul);
54 for (ui32 i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
55 {
56 v128_t t = wasm_v128_load(sp);
57 v128_t s = wasm_f32x4_convert_i32x4(t);
58 s = wasm_f32x4_mul(s, m);
59 s = wasm_f32x4_sub(s, shift);
60 wasm_v128_store(dp, s);
61 }
62 }
63
65 void wasm_cnvrt_si32_to_float(const si32 *sp, float *dp, float mul,
66 ui32 width)
67 {
68 v128_t m = wasm_f32x4_splat(mul);
69 for (ui32 i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
70 {
71 v128_t t = wasm_v128_load(sp);
72 v128_t s = wasm_f32x4_convert_i32x4(t);
73 s = wasm_f32x4_mul(s, m);
74 wasm_v128_store(dp, s);
75 }
76 }
77
79 void wasm_cnvrt_float_to_si32_shftd(const float *sp, si32 *dp, float mul,
80 ui32 width)
81 {
82 // rounding mode is always set to _MM_ROUND_NEAREST
83 v128_t shift = wasm_f32x4_splat(0.5f);
84 v128_t m = wasm_f32x4_splat(mul);
85 for (int i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
86 {
87 v128_t t = wasm_v128_load(sp);
88 v128_t s = wasm_f32x4_add(t, shift);
89 s = wasm_f32x4_mul(s, m);
90 s = wasm_f32x4_add(s, shift); // + 0.5 and followed by floor next
91 wasm_v128_store(dp, wasm_i32x4_trunc_sat_f32x4(s));
92 }
93 }
94
96 void wasm_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul,
97 ui32 width)
98 {
99 // rounding mode is always set to _MM_ROUND_NEAREST
100 v128_t shift = wasm_f32x4_splat(0.5f);
101 v128_t m = wasm_f32x4_splat(mul);
102 for (int i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
103 {
104 v128_t t = wasm_v128_load(sp);
105 v128_t s = wasm_f32x4_mul(t, m);
106 s = wasm_f32x4_add(s, shift); // + 0.5 and followed by floor next
107 wasm_v128_store(dp, wasm_i32x4_trunc_sat_f32x4(s));
108 }
109 }
110
111
113 void wasm_cnvrt_si32_to_si32_shftd(const si32 *sp, si32 *dp, int shift,
114 ui32 width)
115 {
116 v128_t sh = wasm_i32x4_splat(shift);
117 for (int i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
118 {
119 v128_t s = wasm_v128_load(sp);
120 s = wasm_i32x4_add(s, sh);
121 wasm_v128_store(dp, s);
122 }
123 }
124
126 void wasm_rct_forward(const si32 *r, const si32 *g, const si32 *b,
127 si32 *y, si32 *cb, si32 *cr, ui32 repeat)
128 {
129 for (int i = (repeat + 3) >> 2; i > 0; --i)
130 {
131 v128_t mr = wasm_v128_load(r);
132 v128_t mg = wasm_v128_load(g);
133 v128_t mb = wasm_v128_load(b);
134 v128_t t = wasm_i32x4_add(mr, mb);
135 t = wasm_i32x4_add(t, wasm_i32x4_shl(mg, 1));
136 wasm_v128_store(y, wasm_i32x4_shr(t, 2));
137 t = wasm_i32x4_sub(mb, mg);
138 wasm_v128_store(cb, t);
139 t = wasm_i32x4_sub(mr, mg);
140 wasm_v128_store(cr, t);
141
142 r += 4; g += 4; b += 4;
143 y += 4; cb += 4; cr += 4;
144 }
145 }
146
148 void wasm_rct_backward(const si32 *y, const si32 *cb, const si32 *cr,
149 si32 *r, si32 *g, si32 *b, ui32 repeat)
150 {
151 for (int i = (repeat + 3) >> 2; i > 0; --i)
152 {
153 v128_t my = wasm_v128_load(y);
154 v128_t mcb = wasm_v128_load(cb);
155 v128_t mcr = wasm_v128_load(cr);
156
157 v128_t t = wasm_i32x4_add(mcb, mcr);
158 t = wasm_i32x4_sub(my, wasm_i32x4_shr(t, 2));
159 wasm_v128_store(g, t);
160 v128_t u = wasm_i32x4_add(mcb, t);
161 wasm_v128_store(b, u);
162 u = wasm_i32x4_add(mcr, t);
163 wasm_v128_store(r, u);
164
165 y += 4; cb += 4; cr += 4;
166 r += 4; g += 4; b += 4;
167 }
168 }
169
171 void wasm_ict_forward(const float *r, const float *g, const float *b,
172 float *y, float *cb, float *cr, ui32 repeat)
173 {
174 v128_t alpha_rf = wasm_f32x4_splat(CT_CNST::ALPHA_RF);
175 v128_t alpha_gf = wasm_f32x4_splat(CT_CNST::ALPHA_GF);
176 v128_t alpha_bf = wasm_f32x4_splat(CT_CNST::ALPHA_BF);
177 v128_t beta_cbf = wasm_f32x4_splat(CT_CNST::BETA_CbF);
178 v128_t beta_crf = wasm_f32x4_splat(CT_CNST::BETA_CrF);
179 for (ui32 i = (repeat + 3) >> 2; i > 0; --i)
180 {
181 v128_t mr = wasm_v128_load(r);
182 v128_t mb = wasm_v128_load(b);
183 v128_t my = wasm_f32x4_mul(alpha_rf, mr);
184 my = wasm_f32x4_add(my, wasm_f32x4_mul(alpha_gf, wasm_v128_load(g)));
185 my = wasm_f32x4_add(my, wasm_f32x4_mul(alpha_bf, mb));
186 wasm_v128_store(y, my);
187 wasm_v128_store(cb, wasm_f32x4_mul(beta_cbf, wasm_f32x4_sub(mb, my)));
188 wasm_v128_store(cr, wasm_f32x4_mul(beta_crf, wasm_f32x4_sub(mr, my)));
189
190 r += 4; g += 4; b += 4;
191 y += 4; cb += 4; cr += 4;
192 }
193 }
194
196 void wasm_ict_backward(const float *y, const float *cb, const float *cr,
197 float *r, float *g, float *b, ui32 repeat)
198 {
199 v128_t gamma_cr2g = wasm_f32x4_splat(CT_CNST::GAMMA_CR2G);
200 v128_t gamma_cb2g = wasm_f32x4_splat(CT_CNST::GAMMA_CB2G);
201 v128_t gamma_cr2r = wasm_f32x4_splat(CT_CNST::GAMMA_CR2R);
202 v128_t gamma_cb2b = wasm_f32x4_splat(CT_CNST::GAMMA_CB2B);
203 for (ui32 i = (repeat + 3) >> 2; i > 0; --i)
204 {
205 v128_t my = wasm_v128_load(y);
206 v128_t mcr = wasm_v128_load(cr);
207 v128_t mcb = wasm_v128_load(cb);
208 v128_t mg = wasm_f32x4_sub(my, wasm_f32x4_mul(gamma_cr2g, mcr));
209 wasm_v128_store(g, wasm_f32x4_sub(mg, wasm_f32x4_mul(gamma_cb2g, mcb)));
210 wasm_v128_store(r, wasm_f32x4_add(my, wasm_f32x4_mul(gamma_cr2r, mcr)));
211 wasm_v128_store(b, wasm_f32x4_add(my, wasm_f32x4_mul(gamma_cb2b, mcb)));
212
213 y += 4; cb += 4; cr += 4;
214 r += 4; g += 4; b += 4;
215 }
216 }
217
218 }
219}
void wasm_cnvrt_float_to_si32_shftd(const float *sp, si32 *dp, float mul, ui32 width)
void wasm_ict_backward(const float *y, const float *cb, const float *cr, float *r, float *g, float *b, ui32 repeat)
void wasm_cnvrt_si32_to_float_shftd(const si32 *sp, float *dp, float mul, ui32 width)
void wasm_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul, ui32 width)
void wasm_ict_forward(const float *r, const float *g, const float *b, float *y, float *cb, float *cr, ui32 repeat)
void wasm_rct_forward(const si32 *r, const si32 *g, const si32 *b, si32 *y, si32 *cb, si32 *cr, ui32 repeat)
void wasm_cnvrt_si32_to_si32_shftd(const si32 *sp, si32 *dp, int shift, ui32 width)
void wasm_cnvrt_si32_to_float(const si32 *sp, float *dp, float mul, ui32 width)
void wasm_rct_backward(const si32 *y, const si32 *cb, const si32 *cr, si32 *r, si32 *g, si32 *b, ui32 repeat)
int32_t si32
Definition: ojph_defs.h:55
uint32_t ui32
Definition: ojph_defs.h:54
static const float GAMMA_CR2R
static const float BETA_CbF
static const float GAMMA_CB2B
static const float ALPHA_RF
static const float GAMMA_CB2G
static const float GAMMA_CR2G
static const float ALPHA_BF
static const float BETA_CrF
static const float ALPHA_GF