OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_transform.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_transform.cpp
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38#include <cstdio>
39
40#include "ojph_arch.h"
41#include "ojph_mem.h"
42#include "ojph_transform.h"
44
45namespace ojph {
46 struct line_buf;
47
48 namespace local {
49
51 // Reversible functions
53
56 (const line_buf* src1, const line_buf* src2, line_buf *dst,
57 ui32 repeat) = NULL;
58
61 (const line_buf* src1, const line_buf* src2, line_buf *dst,
62 ui32 repeat) = NULL;
63
66 (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
67 = NULL;
68
71 (const line_buf* src1, const line_buf* src2, line_buf *dst,
72 ui32 repeat) = NULL;
73
76 (const line_buf* src1, const line_buf* src2, line_buf *dst,
77 ui32 repeat) = NULL;
78
81 (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even)
82 = NULL;
83
85 // Irreversible functions
87
90 (const line_buf* src1, const line_buf* src2, line_buf *dst,
91 int step_num, ui32 repeat) = NULL;
92
95 (const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis,
96 ui32 repeat) = NULL;
97
100 (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
101 = NULL;
102
105 (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
106 = NULL;
107
110
113 {
115 return;
116
117#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
118
129
130#ifndef OJPH_DISABLE_INTEL_SIMD
131 int level = get_cpu_ext_level();
132
133 if (level >= X86_CPU_EXT_LEVEL_SSE)
134 {
139 }
140
141 if (level >= X86_CPU_EXT_LEVEL_SSE2)
142 {
149 }
150
151 if (level >= X86_CPU_EXT_LEVEL_AVX)
152 {
157 }
158
159 if (level >= X86_CPU_EXT_LEVEL_AVX2)
160 {
167 }
168#endif // !OJPH_DISABLE_INTEL_SIMD
169
170#else // OJPH_ENABLE_WASM_SIMD
181#endif // !OJPH_ENABLE_WASM_SIMD
182
184 }
185
187 const float LIFTING_FACTORS::steps[8] =
188 {
189 -1.586134342059924f, -0.052980118572961f, +0.882911075530934f,
190 +0.443506852043971f,
191 +1.586134342059924f, +0.052980118572961f, -0.882911075530934f,
192 -0.443506852043971f
193 };
194 const float LIFTING_FACTORS::K = 1.230174104914001f;
195 const float LIFTING_FACTORS::K_inv = (float)(1.0 / 1.230174104914001);
196
198
199#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
200
203 const line_buf* line_src2,
204 line_buf *line_dst, ui32 repeat)
205 {
206 si32 *dst = line_dst->i32;
207 const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
208 for (ui32 i = repeat; i > 0; --i)
209 *dst++ -= (*src1++ + *src2++) >> 1;
210 }
211
214 const line_buf* line_src2,
215 line_buf *line_dst, ui32 repeat)
216 {
217 si32 *dst = line_dst->i32;
218 const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
219 for (ui32 i = repeat; i > 0; --i)
220 *dst++ += (*src1++ + *src2++ + 2) >> 2;
221 }
222
224 void gen_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst,
225 line_buf *line_hdst, ui32 width, bool even)
226 {
227 if (width > 1)
228 {
229 si32 *src = line_src->i32;
230 si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32;
231
232 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
233 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
234
235 // extension
236 src[-1] = src[1];
237 src[width] = src[width-2];
238 // predict
239 const si32* sp = src + (even ? 1 : 0);
240 si32 *dph = hdst;
241 for (ui32 i = H_width; i > 0; --i, sp+=2)
242 *dph++ = sp[0] - ((sp[-1] + sp[1]) >> 1);
243
244 // extension
245 hdst[-1] = hdst[0];
246 hdst[H_width] = hdst[H_width-1];
247 // update
248 sp = src + (even ? 0 : 1);
249 const si32* sph = hdst + (even ? 0 : 1);
250 si32 *dpl = ldst;
251 for (ui32 i = L_width; i > 0; --i, sp+=2, sph++)
252 *dpl++ = *sp + ((2 + sph[-1] + sph[0]) >> 2);
253 }
254 else
255 {
256 if (even)
257 line_ldst->i32[0] = line_src->i32[0];
258 else
259 line_hdst->i32[0] = line_src->i32[0] << 1;
260 }
261 }
262
265 const line_buf* line_src2,
266 line_buf *line_dst, ui32 repeat)
267 {
268 si32 *dst = line_dst->i32;
269 const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
270 for (ui32 i = repeat; i > 0; --i)
271 *dst++ += (*src1++ + *src2++) >> 1;
272 }
273
276 const line_buf* line_src2,
277 line_buf *line_dst, ui32 repeat)
278 {
279 si32 *dst = line_dst->i32;
280 const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
281 for (ui32 i = repeat; i > 0; --i)
282 *dst++ -= (2 + *src1++ + *src2++) >> 2;
283 }
284
286 void gen_rev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc,
287 line_buf *line_hsrc, ui32 width, bool even)
288 {
289 if (width > 1)
290 {
291 si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32;
292 si32 *dst = line_dst->i32;
293
294 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
295 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
296
297 // extension
298 hsrc[-1] = hsrc[0];
299 hsrc[H_width] = hsrc[H_width-1];
300 //inverse update
301 const si32 *sph = hsrc + (even ? 0 : 1);
302 si32 *spl = lsrc;
303 for (ui32 i = L_width; i > 0; --i, sph++, spl++)
304 *spl -= ((2 + sph[-1] + sph[0]) >> 2);
305
306 // extension
307 lsrc[-1] = lsrc[0];
308 lsrc[L_width] = lsrc[L_width - 1];
309 // inverse predict and combine
310 si32 *dp = dst + (even ? 0 : -1);
311 spl = lsrc + (even ? 0 : -1);
312 sph = hsrc;
313 for (ui32 i = L_width + (even ? 0 : 1); i > 0; --i, spl++, sph++)
314 {
315 *dp++ = *spl;
316 *dp++ = *sph + ((spl[0] + spl[1]) >> 1);
317 }
318 }
319 else
320 {
321 if (even)
322 line_dst->i32[0] = line_lsrc->i32[0];
323 else
324 line_dst->i32[0] = line_hsrc->i32[0] >> 1;
325 }
326 }
327
328
330 void gen_irrev_vert_wvlt_step(const line_buf* line_src1,
331 const line_buf* line_src2,
332 line_buf *line_dst,
333 int step_num, ui32 repeat)
334 {
335 float *dst = line_dst->f32;
336 const float *src1 = line_src1->f32, *src2 = line_src2->f32;
337 float factor = LIFTING_FACTORS::steps[step_num];
338 for (ui32 i = repeat; i > 0; --i)
339 *dst++ += factor * (*src1++ + *src2++);
340 }
341
343 void gen_irrev_vert_wvlt_K(const line_buf* line_src,
344 line_buf* line_dst,
345 bool L_analysis_or_H_synthesis, ui32 repeat)
346 {
347 float *dst = line_dst->f32;
348 const float *src = line_src->f32;
349 float factor = LIFTING_FACTORS::K_inv;
350 factor = L_analysis_or_H_synthesis ? factor : LIFTING_FACTORS::K;
351 for (ui32 i = repeat; i > 0; --i)
352 *dst++ = *src++ * factor;
353 }
354
355
358 line_buf *line_ldst,
359 line_buf *line_hdst,
360 ui32 width, bool even)
361 {
362 if (width > 1)
363 {
364 float *src = line_src->f32;
365 float *ldst = line_ldst->f32, *hdst = line_hdst->f32;
366
367 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
368 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
369
370 //extension
371 src[-1] = src[1];
372 src[width] = src[width-2];
373 // predict
374 float factor = LIFTING_FACTORS::steps[0];
375 const float* sp = src + (even ? 1 : 0);
376 float *dph = hdst;
377 for (ui32 i = H_width; i > 0; --i, sp+=2)
378 *dph++ = sp[0] + factor * (sp[-1] + sp[1]);
379
380 // extension
381 hdst[-1] = hdst[0];
382 hdst[H_width] = hdst[H_width-1];
383 // update
384 factor = LIFTING_FACTORS::steps[1];
385 sp = src + (even ? 0 : 1);
386 const float* sph = hdst + (even ? 0 : 1);
387 float *dpl = ldst;
388 for (ui32 i = L_width; i > 0; --i, sp+=2, sph++)
389 *dpl++ = sp[0] + factor * (sph[-1] + sph[0]);
390
391 //extension
392 ldst[-1] = ldst[0];
393 ldst[L_width] = ldst[L_width-1];
394 //predict
395 factor = LIFTING_FACTORS::steps[2];
396 const float* spl = ldst + (even ? 1 : 0);
397 dph = hdst;
398 for (ui32 i = H_width; i > 0; --i, spl++)
399 *dph++ += factor * (spl[-1] + spl[0]);
400
401 // extension
402 hdst[-1] = hdst[0];
403 hdst[H_width] = hdst[H_width-1];
404 // update
405 factor = LIFTING_FACTORS::steps[3];
406 sph = hdst + (even ? 0 : 1);
407 dpl = ldst;
408 for (ui32 i = L_width; i > 0; --i, sph++)
409 *dpl++ += factor * (sph[-1] + sph[0]);
410
411 //multipliers
412 float *dp = ldst;
413 for (ui32 i = L_width; i > 0; --i, dp++)
415 dp = hdst;
416 for (ui32 i = H_width; i > 0; --i, dp++)
417 *dp *= LIFTING_FACTORS::K;
418 }
419 else
420 {
421 if (even)
422 line_ldst->f32[0] = line_src->f32[0];
423 else
424 line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0];
425 }
426 }
427
429 void gen_irrev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc,
430 line_buf *line_hsrc, ui32 width,
431 bool even)
432 {
433 if (width > 1)
434 {
435 float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32;
436 float *dst = line_dst->f32;
437
438 const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
439 const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
440
441 //multipliers
442 float *dp = lsrc;
443 for (ui32 i = L_width; i > 0; --i, dp++)
444 *dp *= LIFTING_FACTORS::K;
445 dp = hsrc;
446 for (ui32 i = H_width; i > 0; --i, dp++)
448
449 //extension
450 hsrc[-1] = hsrc[0];
451 hsrc[H_width] = hsrc[H_width-1];
452 //inverse update
453 float factor = LIFTING_FACTORS::steps[7];
454 const float *sph = hsrc + (even ? 0 : 1);
455 float *dpl = lsrc;
456 for (ui32 i = L_width; i > 0; --i, dpl++, sph++)
457 *dpl += factor * (sph[-1] + sph[0]);
458
459 //extension
460 lsrc[-1] = lsrc[0];
461 lsrc[L_width] = lsrc[L_width-1];
462 //inverse perdict
463 factor = LIFTING_FACTORS::steps[6];
464 const float *spl = lsrc + (even ? 0 : -1);
465 float *dph = hsrc;
466 for (ui32 i = H_width; i > 0; --i, dph++, spl++)
467 *dph += factor * (spl[0] + spl[1]);
468
469 //extension
470 hsrc[-1] = hsrc[0];
471 hsrc[H_width] = hsrc[H_width-1];
472 //inverse update
473 factor = LIFTING_FACTORS::steps[5];
474 sph = hsrc + (even ? 0 : 1);
475 dpl = lsrc;
476 for (ui32 i = L_width; i > 0; --i, dpl++, sph++)
477 *dpl += factor * (sph[-1] + sph[0]);
478
479 //extension
480 lsrc[-1] = lsrc[0];
481 lsrc[L_width] = lsrc[L_width-1];
482 //inverse perdict and combine
483 factor = LIFTING_FACTORS::steps[4];
484 dp = dst + (even ? 0 : -1);
485 spl = lsrc + (even ? 0 : -1);
486 sph = hsrc;
487 for (ui32 i = L_width+(even?0:1); i > 0; --i, spl++, sph++)
488 {
489 *dp++ = *spl;
490 *dp++ = *sph + factor * (spl[0] + spl[1]);
491 }
492 }
493 else
494 {
495 if (even)
496 line_dst->f32[0] = line_lsrc->f32[0];
497 else
498 line_dst->f32[0] = line_hsrc->f32[0] * 0.5f;
499 }
500 }
501
502#endif // !OJPH_ENABLE_WASM_SIMD
503
504 }
505}
void wasm_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void avx2_rev_vert_wvlt_fwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void sse_irrev_horz_wvlt_bwd_tx(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void gen_rev_vert_wvlt_bwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void gen_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void sse2_rev_horz_wvlt_fwd_tx(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void avx_irrev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void avx2_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void wasm_rev_vert_wvlt_fwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void wasm_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void gen_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void(* irrev_horz_wvlt_fwd_tx)(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void avx2_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void avx_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void wasm_irrev_horz_wvlt_bwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void sse2_rev_vert_wvlt_fwd_update(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* rev_horz_wvlt_bwd_tx)(line_buf *dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_wvlt_bwd_tx(line_buf *dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even)
void wasm_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void gen_rev_vert_wvlt_fwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
static bool wavelet_transform_functions_initialized
void avx_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void avx2_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void init_wavelet_transform_functions()
void(* rev_vert_wvlt_fwd_update)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void avx_irrev_vert_wvlt_step(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, int step_num, ui32 repeat)
void sse_irrev_horz_wvlt_fwd_tx(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void sse2_rev_vert_wvlt_bwd_predict(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void gen_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void(* rev_horz_wvlt_fwd_tx)(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void(* irrev_vert_wvlt_step)(const line_buf *src1, const line_buf *src2, line_buf *dst, int step_num, ui32 repeat)
void wasm_irrev_vert_wvlt_step(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, int step_num, ui32 repeat)
void gen_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void gen_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void sse2_rev_vert_wvlt_fwd_predict(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* rev_vert_wvlt_bwd_update)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void avx2_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void gen_irrev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void(* rev_vert_wvlt_bwd_predict)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* rev_vert_wvlt_fwd_predict)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* irrev_horz_wvlt_bwd_tx)(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void sse_irrev_vert_wvlt_K(const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void(* irrev_vert_wvlt_K)(const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void sse_irrev_vert_wvlt_step(const line_buf *src1, const line_buf *src2, line_buf *dst, int step_num, ui32 repeat)
void wasm_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void wasm_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void wasm_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void gen_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void avx2_rev_vert_wvlt_bwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void gen_irrev_vert_wvlt_step(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, int step_num, ui32 repeat)
void sse2_rev_vert_wvlt_bwd_update(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void wasm_rev_vert_wvlt_bwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
@ X86_CPU_EXT_LEVEL_AVX2
Definition: ojph_arch.h:104
@ X86_CPU_EXT_LEVEL_AVX
Definition: ojph_arch.h:103
@ X86_CPU_EXT_LEVEL_SSE2
Definition: ojph_arch.h:98
@ X86_CPU_EXT_LEVEL_SSE
Definition: ojph_arch.h:97
OJPH_EXPORT int get_cpu_ext_level()
Definition: ojph_arch.cpp:184
int32_t si32
Definition: ojph_defs.h:55
uint32_t ui32
Definition: ojph_defs.h:54
float * f32
Definition: ojph_mem.h:156
si32 * i32
Definition: ojph_mem.h:155