OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_colour_sse2.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_colour_sse2.cpp
34// Author: Aous Naman
35// Date: 11 October 2019
36//***************************************************************************/
37
38#include <cmath>
39
40#include "ojph_defs.h"
41#include "ojph_arch.h"
42#include "ojph_colour.h"
43
44#include <immintrin.h>
45
46namespace ojph {
47 namespace local {
48
50 void sse2_cnvrt_float_to_si32_shftd(const float *sp, si32 *dp, float mul,
51 ui32 width)
52 {
53 uint32_t rounding_mode = _MM_GET_ROUNDING_MODE();
54 _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
55 __m128 shift = _mm_set1_ps(0.5f);
56 __m128 m = _mm_set1_ps(mul);
57 for (int i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
58 {
59 __m128 t = _mm_loadu_ps(sp);
60 __m128 s = _mm_add_ps(t, shift);
61 s = _mm_mul_ps(s, m);
62 _mm_storeu_si128((__m128i*)dp, _mm_cvtps_epi32(s));
63 }
64 _MM_SET_ROUNDING_MODE(rounding_mode);
65 }
66
68 void sse2_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul,
69 ui32 width)
70 {
71 uint32_t rounding_mode = _MM_GET_ROUNDING_MODE();
72 _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
73 __m128 m = _mm_set1_ps(mul);
74 for (int i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
75 {
76 __m128 t = _mm_loadu_ps(sp);
77 __m128 s = _mm_mul_ps(t, m);
78 _mm_storeu_si128((__m128i*)dp, _mm_cvtps_epi32(s));
79 }
80 _MM_SET_ROUNDING_MODE(rounding_mode);
81 }
82
83
85 void sse2_cnvrt_si32_to_si32_shftd(const si32 *sp, si32 *dp, int shift,
86 ui32 width)
87 {
88 __m128i sh = _mm_set1_epi32(shift);
89 for (int i = (width + 3) >> 2; i > 0; --i, sp+=4, dp+=4)
90 {
91 __m128i s = _mm_loadu_si128((__m128i*)sp);
92 s = _mm_add_epi32(s, sh);
93 _mm_storeu_si128((__m128i*)dp, s);
94 }
95 }
96
98 void sse2_rct_forward(const si32 *r, const si32 *g, const si32 *b,
99 si32 *y, si32 *cb, si32 *cr, ui32 repeat)
100 {
101 for (int i = (repeat + 3) >> 2; i > 0; --i)
102 {
103 __m128i mr = _mm_load_si128((__m128i*)r);
104 __m128i mg = _mm_load_si128((__m128i*)g);
105 __m128i mb = _mm_load_si128((__m128i*)b);
106 __m128i t = _mm_add_epi32(mr, mb);
107 t = _mm_add_epi32(t, _mm_slli_epi32(mg, 1));
108 _mm_store_si128((__m128i*)y, _mm_srai_epi32(t, 2));
109 t = _mm_sub_epi32(mb, mg);
110 _mm_store_si128((__m128i*)cb, t);
111 t = _mm_sub_epi32(mr, mg);
112 _mm_store_si128((__m128i*)cr, t);
113
114 r += 4; g += 4; b += 4;
115 y += 4; cb += 4; cr += 4;
116 }
117 }
118
120 void sse2_rct_backward(const si32 *y, const si32 *cb, const si32 *cr,
121 si32 *r, si32 *g, si32 *b, ui32 repeat)
122 {
123 for (int i = (repeat + 3) >> 2; i > 0; --i)
124 {
125 __m128i my = _mm_load_si128((__m128i*)y);
126 __m128i mcb = _mm_load_si128((__m128i*)cb);
127 __m128i mcr = _mm_load_si128((__m128i*)cr);
128
129 __m128i t = _mm_add_epi32(mcb, mcr);
130 t = _mm_sub_epi32(my, _mm_srai_epi32(t, 2));
131 _mm_store_si128((__m128i*)g, t);
132 __m128i u = _mm_add_epi32(mcb, t);
133 _mm_store_si128((__m128i*)b, u);
134 u = _mm_add_epi32(mcr, t);
135 _mm_store_si128((__m128i*)r, u);
136
137 y += 4; cb += 4; cr += 4;
138 r += 4; g += 4; b += 4;
139 }
140 }
141
142 }
143}
void sse2_rct_forward(const si32 *r, const si32 *g, const si32 *b, si32 *y, si32 *cb, si32 *cr, ui32 repeat)
void sse2_cnvrt_float_to_si32_shftd(const float *sp, si32 *dp, float mul, ui32 width)
void sse2_cnvrt_si32_to_si32_shftd(const si32 *sp, si32 *dp, int shift, ui32 width)
void sse2_rct_backward(const si32 *y, const si32 *cb, const si32 *cr, si32 *r, si32 *g, si32 *b, ui32 repeat)
void sse2_cnvrt_float_to_si32(const float *sp, si32 *dp, float mul, ui32 width)
int32_t si32
Definition: ojph_defs.h:55
uint32_t ui32
Definition: ojph_defs.h:54