OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_arch.h
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_arch.h
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38
39#ifndef OJPH_ARCH_H
40#define OJPH_ARCH_H
41
42#include <cstdio>
43#include <cstdint>
44#include <cmath>
45
46#include "ojph_defs.h"
47
48
50// preprocessor directives for compiler
52#ifdef _MSC_VER
53#define OJPH_COMPILER_MSVC
54#elif (defined __GNUC__)
55#define OJPH_COMPILER_GNUC
56#endif
57
58#ifdef __EMSCRIPTEN__
59#define OJPH_EMSCRIPTEN
60#endif
61
62#ifdef OJPH_COMPILER_MSVC
63#include <intrin.h>
64#endif
65
66namespace ojph {
67
69 // OS detection definitions
71#if (defined WIN32) || (defined _WIN32) || (defined _WIN64)
72#define OJPH_OS_WINDOWS
73#elif (defined __APPLE__)
74#define OJPH_OS_APPLE
75#elif (defined __linux)
76#define OJPH_OS_LINUX
77#endif
78
80 // defines for dll
82#if defined(OJPH_OS_WINDOWS) && defined(OJPH_BUILD_SHARED_LIBRARY)
83#define OJPH_EXPORT __declspec(dllexport)
84#else
85#define OJPH_EXPORT
86#endif
87
89 // cpu features
93
94 enum : int {
107 };
108
110 static inline ui32 population_count(ui32 val)
111 {
112 #ifdef OJPH_COMPILER_MSVC
113 return (ui32)__popcnt(val);
114 #elif (defined OJPH_COMPILER_GNUC)
115 return (ui32)__builtin_popcount(val);
116 #else
117 val -= ((val >> 1) & 0x55555555);
118 val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
119 val = (((val >> 4) + val) & 0x0f0f0f0f);
120 val += (val >> 8);
121 val += (val >> 16);
122 return (int)(val & 0x0000003f);
123 #endif
124 }
125
127#ifdef OJPH_COMPILER_MSVC
128 #pragma intrinsic(_BitScanReverse)
129#endif
130 static inline ui32 count_leading_zeros(ui32 val)
131 {
132 #ifdef OJPH_COMPILER_MSVC
133 unsigned long result = 0;
134 _BitScanReverse(&result, val);
135 return 31 ^ (ui32)result;
136 #elif (defined OJPH_COMPILER_GNUC)
137 return (ui32)__builtin_clz(val);
138 #else
139 val |= (val >> 1);
140 val |= (val >> 2);
141 val |= (val >> 4);
142 val |= (val >> 8);
143 val |= (val >> 16);
144 return 32 - population_count(val);
145 #endif
146 }
147
149#ifdef OJPH_COMPILER_MSVC
150 #pragma intrinsic(_BitScanForward)
151#endif
152 static inline ui32 count_trailing_zeros(ui32 val)
153 {
154 #ifdef OJPH_COMPILER_MSVC
155 unsigned long result = 0;
156 _BitScanForward(&result, val);
157 return (ui32)result;
158 #elif (defined OJPH_COMPILER_GNUC)
159 return (ui32)__builtin_ctz(val);
160 #else
161 val |= (val << 1);
162 val |= (val << 2);
163 val |= (val << 4);
164 val |= (val << 8);
165 val |= (val << 16);
166 return 32 - population_count(val);
167 #endif
168 }
169
171 static inline si32 ojph_round(float val)
172 {
173 #ifdef OJPH_COMPILER_MSVC
174 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
175 #elif (defined OJPH_COMPILER_GNUC)
176 return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
177 #else
178 return (si32)round(val);
179 #endif
180 }
181
183 static inline si32 ojph_trunc(float val)
184 {
185 #ifdef OJPH_COMPILER_MSVC
186 return (si32)(val);
187 #elif (defined OJPH_COMPILER_GNUC)
188 return (si32)(val);
189 #else
190 return (si32)trunc(val);
191 #endif
192 }
193
195 // constants
197#ifdef OJPH_ENABLE_INTEL_AVX512
198 const ui32 byte_alignment = 64; //64 bytes == 512 bits
199#else
200 const ui32 byte_alignment = 32; //32 bytes == 256 bits
201#endif
204
206 // templates for alignment
208
210 // finds the size such that it is a multiple of byte_alignment
211 template <typename T, int N>
212 size_t calc_aligned_size(size_t size) {
213 size = size * sizeof(T) + N - 1;
214 size &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
215 size >>= (31 - count_leading_zeros(sizeof(T)));
216 return size;
217 }
218
220 // moves the pointer to first address that is a multiple of byte_alignment
221 template <typename T, int N>
222 inline T *align_ptr(T *ptr) {
223 intptr_t p = reinterpret_cast<intptr_t>(ptr);
224 p += N - 1;
225 p &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
226 return reinterpret_cast<T *>(p);
227 }
228
229}
230
231#endif // !OJPH_ARCH_H
const ui32 object_alignment
Definition: ojph_arch.h:203
const ui32 byte_alignment
Definition: ojph_arch.h:200
static si32 ojph_round(float val)
Definition: ojph_arch.h:171
size_t calc_aligned_size(size_t size)
Definition: ojph_arch.h:212
@ X86_CPU_EXT_LEVEL_AVX2
Definition: ojph_arch.h:104
@ X86_CPU_EXT_LEVEL_AVX
Definition: ojph_arch.h:103
@ X86_CPU_EXT_LEVEL_AVX512
Definition: ojph_arch.h:106
@ X86_CPU_EXT_LEVEL_GENERIC
Definition: ojph_arch.h:95
@ X86_CPU_EXT_LEVEL_SSE2
Definition: ojph_arch.h:98
@ X86_CPU_EXT_LEVEL_SSE41
Definition: ojph_arch.h:101
@ X86_CPU_EXT_LEVEL_SSE
Definition: ojph_arch.h:97
@ X86_CPU_EXT_LEVEL_MMX
Definition: ojph_arch.h:96
@ X86_CPU_EXT_LEVEL_SSE42
Definition: ojph_arch.h:102
@ X86_CPU_EXT_LEVEL_SSSE3
Definition: ojph_arch.h:100
@ X86_CPU_EXT_LEVEL_SSE3
Definition: ojph_arch.h:99
@ X86_CPU_EXT_LEVEL_AVX2FMA
Definition: ojph_arch.h:105
T * align_ptr(T *ptr)
Definition: ojph_arch.h:222
static ui32 population_count(ui32 val)
Definition: ojph_arch.h:110
OJPH_EXPORT int get_cpu_ext_level()
Definition: ojph_arch.cpp:184
static si32 ojph_trunc(float val)
Definition: ojph_arch.h:183
static ui32 count_trailing_zeros(ui32 val)
Definition: ojph_arch.h:152
static ui32 count_leading_zeros(ui32 val)
Definition: ojph_arch.h:130
int32_t si32
Definition: ojph_defs.h:55
const ui32 log_byte_alignment
Definition: ojph_arch.h:202
uint32_t ui32
Definition: ojph_defs.h:54
#define OJPH_EXPORT
Definition: ojph_arch.h:85