OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_transform.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_transform.cpp
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38#include <cstdio>
39#include <mutex>
40
41#include "ojph_arch.h"
42#include "ojph_mem.h"
43#include "ojph_transform.h"
45#include "ojph_params.h"
47
48namespace ojph {
49
50 // defined elsewhere
51 class line_buf;
52
53 namespace local {
54
56 // Reversible functions
58
61 (const lifting_step* s, const line_buf* sig, const line_buf* other,
62 const line_buf* aug, ui32 repeat, bool synthesis) = NULL;
63
66 (const param_atk* atk, const line_buf* ldst, const line_buf* hdst,
67 const line_buf* src, ui32 width, bool even) = NULL;
68
71 (const param_atk* atk, const line_buf* dst, const line_buf* lsrc,
72 const line_buf* hsrc, ui32 width, bool even) = NULL;
73
75 // Irreversible functions
77
80 (const lifting_step* s, const line_buf* sig, const line_buf* other,
81 const line_buf* aug, ui32 repeat, bool synthesis) = NULL;
82
85 (float K, const line_buf* aug, ui32 repeat) = NULL;
86
89 (const param_atk* atk, const line_buf* ldst, const line_buf* hdst,
90 const line_buf* src, ui32 width, bool even) = NULL;
91
94 (const param_atk* atk, const line_buf* dst, const line_buf* lsrc,
95 const line_buf* hsrc, ui32 width, bool even) = NULL;
96
99 {
100 static std::once_flag wavelet_transform_functions_init_flag;
101 std::call_once(wavelet_transform_functions_init_flag, [](){
102#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
103
107
112
113 #ifndef OJPH_DISABLE_SIMD
114
115 #if (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
116
117 #ifndef OJPH_DISABLE_SSE
119 {
124 }
125 #endif // !OJPH_DISABLE_SSE
126
127 #ifndef OJPH_DISABLE_SSE2
129 {
133 }
134 #endif // !OJPH_DISABLE_SSE2
135
136 #ifndef OJPH_DISABLE_AVX
138 {
143 }
144 #endif // !OJPH_DISABLE_AVX
145
146 #ifndef OJPH_DISABLE_AVX2
148 {
152 }
153 #endif // !OJPH_DISABLE_AVX2
154
155 #if (defined(OJPH_ARCH_X86_64) && !defined(OJPH_DISABLE_AVX512))
157 {
158 // rev_vert_step = avx512_rev_vert_step;
159 // rev_horz_ana = avx512_rev_horz_ana;
160 // rev_horz_syn = avx512_rev_horz_syn;
161
166 }
167 #endif // !OJPH_DISABLE_AVX512
168
169 #elif defined(OJPH_ARCH_ARM)
170
171 #endif // !(defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
172
173 #endif // !OJPH_DISABLE_SIMD
174
175#else // OJPH_ENABLE_WASM_SIMD
179
184#endif // !OJPH_ENABLE_WASM_SIMD
185 });
186 }
187
189
190#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
191
193 static
194 void gen_rev_vert_step32(const lifting_step* s, const line_buf* sig,
195 const line_buf* other, const line_buf* aug,
196 ui32 repeat, bool synthesis)
197 {
198 const si32 a = s->rev.Aatk;
199 const si32 b = s->rev.Batk;
200 const ui8 e = s->rev.Eatk;
201
202 si32* dst = aug->i32;
203 const si32* src1 = sig->i32, * src2 = other->i32;
204 // The general definition of the wavelet in Part 2 is slightly
205 // different to part 2, although they are mathematically equivalent
206 // here, we identify the simpler form from Part 1 and employ them
207 if (a == 1)
208 { // 5/3 update and any case with a == 1
209 if (synthesis)
210 for (ui32 i = repeat; i > 0; --i)
211 *dst++ -= (b + *src1++ + *src2++) >> e;
212 else
213 for (ui32 i = repeat; i > 0; --i)
214 *dst++ += (b + *src1++ + *src2++) >> e;
215 }
216 else if (a == -1 && b == 1 && e == 1)
217 { // 5/3 predict
218 if (synthesis)
219 for (ui32 i = repeat; i > 0; --i)
220 *dst++ += (*src1++ + *src2++) >> e;
221 else
222 for (ui32 i = repeat; i > 0; --i)
223 *dst++ -= (*src1++ + *src2++) >> e;
224 }
225 else if (a == -1)
226 { // any case with a == -1, which is not 5/3 predict
227 if (synthesis)
228 for (ui32 i = repeat; i > 0; --i)
229 *dst++ -= (b - (*src1++ + *src2++)) >> e;
230 else
231 for (ui32 i = repeat; i > 0; --i)
232 *dst++ += (b - (*src1++ + *src2++)) >> e;
233 }
234 else { // general case
235 if (synthesis)
236 for (ui32 i = repeat; i > 0; --i)
237 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
238 else
239 for (ui32 i = repeat; i > 0; --i)
240 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
241 }
242 }
243
245 static
246 void gen_rev_vert_step64(const lifting_step* s, const line_buf* sig,
247 const line_buf* other, const line_buf* aug,
248 ui32 repeat, bool synthesis)
249 {
250 const si64 a = s->rev.Aatk;
251 const si64 b = s->rev.Batk;
252 const ui8 e = s->rev.Eatk;
253
254 si64* dst = aug->i64;
255 const si64* src1 = sig->i64, * src2 = other->i64;
256 // The general definition of the wavelet in Part 2 is slightly
257 // different to part 2, although they are mathematically equivalent
258 // here, we identify the simpler form from Part 1 and employ them
259 if (a == 1)
260 { // 5/3 update and any case with a == 1
261 if (synthesis)
262 for (ui32 i = repeat; i > 0; --i)
263 *dst++ -= (b + *src1++ + *src2++) >> e;
264 else
265 for (ui32 i = repeat; i > 0; --i)
266 *dst++ += (b + *src1++ + *src2++) >> e;
267 }
268 else if (a == -1 && b == 1 && e == 1)
269 { // 5/3 predict
270 if (synthesis)
271 for (ui32 i = repeat; i > 0; --i)
272 *dst++ += (*src1++ + *src2++) >> e;
273 else
274 for (ui32 i = repeat; i > 0; --i)
275 *dst++ -= (*src1++ + *src2++) >> e;
276 }
277 else if (a == -1)
278 { // any case with a == -1, which is not 5/3 predict
279 if (synthesis)
280 for (ui32 i = repeat; i > 0; --i)
281 *dst++ -= (b - (*src1++ + *src2++)) >> e;
282 else
283 for (ui32 i = repeat; i > 0; --i)
284 *dst++ += (b - (*src1++ + *src2++)) >> e;
285 }
286 else { // general case
287 if (synthesis)
288 for (ui32 i = repeat; i > 0; --i)
289 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
290 else
291 for (ui32 i = repeat; i > 0; --i)
292 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
293 }
294 }
295
297 void gen_rev_vert_step(const lifting_step* s, const line_buf* sig,
298 const line_buf* other, const line_buf* aug,
299 ui32 repeat, bool synthesis)
300 {
301 if (((sig != NULL) && (sig->flags & line_buf::LFT_32BIT)) ||
302 ((aug != NULL) && (aug->flags & line_buf::LFT_32BIT)) ||
303 ((other != NULL) && (other->flags & line_buf::LFT_32BIT)))
304 {
305 assert((sig == NULL || sig->flags & line_buf::LFT_32BIT) &&
306 (other == NULL || other->flags & line_buf::LFT_32BIT) &&
307 (aug == NULL || aug->flags & line_buf::LFT_32BIT));
308 gen_rev_vert_step32(s, sig, other, aug, repeat, synthesis);
309 }
310 else
311 {
312 assert((sig == NULL || sig->flags & line_buf::LFT_64BIT) &&
313 (other == NULL || other->flags & line_buf::LFT_64BIT) &&
314 (aug == NULL || aug->flags & line_buf::LFT_64BIT));
315 gen_rev_vert_step64(s, sig, other, aug, repeat, synthesis);
316 }
317 }
318
320 static
321 void gen_rev_horz_ana32(const param_atk* atk, const line_buf* ldst,
322 const line_buf* hdst, const line_buf* src,
323 ui32 width, bool even)
324 {
325 if (width > 1)
326 {
327 // combine both lsrc and hsrc into dst
328 si32* dph = hdst->i32;
329 si32* dpl = ldst->i32;
330 si32* sp = src->i32;
331 ui32 w = width;
332 if (!even)
333 {
334 *dph++ = *sp++; --w;
335 }
336 for (; w > 1; w -= 2)
337 {
338 *dpl++ = *sp++; *dph++ = *sp++;
339 }
340 if (w)
341 {
342 *dpl++ = *sp++; --w;
343 }
344
345 si32* hp = hdst->i32, * lp = ldst->i32;
346 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
347 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
348 ui32 num_steps = atk->get_num_steps();
349 for (ui32 j = num_steps; j > 0; --j)
350 {
351 // first lifting step
352 const lifting_step* s = atk->get_step(j - 1);
353 const si32 a = s->rev.Aatk;
354 const si32 b = s->rev.Batk;
355 const ui8 e = s->rev.Eatk;
356
357 // extension
358 lp[-1] = lp[0];
359 lp[l_width] = lp[l_width - 1];
360 // lifting step
361 const si32* sp = lp + (even ? 1 : 0);
362 si32* dp = hp;
363 if (a == 1)
364 { // 5/3 update and any case with a == 1
365 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
366 *dp += (b + (sp[-1] + sp[0])) >> e;
367 }
368 else if (a == -1 && b == 1 && e == 1)
369 { // 5/3 predict
370 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
371 *dp -= (sp[-1] + sp[0]) >> e;
372 }
373 else if (a == -1)
374 { // any case with a == -1, which is not 5/3 predict
375 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
376 *dp += (b - (sp[-1] + sp[0])) >> e;
377 }
378 else {
379 // general case
380 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
381 *dp += (b + a * (sp[-1] + sp[0])) >> e;
382 }
383
384 // swap buffers
385 si32* t = lp; lp = hp; hp = t;
386 even = !even;
387 ui32 w = l_width; l_width = h_width; h_width = w;
388 }
389 }
390 else {
391 if (even)
392 ldst->i32[0] = src->i32[0];
393 else
394 hdst->i32[0] = src->i32[0] << 1;
395 }
396 }
397
399 static
400 void gen_rev_horz_ana64(const param_atk* atk, const line_buf* ldst,
401 const line_buf* hdst, const line_buf* src,
402 ui32 width, bool even)
403 {
404 if (width > 1)
405 {
406 // combine both lsrc and hsrc into dst
407 si64* dph = hdst->i64;
408 si64* dpl = ldst->i64;
409 si64* sp = src->i64;
410 ui32 w = width;
411 if (!even)
412 {
413 *dph++ = *sp++; --w;
414 }
415 for (; w > 1; w -= 2)
416 {
417 *dpl++ = *sp++; *dph++ = *sp++;
418 }
419 if (w)
420 {
421 *dpl++ = *sp++; --w;
422 }
423
424 si64* hp = hdst->i64, * lp = ldst->i64;
425 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
426 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
427 ui32 num_steps = atk->get_num_steps();
428 for (ui32 j = num_steps; j > 0; --j)
429 {
430 // first lifting step
431 const lifting_step* s = atk->get_step(j - 1);
432 const si64 a = s->rev.Aatk;
433 const si64 b = s->rev.Batk;
434 const ui8 e = s->rev.Eatk;
435
436 // extension
437 lp[-1] = lp[0];
438 lp[l_width] = lp[l_width - 1];
439 // lifting step
440 const si64* sp = lp + (even ? 1 : 0);
441 si64* dp = hp;
442 if (a == 1)
443 { // 5/3 update and any case with a == 1
444 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
445 *dp += (b + (sp[-1] + sp[0])) >> e;
446 }
447 else if (a == -1 && b == 1 && e == 1)
448 { // 5/3 predict
449 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
450 *dp -= (sp[-1] + sp[0]) >> e;
451 }
452 else if (a == -1)
453 { // any case with a == -1, which is not 5/3 predict
454 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
455 *dp += (b - (sp[-1] + sp[0])) >> e;
456 }
457 else {
458 // general case
459 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
460 *dp += (b + a * (sp[-1] + sp[0])) >> e;
461 }
462
463 // swap buffers
464 si64* t = lp; lp = hp; hp = t;
465 even = !even;
466 ui32 w = l_width; l_width = h_width; h_width = w;
467 }
468 }
469 else {
470 if (even)
471 ldst->i64[0] = src->i64[0];
472 else
473 hdst->i64[0] = src->i64[0] << 1;
474 }
475 }
476
478 void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst,
479 const line_buf* hdst, const line_buf* src,
480 ui32 width, bool even)
481 {
482 if (src->flags & line_buf::LFT_32BIT)
483 {
484 assert((ldst == NULL || ldst->flags & line_buf::LFT_32BIT) &&
485 (hdst == NULL || hdst->flags & line_buf::LFT_32BIT));
486 gen_rev_horz_ana32(atk, ldst, hdst, src, width, even);
487 }
488 else
489 {
490 assert((ldst == NULL || ldst->flags & line_buf::LFT_64BIT) &&
491 (hdst == NULL || hdst->flags & line_buf::LFT_64BIT) &&
492 (src == NULL || src->flags & line_buf::LFT_64BIT));
493 gen_rev_horz_ana64(atk, ldst, hdst, src, width, even);
494 }
495 }
496
498 static
499 void gen_rev_horz_syn32(const param_atk* atk, const line_buf* dst,
500 const line_buf* lsrc, const line_buf* hsrc,
501 ui32 width, bool even)
502 {
503 if (width > 1)
504 {
505 bool ev = even;
506 si32* oth = hsrc->i32, * aug = lsrc->i32;
507 ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass
508 ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass
509 ui32 num_steps = atk->get_num_steps();
510 for (ui32 j = 0; j < num_steps; ++j)
511 {
512 const lifting_step* s = atk->get_step(j);
513 const si32 a = s->rev.Aatk;
514 const si32 b = s->rev.Batk;
515 const ui8 e = s->rev.Eatk;
516
517 // extension
518 oth[-1] = oth[0];
519 oth[oth_width] = oth[oth_width - 1];
520 // lifting step
521 const si32* sp = oth + (ev ? 0 : 1);
522 si32* dp = aug;
523 if (a == 1)
524 { // 5/3 update and any case with a == 1
525 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
526 *dp -= (b + (sp[-1] + sp[0])) >> e;
527 }
528 else if (a == -1 && b == 1 && e == 1)
529 { // 5/3 predict
530 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
531 *dp += (sp[-1] + sp[0]) >> e;
532 }
533 else if (a == -1)
534 { // any case with a == -1, which is not 5/3 predict
535 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
536 *dp -= (b - (sp[-1] + sp[0])) >> e;
537 }
538 else {
539 // general case
540 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
541 *dp -= (b + a * (sp[-1] + sp[0])) >> e;
542 }
543
544 // swap buffers
545 si32* t = aug; aug = oth; oth = t;
546 ev = !ev;
547 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
548 }
549
550 // combine both lsrc and hsrc into dst
551 si32* sph = hsrc->i32;
552 si32* spl = lsrc->i32;
553 si32* dp = dst->i32;
554 ui32 w = width;
555 if (!even)
556 {
557 *dp++ = *sph++; --w;
558 }
559 for (; w > 1; w -= 2)
560 {
561 *dp++ = *spl++; *dp++ = *sph++;
562 }
563 if (w)
564 {
565 *dp++ = *spl++; --w;
566 }
567 }
568 else {
569 if (even)
570 dst->i32[0] = lsrc->i32[0];
571 else
572 dst->i32[0] = hsrc->i32[0] >> 1;
573 }
574 }
575
577 static
578 void gen_rev_horz_syn64(const param_atk* atk, const line_buf* dst,
579 const line_buf* lsrc, const line_buf* hsrc,
580 ui32 width, bool even)
581 {
582 if (width > 1)
583 {
584 bool ev = even;
585 si64* oth = hsrc->i64, * aug = lsrc->i64;
586 ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass
587 ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass
588 ui32 num_steps = atk->get_num_steps();
589 for (ui32 j = 0; j < num_steps; ++j)
590 {
591 const lifting_step* s = atk->get_step(j);
592 const si64 a = s->rev.Aatk;
593 const si64 b = s->rev.Batk;
594 const ui8 e = s->rev.Eatk;
595
596 // extension
597 oth[-1] = oth[0];
598 oth[oth_width] = oth[oth_width - 1];
599 // lifting step
600 const si64* sp = oth + (ev ? 0 : 1);
601 si64* dp = aug;
602 if (a == 1)
603 { // 5/3 update and any case with a == 1
604 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
605 *dp -= (b + (sp[-1] + sp[0])) >> e;
606 }
607 else if (a == -1 && b == 1 && e == 1)
608 { // 5/3 predict
609 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
610 *dp += (sp[-1] + sp[0]) >> e;
611 }
612 else if (a == -1)
613 { // any case with a == -1, which is not 5/3 predict
614 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
615 *dp -= (b - (sp[-1] + sp[0])) >> e;
616 }
617 else {
618 // general case
619 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
620 *dp -= (b + a * (sp[-1] + sp[0])) >> e;
621 }
622
623 // swap buffers
624 si64* t = aug; aug = oth; oth = t;
625 ev = !ev;
626 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
627 }
628
629 // combine both lsrc and hsrc into dst
630 si64* sph = hsrc->i64;
631 si64* spl = lsrc->i64;
632 si64* dp = dst->i64;
633 ui32 w = width;
634 if (!even)
635 {
636 *dp++ = *sph++; --w;
637 }
638 for (; w > 1; w -= 2)
639 {
640 *dp++ = *spl++; *dp++ = *sph++;
641 }
642 if (w)
643 {
644 *dp++ = *spl++; --w;
645 }
646 }
647 else {
648 if (even)
649 dst->i64[0] = lsrc->i64[0];
650 else
651 dst->i64[0] = hsrc->i64[0] >> 1;
652 }
653 }
654
656 void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst,
657 const line_buf* lsrc, const line_buf* hsrc,
658 ui32 width, bool even)
659 {
660 if (dst->flags & line_buf::LFT_32BIT)
661 {
662 assert((lsrc == NULL || lsrc->flags & line_buf::LFT_32BIT) &&
663 (hsrc == NULL || hsrc->flags & line_buf::LFT_32BIT));
664 gen_rev_horz_syn32(atk, dst, lsrc, hsrc, width, even);
665 }
666 else
667 {
668 assert((dst == NULL || dst->flags & line_buf::LFT_64BIT) &&
669 (lsrc == NULL || lsrc->flags & line_buf::LFT_64BIT) &&
670 (hsrc == NULL || hsrc->flags & line_buf::LFT_64BIT));
671 gen_rev_horz_syn64(atk, dst, lsrc, hsrc, width, even);
672 }
673 }
674
676 void gen_irv_vert_step(const lifting_step* s, const line_buf* sig,
677 const line_buf* other, const line_buf* aug,
678 ui32 repeat, bool synthesis)
679 {
680 float a = s->irv.Aatk;
681
682 if (synthesis)
683 a = -a;
684
685 float* dst = aug->f32;
686 const float* src1 = sig->f32, * src2 = other->f32;
687 for (ui32 i = repeat; i > 0; --i)
688 *dst++ += a * (*src1++ + *src2++);
689 }
690
692 void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat)
693 {
694 float* dst = aug->f32;
695 for (ui32 i = repeat; i > 0; --i)
696 *dst++ *= K;
697 }
698
700 void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst,
701 const line_buf* hdst, const line_buf* src,
702 ui32 width, bool even)
703 {
704 if (width > 1)
705 {
706 // split src into ldst and hdst
707 float* dph = hdst->f32;
708 float* dpl = ldst->f32;
709 float* sp = src->f32;
710 ui32 w = width;
711 if (!even)
712 {
713 *dph++ = *sp++; --w;
714 }
715 for (; w > 1; w -= 2)
716 {
717 *dpl++ = *sp++; *dph++ = *sp++;
718 }
719 if (w)
720 {
721 *dpl++ = *sp++; --w;
722 }
723
724 float* hp = hdst->f32, * lp = ldst->f32;
725 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
726 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
727 ui32 num_steps = atk->get_num_steps();
728 for (ui32 j = num_steps; j > 0; --j)
729 {
730 const lifting_step* s = atk->get_step(j - 1);
731 const float a = s->irv.Aatk;
732
733 // extension
734 lp[-1] = lp[0];
735 lp[l_width] = lp[l_width - 1];
736 // lifting step
737 const float* sp = lp + (even ? 1 : 0);
738 float* dp = hp;
739 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
740 *dp += a * (sp[-1] + sp[0]);
741
742 // swap buffers
743 float* t = lp; lp = hp; hp = t;
744 even = !even;
745 ui32 w = l_width; l_width = h_width; h_width = w;
746 }
747
748 {
749 float K = atk->get_K();
750 float K_inv = 1.0f / K;
751 float* dp;
752
753 dp = lp;
754 for (ui32 i = l_width; i > 0; --i)
755 *dp++ *= K_inv;
756
757 dp = hp;
758 for (ui32 i = h_width; i > 0; --i)
759 *dp++ *= K;
760 }
761 }
762 else {
763 if (even)
764 ldst->f32[0] = src->f32[0];
765 else
766 hdst->f32[0] = src->f32[0] * 2.0f;
767 }
768 }
769
771 void gen_irv_horz_syn(const param_atk* atk, const line_buf* dst,
772 const line_buf* lsrc, const line_buf* hsrc,
773 ui32 width, bool even)
774 {
775 if (width > 1)
776 {
777 bool ev = even;
778 float* oth = hsrc->f32, * aug = lsrc->f32;
779 ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass
780 ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass
781
782 {
783 float K = atk->get_K();
784 float K_inv = 1.0f / K;
785 float* dp;
786
787 dp = aug;
788 for (ui32 i = aug_width; i > 0; --i)
789 *dp++ *= K;
790
791 dp = oth;
792 for (ui32 i = oth_width; i > 0; --i)
793 *dp++ *= K_inv;
794 }
795
796 ui32 num_steps = atk->get_num_steps();
797 for (ui32 j = 0; j < num_steps; ++j)
798 {
799 const lifting_step* s = atk->get_step(j);
800 const float a = s->irv.Aatk;
801
802 // extension
803 oth[-1] = oth[0];
804 oth[oth_width] = oth[oth_width - 1];
805 // lifting step
806 const float* sp = oth + (ev ? 0 : 1);
807 float* dp = aug;
808 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
809 *dp -= a * (sp[-1] + sp[0]);
810
811 // swap buffers
812 float* t = aug; aug = oth; oth = t;
813 ev = !ev;
814 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
815 }
816
817 // combine both lsrc and hsrc into dst
818 float* sph = hsrc->f32;
819 float* spl = lsrc->f32;
820 float* dp = dst->f32;
821 ui32 w = width;
822 if (!even)
823 { *dp++ = *sph++; --w; }
824 for (; w > 1; w -= 2)
825 { *dp++ = *spl++; *dp++ = *sph++; }
826 if (w)
827 { *dp++ = *spl++; --w; }
828 }
829 else {
830 if (even)
831 dst->f32[0] = lsrc->f32[0];
832 else
833 dst->f32[0] = hsrc->f32[0] * 0.5f;
834 }
835 }
836
837#endif // !OJPH_ENABLE_WASM_SIMD
838
839 }
840}
float * f32
Definition ojph_mem.h:187
void(* rev_horz_ana)(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void gen_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void gen_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_horz_syn32(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
static void gen_rev_vert_step64(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx512_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_vert_step32(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
static void gen_rev_horz_ana64(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* irv_vert_times_K)(float K, const line_buf *aug, ui32 repeat)
void gen_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void(* irv_vert_step)(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void avx2_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void init_wavelet_transform_functions()
void wasm_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
static void gen_rev_horz_syn64(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx512_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx2_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void sse_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void sse_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void wasm_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void sse_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx512_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void avx512_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx2_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* rev_horz_syn)(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void(* irv_horz_ana)(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* rev_vert_step)(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void gen_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void(* irv_horz_syn)(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void wasm_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_horz_ana32(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
int64_t si64
Definition ojph_defs.h:57
OJPH_EXPORT int get_cpu_ext_level()
int32_t si32
Definition ojph_defs.h:55
uint32_t ui32
Definition ojph_defs.h:54
uint8_t ui8
Definition ojph_defs.h:50
@ X86_CPU_EXT_LEVEL_AVX2
Definition ojph_arch.h:142
@ X86_CPU_EXT_LEVEL_AVX
Definition ojph_arch.h:141
@ X86_CPU_EXT_LEVEL_AVX512
Definition ojph_arch.h:144
@ X86_CPU_EXT_LEVEL_SSE2
Definition ojph_arch.h:136
@ X86_CPU_EXT_LEVEL_SSE
Definition ojph_arch.h:135
const lifting_step * get_step(ui32 s) const