Inverse 5-3 wavelet transform in 1-D
*/
static void opj_dwt_decode_1(dwt_t *v);
+static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
/**
Forward 9-7 wavelet transform in 1-D
*/
static opj_bool opj_dwt_encode_procedure( opj_tcd_tilecomp_v2_t * tilec,
void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) );
-static OPJ_UINT32 dwt_max_resolution_v2(opj_tcd_resolution_v2_t* restrict r, OPJ_UINT32 i);
+static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_v2_t* restrict r, OPJ_UINT32 i);
+
+/* <summary> */
+/* Inverse 9-7 wavelet transform in 1-D. */
+/* </summary> */
+static void opj_v4dwt_decode(v4dwt_t* restrict dwt);
+
+static void opj_v4dwt_interleave_h(v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size);
+
+static void opj_v4dwt_interleave_v(v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read);
+
+#ifdef __SSE__
+static void opj_v4dwt_decode_step1_sse(v4* w, OPJ_INT32 count, const __m128 c);
+
+static void opj_v4dwt_decode_step2_sse(v4* l, v4* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c){
+#endif
+
+static void opj_v4dwt_decode_step1(v4* w, OPJ_INT32 count, const OPJ_FLOAT32 c);
+static void opj_v4dwt_decode_step2(v4* l, v4* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c);
/*@}*/
/* <summary> */
/* This table contains the norms of the 5-3 wavelets for different bands. */
/* </summary> */
-static const double opj_dwt_norms[4][10] = {
+static const OPJ_FLOAT64 opj_dwt_norms[4][10] = {
{1.000, 1.500, 2.750, 5.375, 10.68, 21.34, 42.67, 85.33, 170.7, 341.3},
{1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9},
{1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9},
/* <summary> */
/* This table contains the norms of the 9-7 wavelets for different bands. */
/* </summary> */
-static const double dwt_norms_real[4][10] = {
+static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = {
{1.000, 1.965, 4.177, 8.403, 16.90, 33.84, 67.69, 135.3, 270.6, 540.9},
{2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0},
{2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0},
/* <summary> */
/* Inverse 5-3 wavelet transform in 1-D. */
/* </summary> */
-static void dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
+static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
OPJ_INT32 i;
if (!cas) {
/* Inverse 5-3 wavelet transform in 1-D. */
/* </summary> */
static void opj_dwt_decode_1(dwt_t *v) {
- dwt_decode_1_(v->mem, v->dn, v->sn, v->cas);
+ opj_dwt_decode_1_(v->mem, v->dn, v->sn, v->cas);
}
/* <summary> */
rw = l_cur_res->x1 - l_cur_res->x0;
rh = l_cur_res->y1 - l_cur_res->y0;
- l_data_size = dwt_max_resolution_v2( tilec->resolutions,tilec->numresolutions) * sizeof(OPJ_INT32);
+ l_data_size = opj_dwt_max_resolution( tilec->resolutions,tilec->numresolutions) * sizeof(OPJ_INT32);
bj = (OPJ_INT32*)opj_malloc(l_data_size);
if (! bj) {
return OPJ_FALSE;
/* Get norm of 5-3 wavelet. */
/* </summary> */
double dwt_getnorm(int level, int orient) {
- return dwt_norms[orient][level];
+ return opj_dwt_norms[orient][level];
}
/* <summary> */
/* Get norm of 5-3 wavelet. */
/* </summary> */
OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) {
- return dwt_norms[orient][level];
+ return opj_dwt_norms[orient][level];
}
/* <summary> */
/* Get norm of 9-7 wavelet. */
/* </summary> */
double dwt_getnorm_real(int level, int orient) {
- return dwt_norms_real[orient][level];
+ return opj_dwt_norms_real[orient][level];
}
/* <summary> */
/* Get norm of 9-7 wavelet. */
/* </summary> */
OPJ_FLOAT64 opj_dwt_getnorm_real(OPJ_UINT32 level, OPJ_UINT32 orient) {
- return dwt_norms_real[orient][level];
+ return opj_dwt_norms_real[orient][level];
}
void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) {
if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) {
stepsize = 1.0;
} else {
- OPJ_FLOAT64 norm = dwt_norms_real[orient][level];
+ OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level];
stepsize = (1 << (gain)) / norm;
}
opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), prec + gain, &tccp->stepsizes[bandno]);
/* <summary> */
/* Determine maximum computed resolution level for inverse wavelet transform */
/* </summary> */
-static OPJ_UINT32 dwt_max_resolution_v2(opj_tcd_resolution_v2_t* restrict r, OPJ_UINT32 i) {
+static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_v2_t* restrict r, OPJ_UINT32 i) {
OPJ_UINT32 mr = 0;
OPJ_UINT32 w;
while( --i ) {
OPJ_UINT32 w = tilec->x1 - tilec->x0;
h.mem = (OPJ_INT32*)
- opj_aligned_malloc(dwt_max_resolution_v2(tr, numres) * sizeof(OPJ_INT32));
+ opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32));
if
(! h.mem)
{
return OPJ_TRUE;
}
-static void v4dwt_interleave_h(v4dwt_t* restrict w, float* restrict a, int x, int size){
- float* restrict bi = (float*) (w->wavelet + w->cas);
- int count = w->sn;
- int i, k;
+void opj_v4dwt_interleave_h(v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size){
+ OPJ_FLOAT32* restrict bi = (OPJ_FLOAT32*) (w->wavelet + w->cas);
+ OPJ_INT32 count = w->sn;
+ OPJ_INT32 i, k;
for(k = 0; k < 2; ++k){
if ( count + 3 * x < size && ((size_t) a & 0x0f) == 0 && ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0 ) {
/* Fast code path */
for(i = 0; i < count; ++i){
- int j = i;
+ OPJ_INT32 j = i;
bi[i*8 ] = a[j];
j += x;
bi[i*8 + 1] = a[j];
else {
/* Slow code path */
for(i = 0; i < count; ++i){
- int j = i;
+ OPJ_INT32 j = i;
bi[i*8 ] = a[j];
j += x;
if(j >= size) continue;
}
}
-static void v4dwt_interleave_v(v4dwt_t* restrict v , float* restrict a , int x, int nb_elts_read){
+void opj_v4dwt_interleave_v(v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read){
v4* restrict bi = v->wavelet + v->cas;
- int i;
+ OPJ_INT32 i;
for(i = 0; i < v->sn; ++i){
- memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(float));
+ memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(OPJ_FLOAT32));
}
a += v->sn * x;
bi = v->wavelet + 1 - v->cas;
for(i = 0; i < v->dn; ++i){
- memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(float));
+ memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(OPJ_FLOAT32));
}
}
#ifdef __SSE__
-static void v4dwt_decode_step1_sse(v4* w, int count, const __m128 c){
+void opj_v4dwt_decode_step1_sse(v4* w, int count, const __m128 c){
__m128* restrict vw = (__m128*) w;
- int i;
+ OPJ_INT32 i;
/* 4x unrolled loop */
for(i = 0; i < count >> 2; ++i){
*vw = _mm_mul_ps(*vw, c);
}
}
-static void v4dwt_decode_step2_sse(v4* l, v4* w, int k, int m, __m128 c){
+void opj_v4dwt_decode_step2_sse(v4* l, v4* w, int k, int m, __m128 c){
__m128* restrict vl = (__m128*) l;
__m128* restrict vw = (__m128*) w;
int i;
#else
-static void v4dwt_decode_step1(v4* w, int count, const float c){
- float* restrict fw = (float*) w;
- int i;
+void opj_v4dwt_decode_step1(v4* w, OPJ_INT32 count, const OPJ_FLOAT32 c){
+ OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w;
+ OPJ_INT32 i;
for(i = 0; i < count; ++i){
- float tmp1 = fw[i*8 ];
- float tmp2 = fw[i*8 + 1];
- float tmp3 = fw[i*8 + 2];
- float tmp4 = fw[i*8 + 3];
+ OPJ_FLOAT32 tmp1 = fw[i*8 ];
+ OPJ_FLOAT32 tmp2 = fw[i*8 + 1];
+ OPJ_FLOAT32 tmp3 = fw[i*8 + 2];
+ OPJ_FLOAT32 tmp4 = fw[i*8 + 3];
fw[i*8 ] = tmp1 * c;
fw[i*8 + 1] = tmp2 * c;
fw[i*8 + 2] = tmp3 * c;
}
}
-static void v4dwt_decode_step2(v4* l, v4* w, int k, int m, float c){
- float* restrict fl = (float*) l;
- float* restrict fw = (float*) w;
+void opj_v4dwt_decode_step2(v4* l, v4* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c){
+ OPJ_FLOAT32* restrict fl = (OPJ_FLOAT32*) l;
+ OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w;
int i;
for(i = 0; i < m; ++i){
- float tmp1_1 = fl[0];
- float tmp1_2 = fl[1];
- float tmp1_3 = fl[2];
- float tmp1_4 = fl[3];
- float tmp2_1 = fw[-4];
- float tmp2_2 = fw[-3];
- float tmp2_3 = fw[-2];
- float tmp2_4 = fw[-1];
- float tmp3_1 = fw[0];
- float tmp3_2 = fw[1];
- float tmp3_3 = fw[2];
- float tmp3_4 = fw[3];
+ OPJ_FLOAT32 tmp1_1 = fl[0];
+ OPJ_FLOAT32 tmp1_2 = fl[1];
+ OPJ_FLOAT32 tmp1_3 = fl[2];
+ OPJ_FLOAT32 tmp1_4 = fl[3];
+ OPJ_FLOAT32 tmp2_1 = fw[-4];
+ OPJ_FLOAT32 tmp2_2 = fw[-3];
+ OPJ_FLOAT32 tmp2_3 = fw[-2];
+ OPJ_FLOAT32 tmp2_4 = fw[-1];
+ OPJ_FLOAT32 tmp3_1 = fw[0];
+ OPJ_FLOAT32 tmp3_2 = fw[1];
+ OPJ_FLOAT32 tmp3_3 = fw[2];
+ OPJ_FLOAT32 tmp3_4 = fw[3];
fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c);
fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c);
fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c);
fw += 8;
}
if(m < k){
- float c1;
- float c2;
- float c3;
- float c4;
+ OPJ_FLOAT32 c1;
+ OPJ_FLOAT32 c2;
+ OPJ_FLOAT32 c3;
+ OPJ_FLOAT32 c4;
c += c;
c1 = fl[0] * c;
c2 = fl[1] * c;
c3 = fl[2] * c;
c4 = fl[3] * c;
for(; m < k; ++m){
- float tmp1 = fw[-4];
- float tmp2 = fw[-3];
- float tmp3 = fw[-2];
- float tmp4 = fw[-1];
+ OPJ_FLOAT32 tmp1 = fw[-4];
+ OPJ_FLOAT32 tmp2 = fw[-3];
+ OPJ_FLOAT32 tmp3 = fw[-2];
+ OPJ_FLOAT32 tmp4 = fw[-1];
fw[-4] = tmp1 + c1;
fw[-3] = tmp2 + c2;
fw[-2] = tmp3 + c3;
/* <summary> */
/* Inverse 9-7 wavelet transform in 1-D. */
/* </summary> */
-static void v4dwt_decode(v4dwt_t* restrict dwt){
+void opj_v4dwt_decode(v4dwt_t* restrict dwt){
int a, b;
if(dwt->cas == 0) {
if(!((dwt->dn > 0) || (dwt->sn > 1))){
b = 0;
}
#ifdef __SSE__
- v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K));
- v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c13318));
- v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta));
- v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma));
- v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta));
- v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha));
+ opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K));
+ opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c13318));
+ opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta));
+ opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma));
+ opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta));
+ opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha));
#else
- v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K);
- v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318);
- v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), opj_dwt_delta);
- v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), opj_dwt_gamma);
- v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), opj_dwt_beta);
- v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), opj_dwt_alpha);
+ opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K);
+ opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318);
+ opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), opj_dwt_delta);
+ opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), opj_dwt_gamma);
+ opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), opj_dwt_beta);
+ opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), opj_dwt_alpha);
#endif
}
for(j = rh; j > 3; j -= 4){
int k;
- v4dwt_interleave_h(&h, aj, w, bufsize);
- v4dwt_decode(&h);
+ opj_v4dwt_interleave_h(&h, aj, w, bufsize);
+ opj_v4dwt_decode(&h);
for(k = rw; --k >= 0;){
aj[k ] = h.wavelet[k].f[0];
aj[k+w ] = h.wavelet[k].f[1];
if (rh & 0x03) {
int k;
j = rh & 0x03;
- v4dwt_interleave_h(&h, aj, w, bufsize);
- v4dwt_decode(&h);
+ opj_v4dwt_interleave_h(&h, aj, w, bufsize);
+ opj_v4dwt_decode(&h);
for(k = rw; --k >= 0;){
switch(j) {
case 3: aj[k+w*2] = h.wavelet[k].f[2];
aj = (float*) tilec->data;
for(j = rw; j > 3; j -= 4){
int k;
- v4dwt_interleave_v(&v, aj, w, 4);
- v4dwt_decode(&v);
+ opj_v4dwt_interleave_v(&v, aj, w, 4);
+ opj_v4dwt_decode(&v);
for(k = 0; k < rh; ++k){
memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(float));
}
if (rw & 0x03){
int k;
j = rw & 0x03;
- v4dwt_interleave_v(&v, aj, w, j);
- v4dwt_decode(&v);
+ opj_v4dwt_interleave_v(&v, aj, w, j);
+ opj_v4dwt_decode(&v);
for(k = 0; k < rh; ++k){
memcpy(&aj[k*w], &v.wavelet[k], j * sizeof(float));
}
OPJ_UINT32 w = tilec->x1 - tilec->x0;
- h.wavelet = (v4*) opj_aligned_malloc((dwt_max_resolution_v2(res, numres)+5) * sizeof(v4));
+ h.wavelet = (v4*) opj_aligned_malloc((opj_dwt_max_resolution(res, numres)+5) * sizeof(v4));
v.wavelet = h.wavelet;
while( --numres) {
for(j = rh; j > 3; j -= 4) {
OPJ_INT32 k;
- v4dwt_interleave_h(&h, aj, w, bufsize);
- v4dwt_decode(&h);
+ opj_v4dwt_interleave_h(&h, aj, w, bufsize);
+ opj_v4dwt_decode(&h);
for(k = rw; --k >= 0;){
aj[k ] = h.wavelet[k].f[0];
if (rh & 0x03) {
OPJ_INT32 k;
j = rh & 0x03;
- v4dwt_interleave_h(&h, aj, w, bufsize);
- v4dwt_decode(&h);
+ opj_v4dwt_interleave_h(&h, aj, w, bufsize);
+ opj_v4dwt_decode(&h);
for(k = rw; --k >= 0;){
switch(j) {
case 3: aj[k+w*2] = h.wavelet[k].f[2];
for(j = rw; j > 3; j -= 4){
OPJ_UINT32 k;
- v4dwt_interleave_v(&v, aj, w, 4);
- v4dwt_decode(&v);
+ opj_v4dwt_interleave_v(&v, aj, w, 4);
+ opj_v4dwt_decode(&v);
for(k = 0; k < rh; ++k){
memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
j = rw & 0x03;
- v4dwt_interleave_v(&v, aj, w, j);
- v4dwt_decode(&v);
+ opj_v4dwt_interleave_v(&v, aj, w, j);
+ opj_v4dwt_decode(&v);
for(k = 0; k < rh; ++k){
memcpy(&aj[k*w], &v.wavelet[k], j * sizeof(OPJ_FLOAT32));