int i, k;
for(k = 0; k < 2; ++k){
- if (count + 3 * x < size && ((size_t) a & 0x0f) == 0 &&
- ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0) {
+ if ( count + 3 * x < size && ((size_t) a & 0x0f) == 0 && ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0 ) {
/* Fast code path */
for(i = 0; i < count; ++i){
int j = i;
int j = i;
bi[i*8 ] = a[j];
j += x;
- if(j > size) continue;
+ if(j >= size) continue;
bi[i*8 + 1] = a[j];
j += x;
- if(j > size) continue;
+ if(j >= size) continue;
bi[i*8 + 2] = a[j];
j += x;
- if(j > size) continue;
- bi[i*8 + 3] = a[j];
+ if(j >= size) continue;
+ bi[i*8 + 3] = a[j]; /* This one*/
}
}
}
}
-static void v4dwt_interleave_v(v4dwt_t* restrict v , float* restrict a , int x){
+static void v4dwt_interleave_v(v4dwt_t* restrict v , float* restrict a , int x, int nb_elts_read){
v4* restrict bi = v->wavelet + v->cas;
int i;
+
for(i = 0; i < v->sn; ++i){
- memcpy(&bi[i*2], &a[i*x], 4 * sizeof(float));
+ memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(float));
}
+
a += v->sn * x;
bi = v->wavelet + 1 - v->cas;
+
for(i = 0; i < v->dn; ++i){
- memcpy(&bi[i*2], &a[i*x], 4 * sizeof(float));
+ memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(float));
}
}
aj = (float*) tilec->data;
for(j = rw; j > 3; j -= 4){
int k;
- v4dwt_interleave_v(&v, aj, w);
+ v4dwt_interleave_v(&v, aj, w, 4);
v4dwt_decode(&v);
for(k = 0; k < rh; ++k){
memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(float));
if (rw & 0x03){
int k;
j = rw & 0x03;
- v4dwt_interleave_v(&v, aj, w);
+ v4dwt_interleave_v(&v, aj, w, j);
v4dwt_decode(&v);
for(k = 0; k < rh; ++k){
memcpy(&aj[k*w], &v.wavelet[k], j * sizeof(float));
for(j = rw; j > 3; j -= 4){
OPJ_INT32 k;
- v4dwt_interleave_v(&v, aj, w);
+ v4dwt_interleave_v(&v, aj, w, 4);
v4dwt_decode(&v);
for(k = 0; k < rh; ++k){
j = rw & 0x03;
- v4dwt_interleave_v(&v, aj, w);
+ v4dwt_interleave_v(&v, aj, w, j);
v4dwt_decode(&v);
for(k = 0; k < rh; ++k){