#include "common.h"
#include "dwt97.h"

#include <stddef.h>
#include <assert.h>
#include <stdlib.h>
#include <math.h>

#define CONFIG_USE_FAST_CORE

static void zero(float *line, size_t size)
{
	size_t n;

	for (n = 0; n < size; ++n) {
		line[n] = 0.f;
	}
}

/*
 * Lifting constants for the CDF 9/7 wavelet
 */
#define alpha -1.58613434201888022056773162788538f
#define beta  -0.05298011857604780601431779000503f
#define gamma +0.88291107549260031282806293551600f
#define delta +0.44350685204939829327158029348930f
#define zeta  +1.14960439885900000000000000000000f
#define rcp_zeta \
              +0.86986445162572041243487498011333f
#define sqr_zeta \
              +1.32159027387596276050188100000000f
#define rcp_sqr_zeta \
              +0.75666416420211528747583823019750f

/*
 * Predicate: Is a signal defined at the position 'n'?
 *
 * Note that a signal is defined on [0; N).
 */
#define signal_defined(n, N) ( (n) >= 0 && (n) < (N) )

/*
 * mirror symmetric signal extension
 */
#define signal_mirror(n, N) ( (n) < 0 ? -(n) : ( (n) >= (N) ? (2*((N)-1)-(n)) : (n) ) )

/* transpose 2x2 matrix */
static void transpose(float core[4])
{
	float t = core[1];
	core[1] = core[2];
	core[2] = t;
}

static void encode_adjust_levers(int lever[4], ptrdiff_t n, ptrdiff_t N)
{
	lever[2] = n ==   1 ? -1 : 0;
	lever[0] = n ==   2 ? -1 : 0;
	lever[3] = n == N   ? +1 : 0;
	lever[1] = n == N+1 ? +1 : 0;
}

static void decode_adjust_levers(int lever[4], ptrdiff_t n, ptrdiff_t N)
{
	lever[3] = n ==   0 ? -1 : 0;
	lever[1] = n ==   1 ? -1 : 0;
	lever[2] = n == N   ? +1 : 0;
	lever[0] = n == N+1 ? +1 : 0;
}

static void dwtfloat_encode_core(float data[2], float buff[4], const int lever[4])
{
	const float w0 = +delta;
	const float w1 = +gamma;
	const float w2 = +beta;
	const float w3 = +alpha;

	float l0, l1, l2, l3;
	float c0, c1, c2, c3;
	float r0, r1, r2, r3;
	float x0, x1;
	float y0, y1;

	l0 = buff[0];
	l1 = buff[1];
	l2 = buff[2];
	l3 = buff[3];

	x0 = data[0];
	x1 = data[1];

	c0 = l1;
	c1 = l2;
	c2 = l3;
	c3 = x0;

	r3 = x1;
	r2 = c3 + w3 * ( (lever[3] < 0 ? r3 : l3) + (lever[3] > 0 ? l3 : r3) );
	r1 = c2 + w2 * ( (lever[2] < 0 ? r2 : l2) + (lever[2] > 0 ? l2 : r2) );
	r0 = c1 + w1 * ( (lever[1] < 0 ? r1 : l1) + (lever[1] > 0 ? l1 : r1) );
	y0 = c0 + w0 * ( (lever[0] < 0 ? r0 : l0) + (lever[0] > 0 ? l0 : r0) );
	y1 = r0;

	l0 = r0;
	l1 = r1;
	l2 = r2;
	l3 = r3;

	data[0] = y0;
	data[1] = y1;

	buff[0] = l0;
	buff[1] = l1;
	buff[2] = l2;
	buff[3] = l3;
}

static void dwtfloat_encode_core_f(float data[2], float buff[4])
{
	const float w0 = +delta;
	const float w1 = +gamma;
	const float w2 = +beta;
	const float w3 = +alpha;

	float l0, l1, l2, l3;
	float c0, c1, c2, c3;
	float r0, r1, r2, r3;
	float x0, x1;
	float y0, y1;

	l0 = buff[0];
	l1 = buff[1];
	l2 = buff[2];
	l3 = buff[3];

	x0 = data[0];
	x1 = data[1];

	c0 = l1;
	c1 = l2;
	c2 = l3;
	c3 = x0;

	r3 = x1;
	r2 = c3 + w3 * (l3 + r3);
	r1 = c2 + w2 * (l2 + r2);
	r0 = c1 + w1 * (l1 + r1);
	y0 = c0 + w0 * (l0 + r0);
	y1 = r0;

	l0 = r0;
	l1 = r1;
	l2 = r2;
	l3 = r3;

	data[0] = y0;
	data[1] = y1;

	buff[0] = l0;
	buff[1] = l1;
	buff[2] = l2;
	buff[3] = l3;
}

static void dwtfloat_decode_core(float data[2], float buff[4], const int lever[4])
{
	const float w0 = -alpha;
	const float w1 = -beta;
	const float w2 = -gamma;
	const float w3 = -delta;

	float l0, l1, l2, l3;
	float c0, c1, c2, c3;
	float r0, r1, r2, r3;
	float x0, x1;
	float y0, y1;

	l0 = buff[0];
	l1 = buff[1];
	l2 = buff[2];
	l3 = buff[3];

	x0 = data[0];
	x1 = data[1];

	c0 = l1;
	c1 = l2;
	c2 = l3;
	c3 = x0;

	r3 = x1;
	r2 = c3 + w3 * ( (lever[3] < 0 ? r3 : l3) + (lever[3] > 0 ? l3 : r3) );
	r1 = c2 + w2 * ( (lever[2] < 0 ? r2 : l2) + (lever[2] > 0 ? l2 : r2) );
	r0 = c1 + w1 * ( (lever[1] < 0 ? r1 : l1) + (lever[1] > 0 ? l1 : r1) );
	y0 = c0 + w0 * ( (lever[0] < 0 ? r0 : l0) + (lever[0] > 0 ? l0 : r0) );
	y1 = r0;

	l0 = r0;
	l1 = r1;
	l2 = r2;
	l3 = r3;

	data[0] = y0;
	data[1] = y1;

	buff[0] = l0;
	buff[1] = l1;
	buff[2] = l2;
	buff[3] = l3;
}

static void dwtfloat_decode_core_f(float data[2], float buff[4])
{
	const float w0 = -alpha;
	const float w1 = -beta;
	const float w2 = -gamma;
	const float w3 = -delta;

	float l0, l1, l2, l3;
	float c0, c1, c2, c3;
	float r0, r1, r2, r3;
	float x0, x1;
	float y0, y1;

	l0 = buff[0];
	l1 = buff[1];
	l2 = buff[2];
	l3 = buff[3];

	x0 = data[0];
	x1 = data[1];

	c0 = l1;
	c1 = l2;
	c2 = l3;
	c3 = x0;

	r3 = x1;
	r2 = c3 + w3 * (l3 + r3);
	r1 = c2 + w2 * (l2 + r2);
	r0 = c1 + w1 * (l1 + r1);
	y0 = c0 + w0 * (l0 + r0);
	y1 = r0;

	l0 = r0;
	l1 = r1;
	l2 = r2;
	l3 = r3;

	data[0] = y0;
	data[1] = y1;

	buff[0] = l0;
	buff[1] = l1;
	buff[2] = l2;
	buff[3] = l3;
}

/*static*/ void dwtfloat_encode_core2(float core[4], float *buff_y, float *buff_x, int lever[2][4])
{
	/* horizontal filtering */
	dwtfloat_encode_core(&core[0], buff_y + 4*(0), lever[1]);
	dwtfloat_encode_core(&core[2], buff_y + 4*(1), lever[1]);
	transpose(core);
	/* vertical filtering */
	dwtfloat_encode_core(&core[0], buff_x + 4*(0), lever[0]);
	dwtfloat_encode_core(&core[2], buff_x + 4*(1), lever[0]);
	transpose(core);
}

/*static*/ void dwtfloat_encode_core2_f(float core[4], float *buff_y, float *buff_x)
{
	/* horizontal filtering */
	dwtfloat_encode_core_f(&core[0], buff_y + 4*(0));
	dwtfloat_encode_core_f(&core[2], buff_y + 4*(1));
	transpose(core);
	/* vertical filtering */
	dwtfloat_encode_core_f(&core[0], buff_x + 4*(0));
	dwtfloat_encode_core_f(&core[2], buff_x + 4*(1));
	transpose(core);
}

/*
 * encode 2x2 coefficients
 */
void dwtfloat_encode_quad(float *data, ptrdiff_t N_y, ptrdiff_t N_x, ptrdiff_t stride_y, ptrdiff_t stride_x, float *buff_y, float *buff_x, ptrdiff_t n_y, ptrdiff_t n_x)
{
	/* vertical lever at [0], horizontal at [1] */
	int lever[2][4];
	/* order on input: 0=HH, 1=LH, 2=HH, 3=LL */
	float core[4];

	/* we cannot access buff_x[] and buff_y[] at negative indices */
	if (n_y < 0 || n_x < 0)
		return;

	encode_adjust_levers(lever[0], n_y, N_y);
	encode_adjust_levers(lever[1], n_x, N_x);

#	define cc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+0) ] /* LL */
#	define dc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+1) ] /* HL */
#	define cd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+0) ] /* LH */
#	define dd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+1) ] /* HH */

	core[0] = signal_defined(n_y-1, N_y) && signal_defined(n_x-1, N_x) ? (float) dd(n_y-1, n_x-1) : 0; /* HH */
	core[1] = signal_defined(n_y-1, N_y) && signal_defined(n_x-0, N_x) ? (float) cd(n_y-1, n_x-0) : 0; /* LH */
	core[2] = signal_defined(n_y-0, N_y) && signal_defined(n_x-1, N_x) ? (float) dc(n_y-0, n_x-1) : 0; /* HL */
	core[3] = signal_defined(n_y-0, N_y) && signal_defined(n_x-0, N_x) ? (float) cc(n_y-0, n_x-0) : 0; /* LL */

	dwtfloat_encode_core2(core, buff_y + 4*(2*n_y+0), buff_x + 4*(2*n_x+0), lever);

	if (signal_defined(n_y-2, N_y) && signal_defined(n_x-2, N_x)) {
		cc(n_y-2, n_x-2) = ( core[0] * sqr_zeta     ); /* LL */
		dc(n_y-2, n_x-2) = ( core[1] * -1           ); /* HL */
		cd(n_y-2, n_x-2) = ( core[2] * -1           ); /* LH */
		dd(n_y-2, n_x-2) = ( core[3] * rcp_sqr_zeta ); /* HH */
	}

#	undef cc
#	undef dc
#	undef cd
#	undef dd
}

void dwtfloat_encode_quad_f(float *data, ptrdiff_t N_y, ptrdiff_t N_x, ptrdiff_t stride_y, ptrdiff_t stride_x, float *buff_y, float *buff_x, ptrdiff_t n_y, ptrdiff_t n_x)
{
	/* order on input: 0=HH, 1=LH, 2=HH, 3=LL */
	float core[4];

	/* we cannot access buff_x[] and buff_y[] at negative indices */
	if (n_y < 0 || n_x < 0)
		return;

#	define cc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+0) ] /* LL */
#	define dc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+1) ] /* HL */
#	define cd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+0) ] /* LH */
#	define dd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+1) ] /* HH */

	core[0] = signal_defined(n_y-1, N_y) && signal_defined(n_x-1, N_x) ? (float) dd(n_y-1, n_x-1) : 0; /* HH */
	core[1] = signal_defined(n_y-1, N_y) && signal_defined(n_x-0, N_x) ? (float) cd(n_y-1, n_x-0) : 0; /* LH */
	core[2] = signal_defined(n_y-0, N_y) && signal_defined(n_x-1, N_x) ? (float) dc(n_y-0, n_x-1) : 0; /* HL */
	core[3] = signal_defined(n_y-0, N_y) && signal_defined(n_x-0, N_x) ? (float) cc(n_y-0, n_x-0) : 0; /* LL */

	dwtfloat_encode_core2_f(core, buff_y + 4*(2*n_y+0), buff_x + 4*(2*n_x+0));

	if (signal_defined(n_y-2, N_y) && signal_defined(n_x-2, N_x)) {
		cc(n_y-2, n_x-2) = ( core[0] * sqr_zeta     ); /* LL */
		dc(n_y-2, n_x-2) = ( core[1] * -1           ); /* HL */
		cd(n_y-2, n_x-2) = ( core[2] * -1           ); /* LH */
		dd(n_y-2, n_x-2) = ( core[3] * rcp_sqr_zeta ); /* HH */
	}

#	undef cc
#	undef dc
#	undef cd
#	undef dd
}

/*static*/ void dwtfloat_decode_core2(float core[4], float *buff_y, float *buff_x, int lever[2][4])
{
	/* horizontal filtering */
	dwtfloat_decode_core(&core[0], buff_y + 4*(0), lever[1]);
	dwtfloat_decode_core(&core[2], buff_y + 4*(1), lever[1]);
	transpose(core);
	/* vertical filtering */
	dwtfloat_decode_core(&core[0], buff_x + 4*(0), lever[0]);
	dwtfloat_decode_core(&core[2], buff_x + 4*(1), lever[0]);
	transpose(core);
}

/*static*/ void dwtfloat_decode_core2_f(float core[4], float *buff_y, float *buff_x)
{
	/* horizontal filtering */
	dwtfloat_decode_core_f(&core[0], buff_y + 4*(0));
	dwtfloat_decode_core_f(&core[2], buff_y + 4*(1));
	transpose(core);
	/* vertical filtering */
	dwtfloat_decode_core_f(&core[0], buff_x + 4*(0));
	dwtfloat_decode_core_f(&core[2], buff_x + 4*(1));
	transpose(core);
}

void dwtfloat_decode_quad(float *data, ptrdiff_t N_y, ptrdiff_t N_x, ptrdiff_t stride_y, ptrdiff_t stride_x, float *buff_y, float *buff_x, ptrdiff_t n_y, ptrdiff_t n_x)
{
	/* vertical lever at [0], horizontal at [1] */
	int lever[2][4];
	/* order on input: 0=LL, 1=HL, 2=LH, 3=HH */
	float core[4];

	/* we cannot access buff_x[] and buff_y[] at negative indices */
	if (n_y < 0 || n_x < 0)
		return;

	decode_adjust_levers(lever[0], n_y, N_y);
	decode_adjust_levers(lever[1], n_x, N_x);

#	define cc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+0) ] /* LL */
#	define dc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+1) ] /* HL */
#	define cd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+0) ] /* LH */
#	define dd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+1) ] /* HH */

	if ( signal_defined(n_y-0, N_y) && signal_defined(n_x-0, N_x) ) {
		core[0] = (float) cc(n_y, n_x) * rcp_sqr_zeta; /* LL */
		core[1] = (float) dc(n_y, n_x) * -1;           /* HL */
		core[2] = (float) cd(n_y, n_x) * -1;           /* LH */
		core[3] = (float) dd(n_y, n_x) * sqr_zeta;     /* HH */
	} else {
		core[0] = 0;
		core[1] = 0;
		core[2] = 0;
		core[3] = 0;
	}

	dwtfloat_decode_core2(core, buff_y + 4*(2*n_y+0), buff_x + 4*(2*n_x+0), lever);

	if ( signal_defined(n_y-1, N_y) && signal_defined(n_x-1, N_x) )
		cc(n_y-1, n_x-1) = ( core[3] ); /* LL */
	if ( signal_defined(n_y-1, N_y) && signal_defined(n_x-2, N_x) )
		dc(n_y-1, n_x-2) = ( core[2] ); /* HL */
	if ( signal_defined(n_y-2, N_y) && signal_defined(n_x-1, N_x) )
		cd(n_y-2, n_x-1) = ( core[1] ); /* LH */
	if ( signal_defined(n_y-2, N_y) && signal_defined(n_x-2, N_x) )
		dd(n_y-2, n_x-2) = ( core[0] ); /* HH */

#	undef cc
#	undef dc
#	undef cd
#	undef dd
}

void dwtfloat_decode_quad_f(float *data, ptrdiff_t N_y, ptrdiff_t N_x, ptrdiff_t stride_y, ptrdiff_t stride_x, float *buff_y, float *buff_x, ptrdiff_t n_y, ptrdiff_t n_x)
{
	/* order on input: 0=LL, 1=HL, 2=LH, 3=HH */
	float core[4];

	/* we cannot access buff_x[] and buff_y[] at negative indices */
	if (n_y < 0 || n_x < 0)
		return;

#	define cc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+0) ] /* LL */
#	define dc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+1) ] /* HL */
#	define cd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+0) ] /* LH */
#	define dd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+1) ] /* HH */

	if ( signal_defined(n_y-0, N_y) && signal_defined(n_x-0, N_x) ) {
		core[0] = (float) cc(n_y, n_x) * rcp_sqr_zeta; /* LL */
		core[1] = (float) dc(n_y, n_x) * -1;           /* HL */
		core[2] = (float) cd(n_y, n_x) * -1;           /* LH */
		core[3] = (float) dd(n_y, n_x) * sqr_zeta;     /* HH */
	} else {
		core[0] = 0;
		core[1] = 0;
		core[2] = 0;
		core[3] = 0;
	}

	dwtfloat_decode_core2_f(core, buff_y + 4*(2*n_y+0), buff_x + 4*(2*n_x+0));

	if ( signal_defined(n_y-1, N_y) && signal_defined(n_x-1, N_x) )
		cc(n_y-1, n_x-1) = ( core[3] ); /* LL */
	if ( signal_defined(n_y-1, N_y) && signal_defined(n_x-2, N_x) )
		dc(n_y-1, n_x-2) = ( core[2] ); /* HL */
	if ( signal_defined(n_y-2, N_y) && signal_defined(n_x-1, N_x) )
		cd(n_y-2, n_x-1) = ( core[1] ); /* LH */
	if ( signal_defined(n_y-2, N_y) && signal_defined(n_x-2, N_x) )
		dd(n_y-2, n_x-2) = ( core[0] ); /* HH */

#	undef cc
#	undef dc
#	undef cd
#	undef dd
}

float soft_threshold(float x, float lambda)
{
	if (x >= +lambda)
		return x - lambda;
	if (x <= -lambda)
		return x + lambda;
	return 0;
}

float cos_gain(float ratio)
{
	return (1.f - cosf(3.14159265358979323846f * ratio)) / 2.f;
}

void dwtfloat_threshold_quad(float *data, ptrdiff_t stride_y, ptrdiff_t stride_x, ptrdiff_t n_y, ptrdiff_t n_x, float lambda)
{
#	define cc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+0) ] /* LL */
#	define dc(n_y, n_x) data[ stride_y*(2*(n_y)+0) + stride_x*(2*(n_x)+1) ] /* HL */
#	define cd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+0) ] /* LH */
#	define dd(n_y, n_x) data[ stride_y*(2*(n_y)+1) + stride_x*(2*(n_x)+1) ] /* HH */

#if 1
	if (fabsf(dc(n_y, n_x)) < lambda) {
		dc(n_y, n_x) *= cos_gain(fabsf(dc(n_y, n_x)) / lambda);
	}
	if (fabsf(cd(n_y, n_x)) < lambda) {
		cd(n_y, n_x) *= cos_gain(fabsf(cd(n_y, n_x)) / lambda);
	}
	if (fabsf(dd(n_y, n_x)) < lambda) {
		dd(n_y, n_x) *= cos_gain(fabsf(dd(n_y, n_x)) / lambda);
	}
#else
	dc(n_y, n_x) = soft_threshold(dc(n_y, n_x), lambda);
	cd(n_y, n_x) = soft_threshold(cd(n_y, n_x), lambda);
	dd(n_y, n_x) = soft_threshold(dd(n_y, n_x), lambda);
#endif

#	undef cc
#	undef dc
#	undef cd
#	undef dd
}

int dwtfloat_encode_band(float *band, ptrdiff_t stride_y, ptrdiff_t stride_x, ptrdiff_t height, ptrdiff_t width)
{
	ptrdiff_t y, x;

	float *buff_y, *buff_x;

	buff_y = malloc( (size_t) (height+4) * 4 * sizeof(float) );
	buff_x = malloc( (size_t) (width +4) * 4 * sizeof(float) );

	if (NULL == buff_y || NULL == buff_x) {
		return RET_FAILURE_MEMORY_ALLOCATION;
	}

	zero(buff_y, (size_t) (height+4) * 4);
	zero(buff_x, (size_t) (width +4) * 4);

	for (y = 0; y < height/2+2; ++y) {
		for (x = 0; x < width/2+2; ++x) {
#ifndef CONFIG_USE_FAST_CORE
			dwtfloat_encode_quad(band, height/2, width/2, stride_y, stride_x, buff_y, buff_x, y, x);
#else
			dwtfloat_encode_quad_f(band, height/2, width/2, stride_y, stride_x, buff_y, buff_x, y, x);
#endif
		}
	}

	free(buff_x);
	free(buff_y);

	return RET_SUCCESS;
}

int dwtfloat_decode_band(float *band, ptrdiff_t stride_y, ptrdiff_t stride_x, ptrdiff_t height, ptrdiff_t width)
{
	ptrdiff_t y, x;

	float *buff_y, *buff_x;

	buff_y = malloc( (size_t) (height+4) * 4 * sizeof(float) );
	buff_x = malloc( (size_t) (width +4) * 4 * sizeof(float) );

	if (NULL == buff_y || NULL == buff_x) {
		return RET_FAILURE_MEMORY_ALLOCATION;
	}

	zero(buff_y, (size_t) (height+4) * 4);
	zero(buff_x, (size_t) (width +4) * 4);

	for (y = 0; y < height/2+2; ++y) {
		for (x = 0; x < width/2+2; ++x) {
#ifndef CONFIG_USE_FAST_CORE
			dwtfloat_decode_quad(band, height/2, width/2, stride_y, stride_x, buff_y, buff_x, y, x);
#else
			dwtfloat_decode_quad_f(band, height/2, width/2, stride_y, stride_x, buff_y, buff_x, y, x);
#endif
		}
	}

	free(buff_x);
	free(buff_y);

	return RET_SUCCESS;
}

int dwtfloat_threshold_band(float *band, ptrdiff_t stride_y, ptrdiff_t stride_x, ptrdiff_t height, ptrdiff_t width, float lambda)
{
	ptrdiff_t y, x;

	for (y = 0; y < height/2; ++y) {
		for (x = 0; x < width/2; ++x) {
			dwtfloat_threshold_quad(band, stride_y, stride_x, y, x, lambda);
		}
	}

	return RET_SUCCESS;
}

int dwt97_encode(struct plane *plane, int J)
{
	int j;
	ptrdiff_t height, width;
	ptrdiff_t stride_x, stride_y;
	float *data;

	assert(plane != NULL);

	height = (ptrdiff_t) plane->height;
	width  = (ptrdiff_t) plane->width;
	stride_x = (ptrdiff_t) plane->stride_x;
	stride_y = (ptrdiff_t) plane->stride_y;

	data = plane->data;

	assert(data != NULL);

	/* forward two-dimensional transform */

	/* for each level */
	for (j = 0; j < J; ++j) {
		/* number of elements for input */
		ptrdiff_t height_j = height >> j, width_j = width >> j;

		/* stride of input data (for level j) */
		ptrdiff_t stride_y_j = stride_y << j, stride_x_j = stride_x << j;

		dwtfloat_encode_band(data, stride_y_j, stride_x_j, height_j, width_j);
	}

	return RET_SUCCESS;
}

int dwt97_decode(struct plane *plane, int J)
{
	int j;
	ptrdiff_t height, width;
	ptrdiff_t stride_x, stride_y;
	float *data;

	assert(plane != NULL);

	height = (ptrdiff_t) plane->height;
	width  = (ptrdiff_t) plane->width;
	stride_x = (ptrdiff_t) plane->stride_x;
	stride_y = (ptrdiff_t) plane->stride_y;

	data = plane->data;

	assert(data != NULL);

	/* inverse two-dimensional transform */

	for (j = J - 1; j >= 0; --j) {
		ptrdiff_t height_j = height >> j, width_j = width >> j;

		ptrdiff_t stride_y_j = stride_y << j, stride_x_j = stride_x << j;

		dwtfloat_decode_band(data, stride_y_j, stride_x_j, height_j, width_j);
	}

	return RET_SUCCESS;
}

int dwt97_threshold(struct plane *plane, int J, float lambda)
{
	int j;
	ptrdiff_t height, width;
	ptrdiff_t stride_x, stride_y;
	float *data;

	assert(plane != NULL);

	height = (ptrdiff_t) plane->height;
	width  = (ptrdiff_t) plane->width;
	stride_x = (ptrdiff_t) plane->stride_x;
	stride_y = (ptrdiff_t) plane->stride_y;

	data = plane->data;

	assert(data != NULL);

	/* inverse two-dimensional transform */

	for (j = J - 1; j >= 0; --j) {
		ptrdiff_t height_j = height >> j, width_j = width >> j;

		ptrdiff_t stride_y_j = stride_y << j, stride_x_j = stride_x << j;

		dwtfloat_threshold_band(data, stride_y_j, stride_x_j, height_j, width_j, lambda);
	}

	return RET_SUCCESS;
}
