Click here to Skip to main content
Click here to Skip to main content
 
Add your own
alternative version

Embedded Zerotree Image Codec with Bior97 SSE Optimized Wavelet-transform

, 20 Oct 2007 GPL3
This article demonstrates the use of Embedded zero tree still image codec with JPEG 2000 wavelet-filter.
ezw_demo.zip
bin
ezwconsole.exe
ezw_demo_update_1_.zip
ezwconsole.exe
ezw_src.zip
src
Lib
Release
ezw_src_update_1_.zip

#include "stdafx.h"

#include "vec1d.h"
#include "basefwt.h"
#include "mbior97.h"


//bior97 filter////////////////////////////////////////////////////////////////////////////////////////

const float mBior97::tH[9] =  { 0.026748757410810f,    //-4
                                -0.016864118442875f,    //-3
                                -0.078223266528990f,    //-2
                                0.266864118442875f,    //-1
                                0.602949018236360f,    // 0
                                0.266864118442875f,    // 1
                                -0.078223266528990f,    // 2
                                -0.016864118442875f,    // 3
                                0.026748757410810f
                              };  // 4

const float mBior97::tG[9] = { 0.0f,                   //-4
                               0.0f,                   //-3
                               0.045635881557125f,     //-2
                               -0.02877176311425f,     //-1
                               -0.295635881557125f,    // 0
                               0.5575435262285f,       // 1
                               -0.295635881557125f,    // 2
                               -0.02877176311425f,     // 3
                               0.045635881557125f
                             };   // 4


const float mBior97::H[9] = { -0.045635881557125f,    //-3
                              -0.02877176311425f,     //-2
                              0.295635881557125f,     //-1
                              0.5575435262285f,       // 0
                              0.295635881557125f,     // 1
                              -0.02877176311425f,     // 2
                              -0.045635881557125f,    // 3
                              0.0f,                   // 4
                              0.0f
                            };   // 5


const float mBior97::G[9] = { 0.026748757410810f,     //-3
                              0.016864118442875f,     //-2
                              -0.078223266528990f,    //-1
                              -0.266864118442875f,    // 0
                              0.602949018236360f,     // 1
                              -0.266864118442875f,    // 2
                              -0.078223266528990f,    // 3
                              0.016864118442875f,     // 4
                              0.026748757410810f
                            };  // 5

//bior97 filter////////////////////////////////////////////////////////////////////////////////////////



///////////////////////////////////constructors/destructors///////////////////////////////////////////////////////////////////
mBior97::mBior97() : BaseFWT2D(L"bior97", tH, 9, 4, tG, 9, 4, H, 9, 3, G, 9, 3)
{
}
///////////////////////////////////constructors/destructors///////////////////////////////////////////////////////////////////




///////////////////////////////////////////////transforms/////////////////////////////////////////////////////////////////////
void mBior97::transrows(char** dest, char** sour, unsigned int w, unsigned int h) const
{
        char srck0[8] = {0, 0, 0, 0, 0, 0, 0, 0};

        //int n;
        float s, d;
        unsigned int w2 = w / 2;

        const vec1D& tH = gettH();
        const vec1D& tG = gettG();

        for (unsigned int y = 0; y < h; y++) {

                //k=0
                unsigned int k = 0;
                srck0[0] = sour[y][4];
                srck0[1] = sour[y][3];
                srck0[2] = sour[y][2];
                srck0[3] = sour[y][1];

                s = conv(tH.data(), srck0);
                s += conv(tH.data() + 4, &sour[y][2*k]);
                s += tH[4] * (float)sour[y][2*k + 4];
                d = conv(tG.data(), srck0);
                d += conv(tG.data() + 4, &sour[y][2*k]);
                d += tG[4] * (float)sour[y][2*k + 4];

                dest[y][k] = mmxround(s);
                dest[y][k+w2] = mmxroundTH(d);


                //k=1
                k = 1;
                srck0[0] = sour[y][2];
                srck0[1] = sour[y][1];
                srck0[2] = sour[y][0];
                srck0[3] = sour[y][1];

                s = conv(tH.data(), srck0);
                s += conv(tH.data() + 4, &sour[y][2*k]);
                s += tH[4] * (float)sour[y][2*k + 4];
                d = conv(tG.data(), srck0);
                d += conv(tG.data() + 4, &sour[y][2*k]);
                d += tG[4] * (float)sour[y][2*k + 4];

                dest[y][k] = mmxround(s);
                dest[y][k+w2] = mmxroundTH(d);


                //k=2, k<w2-2
                for (k = 2; k < w2 - 2; k++) {
                        s = conv(tH.data(), &sour[y][2*k - 4]);
                        s += conv(tH.data() + 4, &sour[y][2*k]);
                        s += tH[4] * (float)sour[y][2*k + 4];
                        d = conv(tG.data(), &sour[y][2*k - 4]);
                        d += conv(tG.data() + 4, &sour[y][2*k]);
                        d += tG[4] * (float)sour[y][2*k + 4];

                        dest[y][k] = mmxround(s);
                        dest[y][k+w2] = mmxroundTH(d);
                }


                //k=w2-2
                k = w2 - 2;

                s = conv(tH.data(), &sour[y][2*k - 4]);
                s += conv(tH.data() + 4, &sour[y][2*k]);
                s += tH[4] * (float)sour[y][2*k + 4 - 2];
                d = conv(tG.data(), &sour[y][2*k - 4]);
                d += conv(tG.data() + 4, &sour[y][2*k]);
                d += tG[4] * (float)sour[y][2*k + 4 - 2];

                dest[y][k] = mmxround(s);
                dest[y][k+w2] = mmxroundTH(d);


                //k=w2-1
                k = w2 - 1;
                srck0[0] = sour[y][2*k];
                srck0[1] = sour[y][2*k+1];
                srck0[2] = sour[y][2*k];
                srck0[3] = sour[y][2*k-1];

                s = conv(tH.data(), &sour[y][2*k - 4]);
                s += conv(tH.data() + 4, srck0);
                s += tH[4] * (float)sour[y][2*k - 2];
                d = conv(tG.data(), &sour[y][2*k - 4]);
                d += conv(tG.data() + 4, srck0);
                d += tG[4] * (float)sour[y][2*k - 2];

                dest[y][k] = mmxround(s);
                dest[y][k+w2] = mmxroundTH(d);
        }
}

void mBior97::transcols(char** dest, char** sour, unsigned int w, unsigned int h) const
{
        float fz = 0.0f;

        int n;
        float s, d;
        __m128 ms, md;
        unsigned int h2 = h / 2;

        const vec1D& tH = gettH();
        const vec1D& tG = gettG();

        for (unsigned int x = 0; x < w / 4; x++) {   //x<w/4   x = 4*x
                for (unsigned int k = 0; k < h2; k++) {

                        ms = _mm_load_ss(&fz);
                        md = ms;

                        for (int m = -4; m <= 4; m++) {
                                n = 2 * k + m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                ms = _mm_add_ps(ms, _mm_mul_ps(_mm_load_ps1(tH.data(m)), _mm_cvtpi8_ps(*(__m64 *)(&sour[n][4*x]))));
                        }
                        for (int m = -2; m <= 4; m++) {
                                n = 2 * k + m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                md = _mm_add_ps(md, _mm_mul_ps(_mm_load_ps1(tG.data(m)), _mm_cvtpi8_ps(*(__m64 *)(&sour[n][4*x]))));
                        }

                        if (4*x < w / 2) {
                                if ((w / 2) - (4*x) >= 4)
                                        mmxround4(&dest[k][4*x], ms);
                                else
                                        mmxround4TH(&dest[k][4*x], ms, (w / 2) - (4*x));   //skip first from LL part 10/2-4=1 [lo] o o o o * | * * * o o [hi]
                        } else
                                mmxround4TH(&dest[k][4*x], ms);

                        mmxround4TH(&dest[k+h2][4*x], md);
                }
        }
        _mm_empty();


        //odd remainder
        for (unsigned int x = w - (w % 4); x < w; x++) {
                for (unsigned int k = 0; k < h2; k++) {
                        s = 0;
                        d = 0;
                        for (int m = -4; m <= 4; m++) {
                                n = 2 * k + m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                s += tH[m] * float(sour[n][x]);
                        }
                        for (int m = -2; m <= 4; m++) {
                                n = 2 * k + m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                d += tG[m] * float(sour[n][x]);
                        }

                        if (x < w / 2)
                                dest[k][x] = mmxround(s);
                        else
                                dest[k][x] = mmxroundTH(s); 	        //is this needed? hi band were TH'ed on transrows

                        dest[k+h2][x] = mmxroundTH(d);          //is this needed? hi band were TH'ed on transrows on x>w/2
                }
        }
}
///////////////////////////////////////////////transforms/////////////////////////////////////////////////////////////////////





//////////////////////////////////////////////synths//////////////////////////////////////////////////////////////////////////
void mBior97::synthrows(char** dest, char** sour, unsigned int w, unsigned int h) const      //w,h of the LO part
{
        char srclo[8] = {0, 0, 0, 0, 0, 0, 0, 0};
        char srchi[8] = {0, 0, 0, 0, 0, 0, 0, 0};

        //int n;
        float s2k, s2k1;

        const vec1D& H2m = getH2m();
        const vec1D& G2m = getG2m();
        const vec1D& H2m1 = getH2m1();
        const vec1D& G2m1 = getG2m1();

        for (unsigned int y = 0; y < 2*h; y++) {

                //k = [0; 2)
                for (unsigned int k = 0; k < 2; k++) {

                        if (k == 0) {
                                srclo[0] = sour[y][2];
                                srclo[1] = sour[y][1];
                                srclo[2] = sour[y][0];
                                srclo[3] = sour[y][1];
                                srclo[4] = sour[y][2];
                                srchi[0] = sour[y][2+w];
                                srchi[1] = sour[y][1+w];
                                srchi[2] = sour[y][0+w];
                                srchi[3] = sour[y][1+w];
                                srchi[4] = sour[y][2+w];
                        } else if (k == 1) {
                                srclo[0] = sour[y][1];
                                srclo[1] = sour[y][0];
                                srclo[2] = sour[y][1];
                                srclo[3] = sour[y][2];
                                srclo[4] = sour[y][3];
                                srchi[0] = sour[y][1+w];
                                srchi[1] = sour[y][0+w];
                                srchi[2] = sour[y][1+w];
                                srchi[3] = sour[y][2+w];
                                srchi[4] = sour[y][3+w];
                        }

                        s2k = convr(H2m.data(), srclo);                //s2k even H
                        s2k += convr(G2m.data(), srchi);

                        s2k1 = convr(H2m1.data(), srclo + 1);          //s2k1 odd H
                        s2k1 += H2m1[H2m1.last()] * float(srclo[0]);
                        s2k1 += convr(G2m1.data(), srchi + 1);         //s2k1 odd G
                        s2k1 += G2m1[G2m1.last()] * float(srchi[0]);

                        dest[y][2*k] = mmxround(2.0f * s2k);
                        dest[y][2*k+1] = mmxround(2.0f * s2k1);
                }


                //k = [2; w-2)
                for (unsigned int k = 2; k < w - 2; k++) {

                        s2k = convr(H2m.data(), &sour[y][k-2]);         //s2k1 odd H
                        s2k += convr(G2m.data(), &sour[y][k-2+w]);      //s2k even G

                        s2k1 = convr(H2m1.data(), &sour[y][k-1]);         //s2k1 odd H
                        s2k1 += H2m1[H2m1.last()] * float(sour[y][k-2]);
                        s2k1 += convr(G2m1.data(), &sour[y][k-1+w]);      //s2k1 odd G
                        s2k1 += G2m1[G2m1.last()] * float(sour[y][k-2+w]);

                        dest[y][2*k] = mmxround(2.0f * s2k);
                        dest[y][2*k+1] = mmxround(2.0f * s2k1);
                }


                //k = [w-2; w)
                for (unsigned int k = w - 2; k < w; k++) {

                        if (k == w - 2) {
                                srclo[0] = sour[y][k-2];
                                srclo[1] = sour[y][k-1];
                                srclo[2] = sour[y][k];
                                srclo[3] = sour[y][k+1];
                                srclo[4] = sour[y][k];
                                srchi[0] = sour[y][k-2+w];
                                srchi[1] = sour[y][k-1+w];
                                srchi[2] = sour[y][k+w];
                                srchi[3] = sour[y][k+1+w];
                                srchi[4] = sour[y][k+w];
                        } else if (k == w - 1) {
                                srclo[0] = sour[y][k-2];
                                srclo[1] = sour[y][k-1];
                                srclo[2] = sour[y][k];
                                srclo[3] = sour[y][k-1];
                                srclo[4] = sour[y][k-2];
                                srchi[0] = sour[y][k-2+w];
                                srchi[1] = sour[y][k-1+w];
                                srchi[2] = sour[y][k+w];
                                srchi[3] = sour[y][k-1+w];
                                srchi[4] = sour[y][k-2+w];
                        }

                        s2k = convr(H2m.data(), srclo);            //s2k even H
                        s2k += convr(G2m.data(), srchi);

                        s2k1 = convr(H2m1.data(), srclo + 1);          //s2k1 odd H
                        s2k1 += H2m1[H2m1.last()] * float(srclo[0]);
                        s2k1 += convr(G2m1.data(), srchi + 1);         //s2k1 odd G
                        s2k1 += G2m1[G2m1.last()] * float(srchi[0]);

                        dest[y][2*k] = mmxround(2.0f * s2k);
                        dest[y][2*k+1] = mmxround(2.0f * s2k1);
                }
        }
}

void mBior97::synthcols(char** dest, char** sour, unsigned int w, unsigned int h) const     //w,h of the LO part
{
        float fz = 0.0f;
        float mul2 = 2.0f;

        int n;
        float s2k, s2k1;
        __m128 ms2k, ms2k1;
        unsigned int w2 = 2 * w;

        const vec1D& H2m = getH2m();
        const vec1D& G2m = getG2m();
        const vec1D& H2m1 = getH2m1();
        const vec1D& G2m1 = getG2m1();

        for (unsigned int x = 0; x < w2 / 4; x++) {      //x<w2/2   x = 4*x
                for (unsigned int k = 0; k < h; k++) {

                        ms2k = _mm_load_ss(&fz);
                        ms2k1 = ms2k;

                        for (int m = -1; m <= 1; m++) {                             //s2k even H
                                n = k - m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                ms2k = _mm_add_ps(ms2k, _mm_mul_ps(_mm_load_ps1(H2m.data(m)), _mm_cvtpi8_ps(*(__m64 *)(&sour[n][4*x]))));
                        }
                        for (int m = -1; m <= 2; m++) {                             //s2k even G
                                n = k - m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                ms2k = _mm_add_ps(ms2k, _mm_mul_ps(_mm_load_ps1(G2m.data(m)), _mm_cvtpi8_ps(*(__m64 *)(&sour[n+h][4*x]))));
                        }

                        for (int m = -2; m <= 1; m++) {                            //s2k1 odd H
                                n = k - m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                ms2k1 = _mm_add_ps(ms2k1, _mm_mul_ps(_mm_load_ps1(H2m1.data(m)), _mm_cvtpi8_ps(*(__m64 *)(&sour[n][4*x]))));
                        }
                        for (int m = -2; m <= 2; m++) {                            //s2k1 odd G
                                n = k - m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                ms2k1 = _mm_add_ps(ms2k1, _mm_mul_ps(_mm_load_ps1(G2m1.data(m)), _mm_cvtpi8_ps(*(__m64 *)(&sour[n+h][4*x]))));
                        }

                        __m128 mmul2 = _mm_load_ps1(&mul2);

                        mmxround4(&dest[2*k][4*x], _mm_mul_ps(ms2k, mmul2));
                        mmxround4(&dest[2*k+1][4*x], _mm_mul_ps(ms2k1, mmul2));
                }
        }
        _mm_empty();


        //odd remainder
        for (unsigned int x = w2 - (w2 % 4); x < w2; x++) {
                for (unsigned int k = 0; k < h; k++) {

                        s2k = 0;
                        s2k1 = 0;

                        for (int m = -1; m <= 1; m++) {        //s2k even H
                                n = k - m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                s2k += H2m[m] * float(sour[n][x]);
                        }
                        for (int m = -1; m <= 2; m++) {        //s2k even G
                                n = k - m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                s2k += G2m[m] * float(sour[n+h][x]);
                        }

                        for (int m = -2; m <= 1; m++) {       //s2k1 odd H
                                n = k - m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                s2k1 += H2m1[m] * float(sour[n][x]);
                        }
                        for (int m = -2; m <= 2; m++) {      //s2k1 odd G
                                n = k - m;
                                if (n < 0) n = 0 - n;
                                if (n >= (int)h) n -= 2 * (1 + n - h);
                                s2k1 += G2m1[m] * float(sour[n+h][x]);
                        }

                        dest[2*k][x] = mmxround(2.0f * s2k);
                        dest[2*k+1][x] = mmxround(2.0f * s2k1);
                }
        }
}
//////////////////////////////////////////////synths//////////////////////////////////////////////////////////////////////////

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)

Share

About the Author

Chesnokov Yuriy
Engineer
Russian Federation Russian Federation
No Biography provided

| Advertise | Privacy | Terms of Use | Mobile
Web03 | 2.8.1411023.1 | Last Updated 20 Oct 2007
Article Copyright 2007 by Chesnokov Yuriy
Everything else Copyright © CodeProject, 1999-2014
Layout: fixed | fluid