2D Vector Class Wrapper SSE Optimized for Math Operations

Chesnokov Yuriy
Rate me:
4.93/5 (28 votes)
20 May 2008GPL36 min read
82.1K
1.2K
The article demonstrates a 2D vector wrapper, optimized with SSE intrinsics, for math operations with floating point precision.
vec2d.zip
- vec2d.cpp
- vec2d.h
/*

SSE optimized 2D vector  
Copyright (C) 2007 YURIY V. CHESNOKOV

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


You may contact the author by e-mail chesnokov_yuriy@mail.ru

*/


#include "stdafx.h"
#include "vec2d.h"





/////////////////////////constructors/destructors////////////////////////////////////////////////////
vec2D::vec2D(unsigned int ysize, unsigned int xsize,
             int yoffset, int xoffset, const float* data) : m_width(xsize), m_height(ysize),
                m_xoffset(xoffset), m_yoffset(yoffset), m_data(0)
{
        init(ysize, xsize, yoffset, xoffset);

        const float* pdata = data;
        for (int y = yfirst(); y <= ylast(); y++) {
                for (int x = xfirst(); x <= xlast() ; x++) {
                        if (data != 0)
                                m_data[y][x] = *pdata++;
                        else
                                m_data[y][x] = 0.0f;
                }
        }
}
vec2D::vec2D(const vec2D& v)
{
        init(v.height(), v.width(), v.yfirst(), v.xfirst());
        init(v);
}
vec2D::vec2D(const wchar_t* file) : m_width(0), m_height(0), m_xlast(0), m_ylast(0),
                m_xoffset(0), m_yoffset(0), m_data(0)
{
        float val;

        FILE* fp = _wfopen(file, L"rt");
        if (fp) {
                if (fwscanf(fp, L"%d %d", &m_height, &m_width) != 2) {
                        fclose(fp);
                        init(1, 1);
                        return;
                }

                init(m_height, m_width);

                for (int y = yfirst(); y <= ylast(); y++) {
                        for (int x = xfirst(); x <= xlast() ; x++) {
                                if (fwscanf(fp, L"%f", &val) != 1) {
                                        m_data[y][x] = 0.0f;
                                } else
                                        m_data[y][x] = val;
                        }
                }

                fclose(fp);
        } else
                init(1, 1);
}
vec2D::~vec2D()
{
        if (m_data != 0)
                close();
}
////////////////////////////////////////////////////////////////////////////////





//////////////////////init,free memory//////////////////////////////////////////
void vec2D::init(unsigned int ysize, unsigned int xsize, int yoffset, int xoffset)
{
        m_width = xsize;
        m_height = ysize;
        m_xoffset = xoffset;
        m_yoffset = yoffset;

        m_xlast = (m_width + m_xoffset) - 1;
        m_ylast = (m_height + m_yoffset) - 1;

        m_data = (float**) malloc(m_height * sizeof(float*));         //setup rows
        for (unsigned int j = 0; j < m_height; j++) {
                m_data[j] = (float*) _aligned_malloc(m_width * sizeof(float), 16);       //setup cols
                m_data[j] -= m_xoffset;
        }
        m_data -= m_yoffset;
}
void vec2D::init(const vec2D& v)
{
        for (int y = v.yfirst(); y <= v.ylast(); y++)
                for (int x = v.xfirst(); x <= v.xlast(); x++)
                        (*this)(y, x) = v(y, x);
}
void vec2D::close(void)
{
        m_data += m_yoffset;
        for (unsigned int j = 0; j < m_height; j++)
                _aligned_free(m_data[j] + m_xoffset);      //delete colums
        free(m_data);       //delete rows

        m_data = 0;
        m_width = 0;
        m_height = 0;
}
void vec2D::print(const wchar_t* file) const
{
        const vec2D &v = *this;
        if (file) {
                FILE *fp = _wfopen(file, L"wt");
                if (fp) {
                        fwprintf(fp, L"\n vec: %p\n", this);
                        for (int y = v.yfirst(); y <= v.ylast(); y++) {
                                for (int x = v.xfirst(); x <= v.xlast(); x++)
                                        fwprintf(fp, L" %g", v(y, x));
                                fwprintf(fp, L"\n");
                        }
                        fclose(fp);
                }
        } else {
                wprintf(L"\n vec: %p\n", this);
                for (int y = v.yfirst(); y <= v.ylast(); y++) {
                        for (int x = v.xfirst(); x <= v.xlast(); x++)
                                wprintf(L" %g", v(y, x));
                        wprintf(L"\n");
                }
        }
}
////////////////////////////////////////////////////////////////////////////////






/////////////////////operators//////////////////////////////////////////////////
void vec2D::set(float scalar)
{
        vec2D &v = *this;
        for (int y = v.yfirst(); y <= v.ylast(); y++)
                for (int x = v.xfirst(); x <= v.xlast(); x++)
                        v(y, x) = scalar;
}
//set to Rect [ left, right )
//            [ top, bottom )
void vec2D::set(float scalar, RECT& r)                              //0-offset array function
{
        vec2D &v = *this;

        if (v.xfirst() != 0 || v.yfirst() != 0)
                return;

        if (r.top < 0) r.top = 0;
        if (r.left < 0) r.left = 0;
        if ((unsigned int)r.bottom > v.height()) r.bottom = v.height();
        if ((unsigned int)r.right > v.width()) r.right = v.width();

        __m128 mscalar = _mm_load_ps1(&scalar);
        unsigned int width = r.right - r.left;

        for (unsigned int y = (unsigned int)r.top; y < (unsigned int)r.bottom; y++) {
                for (unsigned int x = 0; x < width / 4; x++)
                        _mm_storeu_ps(&v(y, r.left + 4*x), mscalar);      //store unaligned r.left%4 might be nonzero

                if (width % 4) {
                        for (unsigned int x = width - width % 4; x < width; x++)
                                v(y, r.left + x) = scalar;
                }
        }
}
void vec2D::setrand()
{
        vec2D &v = *this;

        int r;
        srand((unsigned int)time(0));
        for (int y = v.yfirst(); y <= v.ylast(); y++) {
                for (int x = v.xfirst(); x <= v.xlast(); x++) {
                        r = 0xFFF & rand();
                        r -= 0x800;
                        v(y, x) = (float)r / 2048.0f;
                }
        }
}

bool vec2D::copy(const vec2D& v, int left, int top)                                        //copy [hxw] region from top,left offset A to this
{
        //0-offset array function
        //v size >= this size !
        vec2D& pv = *this;
        if ((v.xfirst() || v.yfirst()) || (pv.xfirst() || pv.yfirst()))
                return false;

        RECT r;                           //selected rectangle
        r.left = left;                    //left,top coords
        r.top = top;
        r.right = r.left + width();       //right,bottom coords
        r.bottom = r.top + height();

        if ((r.left >= 0 && r.top >= 0) && (r.right <= (int)v.width() && r.bottom <= (int)v.height())) {       //operator()
                for (unsigned int y = 0; y < height(); y++)
                        for (unsigned int x = 0; x < width(); x++)
                                pv(y, x) = v(y + r.top, x + r.left);
        } else { //get
                for (unsigned int y = 0; y < height(); y++)
                        for (unsigned int x = 0; x < width(); x++)
                                pv(y, x) = v.get(y + r.top, x + r.left);
        }

        return true;
}
/////////////////////////////////////////////////////////////////////////////////



///////zero offset operations////////////////////////////////////////////////////
void vec2D::add(const vec2D& a, const vec2D& b)      //this = a.+b
{
        vec2D& c = *this;

        for (unsigned int y = 0; y < c.height(); y++) {
                for (unsigned int x = 0; x < c.width() / 4; x++)
                        _mm_store_ps(&c(y, 4*x), _mm_add_ps(_mm_load_ps(a.data(y, 4*x)), _mm_load_ps(b.data(y, 4*x))));

                if ((c.width() % 4) != 0) {
                        for (unsigned int x = c.width() - c.width() % 4; x < c.width(); x++)
                                c(y, x) = a(y, x) + b(y, x);
                }
        }
}
void vec2D::sub(const vec2D& a, const vec2D& b)      //this = a.-b
{
        vec2D& c = *this;
        for (unsigned int y = 0; y < c.height(); y++) {
                for (unsigned int x = 0; x < c.width() / 4; x++)
                        _mm_store_ps(&c(y, 4*x), _mm_sub_ps(_mm_load_ps(a.data(y, 4*x)), _mm_load_ps(b.data(y, 4*x))));

                if ((c.width() % 4) != 0) {
                        for (unsigned int x = c.width() - c.width() % 4; x < c.width(); x++)
                                c(y, x) = a(y, x) - b(y, x);
                }
        }
}
void vec2D::mule(const vec2D& a, const vec2D& b)      //this = a.*b
{
        vec2D& c = *this;
        for (unsigned int y = 0; y < c.height(); y++) {
                for (unsigned int x = 0; x < c.width() / 4; x++)
                        _mm_store_ps(&c(y, 4*x), _mm_mul_ps(_mm_load_ps(a.data(y, 4*x)), _mm_load_ps(b.data(y, 4*x))));

                if ((c.width() % 4) != 0) {
                        for (unsigned int x = c.width() - c.width() % 4; x < c.width(); x++)
                                c(y, x) = a(y, x) * b(y, x);
                }
        }
}
void vec2D::mul(const vec2D& a, const vec2D& b)      //this = a*b
{
        vec2D& c = *this;
        for (unsigned int y = 0; y < c.height(); y++) {
                for (unsigned int x = 0; x < c.width(); x++) {
                        c(y, x) = 0.0f;
                        for (unsigned int i = 0; i < a.width(); i++)
                                c(y, x) += a(y, i) * b(i, x);
                }
        }
}
void vec2D::mult(const vec2D& a, const vec2D& b)      //this = a*b'
{
        vec2D& c = *this;
        for (unsigned int y = 0; y < c.height(); y++)
                for (unsigned int x = 0; x < c.width(); x++)
                        c(y, x) = mconv(a.data(y, 0), b.data(x, 0), a.width());
}
void vec2D::div(const vec2D& a, const vec2D& b)      //this = a./b
{
        vec2D& c = *this;
        for (unsigned int y = 0; y < c.height(); y++) {
                for (unsigned int x = 0; x < c.width() / 4; x++)
                        _mm_store_ps(&c(y, 4*x), _mm_div_ps(_mm_load_ps(a.data(y, 4*x)), _mm_load_ps(b.data(y, 4*x))));

                if ((c.width() % 4) != 0) {
                        for (unsigned int x = c.width() - c.width() % 4; x < c.width(); x++)
                                c(y, x) = a(y, x) / b(y, x);
                }
        }
}

void vec2D::add(float scalar)           //this = this.+scalar   sse optimized
{
        vec2D& c = *this;
        __m128 mscalar = _mm_load_ps1(&scalar);

        for (unsigned int y = 0; y < c.height(); y++) {
                for (unsigned int x = 0; x < c.width() / 4; x++)
                        _mm_store_ps(&c(y, 4*x), _mm_add_ps(_mm_load_ps(&c(y, 4*x)), mscalar));

                if ((c.width() % 4) != 0) {
                        for (unsigned int x = c.width() - c.width() % 4; x < c.width(); x++)
                                c(y, x) += scalar;
                }
        }
}
void vec2D::sub(float scalar)           //this = this.-scalar   sse optimized
{
        vec2D& c = *this;
        __m128 mscalar = _mm_load_ps1(&scalar);

        for (unsigned int y = 0; y < c.height(); y++) {
                for (unsigned int x = 0; x < c.width() / 4; x++)
                        _mm_store_ps(&c(y, 4*x), _mm_sub_ps(_mm_load_ps(&c(y, 4*x)), mscalar));

                if ((c.width() % 4) != 0) {
                        for (unsigned int x = c.width() - c.width() % 4; x < c.width(); x++)
                                c(y, x) -= scalar;
                }
        }
}
void vec2D::mul(float scalar)           //this = this.*scalar   sse optimized
{
        vec2D& c = *this;
        __m128 mscalar = _mm_load_ps1(&scalar);

        for (unsigned int y = 0; y < c.height(); y++) {
                for (unsigned int x = 0; x < c.width() / 4; x++)
                        _mm_store_ps(&c(y, 4*x), _mm_mul_ps(_mm_load_ps(&c(y, 4*x)), mscalar));

                if ((c.width() % 4) != 0) {
                        for (unsigned int x = c.width() - c.width() % 4; x < c.width(); x++)
                                c(y, x) *= scalar;
                }
        }
}
void vec2D::div(float scalar)           //this = this./scalar   sse optimized
{
        vec2D& c = *this;
        __m128 mscalar = _mm_load_ps1(&scalar);

        for (unsigned int y = 0; y < c.height(); y++) {
                for (unsigned int x = 0; x < c.width() / 4; x++)
                        _mm_store_ps(&c(y, 4*x), _mm_div_ps(_mm_load_ps(&c(y, 4*x)), mscalar));

                if ((c.width() % 4) != 0) {
                        for (unsigned int x = c.width() - c.width() % 4; x < c.width(); x++)
                                c(y, x) /= scalar;
                }
        }
}
float vec2D::prod() const
{
        const vec2D& c = *this;

        float fres = 1.0f;
        __declspec(align(16)) float tmp[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
        __m128 mres;

        for (unsigned int y = 0; y < height(); y++) {
                for (unsigned int x = 0; x < width() / 4; x++) {
                        if (y == 0)
                                mres = _mm_load_ps(c.data(0, 0));
                        else
                                mres = _mm_mul_ps(mres, _mm_load_ps(c.data(y, 4 * x)));
                }

                if ((width() % 4) != 0) {
                        for (unsigned int x = width() - width() % 4; x < width(); x++)
                                fres *= c(y, x);
                }
        }

        //mres: a,b,c,d
        __m128 m = _mm_movehl_ps(mres, mres);    //m:  c,d,c,d
        mres = _mm_mul_ps(m, mres);              //mres: a*c,b*d,*,*
        _mm_store_ps(tmp, mres);

        return tmp[0] * tmp[1] * fres;
}

void vec2D::inter2(const vec2D& src, vec2D& dst_grdx, vec2D& dst_grdy)                //2d interpolation
{
        vec2D& trg = *this;

        float xrto = float(width() - 1) / float(src.width() - 1);       //trg/src
        float yrto = float(height() - 1) / float(src.height() - 1);

        //arrange before srcx,srcy,frcx,frcy to speed calcs
        for (unsigned int y = 0; y < height(); y++) {
                float srcy = (float)y / yrto;
                float frcy = srcy - float((int)srcy);
                dst_grdy(0, y) = srcy;
                dst_grdy(1, y) = frcy;
        }

        for (unsigned int x = 0; x < width(); x++) {
                float srcx = (float)x / xrto;               //position to take from src vec2D
                float frcx = srcx - float((int)srcx);       //srcx=1.34  frcx=.34
                dst_grdx(0, x) = srcx;
                dst_grdx(1, x) = frcx;
        }
        //arrange before srcx,srcy,frcx,frcy to speed calcs

        for (unsigned int y = 0; y < height(); y++) {
                for (unsigned int x = 0; x < width(); x++) {

                        unsigned int sx = (unsigned int)dst_grdx(0, x);         //x index to source
                        unsigned int sy = (unsigned int)dst_grdy(0, y);         //y index to source
                        trg(y, x) = src(sy, sx) * (1.0f - dst_grdy(1, y)) * (1.0f - dst_grdx(1, x));        //1-frcy 1-frcx

                        if (dst_grdx(1, x) > 0.0f && sx + 1 < src.width())
                                trg(y, x) += src(sy, sx + 1) * (1.0f - dst_grdy(1, y)) * dst_grdx(1, x);        //1-frcy frcx
                        if (dst_grdy(1, y) > 0.0f && sy + 1 < src.height())
                                trg(y, x) += src(sy + 1, sx) * dst_grdy(1, y) * (1.0f - dst_grdx(1, x));        //frcy 1-frcx
                        if ((dst_grdx(1, x) > 0.0f && dst_grdy(1, y) > 0.0f) && (sx + 1 < src.width() && sy + 1 < src.height()))
                                trg(y, x) += src(sy + 1, sx + 1) * dst_grdy(1, y) * dst_grdx(1, x);            //frcy frcx
                }
        }
}
///////zero offset operations////////////////////////////////////////////////////





///////////////////mat operations///////////////////////////////////////////////
void vec2D::conv2D(const vec2D& a, const vec2D& filter)        //this = conv2D(A, filter)  (this = A in sizes)
{
        vec2D& c = *this;
        for (int y = c.yfirst(); y <= c.ylast(); y++) {
                for (int x = c.xfirst(); x <= c.xlast(); x++) {

                        c(y, x) = 0.0f;
                        for (int n = filter.yfirst(); n <= filter.ylast(); n++)
                                for (int m = filter.xfirst(); m <= filter.xlast(); m++)
                                        c(y, x) += filter(n, m) * a.get(y + n, x + m);
                }
        }
}
void vec2D::conv2D(const vec2D& a, const vec2D& re, const vec2D& im)        //this = conv2D(A, Im,Re)  (this = A in sizes)
{
        vec2D& c = *this;
        for (int y = c.yfirst(); y <= c.ylast(); y++) {
                for (int x = c.xfirst(); x <= c.xlast(); x++) {

                        float imag = 0.0f;
                        float real = 0.0f, tmp;
                        for (int n = re.yfirst(); n <= re.ylast(); n++) {
                                for (int m = re.xfirst(); m <= re.xlast(); m++) {
                                        tmp = a.get(y + n, x + m);
                                        real += re(n, m) * tmp;
                                        imag += im(n, m) * tmp;
                                }
                        }
                        c(y, x) = sqrt(real * real + imag * imag);  //abs value
                }
        }
}
void vec2D::minmax(float& min, float& max) const
{
        const vec2D& c = *this;

        min = c(yfirst(), xfirst());
        max = min;

        for (int y = c.yfirst(); y <= c.ylast(); y++) {
                for (int x = c.xfirst(); x <= c.xlast(); x++) {
                        if (c(y, x) < min) min = c(y, x);
                        if (c(y, x) > max) max = c(y, x);
                }
        }
}
float vec2D::minval() const
{
        const vec2D& c = *this;

        float min = c(yfirst(), xfirst());

        for (int y = c.yfirst(); y <= c.ylast(); y++) {
                for (int x = c.xfirst(); x <= c.xlast(); x++) {
                        if (c(y, x) < min) min = c(y, x);
                }
        }

        return min;
}
float vec2D::maxval() const
{
        const vec2D& c = *this;

        float max = c(yfirst(), xfirst());

        for (int y = c.yfirst(); y <= c.ylast(); y++) {
                for (int x = c.xfirst(); x <= c.xlast(); x++) {
                        if (c(y, x) > max) max = c(y, x);
                }
        }

        return max;
}

//get max value with [x,y] coord from offset dx,dy in rect [sizex,sizey]
void vec2D::maxval(float& max, int& x, int& y, int sizex, int sizey, int dx, int dy) const
{
        const vec2D& c = *this;

        max = -FLT_MAX;

        if (dx == 0 || dx < (sizex - 1) / 2)
                dx = (sizex - 1) / 2;
        if (dy == 0 || dy < (sizey - 1) / 2)
                dy = (sizey - 1) / 2;

        x = dx;
        y = dy;

        float tmp = 0.0f;
        for (unsigned int n = dy; n < height() - dy; n++) {
                for (unsigned int m = dx; m < width() - dx; m++) {

                        tmp = 0.0f;
                        for (int i = -(sizey - 1) / 2; i <= (sizey - 1) / 2; i++)
                                for (int j = -(sizex - 1) / 2; j <= (sizex - 1) / 2; j++)
                                        tmp += c(n + i, m + j);

                        if (tmp > max) {
                                max = tmp;
                                y = n;
                                x = m;
                        }
                }
        }
}

void vec2D::normalize(float a, float b)
{
        vec2D& c = *this;

        float min, max;
        minmax(min, max);

        c.sub(min);
        c.mul((b - a) / (max - min));
        c.add(a);
}
void vec2D::histeq(vec2D &hist)    //hist 1x256 array
{
        vec2D& c = *this;

        hist.set(0.0f);

        for (int y = c.yfirst(); y <= c.ylast(); y++)
                for (int x = c.xfirst(); x <= c.xlast(); x++)
                        hist(0, int(c(y, x)))++;

        for (unsigned int x = 1; x < hist.width(); x++)
                hist((unsigned)0, x) += hist((unsigned)0, x - 1);

        hist.div(float(width() * height()));

        for (int y = c.yfirst(); y <= c.ylast(); y++)
                for (int x = c.xfirst(); x <= c.xlast(); x++)
                        c(y, x) = (float) hist(0, int(c(y, x)));
}
void vec2D::fliplr()
{
        vec2D& c = *this;
        float tmp;

        for (unsigned int i = 0; i < width() / 2; i++) {
                for (int y = yfirst(); y <= ylast(); y++) {
                        tmp = c(y, xfirst() + i);
                        c(y, xfirst() + i) = c(y, xlast() - i);
                        c(y, xlast() - i) = tmp;
                }
        }
}
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.
License

This article, along with any associated source code and files, is licensed under The GNU General Public License (GPLv3)
Written By
Chesnokov Yuriy
Engineer
Russian Federation
Highly skilled Engineer with 14 years of experience in academia, R&D and commercial product development supporting full software life-cycle from idea to implementation and further support. During my academic career I was able to succeed in MIT Computers in Cardiology 2006 international challenge, as a R&D and SW engineer gain CodeProject MVP, find algorithmic solutions to quickly resolve tough customer problems to pass product requirements in tight deadlines. My key areas of expertise involve Object-Oriented
Analysis and Design OOAD, OOP, machine learning, natural language processing, face recognition, computer vision and image processing, wavelet analysis, digital signal processing in cardiology.
2D Vector Class Wrapper SSE Optimized for Math Operations

License

Comments and Discussions