About opencv threshold original partial code!

Question

1.00/5 (2 votes)

See more:

Hi all.
Here is opencv(above ver. 2.4.0 ) threshold partial code.But I am so confused that
why original code repeat three times to find the result.
If i don't care about spend time.Is all the same result( original code & method 1& method 2 & method 3)

Original code:

C++

thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
    int i, j, j_scalar = 0;
    uchar tab[256];
    Size roi = _src.size();
    roi.width *= _src.channels();

    if( _src.isContinuous() && _dst.isContinuous() )
    {
        roi.width *= roi.height;
        roi.height = 1;
    }

#ifdef HAVE_TEGRA_OPTIMIZATION
    if (tegra::thresh_8u(_src, _dst, roi.width, roi.height, thresh, maxval, type))
        return;
#endif

    switch( type )
    {
    case THRESH_BINARY:
        for( i = 0; i <= thresh; i++ )
            tab[i] = 0;
        for( ; i < 256; i++ )
            tab[i] = maxval;
        break;
    case THRESH_BINARY_INV:
    ……
    default:
        CV_Error( CV_StsBadArg, "Unknown threshold type" );
    }

#if CV_SSE2
    if( checkHardwareSupport(CV_CPU_SSE2) )
    {
        __m128i _x80 = _mm_set1_epi8('\x80');
        __m128i thresh_u = _mm_set1_epi8(thresh);
        __m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
        __m128i maxval_ = _mm_set1_epi8(maxval);
        j_scalar = roi.width & -8;

        for( i = 0; i < roi.height; i++ )
        {
            const uchar* src = (const uchar*)(_src.data + _src.step*i);
            uchar* dst = (uchar*)(_dst.data + _dst.step*i);

            switch( type )
            {
            case THRESH_BINARY:
                for( j = 0; j <= roi.width - 32; j += 32 )
                {
                    __m128i v0, v1;
                    v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
                    v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
                    v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
                    v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
                    v0 = _mm_and_si128( v0, maxval_ );
                    v1 = _mm_and_si128( v1, maxval_ );
                    _mm_storeu_si128( (__m128i*)(dst + j), v0 );
                    _mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
                }

                for( ; j <= roi.width - 8; j += 8 )
                {
                    __m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
                    v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
                    v0 = _mm_and_si128( v0, maxval_ );
                    _mm_storel_epi64( (__m128i*)(dst + j), v0 );
                }
                break;

            case THRESH_BINARY_INV:
             ……
                break;
            }
        }
    }
#endif

    if( j_scalar < roi.width )
    {
        for( i = 0; i < roi.height; i++ )
        {
            const uchar* src = (const uchar*)(_src.data + _src.step*i);
            uchar* dst = (uchar*)(_dst.data + _dst.step*i);
            j = j_scalar;
#if CV_ENABLE_UNROLLED
            for( ; j <= roi.width - 4; j += 4 )
            {
                uchar t0 = tab[src[j]];
                uchar t1 = tab[src[j+1]];

                dst[j] = t0;
                dst[j+1] = t1;

                t0 = tab[src[j+2]];
                t1 = tab[src[j+3]];

                dst[j+2] = t0;
                dst[j+3] = t1;
            }
#endif
            for( ; j < roi.width; j++ )
                dst[j] = tab[src[j]];
        }
    }
}

Method 1:

C++

void thresh_8u_1( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
int i, j, j_scalar = 0;
	_dst.create(_src.size(),CV_8UC1);
	uchar tab[256];
	cv::Size roi = _src.size();
	roi.width *= _src.channels();

	if( _src.isContinuous() && _dst.isContinuous() )
	{
	roi.width *= roi.height;
	roi.height = 1;
	}
	__m128i _x80 = _mm_set1_epi8('\x80');
	__m128i thresh_u = _mm_set1_epi8(thresh);
	__m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
	__m128i maxval_ = _mm_set1_epi8(maxval);
	j_scalar = roi.width & -8;

	for( i = 0; i < roi.height; i++ )
	{
	const uchar* src = (const uchar*)(_src.data + _src.step*i);
	uchar* dst = (uchar*)(_dst.data + _dst.step*i);

	switch( type )
	{
	case THRESH_BINARY:
	for( j = 0; j <= roi.width - 32; j += 32 )
	{
	__m128i v0, v1;
	v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
	v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
	v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
	v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
	v0 = _mm_and_si128( v0, maxval_ );
	v1 = _mm_and_si128( v1, maxval_ );
	_mm_storeu_si128( (__m128i*)(dst + j), v0 );
	_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
	}

	case THRESH_BINARY_INV:
				……………
	break;
	}
  }
}

Methold 2:

C++

void thresh_8u_2( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
int i, j, j_scalar = 0;
dst.create(_src.size(),CV_8UC1);
	uchar tab[256];
	cv::Size roi = _src.size();
	roi.width *= _src.channels();

	if( _src.isContinuous() && _dst.isContinuous() )
	{
	roi.width *= roi.height;
	roi.height = 1;
	}

	__m128i _x80 = _mm_set1_epi8('\x80');
	__m128i thresh_u = _mm_set1_epi8(thresh);
	__m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
	__m128i maxval_ = _mm_set1_epi8(maxval);
	j_scalar = roi.width & -8;

	for( i = 0; i < roi.height; i++ )
	{
	const uchar* src = (const uchar*)(_src.data + _src.step*i);
	uchar* dst = (uchar*)(_dst.data + _dst.step*i);

	switch( type )
	{
	case THRESH_BINARY:
						
	for( ; j <= roi.width - 8; j += 8 )
	{
	__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
	v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
	v0 = _mm_and_si128( v0, maxval_ );
	_mm_storel_epi64( (__m128i*)(dst + j), v0 );
	}
	break;

	case THRESH_BINARY_INV:
	……………					
	break;
	}
}
}

Methold 3:

C++

void thresh_8u_3( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
	int i, j, j_scalar = 0;
	_dst.create(_src.size(),CV_8UC1);
	uchar tab[256];
	cv::Size roi = _src.size();
	roi.width *= _src.channels();

	if( _src.isContinuous() && _dst.isContinuous() )
	{
	roi.width *= roi.height;
	roi.height = 1;
	}

	switch( type )
	{
	case THRESH_BINARY:
	for( i = 0; i <= thresh; i++ )
	tab[i] = 0;
	for( ; i < 256; i++ )
		tab[i] = maxval;
	break;
	case THRESH_BINARY_INV:
	………..
	default:
	CV_Error( CV_StsBadArg, "Unknown threshold type" );
}

	if( j_scalar < roi.width )
	{
		for( i = 0; i < roi.height; i++ )
		{
		const uchar* src = (const uchar*)(_src.data + _src.step*i);
		uchar* dst = (uchar*)(_dst.data + _dst.step*i);
		j = j_scalar;
#if CV_ENABLE_UNROLLED
		for( ; j <= roi.width - 4; j += 4 )
		{
		uchar t0 = tab[src[j]];
		uchar t1 = tab[src[j+1]];

		dst[j] = t0;
		dst[j+1] = t1;

		t0 = tab[src[j+2]];
		t1 = tab[src[j+3]];

		dst[j+2] = t0;
		dst[j+3] = t1;
		}
#endif
		for( ; j < roi.width; j++ )
		dst[j] = tab[src[j]];
		}
	}
}

Thanks your help!

Posted 10-Jul-12 4:20am

wuling

Add a Solution

Comments

wuling 11-Jul-12 0:08am

Hi,
Sorry, I try to explain my question.

The original code in have SSE "csse:THRESH_BINARY:",there are two for loop(please see part list 1&2) and do the same job, and store "dst"; the only different only input data is "_mm_loadu_si128"&"_mm_loadl_epi64".
So, I am confused one question is here. Why not use one for loop ???

//part list 1
for( j = 0; j <= roi.width - 32; j += 32 )
{
__m128i v0, v1;
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
v0 = _mm_and_si128( v0, maxval_ );
v1 = _mm_and_si128( v1, maxval_ );
_mm_storeu_si128( (__m128i*)(dst + j), v0 );
_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
}

//part list 2
for( ; j <= roi.width - 8; j += 8 )
{
__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
v0 = _mm_and_si128( v0, maxval_ );
_mm_storel_epi64( (__m128i*)(dst + j), v0 );
}

The other question is "#if CV_SSE2" is true or not. In the part list 3 , however,the code must be implemented, but the variable "tab" declare in thresh_8u not in "#if CV_SSE2 ......", you will find the result dst is stored by variable tab, again, right?? So, the original code do the same work using three different method?

//part list 3
if( j_scalar < roi.width )
{
for( i = 0; i < roi.height; i++ )
{
const uchar* src = (const uchar*)(_src.data + _src.step*i);
uchar* dst = (uchar*)(_dst.data + _dst.step*i);
j = j_scalar;
#if CV_ENABLE_UNROLLED
for( ; j <= roi.width - 4; j += 4 )
{
uchar t0 = tab[src[j]];
uchar t1 = tab[src[j+1]];

dst[j] = t0;
dst[j+1] = t1;

t0 = tab[src[j+2]];
t1 = tab[src[j+3]];

dst[j+2] = t0;
dst[j+3] = t1;
}
#endif
for( ; j < roi.width; j++ )
dst[j] = tab[src[j]];
}
}

Add your solution here

Treat my content as plain text, not as HTML

Preview 0

…

Existing Members

Sign in to your account

...or Join us

Download, Vote, Comment, Publish.

Your Email
Password
Forgot your password?

Your Email
This email is in use. Do you need your password?
Optional Password

I have read and agree to the Terms of Service and Privacy Policy
Please subscribe me to the CodeProject newsletters

When answering a question please:

Read the question carefully.
Understand that English isn't everyone's first language so be lenient of bad spelling and grammar.
If a question is poorly phrased then either ask for clarification, ignore it, or edit the question and fix the problem. Insults are not welcome.
Don't tell someone to read the manual. Chances are they have and don't get it. Provide an answer or move on to the next question.

Let's work to help developers, not make them feel stupid.

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)