Click here to Skip to main content
15,905,875 members
Please Sign up or sign in to vote.
1.00/5 (2 votes)
See more:
Hi all.
Here is opencv(above ver. 2.4.0 ) threshold partial code.But I am so confused that
why original code repeat three times to find the result.
If i don't care about spend time.Is all the same result( original code & method 1& method 2 & method 3)


Original code:
C++
thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
    int i, j, j_scalar = 0;
    uchar tab[256];
    Size roi = _src.size();
    roi.width *= _src.channels();

    if( _src.isContinuous() && _dst.isContinuous() )
    {
        roi.width *= roi.height;
        roi.height = 1;
    }

#ifdef HAVE_TEGRA_OPTIMIZATION
    if (tegra::thresh_8u(_src, _dst, roi.width, roi.height, thresh, maxval, type))
        return;
#endif

    switch( type )
    {
    case THRESH_BINARY:
        for( i = 0; i <= thresh; i++ )
            tab[i] = 0;
        for( ; i < 256; i++ )
            tab[i] = maxval;
        break;
    case THRESH_BINARY_INV:
    ……
    default:
        CV_Error( CV_StsBadArg, "Unknown threshold type" );
    }

#if CV_SSE2
    if( checkHardwareSupport(CV_CPU_SSE2) )
    {
        __m128i _x80 = _mm_set1_epi8('\x80');
        __m128i thresh_u = _mm_set1_epi8(thresh);
        __m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
        __m128i maxval_ = _mm_set1_epi8(maxval);
        j_scalar = roi.width & -8;

        for( i = 0; i < roi.height; i++ )
        {
            const uchar* src = (const uchar*)(_src.data + _src.step*i);
            uchar* dst = (uchar*)(_dst.data + _dst.step*i);

            switch( type )
            {
            case THRESH_BINARY:
                for( j = 0; j <= roi.width - 32; j += 32 )
                {
                    __m128i v0, v1;
                    v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
                    v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
                    v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
                    v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
                    v0 = _mm_and_si128( v0, maxval_ );
                    v1 = _mm_and_si128( v1, maxval_ );
                    _mm_storeu_si128( (__m128i*)(dst + j), v0 );
                    _mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
                }

                for( ; j <= roi.width - 8; j += 8 )
                {
                    __m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
                    v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
                    v0 = _mm_and_si128( v0, maxval_ );
                    _mm_storel_epi64( (__m128i*)(dst + j), v0 );
                }
                break;

            case THRESH_BINARY_INV:
             ……
                break;
            }
        }
    }
#endif

    if( j_scalar < roi.width )
    {
        for( i = 0; i < roi.height; i++ )
        {
            const uchar* src = (const uchar*)(_src.data + _src.step*i);
            uchar* dst = (uchar*)(_dst.data + _dst.step*i);
            j = j_scalar;
#if CV_ENABLE_UNROLLED
            for( ; j <= roi.width - 4; j += 4 )
            {
                uchar t0 = tab[src[j]];
                uchar t1 = tab[src[j+1]];

                dst[j] = t0;
                dst[j+1] = t1;

                t0 = tab[src[j+2]];
                t1 = tab[src[j+3]];

                dst[j+2] = t0;
                dst[j+3] = t1;
            }
#endif
            for( ; j < roi.width; j++ )
                dst[j] = tab[src[j]];
        }
    }
}


Method 1:
C++
void thresh_8u_1( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
int i, j, j_scalar = 0;
	_dst.create(_src.size(),CV_8UC1);
	uchar tab[256];
	cv::Size roi = _src.size();
	roi.width *= _src.channels();

	if( _src.isContinuous() && _dst.isContinuous() )
	{
	roi.width *= roi.height;
	roi.height = 1;
	}
	__m128i _x80 = _mm_set1_epi8('\x80');
	__m128i thresh_u = _mm_set1_epi8(thresh);
	__m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
	__m128i maxval_ = _mm_set1_epi8(maxval);
	j_scalar = roi.width & -8;

	for( i = 0; i < roi.height; i++ )
	{
	const uchar* src = (const uchar*)(_src.data + _src.step*i);
	uchar* dst = (uchar*)(_dst.data + _dst.step*i);

	switch( type )
	{
	case THRESH_BINARY:
	for( j = 0; j <= roi.width - 32; j += 32 )
	{
	__m128i v0, v1;
	v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
	v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
	v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
	v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
	v0 = _mm_and_si128( v0, maxval_ );
	v1 = _mm_and_si128( v1, maxval_ );
	_mm_storeu_si128( (__m128i*)(dst + j), v0 );
	_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
	}

	case THRESH_BINARY_INV:
				……………
	break;
	}
  }
}


Methold 2:
C++
void thresh_8u_2( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
int i, j, j_scalar = 0;
dst.create(_src.size(),CV_8UC1);
	uchar tab[256];
	cv::Size roi = _src.size();
	roi.width *= _src.channels();

	if( _src.isContinuous() && _dst.isContinuous() )
	{
	roi.width *= roi.height;
	roi.height = 1;
	}

	__m128i _x80 = _mm_set1_epi8('\x80');
	__m128i thresh_u = _mm_set1_epi8(thresh);
	__m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
	__m128i maxval_ = _mm_set1_epi8(maxval);
	j_scalar = roi.width & -8;

	for( i = 0; i < roi.height; i++ )
	{
	const uchar* src = (const uchar*)(_src.data + _src.step*i);
	uchar* dst = (uchar*)(_dst.data + _dst.step*i);

	switch( type )
	{
	case THRESH_BINARY:
						
	for( ; j <= roi.width - 8; j += 8 )
	{
	__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
	v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
	v0 = _mm_and_si128( v0, maxval_ );
	_mm_storel_epi64( (__m128i*)(dst + j), v0 );
	}
	break;

	case THRESH_BINARY_INV:
	……………					
	break;
	}
}
}


Methold 3:
C++
void thresh_8u_3( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
	int i, j, j_scalar = 0;
	_dst.create(_src.size(),CV_8UC1);
	uchar tab[256];
	cv::Size roi = _src.size();
	roi.width *= _src.channels();

	if( _src.isContinuous() && _dst.isContinuous() )
	{
	roi.width *= roi.height;
	roi.height = 1;
	}

	switch( type )
	{
	case THRESH_BINARY:
	for( i = 0; i <= thresh; i++ )
	tab[i] = 0;
	for( ; i < 256; i++ )
		tab[i] = maxval;
	break;
	case THRESH_BINARY_INV:
	………..
	default:
	CV_Error( CV_StsBadArg, "Unknown threshold type" );
}

	if( j_scalar < roi.width )
	{
		for( i = 0; i < roi.height; i++ )
		{
		const uchar* src = (const uchar*)(_src.data + _src.step*i);
		uchar* dst = (uchar*)(_dst.data + _dst.step*i);
		j = j_scalar;
#if CV_ENABLE_UNROLLED
		for( ; j <= roi.width - 4; j += 4 )
		{
		uchar t0 = tab[src[j]];
		uchar t1 = tab[src[j+1]];

		dst[j] = t0;
		dst[j+1] = t1;

		t0 = tab[src[j+2]];
		t1 = tab[src[j+3]];

		dst[j+2] = t0;
		dst[j+3] = t1;
		}
#endif
		for( ; j < roi.width; j++ )
		dst[j] = tab[src[j]];
		}
	}
}



Thanks your help!
Posted
Comments
wuling 11-Jul-12 0:08am    
Hi,
Sorry, I try to explain my question.

The original code in have SSE "csse:THRESH_BINARY:",there are two for loop(please see part list 1&2) and do the same job, and store "dst"; the only different only input data is "_mm_loadu_si128"&"_mm_loadl_epi64".
So, I am confused one question is here. Why not use one for loop ???


//part list 1
for( j = 0; j <= roi.width - 32; j += 32 )
{
__m128i v0, v1;
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
v0 = _mm_and_si128( v0, maxval_ );
v1 = _mm_and_si128( v1, maxval_ );
_mm_storeu_si128( (__m128i*)(dst + j), v0 );
_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
}

//part list 2
for( ; j <= roi.width - 8; j += 8 )
{
__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
v0 = _mm_and_si128( v0, maxval_ );
_mm_storel_epi64( (__m128i*)(dst + j), v0 );
}

The other question is "#if CV_SSE2" is true or not. In the part list 3 , however,the code must be implemented, but the variable "tab" declare in thresh_8u not in "#if CV_SSE2 ......", you will find the result dst is stored by variable tab, again, right?? So, the original code do the same work using three different method?



//part list 3
if( j_scalar < roi.width )
{
for( i = 0; i < roi.height; i++ )
{
const uchar* src = (const uchar*)(_src.data + _src.step*i);
uchar* dst = (uchar*)(_dst.data + _dst.step*i);
j = j_scalar;
#if CV_ENABLE_UNROLLED
for( ; j <= roi.width - 4; j += 4 )
{
uchar t0 = tab[src[j]];
uchar t1 = tab[src[j+1]];

dst[j] = t0;
dst[j+1] = t1;

t0 = tab[src[j+2]];
t1 = tab[src[j+3]];

dst[j+2] = t0;
dst[j+3] = t1;
}
#endif
for( ; j < roi.width; j++ )
dst[j] = tab[src[j]];
}
}

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900