Hi all.
Here is opencv(above ver. 2.4.0 ) threshold partial code.But I am so confused that
why original code repeat three times to find the result.
If i don't care about spend time.Is all the same result( original code & method 1& method 2 & method 3)
Original code:
thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
int i, j, j_scalar = 0;
uchar tab[256];
Size roi = _src.size();
roi.width *= _src.channels();
if( _src.isContinuous() && _dst.isContinuous() )
{
roi.width *= roi.height;
roi.height = 1;
}
#ifdef HAVE_TEGRA_OPTIMIZATION
if (tegra::thresh_8u(_src, _dst, roi.width, roi.height, thresh, maxval, type))
return;
#endif
switch( type )
{
case THRESH_BINARY:
for( i = 0; i <= thresh; i++ )
tab[i] = 0;
for( ; i < 256; i++ )
tab[i] = maxval;
break;
case THRESH_BINARY_INV:
……
default:
CV_Error( CV_StsBadArg, "Unknown threshold type" );
}
#if CV_SSE2
if( checkHardwareSupport(CV_CPU_SSE2) )
{
__m128i _x80 = _mm_set1_epi8('\x80');
__m128i thresh_u = _mm_set1_epi8(thresh);
__m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
__m128i maxval_ = _mm_set1_epi8(maxval);
j_scalar = roi.width & -8;
for( i = 0; i < roi.height; i++ )
{
const uchar* src = (const uchar*)(_src.data + _src.step*i);
uchar* dst = (uchar*)(_dst.data + _dst.step*i);
switch( type )
{
case THRESH_BINARY:
for( j = 0; j <= roi.width - 32; j += 32 )
{
__m128i v0, v1;
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
v0 = _mm_and_si128( v0, maxval_ );
v1 = _mm_and_si128( v1, maxval_ );
_mm_storeu_si128( (__m128i*)(dst + j), v0 );
_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
}
for( ; j <= roi.width - 8; j += 8 )
{
__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
v0 = _mm_and_si128( v0, maxval_ );
_mm_storel_epi64( (__m128i*)(dst + j), v0 );
}
break;
case THRESH_BINARY_INV:
……
break;
}
}
}
#endif
if( j_scalar < roi.width )
{
for( i = 0; i < roi.height; i++ )
{
const uchar* src = (const uchar*)(_src.data + _src.step*i);
uchar* dst = (uchar*)(_dst.data + _dst.step*i);
j = j_scalar;
#if CV_ENABLE_UNROLLED
for( ; j <= roi.width - 4; j += 4 )
{
uchar t0 = tab[src[j]];
uchar t1 = tab[src[j+1]];
dst[j] = t0;
dst[j+1] = t1;
t0 = tab[src[j+2]];
t1 = tab[src[j+3]];
dst[j+2] = t0;
dst[j+3] = t1;
}
#endif
for( ; j < roi.width; j++ )
dst[j] = tab[src[j]];
}
}
}
Method 1:
void thresh_8u_1( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
int i, j, j_scalar = 0;
_dst.create(_src.size(),CV_8UC1);
uchar tab[256];
cv::Size roi = _src.size();
roi.width *= _src.channels();
if( _src.isContinuous() && _dst.isContinuous() )
{
roi.width *= roi.height;
roi.height = 1;
}
__m128i _x80 = _mm_set1_epi8('\x80');
__m128i thresh_u = _mm_set1_epi8(thresh);
__m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
__m128i maxval_ = _mm_set1_epi8(maxval);
j_scalar = roi.width & -8;
for( i = 0; i < roi.height; i++ )
{
const uchar* src = (const uchar*)(_src.data + _src.step*i);
uchar* dst = (uchar*)(_dst.data + _dst.step*i);
switch( type )
{
case THRESH_BINARY:
for( j = 0; j <= roi.width - 32; j += 32 )
{
__m128i v0, v1;
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
v0 = _mm_and_si128( v0, maxval_ );
v1 = _mm_and_si128( v1, maxval_ );
_mm_storeu_si128( (__m128i*)(dst + j), v0 );
_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
}
case THRESH_BINARY_INV:
……………
break;
}
}
}
Methold 2:
void thresh_8u_2( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
int i, j, j_scalar = 0;
dst.create(_src.size(),CV_8UC1);
uchar tab[256];
cv::Size roi = _src.size();
roi.width *= _src.channels();
if( _src.isContinuous() && _dst.isContinuous() )
{
roi.width *= roi.height;
roi.height = 1;
}
__m128i _x80 = _mm_set1_epi8('\x80');
__m128i thresh_u = _mm_set1_epi8(thresh);
__m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
__m128i maxval_ = _mm_set1_epi8(maxval);
j_scalar = roi.width & -8;
for( i = 0; i < roi.height; i++ )
{
const uchar* src = (const uchar*)(_src.data + _src.step*i);
uchar* dst = (uchar*)(_dst.data + _dst.step*i);
switch( type )
{
case THRESH_BINARY:
for( ; j <= roi.width - 8; j += 8 )
{
__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
v0 = _mm_and_si128( v0, maxval_ );
_mm_storel_epi64( (__m128i*)(dst + j), v0 );
}
break;
case THRESH_BINARY_INV:
……………
break;
}
}
}
Methold 3:
void thresh_8u_3( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
{
int i, j, j_scalar = 0;
_dst.create(_src.size(),CV_8UC1);
uchar tab[256];
cv::Size roi = _src.size();
roi.width *= _src.channels();
if( _src.isContinuous() && _dst.isContinuous() )
{
roi.width *= roi.height;
roi.height = 1;
}
switch( type )
{
case THRESH_BINARY:
for( i = 0; i <= thresh; i++ )
tab[i] = 0;
for( ; i < 256; i++ )
tab[i] = maxval;
break;
case THRESH_BINARY_INV:
………..
default:
CV_Error( CV_StsBadArg, "Unknown threshold type" );
}
if( j_scalar < roi.width )
{
for( i = 0; i < roi.height; i++ )
{
const uchar* src = (const uchar*)(_src.data + _src.step*i);
uchar* dst = (uchar*)(_dst.data + _dst.step*i);
j = j_scalar;
#if CV_ENABLE_UNROLLED
for( ; j <= roi.width - 4; j += 4 )
{
uchar t0 = tab[src[j]];
uchar t1 = tab[src[j+1]];
dst[j] = t0;
dst[j+1] = t1;
t0 = tab[src[j+2]];
t1 = tab[src[j+3]];
dst[j+2] = t0;
dst[j+3] = t1;
}
#endif
for( ; j < roi.width; j++ )
dst[j] = tab[src[j]];
}
}
}
Thanks your help!