|
#include "Deinterleaver.h"
#include <cassert>
#pragma optimize ( "t", on ) // begin optimize for speed.
////////////////////////////////////////////////////////////////////////////////
//
// Helpers and private implementation details.
// -------------------------------------------
//
////////////////////////////////////////////////////////////////////////////////
namespace // anonymous
{
template <typename T>
inline bool isMMXAligned( T const * const pointer )
{
return ( reinterpret_cast<unsigned int>( pointer ) % sizeof( __m64 ) ) == 0;
}
inline bool isM64Multiple( size_t const size )
{
return ( size % sizeof( __m64 ) ) == 0;
}
} // namespace anonymous
//************************************
// Method: Deinterleave_16_32
// FullName: Deinterleaver::Deinterleave_16_32
// Access: public static
//************************************
void __fastcall Deinterleaver::Deinterleave_16_32( short const * in, int * out1, int * out2, size_t const sz )
{
if ( isM64Multiple( sz ) )//&& isMMXAligned( out1 ) ) // ??
Deinterleave_16_32_MMX( in, out1, out2, sz );
else
Deinterleave_16_32_x86( in, out1, out2, sz );
}
//************************************
// Method: Deinterleave_16_32_x86
// FullName: Deinterleaver::Deinterleave_16_32_x86
// Access: public static
//************************************
void __fastcall Deinterleaver::Deinterleave_16_32_x86( short const * in, int * out1, int * out2, size_t const sz )
{
assert( sz % (2*sizeof(signed short)) == 0 );
assert((out2-out1) > (int)(sz/(2*sizeof(signed short))));
signed short const * const end( in + sz/2 );
while( in < end )
{
*out1++ = (*in++) << 16;
*out2++ = (*in++) << 16;
}
}
//************************************
// Method: Deinterleave_16_32_MMX
// FullName: Deinterleaver::Deinterleave_16_32_MMX
// Access: public static
//************************************
void __fastcall Deinterleaver::Deinterleave_16_32_MMX( short const * in, int * const out1, int * const out2, size_t const sz )
{
assert( isM64Multiple( sz ) );
//assert( isMMXAligned( in ) ); // ??
//assert( isMMXAligned( out1 ) ); // ??
//assert( isMMXAligned( out2 ) ); // ??
_mm_prefetch( reinterpret_cast<char const *>( in ), _MM_HINT_NTA );
static __m64 const ones = { 0xFFFFFFFFFFFFFFFF };
short const * const end( in + sz/2 );
__m64 * mout1( reinterpret_cast<__m64 *>( out1 ) );
__m64 * mout2( reinterpret_cast<__m64 *>( out2 ) );
while( in < end )
{
__m64 const unshuffled( _mm_shuffle_pi16( reinterpret_cast<__m64 const &>( *in ), _MM_SHUFFLE( 0, 2, 1, 3 ) ) );
*mout1++ = _mm_unpacklo_pi16( ones, unshuffled );
*mout2++ = _mm_unpackhi_pi16( ones, unshuffled );
in += sizeof( __m64 ) / sizeof( short );
}
_mm_empty();
}
//************************************
// Method: Deinterleave_24_32
// FullName: Deinterleaver::Deinterleave_24_32
// Access: public static
//************************************
void __fastcall Deinterleaver::Deinterleave_24_32( char const * in, int * out1, int * out2, size_t const sz )
{
assert( sz % ( 2 * (24/8) ) == 0 );
struct TwoSamples
{
#ifdef _M_X64
int firstSample () const { return static_cast<int>( ( firstSample_ + 1 ) * 0xFF - 1 ); }
int secondSample() const { return static_cast<int>( secondSample_ << 8 | 0xFF ); }
#else // avoid importing MSVC long long asm routines on 32 bit machines.
int firstSample () const { return firstSample_; }
int secondSample() const { return secondSampleFirstByte_ | secondSampleLastTwoBytes_ << 16; }
#endif
private:
#ifdef _M_X64
signed long long firstSample_ : 24;
signed long long secondSample_ : 24;
#else
signed long firstSample_ : 24;
signed long secondSampleFirstByte_ : 8 ;
signed short secondSampleLastTwoBytes_ ;
#endif
};
assert((out2-out1) > (int)(sz/(2*sizeof(signed short))));
char const * const end( in + sz );
while( in < end )
{
TwoSamples const & twoSamples( *reinterpret_cast<TwoSamples const *>( in ) );
*out1++ = twoSamples.firstSample ();
*out2++ = twoSamples.secondSample();
in += 2 * 24/8;
}
}
#pragma optimize( "", on ) // end optimize for speed.
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.