|
I am wondering if anyone knows of a more efficient algorithm to perform a bit-interleave/deinterleave that takes 256-bits and interleaves every 64-th bit in the output. For example, I want:
output = (bit[ 0] << 255) | (bit[ 64] << 254) | (bit[128] << 253) | (bit[192] << 252)
| (bit[ 1] << 251) | (bit[ 65] << 250) | (bit[129] << 249) | (bit[193] << 248)
| ...
| (bit[63] << 3) | (bit[127] << 2) | (bit[191] << 1) | (bit[255]) So far, I have the following algorithms (presented in untested C++):
__uint16 *data = new __uint16[32 / sizeof(__uint16)]; __uint64 *result = new __uint64[32 / sizeof(__uint64)]();
for (int resIdx = 0; resIdx < 4; ++resIdx) {
for (int dataIdx = resIdx; dataIdx < 16; dataIdx += 4) {
__uint64 temp = data[dataIdx];
temp |= temp << 24;
temp |= temp << 12;
temp &= 0x000F000F000F000FULL;
temp |= temp << 6;
temp |= temp << 3;
temp &= 0x1111111111111111ULL;
result[resIdx] |= temp << (dataIdx & 3);
}
}
__uint64 *data = new __uint64[32 / sizeof(__uint64)]; __uint16 *result = new __uint16[32 / sizeof(__uint16)];
for (int dataIdx = 0; dataIdx < 4; ++dataIdx) {
for (int resIdx = dataIdx; resIdx < 16; resIdx += 4) {
__uint64 temp = data[dataIdx] >> (resIdx >> 2);
temp &= 0x1111111111111111ULL;
temp |= temp >> 3;
temp |= temp >> 6;
temp &= 0x000F000F000F000FULL;
temp |= temp >> 12;
temp |= temp >> 24;
result[resIdx] = (__uint16)temp;
}
}
Sounds like somebody's got a case of the Mondays
-Jeff
|
|
|
|