'ConsoleApplication1.exe' (Win32): Loaded 'C:\DevSoft\TBB-Tutorial\ConsoleApplication1\x64\Debug\ConsoleApplication1.exe'. Symbols loaded.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\ntdll.dll'.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\kernel32.dll'.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\KernelBase.dll'.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\DevSoft\TBB-Tutorial\ConsoleApplication1\x64\Debug\tbb_debug.dll'.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\vcruntime140d.dll'.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\vcruntime140_1d.dll'.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\ucrtbased.dll'.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\msvcp140d.dll'.
The thread 0x3380 has exited with code 0 (0x0).
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\kernel.appcore.dll'.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\msvcrt.dll'.
'ConsoleApplication1.exe' (Win32): Loaded 'C:\Windows\System32\rpcrt4.dll'.
The thread 0x5b4 has exited with code 0 (0x0).
The thread 0x342c has exited with code 0 (0x0).
The thread 0xed0 has exited with code 0 (0x0).
The thread 0x29fc has exited with code 0 (0x0).
The thread 0x21e8 has exited with code 0 (0x0).
The thread 0x2e2c has exited with code 0 (0x0).
The thread 0x353c has exited with code 0 (0x0).
The thread 0x2198 has exited with code 0 (0x0).
The thread 0x2044 has exited with code 0 (0x0).
The program '[6016] ConsoleApplication1.exe' has exited with code 0 (0x0).
What I have tried:
#include <tbb parallel_for.h="">#include <tbb blocked_range.h="">using namespace tbb;
const int size = 1000;
float a[size][size];
float b[size][size];
float c[size][size];
class Multiply
{
public:
void operator()(blocked_range<int> r) const {
for (int i = r.begin(); i != r.end(); ++i) {
for (int j = 0; j < size; ++j) {
for (int k = 0; k < size; ++k) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
}
};
int main()
{
// Initialize buffers.
for (int i = 0; i < size; ++i) {
for (int j = 0; j < size; ++j) {
a[i][j] = (float)i + j;
b[i][j] = (float)i - j;
c[i][j] = 0.0f;
}
}
// Compute matrix multiplication.
// C <- C + A x B
parallel_for(blocked_range<int>(0, size), Multiply());
return 0;
}