hi!
my program can not return true final value from matrix c!!!
#include <stdio.h>
#include <conio.h>
__global__ void kernelFunc(float* ad, float* bd, float* cd, int n) {
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if(x < n && y < n) {
float v = 0.0f;
for(int i=0; i<n;> v += ad[y * n + i] * bd[i * n + x];
cd[y * n + x] = v;
}
}
void main() {
int ii,jj,k;
float* ad;
float* bd;
float* cd;
int n=64;
float a[64][64];
float b[64][64];
float c[64][64];
for(ii=0;ii<n;ii++)>
{
for(jj=0;jj<n;jj++)>
{
a[ii][jj]=2; b[ii][jj]=3; }
}
cudaMalloc((void**)&ad, n * n * sizeof(float));
cudaMalloc((void**)&bd, n * n * sizeof(float));
cudaMalloc((void**)&cd, n * n * sizeof(float));
cudaMemcpy(ad, a, n * n * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(bd, b, n * n * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(cd, c, n * n * sizeof(float), cudaMemcpyHostToDevice);
dim3 block(1, 1);
dim3 grid(1, 1);
kernelFunc<<<grid,>>>(ad, bd, cd, n);
cudaMemcpy(c, cd, n * n * sizeof(float), cudaMemcpyDeviceToHost);
for(ii=0;ii<n;ii++)>
{
printf("\n ");
for(jj=0;jj<n;jj++)>
{
printf("%f ",c[ii][jj]);
}
}
cudaFree(ad);
cudaFree(bd);
cudaFree(cd);
}