Click here to Skip to main content
15,886,026 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
hi!
my program can not return true final value from matrix c!!!
C++
#include <stdio.h>
#include <conio.h>
 __global__ void kernelFunc(float* ad, float* bd, float* cd, int n) {
    int x = (blockIdx.x * blockDim.x) + threadIdx.x;
    int y = (blockIdx.y * blockDim.y) + threadIdx.y;
    
    if(x < n && y < n) {   
        float v = 0.0f;
        for(int i=0; i<n;>            v += ad[y * n + i] * bd[i * n + x];
        cd[y * n + x] = v;
    }
}

void main() {

int ii,jj,k;
    float* ad;
    float* bd;
    float* cd;
    int n=64;
	float a[64][64];
	float b[64][64];
	float c[64][64];

	for(ii=0;ii<n;ii++)>
	{
	for(jj=0;jj<n;jj++)>
	{
	a[ii][jj]=2;//ii+jj;
	b[ii][jj]=3;//ii+jj;
	//c[ii][jj]=4;
	}

	}
	
    cudaMalloc((void**)&ad, n * n * sizeof(float));
    cudaMalloc((void**)&bd, n * n * sizeof(float));
    cudaMalloc((void**)&cd, n * n * sizeof(float));
    
    cudaMemcpy(ad, a, n * n * sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(bd, b, n * n * sizeof(float), cudaMemcpyHostToDevice);
	cudaMemcpy(cd, c, n * n * sizeof(float), cudaMemcpyHostToDevice);
    dim3 block(1, 1);           
    dim3 grid(1, 1);
    
    kernelFunc<<<grid,>>>(ad, bd, cd, n);

    cudaMemcpy(c, cd, n * n * sizeof(float), cudaMemcpyDeviceToHost);

	for(ii=0;ii<n;ii++)>
	{
	printf("\n ");
	for(jj=0;jj<n;jj++)>
	{
	printf("%f ",c[ii][jj]);

	

	}

	}
    
    cudaFree(ad);
    cudaFree(bd);
    cudaFree(cd);

}
Posted
Updated 6-Aug-12 0:26am
v2
Comments
nv3 6-Aug-12 8:18am    
So, what does your program return instead?

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900