Click here to Skip to main content
15,888,802 members
Home / Discussions / ATL / WTL / STL
   

ATL / WTL / STL

 
QuestionSTL List Pin
dotman115-May-12 5:05
dotman115-May-12 5:05 
AnswerRe: STL List Pin
«_Superman_»20-May-12 16:37
professional«_Superman_»20-May-12 16:37 
QuestionRe: STL List Pin
Aescleal21-May-12 4:55
Aescleal21-May-12 4:55 
AnswerRe: STL List Pin
Vitaly Tomilov10-Jun-12 9:53
Vitaly Tomilov10-Jun-12 9:53 
Questiongenerate the .H file from COM Pin
MrKBA14-May-12 1:38
MrKBA14-May-12 1:38 
AnswerRe: generate the .H file from COM Pin
Pablo Aliskevicius14-May-12 1:55
Pablo Aliskevicius14-May-12 1:55 
GeneralRe: generate the .H file from COM Pin
MrKBA14-May-12 1:58
MrKBA14-May-12 1:58 
GeneralRe: generate the .H file from COM Pin
Pablo Aliskevicius14-May-12 1:59
Pablo Aliskevicius14-May-12 1:59 
GeneralRe: generate the .H file from COM Pin
MrKBA14-May-12 2:02
MrKBA14-May-12 2:02 
AnswerRe: generate the .H file from COM Pin
Pete O'Hanlon14-May-12 2:08
mvePete O'Hanlon14-May-12 2:08 
GeneralRe: generate the .H file from COM Pin
MrKBA14-May-12 2:51
MrKBA14-May-12 2:51 
GeneralRe: generate the .H file from COM Pin
Pete O'Hanlon14-May-12 2:55
mvePete O'Hanlon14-May-12 2:55 
Questioncreating a dialog in non mfc application Pin
Rajeev.Goutham9-May-12 20:42
Rajeev.Goutham9-May-12 20:42 
AnswerRe: creating a dialog in non mfc application Pin
Richard MacCutchan9-May-12 22:12
mveRichard MacCutchan9-May-12 22:12 
AnswerRe: creating a dialog in non mfc application Pin
yu-jian15-May-12 6:17
yu-jian15-May-12 6:17 
QuestionHow to realize multi progress bar show at the same time ? Pin
redleafzzh2-May-12 4:02
redleafzzh2-May-12 4:02 
AnswerRe: How to realize multi progress bar show at the same time ? Pin
Richard MacCutchan2-May-12 6:28
mveRichard MacCutchan2-May-12 6:28 
AnswerRe: How to realize multi progress bar show at the same time ? Pin
Albert Holguin4-May-12 10:02
professionalAlbert Holguin4-May-12 10:02 
QuestionAsyncIO C++ Pin
TalSt29-Apr-12 0:55
TalSt29-Apr-12 0:55 
AnswerRe: AsyncIO C++ Pin
Richard MacCutchan2-May-12 6:26
mveRichard MacCutchan2-May-12 6:26 
AnswerRe: AsyncIO C++ Pin
Ashish Tyagi 4022-May-12 7:29
Ashish Tyagi 4022-May-12 7:29 
QuestionCUDA and MPI combination Pin
Ron120223-Apr-12 18:48
Ron120223-Apr-12 18:48 
Hi,

I have following MPI code
#include "mpi.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define  ARRAYSIZE	2000

#define  MASTER		0



int  data[ARRAYSIZE];




int main(int argc, char* argv[])
{

int   numtasks, taskid, rc, dest, offset, i, j, tag1, tag2, source, chunksize, namelen; 

int mysum;

long sum;

int update(int myoffset, int chunk, int myid);
char myname[MPI_MAX_PROCESSOR_NAME];

MPI_Status status;
double start = 0.0, stop = 0.0, time = 0.0;
double totaltime;
FILE *fp;
char line[128];
char element;
int n;
int k=0;




/***** Initializations *****/

MPI_Init(&argc, &argv);

MPI_Comm_size(MPI_COMM_WORLD, &numtasks);

MPI_Comm_rank(MPI_COMM_WORLD,&taskid); 
MPI_Get_processor_name(myname, &namelen);

printf ("MPI task %d has started on host %s...\n", taskid, myname);

chunksize = (ARRAYSIZE / numtasks);

tag2 = 1;

tag1 = 2;

//Read the data from file and store into array


// printf("Initialized array sum = %d\n",sum);


//data = malloc(ARRAYSIZE * sizeof(int));

/***** Master task only ******/

if (taskid == MASTER){

  fp=fopen("integers.txt", "r");
  if(fp != NULL){
   sum = 0;
   while(fgets(line, sizeof line, fp)!= NULL){
    fscanf(fp,"%d",&data[k]);
    sum = sum + data[k]; // calculate sum to verify later on
    k++;
   }
  }


printf("Initialized array sum %d", sum);

  /* Send each task its portion of the array - master keeps 1st part */

  offset = chunksize;

  for (dest=1; dest<numtasks; dest++) {

    MPI_Send(&offset, 1, MPI_INT, dest, tag1, MPI_COMM_WORLD);

    MPI_Send(&data[offset], chunksize, MPI_INT, dest, tag2, MPI_COMM_WORLD);

    printf("Sent %d elements to task %d offset= %d\n",chunksize,dest,offset);

    offset = offset + chunksize;

   }



  /* Master does its part of the work */

  offset = 0;
  mysum = run_kernel(&data[offset], chunksize);
  printf("Kernel returns sum %d", mysum);

  //mysum = update(offset, chunksize, taskid);



  /* Wait to receive results from each task */

  for (i=1; i<numtasks; i++) {

    source = i;

    MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status);

    MPI_Recv(&data[offset], chunksize, MPI_INT, source, tag2,MPI_COMM_WORLD, &status);

   }



  /* Get final sum and print sample results */  

  MPI_Reduce(&mysum, &sum, 1, MPI_INT, MPI_SUM, MASTER, MPI_COMM_WORLD);



  printf("\n*** Final sum= %d ***\n",sum);



 }  /* end of master section */





/***** Non-master tasks only *****/



if (taskid > MASTER) {



  /* Receive my portion of array from the master task */
  start= MPI_Wtime();

  source = MASTER;

  MPI_Recv(&offset, 1, MPI_INT, source, tag1, MPI_COMM_WORLD, &status);

  MPI_Recv(&data[offset], chunksize, MPI_INT, source, tag2,MPI_COMM_WORLD, &status);

  mysum = run_kernel(&data[offset], chunksize);
  printf("\nKernel returns sum %d ", mysum);

// mysum = update(offset, chunksize, taskid);
  stop = MPI_Wtime();
  time = stop -start;
  printf("time taken by process %d to recieve elements and caluclate own sum is = %lf seconds \n", taskid, time);
  
 // totaltime = totaltime + time;



  /* Send my results back to the master task */

  dest = MASTER;

  MPI_Send(&offset, 1, MPI_INT, dest, tag1, MPI_COMM_WORLD);

  MPI_Send(&data[offset], chunksize, MPI_INT, MASTER, tag2, MPI_COMM_WORLD);



  MPI_Reduce(&mysum, &sum, 1, MPI_INT, MPI_SUM, MASTER, MPI_COMM_WORLD);



  } /* end of non-master */



 MPI_Finalize();



}   



int update(int myoffset, int chunk, int myid) {

  int i,j; 

  int mysum = 0;

  for(i=myoffset; i < myoffset + chunk; i++) {

    mysum = mysum + data[i];

    }

  printf("Task %d has sum = %d\n",myid,mysum);

  return(mysum);

}

and I have following cuda code
#include <stdio.h>



__global__ void add(int *devarray, int *devsum)
{
        int index = blockIdx.x * blockDim.x + threadIdx.x;
        devsum = devsum + devarray[index];
}

extern "C"
int * run_kernel(int array[],int nelements)
{
        int  *devarray, *sum, *devsum;
       
        printf("\nrun_kernel called..............");       
        
        cudaMalloc((void**) &devarray, sizeof(int)*nelements);
        cudaMalloc((void**) &devsum, sizeof(int));
        cudaMemcpy(devarray, array, sizeof(int)*nelements, cudaMemcpyHostToDevice);
        add<<<2, 3>>>(devarray, devsum);
       	
	cudaMemcpy(sum, devsum, sizeof(int), cudaMemcpyDeviceToHost);

        printf(" \nthe sum is %d", sum);
        cudaFree(devarray);
	return sum;
        
}


I am getting following output

Here is my output when I run above code -
MPI task 0 has started on host
MPI task 1 has started on host
MPI task 2 has started on host
MPI task 3 has started on host
Initialized array sum 9061Sent 500 elements to task 1 offset= 500
Sent 500 elements to task 2 offset= 1000
Sent 500 elements to task 3 offset= 1500




[node4] *** Process received signal ***
run_kernel called..............
[node4:04786] Signal: Segmentation fault (11)
[node4:04786] Signal code: Invalid permissions (2)
[node4:04786] Failing at address: 0x8049828
[node4:04786] [ 0] [0xaf440c]
[node4:04786] [ 1] /usr/lib/libcuda.so(+0x13a0f6) [0xfa10f6]
[node4:04786] [ 2] /usr/lib/libcuda.so(+0x146912) [0xfad912]
[node4:04786] [ 3] /usr/lib/libcuda.so(+0x148094) [0xfaf094]
[node4:04786] [ 4] /usr/lib/libcuda.so(+0x13ca50) [0xfa3a50]
[node4:04786] [ 5] /usr/lib/libcuda.so(+0x11863c) [0xf7f63c]
[node4:04786] [ 6] /usr/lib/libcuda.so(+0x11d167) [0xf84167]
[node4:04786] [ 7] /usr/lib/libcuda.so(cuMemcpyDtoH_v2+0x64) [0xf74014]
[node4:04786] [ 8] /usr/local/cuda/lib/libcudart.so.4(+0x2037b) [0xcbe37b]
[node4:04786] [ 9] /usr/local/cuda/lib/libcudart.so.4(cudaMemcpy+0x230) [0xcf1360]
[node4:04786] [10] mpi_array(run_kernel+0x135) [0x8049559]
[node4:04786] [11] mpi_array(main+0x2f2) [0x8049046]
[node4:04786] [12] /lib/libc.so.6(__libc_start_main+0xe6) [0x2fece6]
[node4:04786] [13] mpi_array() [0x8048cc1]
[node4:04786] *** End of error message ***

Kernel returns sum 134530992 time taken by process 1 to recieve elements and caluclate own sum is = 0.276339 seconds
run_kernel called..............
devsum is 3211264
the sum is 134532992
Kernel returns sum 134532992 time taken by process 2 to recieve elements and caluclate own sum is = 0.280452 seconds
run_kernel called..............
devsum is 3211264
the sum is 134534992
Kernel returns sum 134534992 time taken by process 3 to recieve elements and caluclate own sum is = 0.285010 seconds
------------------------------------------------------------- -------------
mpirun noticed that process rank 0 with PID 4786 on node ecm-c-l-207-004.uniwa.uwa.edu.au exited on signal 11 (Segmentation fault).

Perhaps the sum does not look correct. Not sure what is causing segmentation fault. Can anyone help?

Thanks
Questionerror while loading shared libraries: libcudart.so.4: cannot open shared object file: No such file or directory Pin
Ron120220-Apr-12 18:48
Ron120220-Apr-12 18:48 
QuestionRe: error while loading shared libraries: libcudart.so.4: cannot open shared object file: No such file or directory Pin
Richard MacCutchan20-Apr-12 22:08
mveRichard MacCutchan20-Apr-12 22:08 
GeneralRe: error while loading shared libraries: libcudart.so.4: cannot open shared object file: No such file or directory Pin
Ron120220-Apr-12 22:16
Ron120220-Apr-12 22:16 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.