Click here to Skip to main content
15,993,109 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
C++
__device__ void levenshteinDistance(char *str,int strStart,int strLength,char *patternRemoved,int patternRemovedStart,int patternRemovedLength,int *dXIndividual,int *dXFinal)
{
       int indexA = blockIdx.x * blockDim.x + threadIdx.x;
       int offsetStr = strStart;
       int offsetPattern = patternRemovedStart;
       if (indexA < patternRemovedLength) {
	   for (int i = offsetStr; i <= strLength; i++) {
		if (i == 0)
		    dXIndividual[indexA * (strLength+1) + i] = 0;
                                                         
		else{
		    if (str[i-1] == patternRemoved[indexA+offsetPattern])
			dXIndividual[indexA * (strLength+1) + i] = i;
		    else if (str[i-1] != patternRemoved[indexA+offsetPattern])
			dXIndividual[indexA * (strLength+1) + i] = dXIndividual[indexA * (strLength+1) + i - 1];
		}
	   }
           __syncthreads();
        }
        dXFinal[0] = dXIndividual[(strLength+1) * (patternRemovedLength)];
}

extern "C"
__global__ void ComputationdXOnGPU(int numStr, char *str, int *strStartIndices, int *strIndividualLengths,int numPatternRemoved, char *patternRemoved, int *patternRemovedStartIndices,int *patternRemovedIndividualLengths, int *dXFinal)
{
    int ix = blockIdx.x * blockDim.x + threadIdx.x;
    if (ix<numStr)
    {
        for (int i=0; i<numPatternRemoved; i++) 
        {
            int strStart = strStartIndices[ix];
            int strLength = strIndividualLengths[ix];
            int patternStart = patternRemovedStartIndices[i];
            int patternRemovedLength = patternRemovedIndividualLengths[i];
            int size = (strLength+1) * patternRemovedLength;
            int dXIndividual [size];
            int *result = &dXFinal[ix * numStrings1 + i];
            levenshteinDistance(str,strStart, strLength, patternRemoved, patternRemovedStart, patternRemovedLength, dXIndividual, dXFinal);
        }
    }
}


What I have tried:

In the device function, it runs every time from core0 to patternRemovedLength (if (indexA < patternRemovedLength) )

These will limit all token pairs to be run on a small range of cores. I need to say for(indexA = 3; indexA<patternRemovedLength;indexA++) for example in terms of cuda in order to specify for each token a certain range of cores to fully utilize the GPU.
Posted
Updated 29-Nov-22 8:18am
v5

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900