GPU Computing Using CUDA, Eclipse, and Java with JCuda

Mark H Bishop

Rate me:

4.71/5 (8 votes)

21 Sep 2013CPOL18 min read

103.1K

804

Tutorial: GPU computing with JCuda and Nsight (Eclipse)

CodeProjectMBishop_12_22_2012.zip
- CodeProjectMBishop_12_22_2012
  - JCudaFftDemo
    - bin
      - CaxpyGpu.class
      - ComplexCalcFloat.class
      - ComplexFloat.class
      - FftCpuFloat.class
      - FftGpuFloat.class
      - Main.class
      - Stopwatch.class
    - src
  - Notes

/**
 * Author Mark Bishop; 2012
 * License GNU v3; 
 * This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

import jcuda.Pointer;
import jcuda.runtime.JCuda;

public class Main {

	// We will synthesize a signal = sin(2*pi*f)
	// To experiment, try changing these constants.
	/**
	 * Frequency of sin test signal
	 */
	private static final float FREQ = 11.0f;

	/**
	 * Length of complex vector (number of complex number pairs). Choose N such
	 * that: N = 2^n, n = 1, 2, 3, ,,,
	 */
	private static final int N = 16777216 / 16;

	/**
	 * Delta t for sampling function
	 */
	private static final float dT = 0.00005f;

	public static void main(String[] args) {

		seed();
		SinTest();
	}

	/**
	 * Synthesize a signal and demonstrate GPU vs CPU FFT/IFFT performance.
	 */
	private static void SinTest() {

		System.out.println("Creating sin wave input data: Frequency = " + FREQ
				+ ", N = " + N + ", dt = " + dT + " ...\n");

		// Note: gpuIn[] is an interleaved data array and is 2X the length of
		// the desired complex input vector.
		float gpuIn[] = sin2pif(FREQ, N, dT);

		// The use of the ComplexFloat class simplifies the code for
		// computations
		// performed on the CPU.
		// Note: cpuIn is the same length as the complex input vector (half the
		// length of the interleaved data array).
		ComplexFloat[] cpuIn = ComplexCalcFloat.InterleavedToComplex(gpuIn);

		System.out.println("L2 Norm of original signal: "
				+ FftCpuFloat.VectorTwoNorm(cpuIn) + "\n");

		System.out.println("Performing a 1D C2C FFT on GPU with JCufft...");
		Stopwatch stopWatch = new Stopwatch();
		float gpuFft[] = FftGpuFloat.C2C_1D(gpuIn);
		System.out.println("GPU FFT time: " + stopWatch.elapsedTime()
				+ " seconds \n");

		System.out.println("Performing a 1D C2C FFT on CPU...");
		stopWatch = new Stopwatch();
		ComplexFloat[] cpuFft = FftCpuFloat.FftRadix2_Cpu(cpuIn);
		System.out.println("CPU time: " + stopWatch.elapsedTime()
				+ " seconds \n");

		// Convert to ComplexNumber to simplify code for CPU.
		ComplexFloat[] cGpuFft = ComplexCalcFloat.InterleavedToComplex(gpuFft);

		float gpuFftNorm = FftCpuFloat.VectorTwoNorm(cGpuFft);
		System.out.println("GPU FFT L2 Norm: " + gpuFftNorm);

		float cpuFftNorm = FftCpuFloat.VectorTwoNorm(cpuFft);
		System.out.println("CPU FFT L2 Norm: " + cpuFftNorm + "\n");

		float[] pGpuFft = FftCpuFloat.PowerSpectrum(cGpuFft);
		int indexMax = FftCpuFloat.IndexOfMaximum(pGpuFft);
		System.out.println("Index at maximum in GPU power spectrum = "
				+ indexMax + ", " + "frequency = " + (float) indexMax / dT
				/ (float) N);

		float[] pCpuFft = FftCpuFloat.PowerSpectrum(cpuFft);
		indexMax = FftCpuFloat.IndexOfMaximum(pCpuFft);
		System.out.println("Index at maximum in CPU power spectrum = "
				+ indexMax + ", " + "frequency = " + (float) indexMax / dT
				/ (float) N);
		System.out.println("\n");

		// Inverse FFT of above results (Normalized for signal reconstruction)

		System.out.println("Performing 1D C2C IFFT(FFT) on GPU with JCufft...");
		stopWatch = new Stopwatch();
		float gpuIFft[] = FftGpuFloat.InverseC2C_1D(gpuFft, true);
		System.out.println("GPU time: " + stopWatch.elapsedTime()
				+ " seconds \n");

		System.out.println("Performing 1D C2C IFFT(FFT) on CPU...");
		stopWatch = new Stopwatch();
		ComplexFloat cpuIFftt[] = FftCpuFloat.IFftRadix2_Cpu(cpuFft, true);
		System.out.println("CPU time: " + stopWatch.elapsedTime()
				+ " seconds \n");

		ComplexFloat[] cGpuIFft = ComplexCalcFloat
				.InterleavedToComplex(gpuIFft);

		float gpuL2 = FftCpuFloat.VectorTwoNorm(cGpuIFft);
		System.out.println("GPU IFFT L2 Norm: " + gpuL2);

		float cpuIFftNorm = FftCpuFloat.VectorTwoNorm(cpuIFftt);
		System.out.println("CPU IFFT L2 Norm: " + cpuIFftNorm);
	}

	/**
	 * Test signal synthesis
	 * 
	 * @param f
	 *            Frequency
	 * @param N
	 *            vector length for requested signal (You will get an
	 *            interleaved complex data array of length 2*N.)
	 * @param dt
	 *            sampling function increment (delta t).
	 * @return an interleaved array of length 2*N representing a sampled
	 *         function: sin(2*pi*freq)
	 */
	private static float[] sin2pif(float f, int N, float dt) {

		float result[] = new float[N * 2];

		float step = 0;
		for (int i = 0; i < result.length; i += 2) {
			float angle = (float) (2 * Math.PI * f * step);
			result[i] = (float) Math.sin(angle);
			step += dt;
		}
		return result;
	}

	/**
	 * GPU initialization. Running this first appears to speed up the first GPU
	 * computation run in application.
	 */
	private static void seed() {
		Pointer pointer = new Pointer();
		JCuda.cudaMalloc(pointer, 4);
		JCuda.cudaFree(pointer);
	}

}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Written By

Mark H Bishop

Founder PEI Watershed Alliance, Inc.

United States

I am an analytical chemist and an educator. I program primarily to perform matrix computations for regression analysis, process signals, acquire data from sensors, and to control devices.

I participate in many open source development communities and Linux user forums. I do contract work for an environmental analytical laboratory, where I am primarily focused on LIMS programming and network administration.

I am a member of several community-interest groups such as the Prince Edward Island Watershed Alliance, the Lot 11 and Area Watershed Management Group, and the Petersham Historic Commission.

GPU Computing Using CUDA, Eclipse, and Java with JCuda

License

Comments and Discussions