Click here to Skip to main content
12,447,242 members (57,189 online)
Click here to Skip to main content

Stats

49.9K views
561 downloads
23 bookmarked
Posted

GPU Computing Using CUDA, Eclipse, and Java with JCuda

, 21 Sep 2013 CPOL
Tutorial: GPU computing with JCuda and Nsight (Eclipse)
CodeProjectMBishop_12_22_2012
JCudaFftDemo
bin
CaxpyGpu.class
ComplexCalcFloat.class
ComplexFloat.class
FftCpuFloat.class
FftGpuFloat.class
Main.class
Stopwatch.class
src
Notes
/**
 * Author Mark Bishop; 2012
 * License GNU v3; 
 * This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

import jcuda.Pointer;
import jcuda.runtime.JCuda;

public class Main {

	// We will synthesize a signal = sin(2*pi*f)
	// To experiment, try changing these constants.
	/**
	 * Frequency of sin test signal
	 */
	private static final float FREQ = 11.0f;

	/**
	 * Length of complex vector (number of complex number pairs). Choose N such
	 * that: N = 2^n, n = 1, 2, 3, ,,,
	 */
	private static final int N = 16777216 / 16;

	/**
	 * Delta t for sampling function
	 */
	private static final float dT = 0.00005f;

	public static void main(String[] args) {

		seed();
		SinTest();
	}

	/**
	 * Synthesize a signal and demonstrate GPU vs CPU FFT/IFFT performance.
	 */
	private static void SinTest() {

		System.out.println("Creating sin wave input data: Frequency = " + FREQ
				+ ", N = " + N + ", dt = " + dT + " ...\n");

		// Note: gpuIn[] is an interleaved data array and is 2X the length of
		// the desired complex input vector.
		float gpuIn[] = sin2pif(FREQ, N, dT);

		// The use of the ComplexFloat class simplifies the code for
		// computations
		// performed on the CPU.
		// Note: cpuIn is the same length as the complex input vector (half the
		// length of the interleaved data array).
		ComplexFloat[] cpuIn = ComplexCalcFloat.InterleavedToComplex(gpuIn);

		System.out.println("L2 Norm of original signal: "
				+ FftCpuFloat.VectorTwoNorm(cpuIn) + "\n");

		System.out.println("Performing a 1D C2C FFT on GPU with JCufft...");
		Stopwatch stopWatch = new Stopwatch();
		float gpuFft[] = FftGpuFloat.C2C_1D(gpuIn);
		System.out.println("GPU FFT time: " + stopWatch.elapsedTime()
				+ " seconds \n");

		System.out.println("Performing a 1D C2C FFT on CPU...");
		stopWatch = new Stopwatch();
		ComplexFloat[] cpuFft = FftCpuFloat.FftRadix2_Cpu(cpuIn);
		System.out.println("CPU time: " + stopWatch.elapsedTime()
				+ " seconds \n");

		// Convert to ComplexNumber to simplify code for CPU.
		ComplexFloat[] cGpuFft = ComplexCalcFloat.InterleavedToComplex(gpuFft);

		float gpuFftNorm = FftCpuFloat.VectorTwoNorm(cGpuFft);
		System.out.println("GPU FFT L2 Norm: " + gpuFftNorm);

		float cpuFftNorm = FftCpuFloat.VectorTwoNorm(cpuFft);
		System.out.println("CPU FFT L2 Norm: " + cpuFftNorm + "\n");

		float[] pGpuFft = FftCpuFloat.PowerSpectrum(cGpuFft);
		int indexMax = FftCpuFloat.IndexOfMaximum(pGpuFft);
		System.out.println("Index at maximum in GPU power spectrum = "
				+ indexMax + ", " + "frequency = " + (float) indexMax / dT
				/ (float) N);

		float[] pCpuFft = FftCpuFloat.PowerSpectrum(cpuFft);
		indexMax = FftCpuFloat.IndexOfMaximum(pCpuFft);
		System.out.println("Index at maximum in CPU power spectrum = "
				+ indexMax + ", " + "frequency = " + (float) indexMax / dT
				/ (float) N);
		System.out.println("\n");

		// Inverse FFT of above results (Normalized for signal reconstruction)

		System.out.println("Performing 1D C2C IFFT(FFT) on GPU with JCufft...");
		stopWatch = new Stopwatch();
		float gpuIFft[] = FftGpuFloat.InverseC2C_1D(gpuFft, true);
		System.out.println("GPU time: " + stopWatch.elapsedTime()
				+ " seconds \n");

		System.out.println("Performing 1D C2C IFFT(FFT) on CPU...");
		stopWatch = new Stopwatch();
		ComplexFloat cpuIFftt[] = FftCpuFloat.IFftRadix2_Cpu(cpuFft, true);
		System.out.println("CPU time: " + stopWatch.elapsedTime()
				+ " seconds \n");

		ComplexFloat[] cGpuIFft = ComplexCalcFloat
				.InterleavedToComplex(gpuIFft);

		float gpuL2 = FftCpuFloat.VectorTwoNorm(cGpuIFft);
		System.out.println("GPU IFFT L2 Norm: " + gpuL2);

		float cpuIFftNorm = FftCpuFloat.VectorTwoNorm(cpuIFftt);
		System.out.println("CPU IFFT L2 Norm: " + cpuIFftNorm);
	}

	/**
	 * Test signal synthesis
	 * 
	 * @param f
	 *            Frequency
	 * @param N
	 *            vector length for requested signal (You will get an
	 *            interleaved complex data array of length 2*N.)
	 * @param dt
	 *            sampling function increment (delta t).
	 * @return an interleaved array of length 2*N representing a sampled
	 *         function: sin(2*pi*freq)
	 */
	private static float[] sin2pif(float f, int N, float dt) {

		float result[] = new float[N * 2];

		float step = 0;
		for (int i = 0; i < result.length; i += 2) {
			float angle = (float) (2 * Math.PI * f * step);
			result[i] = (float) Math.sin(angle);
			step += dt;
		}
		return result;
	}

	/**
	 * GPU initialization. Running this first appears to speed up the first GPU
	 * computation run in application.
	 */
	private static void seed() {
		Pointer pointer = new Pointer();
		JCuda.cudaMalloc(pointer, 4);
		JCuda.cudaFree(pointer);
	}

}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Mark H Bishop
Founder PEI Watershed Alliance, Inc.
United States United States
I am an analytical chemist and an educator. I program primarily to perform matrix computations for regression analysis, process signals, acquire data from sensors, and to control devices.

I participate in many open source development communities and Linux user forums. I occasionally perform IT contract work, primarily focused on network design/deployment and penetration testing for small organizations.

I am a member of several community-interest groups such as the Prince Edward Island Watershed Alliance, the Lot 11 and Area Watershed Management Group, and the Petersham Historic Commission.

You may also be interested in...

Pro
Pro
| Advertise | Privacy | Terms of Use | Mobile
Web02 | 2.8.160811.3 | Last Updated 21 Sep 2013
Article Copyright 2012 by Mark H Bishop
Everything else Copyright © CodeProject, 1999-2016
Layout: fixed | fluid