Base64 Encoding on a GPU

Nick Kopp

Rate me:

4.89/5 (26 votes)

16 Sep 2013LGPL310 min read

63.6K

1.8K

Performing base64 encoding on a graphics processing unit using CUDAfy.NET (CUDA in .NET).

Base64EncodingOnGPU_src-noexe.zip
- Base64EncodingOnGPU
  - Base64EncodingOnGPU.sln
  - Base64EncodingOnGPU
    - app.config
    - Base64EncodingOnGPU.csproj
    - GPUConvert.cs
    - Program.cs
    - Properties
      - AssemblyInfo.cs
  - CudafyV1.26
    - bin
      - Cudafy.NET.xml
    - CUDA.NET.Readme.txt
    - CUDAfy API Documentation.url
    - CUDAfy_User_Manual_1_22.pdf
    - CudafyByExample
      - app.config
      - chapter03
        
        enum_gpu.cs
        
        hello_world.cs
        
        simple_kernel.cs
        
        simple_kernel_params.cs
      - chapter04
        
        add_loop_cpu.cs
        
        add_loop_gpu.cs
        
        add_loop_gpu_alt.cs
        
        add_loop_long.cs
        
        cuComplex.cs
        
        julia_cpu.cs
        
        julia_cpu.Designer.cs
        
        julia_cpu.resx
        
        julia_gpu.cs
      - chapter05
        
        add_loop_blocks.cs
        
        add_loop_long_blocks.cs
        
        dot.cs
        
        ripple.cs
        
        ripple.Designer.cs
        
        ripple.resx
        
        ripple_gpu.cs
      - chapter06
        
        ray.cs
        
        ray_gui.cs
        
        ray_gui.Designer.cs
        
        ray_gui.resx
        
        ray_noconst.cs
        
        ray_opencl.cs
        
        ray_opencl_const.cs
      - chapter09
        
        hist_gpu_shmem_atomics.cs
      - chapter10
        
        basic_double_stream_correct.cs
        
        copy_timed.cs
      - CudafyByExample.csproj
      - CudafyByExample.sln
      - Program.cs
      - Properties
        
        AssemblyInfo.cs
    - CUDAfy-License.txt
    - ILSpy-license.txt
    - README.txt
    - Visit www.hybriddsp.com for full CUDAfy.NET SDK.txt
Base64EncodingOnGPU_src.zip
- Base64EncodingOnGPU.sln
- .svn
  - app.config
  - Base64EncodingOnGPU.csproj
  - GPUConvert.cs
  - Program.cs
  - AssemblyInfo.cs
  - Cudafy.NET.dll
  - Cudafy.NET.xml
  - CUDA.NET.Readme.txt
  - CUDAfy API Documentation.url
  - CUDAfy_User_Manual_1_22.pdf
  - app.config
  - enum_gpu.cs
  - hello_world.cs
  - simple_kernel.cs
  - simple_kernel_params.cs
  - add_loop_cpu.cs
  - add_loop_gpu.cs
  - add_loop_gpu_alt.cs
  - add_loop_long.cs
  - cuComplex.cs
  - julia_cpu.cs
  - julia_cpu.Designer.cs
  - julia_cpu.resx
  - julia_gpu.cs
  - add_loop_blocks.cs
  - add_loop_long_blocks.cs
  - dot.cs
  - ripple.cs
  - ripple.Designer.cs
  - ripple.resx
  - ripple_gpu.cs
  - ray.cs
  - ray_gui.cs
  - ray_gui.Designer.cs
  - ray_gui.resx
  - ray_noconst.cs
  - ray_opencl.cs
  - ray_opencl_const.cs
  - hist_gpu_shmem_atomics.cs
  - basic_double_stream_correct.cs
  - copy_timed.cs
  - CudafyByExample.csproj
  - CudafyByExample.sln
  - Program.cs
  - AssemblyInfo.cs
  - CUDAfy-License.txt
  - ILSpy-license.txt
  - README.txt
  - Visit www.hybriddsp.com for full CUDAfy.NET SDK.txt

/* 
 * This software is based upon the book CUDA By Example by Sanders and Kandrot
 * and source code provided by NVIDIA Corporation.
 * It is a good idea to read the book while studying the examples!
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Cudafy;
using Cudafy.Host;
using Cudafy.Translator;

namespace CudafyByExample
{    
    public class dot
    {

        public static int imin(float a, float b)
        {
            return (int)(a < b ? a : b);
        }

        public static float sum_squares(float x)  
        {
            return (x*(x+1)*(2*x+1)/6);
        }

        public const int N = 33 * 1024;
        public const int threadsPerBlock = 256;
        public const int blocksPerGrid = 32;//imin( 32, (N+threadsPerBlock-1) / threadsPerBlock );

        [Cudafy]
        public static void Dot(GThread thread, float[] a, float[] b, float[] c ) 
        {
            float[] cache = thread.AllocateShared<float>("cache", threadsPerBlock);

            int tid = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
            int cacheIndex = thread.threadIdx.x;

            float temp = 0;
            while (tid < N)
            {
                temp += a[tid] * b[tid];
                tid += thread.blockDim.x * thread.gridDim.x;
            }

            // set the cache values
            cache[cacheIndex] = temp;

            // synchronize threads in this block
            thread.SyncThreads();

            // for reductions, threadsPerBlock must be a power of 2
            // because of the following code
            int i = thread.blockDim.x / 2;
            while (i != 0)
            {
                if (cacheIndex < i)
                    cache[cacheIndex] += cache[cacheIndex + i];
                thread.SyncThreads();
                i /= 2;
            }

            if (cacheIndex == 0)
                c[thread.blockIdx.x] = cache[0];
        }


        public static void Execute() 
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            gpu.LoadModule(km);

            float c;

            // allocate memory on the cpu side
            float[] a = new float[N];
            float[] b = new float[N];
            float[] partial_c = new float[blocksPerGrid];

            // allocate the memory on the GPU
            float[] dev_a = gpu.Allocate<float>(N);
            float[] dev_b = gpu.Allocate<float>(N);
            float[] dev_partial_c = gpu.Allocate<float>(blocksPerGrid);

            float[] dev_test = gpu.Allocate<float>(blocksPerGrid * blocksPerGrid);

            // fill in the host memory with data
            for (int i=0; i<N; i++) 
            {
                a[i] = i;
                b[i] = i*2;
            }

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(a, dev_a);
            gpu.CopyToDevice(b, dev_b);

            gpu.Launch(blocksPerGrid, threadsPerBlock).Dot(dev_a, dev_b, dev_partial_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_partial_c, partial_c);

            // finish up on the CPU side
            c = 0;
            for (int i = 0; i < blocksPerGrid; i++)
            {
                c += partial_c[i];
            }

            Console.WriteLine("Does GPU value {0} = {1}?\n", c, 2 * sum_squares((float)(N - 1)));

            // free memory on the gpu side
            gpu.FreeAll();

            // free memory on the cpu side
            // No worries...
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The GNU Lesser General Public License (LGPLv3)

Written By

Nick Kopp

Systems Engineer Hybrid DSP Systems

Netherlands

Nick is co owner of Hybrid DSP , a company specialized in high speed data acquisition, processing and storage.

CUDAfy.NET took considerable effort to develop and we ask nothing in return from users of the LGPL library other than that you please consider donating to Harmony through Education . This small charity helps handicapped children in developing countries by providing suitable schooling.

Base64 Encoding on a GPU

License

Comments and Discussions