AccFFT
Functions
accfft_gpu.cpp File Reference
#include "accfft_gpu.h"
#include <mpi.h>
#include <omp.h>
#include <iostream>
#include <cmath>
#include <math.h>
#include "transpose_cuda.h"
#include <cuda_runtime_api.h>
#include <string.h>
#include <cuda.h>
#include <cufft.h>
#include "accfft_common.h"

Functions

void accfft_cleanup_gpu ()
 
int accfft_local_size_dft_r2c_gpu (int *n, int *isize, int *istart, int *osize, int *ostart, MPI_Comm c_comm, bool inplace)
 
accfft_plan_gpu * accfft_plan_dft_3d_r2c_gpu (int *n, double *data_d, double *data_out_d, MPI_Comm c_comm, unsigned flags)
 
void accfft_execute_r2c_gpu (accfft_plan_gpu *plan, double *data, Complex *data_out, double *timer, std::bitset< 3 > xyz)
 
void accfft_execute_c2r_gpu (accfft_plan_gpu *plan, Complex *data, double *data_out, double *timer, std::bitset< 3 > xyz)
 
int accfft_local_size_dft_c2c_gpu (int *n, int *isize, int *istart, int *osize, int *ostart, MPI_Comm c_comm)
 
accfft_plan_gpu * accfft_plan_dft_3d_c2c_gpu (int *n, Complex *data_d, Complex *data_out_d, MPI_Comm c_comm, unsigned flags)
 
void accfft_execute_c2c_gpu (accfft_plan_gpu *plan, int direction, Complex *data_d, Complex *data_out_d, double *timer, std::bitset< 3 > xyz)
 
void accfft_destroy_plan (accfft_plan_gpu *plan)
 
void accfft_destroy_plan_gpu (accfft_plan_gpu *plan)
 

Detailed Description

GPU functions of AccFFT

Function Documentation

void accfft_cleanup_gpu ( )

Cleanup all CPU resources

void accfft_destroy_plan ( accfft_plan_gpu *  plan)

Destroy AccFFT CPU plan. This function calls accfft_destroy_plan_gpu.

Parameters
planInput plan to be destroyed.
void accfft_destroy_plan_gpu ( accfft_plan_gpu *  plan)

Destroy AccFFT GPU plan.

Parameters
planInput plan to be destroyed.
void accfft_execute_c2c_gpu ( accfft_plan_gpu *  plan,
int  direction,
Complex *  data_d,
Complex *  data_out_d,
double *  timer,
std::bitset< 3 >  xyz 
)

Execute C2C plan. This function is blocking and only returns after the transform is completed.

Note
For inplace transforms, data_out should point to the same memory address as data, AND the plan must have been created as inplace.
Parameters
planFFT plan created by accfft_plan_dft_3d_r2c.
dataInput data in frequency domain.
data_outOutput data in frequency domain.
timerSee Timing AccFFT for more details.
XYZa bit set field that determines which directions FFT should be executed
void accfft_execute_c2r_gpu ( accfft_plan_gpu *  plan,
Complex *  data,
double *  data_out,
double *  timer,
std::bitset< 3 >  xyz 
)

Execute C2R plan. This function is blocking and only returns after the transform is completed.

Note
For inplace transforms, data_out should point to the same memory address as data, AND the plan must have been created as inplace.
Parameters
planFFT plan created by accfft_plan_dft_3d_r2c.
dataInput data in frequency domain.
data_outOutput data in frequency domain.
timerSee Timing AccFFT for more details.
XYZa bit set field that determines which directions FFT should be executed
void accfft_execute_r2c_gpu ( accfft_plan_gpu *  plan,
double *  data,
Complex *  data_out,
double *  timer,
std::bitset< 3 >  xyz 
)

Execute R2C plan. This function is blocking and only returns after the transform is completed.

Note
For inplace transforms, data_out should point to the same memory address as data, AND the plan must have been created as inplace.
Parameters
planFFT plan created by accfft_plan_dft_3d_r2c.
dataInput data in spatial domain.
data_outOutput data in frequency domain.
timerSee Timing AccFFT for more details.
XYZa bit set field that determines which directions FFT should be executed
int accfft_local_size_dft_c2c_gpu ( int *  n,
int *  isize,
int *  istart,
int *  osize,
int *  ostart,
MPI_Comm  c_comm 
)

Get the local sizes of the distributed global data for a GPU C2C transform

Parameters
nInteger array of size 3, corresponding to the global data size
isizeThe size of the data that is locally distributed to the calling process
istartThe starting index of the data that locally resides on the calling process
osizeThe output size of the data that locally resides on the calling process, after the C2C transform is finished
ostartThe output starting index of the data that locally resides on the calling process, after the R2C transform is finished
c_commCartesian communicator returned by accfft_create_comm
Returns
int accfft_local_size_dft_r2c_gpu ( int *  n,
int *  isize,
int *  istart,
int *  osize,
int *  ostart,
MPI_Comm  c_comm,
bool  inplace 
)

Get the local sizes of the distributed global data for a GPU R2C transform

Parameters
nInteger array of size 3, corresponding to the global data size
isizeThe size of the data that is locally distributed to the calling process
istartThe starting index of the data that locally resides on the calling process
osizeThe output size of the data that locally resides on the calling process, after the R2C transform is finished
ostartThe output starting index of the data that locally resides on the calling process, after the R2C transform is finished
c_commCartesian communicator returned by accfft_create_comm
Returns
accfft_plan_gpu* accfft_plan_dft_3d_c2c_gpu ( int *  n,
Complex *  data_d,
Complex *  data_out_d,
MPI_Comm  c_comm,
unsigned  flags 
)

Creates a 3D C2C parallel FFT plan. If data_out point to the same location as the input data, then an inplace plan will be created. Otherwise the plan would be outplace.

Parameters
nInteger array of size 3, corresponding to the global data size
dataInput data in spatial domain
data_outOutput data in frequency domain
c_commCartesian communicator returned by accfft_create_comm
flagsAccFFT flags, See AccFFT Flags for more details.
Returns
accfft_plan_gpu* accfft_plan_dft_3d_r2c_gpu ( int *  n,
double *  data_d,
double *  data_out_d,
MPI_Comm  c_comm,
unsigned  flags 
)

Creates a 3D R2C parallel FFT plan.If data_out point to the same location as the input data, then an inplace plan will be created. Otherwise the plan would be outplace.

Parameters
nInteger array of size 3, corresponding to the global data size
dataInput data in spatial domain
data_outOutput data in frequency domain
c_commCartesian communicator returned by accfft_create_comm
flagsAccFFT flags, See AccFFT Flags for more details.
Returns