#include "accfft_gpu.h"
#include <mpi.h>
#include <omp.h>
#include <iostream>
#include <cmath>
#include <math.h>
#include "transpose_cuda.h"
#include <cuda_runtime_api.h>
#include <string.h>
#include <cuda.h>
#include <cufft.h>
#include "accfft_common.h"
|
void | accfft_cleanup_gpu () |
|
int | accfft_local_size_dft_r2c_gpu (int *n, int *isize, int *istart, int *osize, int *ostart, MPI_Comm c_comm, bool inplace) |
|
accfft_plan_gpu * | accfft_plan_dft_3d_r2c_gpu (int *n, double *data_d, double *data_out_d, MPI_Comm c_comm, unsigned flags) |
|
void | accfft_execute_r2c_gpu (accfft_plan_gpu *plan, double *data, Complex *data_out, double *timer, std::bitset< 3 > xyz) |
|
void | accfft_execute_c2r_gpu (accfft_plan_gpu *plan, Complex *data, double *data_out, double *timer, std::bitset< 3 > xyz) |
|
int | accfft_local_size_dft_c2c_gpu (int *n, int *isize, int *istart, int *osize, int *ostart, MPI_Comm c_comm) |
|
accfft_plan_gpu * | accfft_plan_dft_3d_c2c_gpu (int *n, Complex *data_d, Complex *data_out_d, MPI_Comm c_comm, unsigned flags) |
|
void | accfft_execute_c2c_gpu (accfft_plan_gpu *plan, int direction, Complex *data_d, Complex *data_out_d, double *timer, std::bitset< 3 > xyz) |
|
void | accfft_destroy_plan (accfft_plan_gpu *plan) |
|
void | accfft_destroy_plan_gpu (accfft_plan_gpu *plan) |
|
void accfft_cleanup_gpu |
( |
| ) |
|
Cleanup all CPU resources
void accfft_destroy_plan |
( |
accfft_plan_gpu * |
plan | ) |
|
void accfft_destroy_plan_gpu |
( |
accfft_plan_gpu * |
plan | ) |
|
Destroy AccFFT GPU plan.
- Parameters
-
plan | Input plan to be destroyed. |
void accfft_execute_c2c_gpu |
( |
accfft_plan_gpu * |
plan, |
|
|
int |
direction, |
|
|
Complex * |
data_d, |
|
|
Complex * |
data_out_d, |
|
|
double * |
timer, |
|
|
std::bitset< 3 > |
xyz |
|
) |
| |
Execute C2C plan. This function is blocking and only returns after the transform is completed.
- Note
- For inplace transforms, data_out should point to the same memory address as data, AND the plan must have been created as inplace.
- Parameters
-
plan | FFT plan created by accfft_plan_dft_3d_r2c. |
data | Input data in frequency domain. |
data_out | Output data in frequency domain. |
timer | See Timing AccFFT for more details. |
XYZ | a bit set field that determines which directions FFT should be executed |
void accfft_execute_c2r_gpu |
( |
accfft_plan_gpu * |
plan, |
|
|
Complex * |
data, |
|
|
double * |
data_out, |
|
|
double * |
timer, |
|
|
std::bitset< 3 > |
xyz |
|
) |
| |
Execute C2R plan. This function is blocking and only returns after the transform is completed.
- Note
- For inplace transforms, data_out should point to the same memory address as data, AND the plan must have been created as inplace.
- Parameters
-
plan | FFT plan created by accfft_plan_dft_3d_r2c. |
data | Input data in frequency domain. |
data_out | Output data in frequency domain. |
timer | See Timing AccFFT for more details. |
XYZ | a bit set field that determines which directions FFT should be executed |
void accfft_execute_r2c_gpu |
( |
accfft_plan_gpu * |
plan, |
|
|
double * |
data, |
|
|
Complex * |
data_out, |
|
|
double * |
timer, |
|
|
std::bitset< 3 > |
xyz |
|
) |
| |
Execute R2C plan. This function is blocking and only returns after the transform is completed.
- Note
- For inplace transforms, data_out should point to the same memory address as data, AND the plan must have been created as inplace.
- Parameters
-
plan | FFT plan created by accfft_plan_dft_3d_r2c. |
data | Input data in spatial domain. |
data_out | Output data in frequency domain. |
timer | See Timing AccFFT for more details. |
XYZ | a bit set field that determines which directions FFT should be executed |
int accfft_local_size_dft_c2c_gpu |
( |
int * |
n, |
|
|
int * |
isize, |
|
|
int * |
istart, |
|
|
int * |
osize, |
|
|
int * |
ostart, |
|
|
MPI_Comm |
c_comm |
|
) |
| |
Get the local sizes of the distributed global data for a GPU C2C transform
- Parameters
-
n | Integer array of size 3, corresponding to the global data size |
isize | The size of the data that is locally distributed to the calling process |
istart | The starting index of the data that locally resides on the calling process |
osize | The output size of the data that locally resides on the calling process, after the C2C transform is finished |
ostart | The output starting index of the data that locally resides on the calling process, after the R2C transform is finished |
c_comm | Cartesian communicator returned by accfft_create_comm |
- Returns
int accfft_local_size_dft_r2c_gpu |
( |
int * |
n, |
|
|
int * |
isize, |
|
|
int * |
istart, |
|
|
int * |
osize, |
|
|
int * |
ostart, |
|
|
MPI_Comm |
c_comm, |
|
|
bool |
inplace |
|
) |
| |
Get the local sizes of the distributed global data for a GPU R2C transform
- Parameters
-
n | Integer array of size 3, corresponding to the global data size |
isize | The size of the data that is locally distributed to the calling process |
istart | The starting index of the data that locally resides on the calling process |
osize | The output size of the data that locally resides on the calling process, after the R2C transform is finished |
ostart | The output starting index of the data that locally resides on the calling process, after the R2C transform is finished |
c_comm | Cartesian communicator returned by accfft_create_comm |
- Returns
accfft_plan_gpu* accfft_plan_dft_3d_c2c_gpu |
( |
int * |
n, |
|
|
Complex * |
data_d, |
|
|
Complex * |
data_out_d, |
|
|
MPI_Comm |
c_comm, |
|
|
unsigned |
flags |
|
) |
| |
Creates a 3D C2C parallel FFT plan. If data_out point to the same location as the input data, then an inplace plan will be created. Otherwise the plan would be outplace.
- Parameters
-
n | Integer array of size 3, corresponding to the global data size |
data | Input data in spatial domain |
data_out | Output data in frequency domain |
c_comm | Cartesian communicator returned by accfft_create_comm |
flags | AccFFT flags, See AccFFT Flags for more details. |
- Returns
accfft_plan_gpu* accfft_plan_dft_3d_r2c_gpu |
( |
int * |
n, |
|
|
double * |
data_d, |
|
|
double * |
data_out_d, |
|
|
MPI_Comm |
c_comm, |
|
|
unsigned |
flags |
|
) |
| |
Creates a 3D R2C parallel FFT plan.If data_out point to the same location as the input data, then an inplace plan will be created. Otherwise the plan would be outplace.
- Parameters
-
n | Integer array of size 3, corresponding to the global data size |
data | Input data in spatial domain |
data_out | Output data in frequency domain |
c_comm | Cartesian communicator returned by accfft_create_comm |
flags | AccFFT flags, See AccFFT Flags for more details. |
- Returns