30 #include "transpose.h"
33 #include "accfft_common.h"
35 #define PCOUT if(procid==0) std::cout
36 typedef double Complex[2];
45 if (threads_ok) threads_ok = fftw_init_threads();
46 if (threads_ok) fftw_plan_with_nthreads(nthreads);
54 fftw_cleanup_threads();
58 int dfft_get_local_size(
int N0,
int N1,
int N2,
int * isize,
int * istart,MPI_Comm c_comm ){
60 MPI_Comm_rank(c_comm, &procid);
62 int coords[2],np[2],periods[2];
63 MPI_Cart_get(c_comm,2,np,periods,coords);
65 isize[0]=ceil(N0/(
double)np[0]);
66 isize[1]=ceil(N1/(
double)np[1]);
68 istart[0]=isize[0]*(coords[0]);
69 istart[1]=isize[1]*(coords[1]);
72 if((N0-isize[0]*coords[0])<isize[0]) {isize[0]=N0-isize[0]*coords[0]; isize[0]*=(int) isize[0]>0; istart[0]=N0-isize[0];}
73 if((N1-isize[1]*coords[1])<isize[1]) {isize[1]=N1-isize[1]*coords[1]; isize[1]*=(int) isize[1]>0; istart[1]=N1-isize[1];}
78 for(
int r=0;r<np[0];r++)
79 for(
int c=0;c<np[1];c++){
81 if((coords[0]==r) && (coords[1]==c))
82 std::cout<<coords[0]<<
","<<coords[1]<<
" isize[0]= "<<isize[0]<<
" isize[1]= "<<isize[1]<<
" isize[2]= "<<isize[2]<<
" istart[0]= "<<istart[0]<<
" istart[1]= "<<istart[1]<<
" istart[2]= "<<istart[2]<<std::endl;
87 int alloc_local=isize[0]*isize[1]*isize[2]*
sizeof(double);
109 int osize_0[3]={0}, ostart_0[3]={0};
110 int osize_1[3]={0}, ostart_1[3]={0};
111 int osize_2[3]={0}, ostart_2[3]={0};
114 int alloc_max=0,n_tuples;
116 n_tuples=(n[2]/2+1)*2;
117 alloc_local=dfft_get_local_size(n[0],n[1],n_tuples,osize_0,ostart_0,c_comm);
118 alloc_max=std::max(alloc_max, alloc_local);
119 alloc_local=dfft_get_local_size(n[0],n_tuples/2,n[1],osize_1,ostart_1,c_comm);
120 alloc_max=std::max(alloc_max, alloc_local*2);
121 alloc_local=dfft_get_local_size(n[1],n_tuples/2,n[0],osize_2,ostart_2,c_comm);
122 alloc_max=std::max(alloc_max, alloc_local*2);
124 std::swap(osize_1[1],osize_1[2]);
125 std::swap(ostart_1[1],ostart_1[2]);
127 std::swap(ostart_2[1],ostart_2[2]);
128 std::swap(ostart_2[0],ostart_2[1]);
129 std::swap(osize_2[1],osize_2[2]);
130 std::swap(osize_2[0],osize_2[1]);
135 dfft_get_local_size(n[0],n[1],n[2],isize,istart,c_comm);
141 ostart[0]=ostart_2[0];
142 ostart[1]=ostart_2[1];
143 ostart[2]=ostart_2[2];
161 accfft_plan *plan=
new accfft_plan;
163 MPI_Comm_rank(c_comm, &procid);
165 MPI_Cart_get(c_comm,2,plan->np,plan->periods,plan->coord);
167 int *coord=plan->coord;
168 MPI_Comm_split(c_comm,coord[0],coord[1],&plan->row_comm);
169 MPI_Comm_split(c_comm,coord[1],coord[0],&plan->col_comm);
171 plan->N[0]=n[0];plan->N[1]=n[1];plan->N[2]=n[2];
174 plan->data_out=data_out;
177 else{plan->inplace=
false;}
181 unsigned fftw_flags=FFTW_MEASURE;
182 int N0=n[0], N1=n[1], N2=n[2];
184 int n_tuples_o,n_tuples_i;
185 plan->inplace==
true ? n_tuples_i=(N2/2+1)*2: n_tuples_i=N2;
186 n_tuples_o=(N2/2+1)*2;
188 int isize[3],osize[3],istart[3],ostart[3];
190 plan->alloc_max=alloc_max;
192 plan->Mem_mgr=
new Mem_Mgr(N0,N1,n_tuples_o,c_comm);
193 plan->T_plan_1=
new T_Plan(N0,N1,n_tuples_o, plan->Mem_mgr, c_comm);
194 plan->T_plan_1i=
new T_Plan(N1,N0,n_tuples_o,plan->Mem_mgr, c_comm);
196 plan->T_plan_1->alloc_local=alloc_max;
197 plan->T_plan_1i->alloc_local=alloc_max;
199 long int BATCH=plan->T_plan_1->local_n0;
200 plan->fplan_0= fftw_plan_many_dft_r2c(2, &n[1],plan->T_plan_1->local_n0,
203 (fftw_complex*)data_out, NULL,
206 if(plan->fplan_0==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
208 ptrdiff_t local_n0=plan->T_plan_1->local_n0;
209 ptrdiff_t local_n1=plan->T_plan_1->local_n1;
210 plan->fplan_1= fftw_plan_many_dft(1, &n[0],n_tuples_o/2*local_n1,
211 (fftw_complex*)data_out, NULL,
212 local_n1*n_tuples_o/2,1,
213 (fftw_complex*)data_out, NULL,
214 local_n1*n_tuples_o/2,1,
215 FFTW_FORWARD,fftw_flags);
216 if(plan->fplan_1==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
218 plan->iplan_0=fftw_plan_many_dft_c2r(2, &n[1],plan->T_plan_1->local_n0,
219 (fftw_complex*)data_out, NULL,
224 if(plan->iplan_0==NULL) std::cout<<
"!!! Inverse Plan not Created !!!"<<std::endl;
226 plan->iplan_1= fftw_plan_many_dft(1, &n[0],n_tuples_o/2*local_n1,
227 (fftw_complex*)data_out, NULL,
228 local_n1*n_tuples_o/2,1,
229 (fftw_complex*)data_out, NULL,
230 local_n1*n_tuples_o/2,1,
231 FFTW_BACKWARD,fftw_flags);
232 if(plan->iplan_1==NULL) std::cout<<
"!!! Inverse Plan2 not Created !!!"<<std::endl;
234 static int method_static=0;
235 static int kway_static_2=0;
236 if(method_static==0){
237 plan->T_plan_1->which_fast_method(plan->T_plan_1,data_out);
238 method_static=plan->T_plan_1->method;
239 kway_static_2=plan->T_plan_1->kway;
242 plan->T_plan_1->method=method_static;
243 plan->T_plan_1->kway=kway_static_2;
245 plan->T_plan_1->method=plan->T_plan_1->method;
246 plan->T_plan_1->kway=kway_static_2;
247 plan->T_plan_1i->method=plan->T_plan_1->method;
248 plan->T_plan_1i->kway=kway_static_2;
253 plan->T_plan_2i=NULL;
262 int *osize_0 =plan->osize_0, *ostart_0 =plan->ostart_0;
263 int *osize_1 =plan->osize_1, *ostart_1 =plan->ostart_1;
264 int *osize_2 =plan->osize_2, *ostart_2 =plan->ostart_2;
265 int *osize_1i=plan->osize_1i,*ostart_1i=plan->ostart_1i;
266 int *osize_2i=plan->osize_2i,*ostart_2i=plan->ostart_2i;
270 int n_tuples_o,n_tuples_i;
271 plan->inplace==
true ? n_tuples_i=(n[2]/2+1)*2: n_tuples_i=n[2];
272 n_tuples_o=(n[2]/2+1)*2;
274 int isize[3],osize[3],istart[3],ostart[3];
277 dfft_get_local_size(n[0],n[1],n_tuples_o,osize_0,ostart_0,c_comm);
278 dfft_get_local_size(n[0],n_tuples_o/2,n[1],osize_1,ostart_1,c_comm);
279 dfft_get_local_size(n[1],n_tuples_o/2,n[0],osize_2,ostart_2,c_comm);
281 std::swap(osize_1[1],osize_1[2]);
282 std::swap(ostart_1[1],ostart_1[2]);
284 std::swap(ostart_2[1],ostart_2[2]);
285 std::swap(ostart_2[0],ostart_2[1]);
286 std::swap(osize_2[1],osize_2[2]);
287 std::swap(osize_2[0],osize_2[1]);
289 for(
int i=0;i<3;i++){
290 osize_1i[i]=osize_1[i];
291 osize_2i[i]=osize_2[i];
292 ostart_1i[i]=ostart_1[i];
293 ostart_2i[i]=ostart_2[i];
297 plan->Mem_mgr=
new Mem_Mgr(n[1],n_tuples_o/2,2,plan->row_comm,osize_0[0],alloc_max);
298 plan->T_plan_1=
new T_Plan(n[1],n_tuples_o/2,2, plan->Mem_mgr, plan->row_comm,osize_0[0]);
299 plan->T_plan_2=
new T_Plan(n[0],n[1],osize_2[2]*2, plan->Mem_mgr, plan->col_comm);
300 plan->T_plan_2i=
new T_Plan(n[1],n[0],osize_2i[2]*2, plan->Mem_mgr, plan->col_comm);
301 plan->T_plan_1i=
new T_Plan(n_tuples_o/2,n[1],2, plan->Mem_mgr, plan->row_comm,osize_1i[0]);
303 plan->T_plan_1->alloc_local=plan->alloc_max;
304 plan->T_plan_2->alloc_local=plan->alloc_max;
305 plan->T_plan_2i->alloc_local=plan->alloc_max;
306 plan->T_plan_1i->alloc_local=plan->alloc_max;
309 unsigned fftw_flags=FFTW_MEASURE;
310 plan->fplan_0= fftw_plan_many_dft_r2c(1, &n[2],osize_0[0]*osize_0[1],
313 (fftw_complex*)data_out, NULL,
316 if(plan->fplan_0==NULL) std::cout<<
"!!! fplan_0 not Created !!!"<<std::endl;
318 plan->iplan_0= fftw_plan_many_dft_c2r(1, &n[2],osize_0[0]*osize_0[1],
319 (fftw_complex*)data_out, NULL,
324 if(plan->iplan_0==NULL) std::cout<<
"!!! iplan_0 not Created !!!"<<std::endl;
328 fftw_iodim dims, howmany_dims[2];
333 howmany_dims[0].n=osize_1[2];
334 howmany_dims[0].is=1;
335 howmany_dims[0].os=1;
337 howmany_dims[1].n=osize_1[0];
338 howmany_dims[1].is=osize_1[1]*osize_1[2];
339 howmany_dims[1].os=osize_1[1]*osize_1[2];
341 plan->fplan_1=fftw_plan_guru_dft(
344 (fftw_complex*)data_out, (fftw_complex*)data_out,
346 if(plan->fplan_1==NULL) std::cout<<
"!!! fplan1 not Created !!!"<<std::endl;
347 plan->iplan_1=fftw_plan_guru_dft(
350 (fftw_complex*)data_out, (fftw_complex*)data_out,
352 if(plan->iplan_1==NULL) std::cout<<
"!!! fplan1 not Created !!!"<<std::endl;
359 howmany_dims[0].n=osize_2[2];
360 howmany_dims[0].is=1;
361 howmany_dims[0].os=1;
363 howmany_dims[1].n=osize_2[0];
364 howmany_dims[1].is=osize_2[1]*osize_2[2];
365 howmany_dims[1].os=osize_2[1]*osize_2[2];
367 plan->fplan_2= fftw_plan_many_dft(1, &n[0],osize_2[2]*osize_2[1],
368 (fftw_complex*)data_out, NULL,
369 osize_2[2]*osize_2[1],1,
370 (fftw_complex*)data_out, NULL,
371 osize_2[2]*osize_2[1],1,
372 FFTW_FORWARD,fftw_flags);
373 if(plan->fplan_2==NULL) std::cout<<
"!!! fplan2 not Created !!!"<<std::endl;
375 plan->iplan_2= fftw_plan_many_dft(1, &n[0],osize_2[2]*osize_2[1],
376 (fftw_complex*)data_out, NULL,
377 osize_2[2]*osize_2[1],1,
378 (fftw_complex*)data_out, NULL,
379 osize_2[2]*osize_2[1],1,
380 FFTW_BACKWARD,fftw_flags);
381 if(plan->iplan_2==NULL) std::cout<<
"!!! fplan2 not Created !!!"<<std::endl;
384 static int method_static_2=0;
385 static int kway_static_2=0;
386 if(method_static_2==0){
388 plan->T_plan_1->which_fast_method(plan->T_plan_1,data_out);
389 method_static_2=plan->T_plan_1->method;
390 kway_static_2=plan->T_plan_1->kway;
392 MPI_Bcast(&method_static_2,1, MPI_INT,0, c_comm );
393 MPI_Bcast(&kway_static_2,1, MPI_INT,0, c_comm );
396 plan->T_plan_1->method=method_static_2;
397 plan->T_plan_2->method=method_static_2;
398 plan->T_plan_2i->method=method_static_2;
399 plan->T_plan_1i->method=method_static_2;
400 plan->T_plan_1->kway=kway_static_2;
401 plan->T_plan_2->kway=kway_static_2;
402 plan->T_plan_2i->kway=kway_static_2;
403 plan->T_plan_1i->kway=kway_static_2;
424 int osize_0[3]={0}, ostart_0[3]={0};
425 int osize_1[3]={0}, ostart_1[3]={0};
426 int osize_2[3]={0}, ostart_2[3]={0};
427 int osize_1i[3]={0}, ostart_1i[3]={0};
428 int osize_2i[3]={0}, ostart_2i[3]={0};
431 int alloc_max=0,n_tuples=n[2]*2;
432 alloc_local=dfft_get_local_size(n[0],n[1],n[2],osize_0,ostart_0,c_comm);
433 alloc_max=std::max(alloc_max, alloc_local);
434 alloc_local=dfft_get_local_size(n[0],n[2],n[1],osize_1,ostart_1,c_comm);
435 alloc_max=std::max(alloc_max, alloc_local);
436 alloc_local=dfft_get_local_size(n[1],n[2],n[0],osize_2,ostart_2,c_comm);
437 alloc_max=std::max(alloc_max, alloc_local);
440 std::swap(osize_1[1],osize_1[2]);
441 std::swap(ostart_1[1],ostart_1[2]);
443 std::swap(ostart_2[1],ostart_2[2]);
444 std::swap(ostart_2[0],ostart_2[1]);
445 std::swap(osize_2[1],osize_2[2]);
446 std::swap(osize_2[0],osize_2[1]);
448 for(
int i=0;i<3;i++){
449 osize_1i[i]=osize_1[i];
450 osize_2i[i]=osize_2[i];
451 ostart_1i[i]=ostart_1[i];
452 ostart_2i[i]=ostart_2[i];
458 dfft_get_local_size(n[0],n[1],n[2],isize,istart,c_comm);
464 ostart[0]=ostart_2[0];
465 ostart[1]=ostart_2[1];
466 ostart[2]=ostart_2[2];
483 accfft_plan *plan=
new accfft_plan;
485 MPI_Comm_rank(c_comm, &procid);
487 MPI_Cart_get(c_comm,2,plan->np,plan->periods,plan->coord);
489 int *coord=plan->coord;
490 MPI_Comm_split(c_comm,coord[0],coord[1],&plan->row_comm);
491 MPI_Comm_split(c_comm,coord[1],coord[0],&plan->col_comm);
492 plan->N[0]=n[0];plan->N[1]=n[1];plan->N[2]=n[2];
495 plan->data_out_c=data_out;
498 else{plan->inplace=
false;}
502 int NX=n[0],NY=n[1],NZ=n[2];
503 plan->Mem_mgr=
new Mem_Mgr(NX,NY,(NZ)*2,c_comm);
504 plan->T_plan_1=
new T_Plan(NX,NY,(NZ)*2, plan->Mem_mgr,c_comm);
505 plan->T_plan_1i=
new T_Plan(NY,NX,NZ*2, plan->Mem_mgr,c_comm);
507 int isize[3],osize[3],istart[3],ostart[3];
509 plan->alloc_max=alloc_max;
510 plan->T_plan_1->alloc_local=alloc_max;
511 plan->T_plan_1i->alloc_local=alloc_max;
514 ptrdiff_t local_n0=plan->T_plan_1->local_n0;
515 ptrdiff_t local_n1=plan->T_plan_1->local_n1;
516 int N0=NX, N1=NY, N2=NZ;
518 unsigned fftw_flags=FFTW_MEASURE;
519 plan->fplan_0= fftw_plan_many_dft(2, &n[1],plan->T_plan_1->local_n0,
524 FFTW_FORWARD,fftw_flags);
525 if(plan->fplan_0==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
526 plan->iplan_0= fftw_plan_many_dft(2, &n[1],plan->T_plan_1->local_n0,
531 FFTW_BACKWARD,fftw_flags);
532 if(plan->iplan_0==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
536 plan->fplan_1= fftw_plan_many_dft(1, &n[0],plan->T_plan_1->local_n1*NZ,
538 plan->T_plan_1->local_n1*NZ, 1,
540 plan->T_plan_1->local_n1*NZ, 1,
541 FFTW_FORWARD,fftw_flags);
542 if(plan->fplan_1==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
544 plan->iplan_1= fftw_plan_many_dft(1, &n[0],plan->T_plan_1->local_n1*NZ,
546 plan->T_plan_1->local_n1*NZ, 1,
548 plan->T_plan_1->local_n1*NZ, 1,
549 FFTW_BACKWARD,fftw_flags);
550 if(plan->iplan_1==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
553 plan->T_plan_1->which_fast_method(plan->T_plan_1,(
double*)data_out);
554 plan->T_plan_1i->method=plan->T_plan_1->method;
555 plan->T_plan_1i->kway=plan->T_plan_1->kway;
559 plan->T_plan_2i=NULL;
567 int *osize_0 =plan->osize_0, *ostart_0 =plan->ostart_0;
568 int *osize_1 =plan->osize_1, *ostart_1 =plan->ostart_1;
569 int *osize_2 =plan->osize_2, *ostart_2 =plan->ostart_2;
570 int *osize_1i=plan->osize_1i,*ostart_1i=plan->ostart_1i;
571 int *osize_2i=plan->osize_2i,*ostart_2i=plan->ostart_2i;
574 int alloc_max=0,n_tuples=(n[2]/2+1)*2;
576 int isize[3],osize[3],istart[3],ostart[3];
578 plan->alloc_max=alloc_max;
580 dfft_get_local_size(n[0],n[1],n[2],osize_0,ostart_0,c_comm);
581 dfft_get_local_size(n[0],n[2],n[1],osize_1,ostart_1,c_comm);
582 dfft_get_local_size(n[1],n[2],n[0],osize_2,ostart_2,c_comm);
584 std::swap(osize_1[1],osize_1[2]);
585 std::swap(ostart_1[1],ostart_1[2]);
587 std::swap(ostart_2[1],ostart_2[2]);
588 std::swap(ostart_2[0],ostart_2[1]);
589 std::swap(osize_2[1],osize_2[2]);
590 std::swap(osize_2[0],osize_2[1]);
592 for(
int i=0;i<3;i++){
593 osize_1i[i]=osize_1[i];
594 osize_2i[i]=osize_2[i];
595 ostart_1i[i]=ostart_1[i];
596 ostart_2i[i]=ostart_2[i];
604 plan->Mem_mgr=
new Mem_Mgr(n[1],n[2],2,plan->row_comm,osize_0[0],alloc_max);
605 plan->T_plan_1=
new T_Plan(n[1],n[2],2, plan->Mem_mgr, plan->row_comm,osize_0[0]);
606 plan->T_plan_2=
new T_Plan(n[0],n[1],2*osize_2[2], plan->Mem_mgr, plan->col_comm);
607 plan->T_plan_2i=
new T_Plan(n[1],n[0],2*osize_2i[2], plan->Mem_mgr, plan->col_comm);
608 plan->T_plan_1i=
new T_Plan(n[2],n[1],2, plan->Mem_mgr, plan->row_comm,osize_1i[0]);
610 plan->T_plan_1->alloc_local=plan->alloc_max;
611 plan->T_plan_2->alloc_local=plan->alloc_max;
612 plan->T_plan_2i->alloc_local=plan->alloc_max;
613 plan->T_plan_1i->alloc_local=plan->alloc_max;
617 unsigned fftw_flags=FFTW_MEASURE;
618 plan->fplan_0= fftw_plan_many_dft(1, &n[2],osize_0[0]*osize_0[1],
623 FFTW_FORWARD,fftw_flags);
624 if(plan->fplan_0==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
625 plan->iplan_0= fftw_plan_many_dft(1, &n[2],osize_0[0]*osize_0[1],
630 FFTW_BACKWARD,fftw_flags);
631 if(plan->iplan_0==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
635 fftw_iodim dims, howmany_dims[2];
640 howmany_dims[0].n=osize_1[2];
641 howmany_dims[0].is=1;
642 howmany_dims[0].os=1;
644 howmany_dims[1].n=osize_1[0];
645 howmany_dims[1].is=osize_1[1]*osize_1[2];
646 howmany_dims[1].os=osize_1[1]*osize_1[2];
648 plan->fplan_1=fftw_plan_guru_dft(
653 if(plan->fplan_1==NULL) std::cout<<
"!!! fplan1 not Created !!!"<<std::endl;
654 plan->iplan_1=fftw_plan_guru_dft(
657 (fftw_complex*)data_out, (fftw_complex*)data_out,
659 if(plan->iplan_1==NULL) std::cout<<
"!!! iplan1 not Created !!!"<<std::endl;
663 plan->fplan_2= fftw_plan_many_dft(1, &n[0],osize_2[1]*osize_2[2],
665 osize_2[1]*osize_2[2],1 ,
667 osize_2[1]*osize_2[2],1 ,
668 FFTW_FORWARD,fftw_flags);
669 if(plan->fplan_2==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
670 plan->iplan_2= fftw_plan_many_dft(1, &n[0],osize_2[1]*osize_2[2],
672 osize_2[1]*osize_2[2],1 ,
674 osize_2[1]*osize_2[2],1 ,
675 FFTW_BACKWARD,fftw_flags);
676 if(plan->iplan_2==NULL) std::cout<<
"!!! Forward Plan not Created !!!"<<std::endl;
680 static int method_static_2=0;
681 static int kway_static_2=0;
682 if(method_static_2==0){
684 plan->T_plan_1->which_fast_method(plan->T_plan_1,(
double*)data_out);
685 method_static_2=plan->T_plan_1->method;
686 kway_static_2=plan->T_plan_1->kway;
688 MPI_Bcast(&method_static_2,1, MPI_INT,0, c_comm );
689 MPI_Bcast(&kway_static_2,1, MPI_INT,0, c_comm );
692 plan->T_plan_1->method=method_static_2;
693 plan->T_plan_2->method=method_static_2;
694 plan->T_plan_2i->method=method_static_2;
695 plan->T_plan_1i->method=method_static_2;
696 plan->T_plan_1->kway=kway_static_2;
697 plan->T_plan_2->kway=kway_static_2;
698 plan->T_plan_2i->kway=kway_static_2;
699 plan->T_plan_1i->kway=kway_static_2;
715 accfft_execute(plan,-1,data,(
double*)data_out,timer);
731 accfft_execute(plan,1,(
double*)data,data_out,timer);
736 void accfft_execute(accfft_plan* plan,
int direction,
double * data,
double * data_out,
double * timer){
741 data_out=plan->data_out;
742 int * coords=plan->coord;
743 int procid=plan->procid;
747 timings=
new double[5];
748 memset(timings,0,
sizeof(
double)*5);
762 fft_time-=MPI_Wtime();
763 fftw_execute_dft_r2c(plan->fplan_0,(
double*)data,(fftw_complex*)data_out);
764 fft_time+=MPI_Wtime();
766 MPI_Barrier(plan->c_comm);
767 plan->T_plan_1->execute(plan->T_plan_1,data_out,timings,2);
771 fft_time-=MPI_Wtime();
772 fftw_execute_dft(plan->fplan_1,(fftw_complex*)data_out,(fftw_complex*)data_out);
773 fft_time+=MPI_Wtime();
778 fft_time-=MPI_Wtime();
779 fftw_execute_dft(plan->iplan_1,(fftw_complex*)data,(fftw_complex*)data);
780 fft_time+=MPI_Wtime();
782 plan->T_plan_1i->execute(plan->T_plan_1i,data,timings,1);
787 fft_time-=MPI_Wtime();
788 fftw_execute_dft_c2r(plan->iplan_0,(fftw_complex*)data,(
double*)data_out);
789 fft_time+=MPI_Wtime();
796 int *osize_0 =plan->osize_0, *ostart_0 =plan->ostart_0;
797 int *osize_1 =plan->osize_1, *ostart_1 =plan->ostart_1;
798 int *osize_2 =plan->osize_2, *ostart_2 =plan->ostart_2;
799 int *osize_1i=plan->osize_1i,*ostart_1i=plan->ostart_1i;
800 int *osize_2i=plan->osize_2i,*ostart_2i=plan->ostart_2i;
807 fft_time-=MPI_Wtime();
808 fftw_execute_dft_r2c(plan->fplan_0,(
double*)data,(fftw_complex*)data_out);
809 fft_time+=MPI_Wtime();
814 plan->T_plan_1->execute(plan->T_plan_1,data_out,timings,2,osize_0[0],coords[0]);
818 fft_time-=MPI_Wtime();
819 fftw_execute_dft(plan->fplan_1,(fftw_complex*)data_out,(fftw_complex*)data_out);
820 fft_time+=MPI_Wtime();
824 plan->T_plan_2->execute(plan->T_plan_2,plan->data_out,timings,2,1,coords[1]);
828 fft_time-=MPI_Wtime();
829 fftw_execute_dft(plan->fplan_2,(fftw_complex*)data_out,(fftw_complex*)data_out);
830 fft_time+=MPI_Wtime();
832 else if (direction==1){
833 fft_time-=MPI_Wtime();
834 fftw_execute_dft(plan->iplan_2,(fftw_complex*)data,(fftw_complex*)data);
835 fft_time+=MPI_Wtime();
838 plan->T_plan_2i->execute(plan->T_plan_2i,data,timings,1,1,coords[1]);
839 MPI_Barrier(plan->c_comm);
843 fft_time-=MPI_Wtime();
844 fftw_execute_dft(plan->iplan_1,(fftw_complex*)data,(fftw_complex*)data);
845 fft_time+=MPI_Wtime();
848 plan->T_plan_1i->execute(plan->T_plan_1i,data,timings,1,osize_1i[0],coords[0]);
849 MPI_Barrier(plan->c_comm);
855 fft_time-=MPI_Wtime();
856 fftw_execute_dft_c2r(plan->iplan_0,(fftw_complex*)data,(
double*)data_out);
857 fft_time+=MPI_Wtime();
858 MPI_Barrier(plan->c_comm);
866 MPI_Barrier(plan->c_comm);
880 void accfft_execute_c2c(accfft_plan* plan,
int direction,Complex * data, Complex * data_out,
double * timer){
885 data_out=plan->data_out_c;
886 int * coords=plan->coord;
887 int procid=plan->procid;
891 timings=
new double[5];
892 memset(timings,0,
sizeof(
double)*5);
907 fft_time-=MPI_Wtime();
908 fftw_execute_dft(plan->fplan_0,data,data_out);
909 fft_time+=MPI_Wtime();
911 MPI_Barrier(plan->c_comm);
912 plan->T_plan_1->execute(plan->T_plan_1,(
double*)data_out,timings,2);
916 fft_time-=MPI_Wtime();
917 fftw_execute_dft(plan->fplan_1,(fftw_complex*)data_out,(fftw_complex*)data_out);
918 fft_time+=MPI_Wtime();
923 fft_time-=MPI_Wtime();
924 fftw_execute_dft(plan->iplan_1,(fftw_complex*)data,(fftw_complex*)data);
925 fft_time+=MPI_Wtime();
927 plan->T_plan_1i->execute(plan->T_plan_1i,(
double*)data,timings,1);
932 fft_time-=MPI_Wtime();
933 fftw_execute_dft(plan->iplan_0,data,data_out);
934 fft_time+=MPI_Wtime();
941 int *osize_0 =plan->osize_0, *ostart_0 =plan->ostart_0;
942 int *osize_1 =plan->osize_1, *ostart_1 =plan->ostart_1;
943 int *osize_2 =plan->osize_2, *ostart_2 =plan->ostart_2;
944 int *osize_1i=plan->osize_1i,*ostart_1i=plan->ostart_1i;
945 int *osize_2i=plan->osize_2i,*ostart_2i=plan->ostart_2i;
952 fft_time-=MPI_Wtime();
953 fftw_execute_dft(plan->fplan_0,data,data_out);
954 fft_time+=MPI_Wtime();
958 plan->T_plan_1->execute(plan->T_plan_1,(
double*)data_out,timings,2,osize_0[0],coords[0]);
962 fft_time-=MPI_Wtime();
963 fftw_execute_dft(plan->fplan_1,(fftw_complex*)data_out,(fftw_complex*)data_out);
964 fft_time+=MPI_Wtime();
968 plan->T_plan_2->execute(plan->T_plan_2,(
double*)data_out,timings,2,1,coords[1]);
972 fft_time-=MPI_Wtime();
973 fftw_execute_dft(plan->fplan_2,(fftw_complex*)data_out,(fftw_complex*)data_out);
974 fft_time+=MPI_Wtime();
976 else if (direction==1){
977 fft_time-=MPI_Wtime();
978 fftw_execute_dft(plan->iplan_2,(fftw_complex*)data,(fftw_complex*)data);
979 fft_time+=MPI_Wtime();
982 plan->T_plan_2i->execute(plan->T_plan_2i,(
double*)data,timings,1,1,coords[1]);
986 fft_time-=MPI_Wtime();
987 fftw_execute_dft(plan->iplan_1,(fftw_complex*)data,(fftw_complex*)data);
988 fft_time+=MPI_Wtime();
991 plan->T_plan_1i->execute(plan->T_plan_1i,(
double*)data,timings,1,osize_1i[0],coords[0]);
992 MPI_Barrier(plan->c_comm);
998 fft_time-=MPI_Wtime();
999 fftw_execute_dft(plan->iplan_0,data,data_out);
1000 fft_time+=MPI_Wtime();
1004 timings[4]=fft_time;
1008 MPI_Barrier(plan->c_comm);
1019 if(plan->T_plan_1!=NULL)
delete(plan->T_plan_1);
1020 if(plan->T_plan_1i!=NULL)
delete(plan->T_plan_1i);
1021 if(plan->T_plan_2!=NULL)
delete(plan->T_plan_2);
1022 if(plan->T_plan_2i!=NULL)
delete(plan->T_plan_2i);
1023 if(plan->Mem_mgr!=NULL)
delete(plan->Mem_mgr);
1024 if(plan->fplan_0!=NULL)fftw_destroy_plan(plan->fplan_0);
1025 if(plan->fplan_1!=NULL)fftw_destroy_plan(plan->fplan_1);
1026 if(plan->fplan_2!=NULL)fftw_destroy_plan(plan->fplan_2);
1027 if(plan->iplan_0!=NULL)fftw_destroy_plan(plan->iplan_0);
1028 if(plan->iplan_1!=NULL)fftw_destroy_plan(plan->iplan_1);
1029 if(plan->iplan_2!=NULL)fftw_destroy_plan(plan->iplan_2);
1031 MPI_Comm_free(&plan->row_comm);
1032 MPI_Comm_free(&plan->col_comm);
void accfft_execute_c2c(accfft_plan *plan, int direction, Complex *data, Complex *data_out, double *timer)
void accfft_execute_c2r(accfft_plan *plan, Complex *data, double *data_out, double *timer)
void accfft_destroy_plan(accfft_plan *plan)
void accfft_execute_r2c(accfft_plan *plan, double *data, Complex *data_out, double *timer)
int accfft_init(int nthreads)
int accfft_local_size_dft_r2c(int *n, int *isize, int *istart, int *osize, int *ostart, MPI_Comm c_comm)
accfft_plan * accfft_plan_dft_3d_c2c(int *n, Complex *data, Complex *data_out, MPI_Comm c_comm, unsigned flags)
accfft_plan * accfft_plan_dft_3d_r2c(int *n, double *data, double *data_out, MPI_Comm c_comm, unsigned flags)
int accfft_local_size_dft_c2c(int *n, int *isize, int *istart, int *osize, int *ostart, MPI_Comm c_comm)