1 // Copyright Jernej Krempuš 20122 // Distributed under the Boost Software License, Version 1.0.3 // (See accompanying file LICENSE_1_0.txt or copy at4 // http://www.boost.org/LICENSE_1_0.txt)5 6 moduledplug.fft.pfft;
7 importcore.stdc.stdlib;
8 importcore.stdc.string: memcpy;
9 importcore.exception,
10 core.bitop,
11 std.array;
12 13 templateImport(TT)
14 {
15 staticif(is(TT == float))
16 importimpl = dplug.fft.impl_float;
17 elsestaticif(is(TT == double))
18 importimpl = dplug.fft.impl_double;
19 else20 staticassert(0, "Not implemented");
21 }
22 23 templatest(aliasa){ enumst = cast(size_t) a; }
24 25 /**
26 A class for calculating discrete fourier transform. The methods of this class
27 use split format for complex data. This means that a complex data set is
28 represented as two arrays - one for the real part and one for the imaginary
29 part. An instance of this class can only be used for transforms of one
30 particular size. The template parameter is the floating point type that the
31 methods of the class will operate on.
32 33 Example:
34 ---
35 import std.stdio, std.conv, std.exception;
36 import pfft.pfft;
37 38 void main(string[] args)
39 {
40 auto n = to!int(args[1]);
41 enforce((n & (n-1)) == 0, "N must be a power of two.");
42 43 alias Fft!float F;
44 45 F f;
46 f.initialize(n);
47 48 auto re = F.allocate(n);
49 auto im = F.allocate(n);
50 51 foreach(i, _; re)
52 readf("%s %s\n", &re[i], &im[i]);
53 54 f.fft(re, im);
55 56 foreach(i, _; re)
57 writefln("%s\t%s", re[i], im[i]);
58 }
59 ---
60 */61 structFft(T)
62 {
63 public:
64 nothrow:
65 @nogc:
66 mixinImport!T;
67 68 intlog2n;
69 impl.Tabletable;
70 71 /**
72 The Fft constructor. The parameter is the size of data sets that $(D fft) and
73 $(D ifft) will operate on. I will refer to this number as n in the rest of the
74 documentation for this class.Tables used in fft and ifft are calculated in the
75 constructor.
76 */77 voidinitialize(size_tn)
78 {
79 assert((n & (n - 1)) == 0);
80 log2n = bsf(n);
81 automem = alignedRealloc(table, impl.table_size_bytes(log2n), 64);
82 table = impl.fft_table(log2n, mem);
83 assert(mem == table);
84 }
85 86 ~this()
87 {
88 alignedFree(table, 64);
89 }
90 91 /**
92 Calculates discrete fourier transform. $(D_PARAM re) should contain the real
93 part of the data and $(D_PARAM im) the imaginary part of the data. The method
94 operates in place - the result is saved back to $(D_PARAM re) and $(D_PARAM im).
95 Both arrays must be properly aligned - to obtain a properly aligned array you can
96 use $(D allocate).
97 */98 voidfft(T[] re, T[] im)
99 {
100 assert(re.length == im.length);
101 assert(re.length == (st!1 << log2n));
102 assert(((impl.alignment(re.length) - 1) & cast(size_t) re.ptr) == 0);
103 assert(((impl.alignment(im.length) - 1) & cast(size_t) im.ptr) == 0);
104 105 impl.fft(re.ptr, im.ptr, log2n, table);
106 }
107 108 /**
109 Calculates inverse discrete fourier transform scaled by n. The arguments have
110 the same role as they do in $(D fft).
111 */112 voidifft(T[] re, T[] im)
113 {
114 fft(im, re);
115 }
116 117 /**
118 Returns requited alignment for use with $(D fft), $(D ifft) and
119 $(D scale) methods.
120 */121 staticsize_talignment(size_tn)
122 {
123 returnimpl.alignment(n);
124 }
125 126 /**
127 Allocates an array that is aligned properly for use with $(D fft), $(D ifft) and
128 $(D scale) methods.
129 */130 staticT[] allocate(size_tn)
131 {
132 size_tbytes = n * T.sizeof;
133 T* r = cast(T*) alignedMalloc(bytes, alignment(bytes));
134 assert(((impl.alignment(bytes) - 1) & cast(size_t) r) == 0);
135 returnr[0..n];
136 }
137 138 /**
139 Deallocates an array allocated with `allocate`.
140 */141 staticvoiddeallocate(T[] arr)
142 {
143 size_tn = arr.length;
144 alignedFree(arr.ptr, alignment(n));
145 }
146 147 148 /**
149 Scales an array data by factor k. The array must be properly aligned. To obtain
150 a properly aligned array, use $(D allocate).
151 */152 staticvoidscale(T[] data, Tk)
153 {
154 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
155 impl.scale(data.ptr, data.length, k);
156 }
157 158 }
159 160 /**
161 A class for calculating real discrete fourier transform. The methods of this
162 class use split format for complex data. This means that complex data set is
163 represented as two arrays - one for the real part and one for the imaginary
164 part. An instance of this class can only be used for transforms of one
165 particular size. The template parameter is the floating point type that the
166 methods of the class will operate on.
167 168 Example:
169 ---
170 import std.stdio, std.conv, std.exception;
171 import pfft.pfft;
172 173 void main(string[] args)
174 {
175 auto n = to!int(args[1]);
176 enforce((n & (n-1)) == 0, "N must be a power of two.");
177 178 alias Rfft!float F;
179 180 F f;
181 f.initialize(n);
182 183 auto data = F.allocate(n);
184 185 foreach(ref e; data)
186 readf("%s\n", &e);
187 188 f.rfft(data);
189 190 foreach(i; 0 .. n / 2 + 1)
191 writefln("%s\t%s", data[i], (i == 0 || i == n / 2) ? 0 : data[i]);
192 }
193 ---
194 */195 structRfft(T)
196 {
197 public:
198 nothrow:
199 @nogc:
200 mixinImport!T;
201 202 intlog2n;
203 Fft!T_complex;
204 impl.RTablertable;
205 impl.ITableitable;
206 207 /**
208 The Rfft constructor. The parameter is the size of data sets that $(D rfft) will
209 operate on. I will refer to this number as n in the rest of the documentation
210 for this class. All tables used in rfft are calculated in the constructor.
211 */212 voidinitialize(size_tn)
213 {
214 // Doesn't work with lower size, but I'm unable to understand why215 assert(n >= 128);
216 217 assert((n & (n - 1)) == 0);
218 log2n = bsf(n);
219 220 _complex.initialize(n / 2);
221 222 automem = alignedRealloc(rtable, impl.rtable_size_bytes(log2n), 64);
223 rtable = impl.rfft_table(log2n, mem);
224 assert(mem == rtable);
225 226 mem = alignedRealloc(itable, impl.itable_size_bytes(log2n), 64);
227 itable = impl.interleave_table(log2n, mem);
228 assert(mem == itable);
229 }
230 231 ~this()
232 {
233 alignedFree(rtable, 64);
234 alignedFree(itable, 64);
235 }
236 237 /**
238 Calculates discrete fourier transform of the real valued sequence in data.
239 The method operates in place. When the method completes, data contains the
240 result. First $(I n / 2 + 1) elements contain the real part of the result and
241 the rest contains the imaginary part. Imaginary parts at position 0 and
242 $(I n / 2) are known to be equal to 0 and are not stored, so the content of
243 data looks like this:
244 245 $(D r(0), r(1), ... r(n / 2), i(1), i(2), ... i(n / 2 - 1))
246 247 248 The elements of the result at position greater than n / 2 can be trivially
249 calculated from the relation $(I DFT(f)[i] = DFT(f)[n - i]*) that holds
250 because the input sequence is real.
251 252 253 The length of the array must be equal to n and the array must be properly
254 aligned. To obtain a properly aligned array you can use $(D allocate).
255 */256 voidrfft(T[] data)
257 {
258 assert(data.length == (st!1 << log2n));
259 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
260 261 impl.deinterleave(data.ptr, log2n, itable);
262 impl.rfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable);
263 }
264 265 /**
266 Calculates the inverse of $(D rfft), scaled by n (You can use $(D scale)
267 to normalize the result). Before the method is called, data should contain a
268 complex sequence in the same format as the result of $(D rfft). It is
269 assumed that the input sequence is a discrete fourier transform of a real
270 valued sequence, so the elements of the input sequence not stored in data
271 can be calculated from $(I DFT(f)[i] = DFT(f)[n - i]*). When the method
272 completes, the array contains the real part of the inverse discrete fourier
273 transform. The imaginary part is known to be equal to 0.
274 275 The length of the array must be equal to n and the array must be properly
276 aligned. To obtain a properly aligned array you can use $(D allocate).
277 */278 voidirfft(T[] data)
279 {
280 assert(data.length == (st!1 << log2n));
281 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
282 283 impl.irfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable);
284 impl.interleave(data.ptr, log2n, itable);
285 }
286 287 /// An alias for Fft!T.allocate288 aliasFft!(T).allocateallocate;
289 290 /// An alias for Fft!T.deallocate291 aliasFft!(T).deallocatedeallocate;
292 293 /// An alias for Fft!T.scale294 aliasFft!(T).scalescale;
295 296 /// An alias for Fft!T.alignment297 aliasFft!(T).alignmentalignment;
298 299 @propertycomplex(){ return_complex; }
300 }
301 302 303 private:
304 305 /// Returns: `true` if the pointer is suitably aligned.306 boolisPointerAligned(void* p, size_talignment) purenothrow @nogc307 {
308 assert(alignment != 0);
309 return ( cast(size_t)p & (alignment - 1) ) == 0;
310 }
311 312 /// Allocates an aligned memory chunk.313 /// Functionally equivalent to Visual C++ _aligned_malloc.314 /// Do not mix allocations with different alignment.315 void* alignedMalloc(size_tsize, size_talignment) nothrow @nogc316 {
317 assert(alignment != 0);
318 319 // Short-cut and use the C allocator to avoid overhead if no alignment320 if (alignment == 1)
321 {
322 // C99: 323 // Implementation-defined behavior324 // Whether the calloc, malloc, and realloc functions return a null pointer 325 // or a pointer to an allocated object when the size requested is zero.326 void* res = malloc(size);
327 if (size == 0)
328 returnnull;
329 }
330 331 if (size == 0)
332 returnnull;
333 334 size_trequest = requestedSize(size, alignment);
335 void* raw = malloc(request);
336 337 if (request > 0 && raw == null) // malloc(0) can validly return anything338 onOutOfMemoryError();
339 340 returnstoreRawPointerPlusSizeAndReturnAligned(raw, size, alignment);
341 }
342 343 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc.344 /// Functionally equivalent to Visual C++ _aligned_free.345 /// Do not mix allocations with different alignment.346 voidalignedFree(void* aligned, size_talignment) nothrow @nogc347 {
348 assert(alignment != 0);
349 350 // Short-cut and use the C allocator to avoid overhead if no alignment351 if (alignment == 1)
352 returnfree(aligned);
353 354 // support for free(NULL)355 if (alignedisnull)
356 return;
357 358 void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof);
359 free(*rawLocation);
360 }
361 362 /// Returns: next pointer aligned with alignment bytes.363 void* nextAlignedPointer(void* start, size_talignment) purenothrow @nogc364 {
365 returncast(void*)nextMultipleOf(cast(size_t)(start), alignment);
366 }
367 368 // Returns number of bytes to actually allocate when asking369 // for a particular alignement370 @nogcsize_trequestedSize(size_taskedSize, size_talignment) purenothrow371 {
372 enumsize_tpointerSize = size_t.sizeof;
373 returnaskedSize + alignment - 1 + pointerSize * 2;
374 }
375 376 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it)377 @nogcvoid* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_tsize, size_talignment) nothrow378 {
379 enumsize_tpointerSize = size_t.sizeof;
380 char* start = cast(char*)raw + pointerSize * 2;
381 void* aligned = nextAlignedPointer(start, alignment);
382 void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize);
383 *rawLocation = raw;
384 size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize);
385 *sizeLocation = size;
386 returnaligned;
387 }
388 389 // Returns: x, multiple of powerOfTwo, so that x >= n.390 @nogcsize_tnextMultipleOf(size_tn, size_tpowerOfTwo) purenothrow391 {
392 // check power-of-two393 assert( (powerOfTwo != 0) && ((powerOfTwo & (powerOfTwo - 1)) == 0));
394 395 size_tmask = ~(powerOfTwo - 1);
396 return (n + powerOfTwo - 1) & mask;
397 }
398 399 /// Reallocates an aligned memory chunk allocated by alignedMalloc or alignedRealloc.400 /// Functionally equivalent to Visual C++ _aligned_realloc.401 /// Do not mix allocations with different alignment.402 @nogcvoid* alignedRealloc(void* aligned, size_tsize, size_talignment) nothrow403 {
404 assert(isPointerAligned(aligned, alignment));
405 406 // If you fail here, it can mean you've used an uninitialized AlignedBuffer.407 assert(alignment != 0);
408 409 // Short-cut and use the C allocator to avoid overhead if no alignment410 if (alignment == 1)
411 {
412 void* res = realloc(aligned, size);
413 414 // C99: 415 // Implementation-defined behavior416 // Whether the calloc, malloc, and realloc functions return a null pointer 417 // or a pointer to an allocated object when the size requested is zero.418 if (size == 0)
419 returnnull;
420 421 returnres;
422 }
423 424 if (alignedisnull)
425 returnalignedMalloc(size, alignment);
426 427 if (size == 0)
428 {
429 alignedFree(aligned, alignment);
430 returnnull;
431 }
432 433 size_tpreviousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2);
434 435 436 void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof);
437 size_trequest = requestedSize(size, alignment);
438 439 // Heuristic: if a requested size is within 50% to 100% of what is already allocated440 // then exit with the same pointer441 if ( (previousSize < request * 4) && (request <= previousSize) )
442 returnaligned;
443 444 void* newRaw = malloc(request);
445 staticif( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014446 {
447 if (request > 0 && newRaw == null) // realloc(0) can validly return anything448 onOutOfMemoryError();
449 }
450 451 void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, request, alignment);
452 size_tminSize = size < previousSize ? size : previousSize;
453 memcpy(newAligned, aligned, minSize); // memcpy OK454 455 // Free previous data456 alignedFree(aligned, alignment);
457 isPointerAligned(newAligned, alignment);
458 returnnewAligned;
459 }
460 461 462 unittest463 {
464 {
465 intn = 16;
466 Fft!floatA;
467 A.initialize(n);
468 float[] re = A.allocate(n);
469 float[] im = A.allocate(n);
470 scope(exit) A.deallocate(re);
471 scope(exit) A.deallocate(im);
472 re[] = 1.0f;
473 im[] = 0.0f;
474 A.fft(re, im);
475 A.ifft(re, im);
476 }
477 478 {
479 intn = 128;
480 Rfft!floatB;
481 B.initialize(n);
482 float[] data = B.allocate(n);
483 scope(exit) B.deallocate(data);
484 data[] = 1.0f;
485 B.rfft(data);
486 B.irfft(data);
487 }
488 }