1 /**
2     Defines `Vec`, `reallocBuffer` and memory functions.
3 
4     Copyright: Guillaume Piolat 2015-2024.
5     License:   http://www.boost.org/LICENSE_1_0.txt
6 */
7 module dplug.core.vec;
8 
9 import std.traits: hasElaborateDestructor;
10 
11 import core.stdc.stdlib: malloc, free, realloc;
12 import core.stdc.string: memcpy, memmove;
13 
14 import core.exception;
15 import inteli.xmmintrin;
16 
17 
18 // This module deals with aligned memory.
19 // You'll also find here a non-copyable std::vector equivalent `Vec`.
20 
21 /// Allocates an aligned memory chunk.
22 /// Functionally equivalent to Visual C++ _aligned_malloc.
23 /// Do not mix allocations with different alignment.
24 ///
25 /// Important: `alignedMalloc(0)` does not necessarily
26 ///            return `null`, and its result
27 ///            _has_ to be freed with `alignedFree`.
28 ///
29 /// Important: Don't call that often in a real-time thread
30 ///            without amortization (keep a capacity).
31 ///            If you need to allocate from `nextBuffer`, do
32 ///            prefer the use of `Vec` which doesn't shrink,
33 ///            and will reuse the allocation.
34 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc
35 {
36     assert(alignment != 0);
37 
38     // Short-cut and use the C allocator to avoid overhead if no alignment
39     if (alignment == 1)
40     {
41         // C99:
42         // Implementation-defined behavior
43         // Whether the calloc, malloc, and realloc functions return a null pointer
44         // or a pointer to an allocated object when the size requested is zero.
45         // In any case, we'll have to free() it.
46         return malloc(size);
47     }
48 
49     size_t request = requestedSize(size, alignment);
50     void* raw = malloc(request);
51 
52     if (request > 0 && raw == null) // malloc(0) can validly return anything
53         onOutOfMemoryError();
54 
55     return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment);
56 }
57 
58 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc.
59 /// Functionally equivalent to Visual C++ _aligned_free.
60 /// Do not mix allocations with different alignment.
61 void alignedFree(void* aligned, size_t alignment) nothrow @nogc
62 {
63     // Short-cut and use the C allocator to avoid overhead if no alignment
64     if (alignment == 1)
65         return free(aligned);
66 
67     // support for free(NULL)
68     if (aligned is null)
69         return;
70 
71     assert(alignment != 0);
72     assert(isPointerAligned(aligned, alignment));
73 
74     void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof);
75     free(*rawLocation);
76 }
77 
78 /// Reallocates an aligned memory chunk allocated by `alignedMalloc` or `alignedRealloc`.
79 /// Functionally equivalent to Visual C++ `_aligned_realloc`.
80 /// Do not mix allocations with different alignment.
81 ///
82 /// The "discard" version doesn't preserve data.
83 ///
84 /// Important: `alignedRealloc(p, 0)` does not necessarily return `null`, and its result
85 ///            _has_ to be freed with `alignedFree`.
86 ///
87 /// Important: Don't call that often in a real-time thread
88 ///            without amortization (keep a capacity).
89 ///            If you need to allocate from `nextBuffer`, do
90 ///            prefer the use of `Vec` which doesn't shrink,
91 ///            and will reuse the allocation.
92 void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow @nogc
93 {
94     return alignedReallocImpl!true(aligned, size, alignment);
95 }
96 ///ditto
97 void* alignedReallocDiscard(void* aligned, size_t size, size_t alignment) nothrow @nogc
98 {
99     return alignedReallocImpl!false(aligned, size, alignment);
100 }
101 
102 
103 /// Returns: `true` if the pointer is suitably aligned.
104 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc
105 {
106     assert(alignment != 0);
107     return ( cast(size_t)p & (alignment - 1) ) == 0;
108 }
109 unittest
110 {
111     ubyte b;
112     align(16) ubyte[5] c;
113     assert(isPointerAligned(&b, 1));
114     assert(!isPointerAligned(&c[1], 2));
115     assert(isPointerAligned(&c[4], 4));
116 }
117 
118 /// Does memory slices a[0..a_size] and b[0..b_size] have an
119 /// overlapping byte?
120 bool isMemoryOverlapping(const(void)* a, ptrdiff_t a_size,
121                          const(void)* b, ptrdiff_t b_size)
122     pure @trusted
123 {
124     assert(a_size >= 0 && b_size >= 0);
125 
126     if (a is null || b is null)
127         return false;
128 
129     if (a_size == 0 || b_size == 0)
130         return false;
131 
132     ubyte* lA = cast(ubyte*)a;
133     ubyte* hA = lA + a_size;
134     ubyte* lB = cast(ubyte*)b;
135     ubyte* hB = lB + b_size;
136 
137     // There is overlapping, if lA is inside lB..hB,
138     // or lB is inside lA..hA
139 
140     if (lA >= lB && lA < hB)
141         return true;
142 
143     if (lB >= lA && lB < hA)
144         return true;
145 
146     return false;
147 }
148 bool isMemoryOverlapping(const(void)[] a, const(void)[] b) pure @trusted
149 {
150     return isMemoryOverlapping(a.ptr, a.length, b.ptr, b.length);
151 }
152 unittest
153 {
154     ubyte[100] a;
155     assert(!isMemoryOverlapping(null, a));
156     assert(!isMemoryOverlapping(a, null));
157     assert(!isMemoryOverlapping(a[1..1], a[0..10]));
158     assert(!isMemoryOverlapping(a[1..10], a[10..100]));
159     assert(!isMemoryOverlapping(a[30..100], a[0..30]));
160     assert(isMemoryOverlapping(a[1..50], a[49..100]));
161     assert(isMemoryOverlapping(a[49..100], a[1..50]));
162     assert(isMemoryOverlapping(a[40..45], a[30..55]));
163     assert(isMemoryOverlapping(a[30..55], a[40..45]));
164 }
165 
166 private nothrow @nogc
167 {
168     void* alignedReallocImpl(bool PreserveDataIfResized)(void* aligned, size_t size, size_t alignment)
169     {
170         // Short-cut and use the C allocator to avoid overhead if no alignment
171         if (alignment == 1)
172         {
173             // C99:
174             // Implementation-defined behavior
175             // Whether the calloc, malloc, and realloc functions return a null pointer
176             // or a pointer to an allocated object when the size requested is zero.
177             // In any case, we'll have to `free()` it.
178             return realloc(aligned, size);
179         }
180 
181         if (aligned is null)
182             return alignedMalloc(size, alignment);
183 
184         assert(alignment != 0);
185         assert(isPointerAligned(aligned, alignment));
186 
187         size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2);
188 
189         void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof);
190         size_t request = requestedSize(size, alignment);
191         size_t previousRequest = requestedSize(previousSize, alignment);
192         assert(previousRequest - request == previousSize - size); // same alignment
193 
194         // Heuristic: if a requested size is within 50% to 100% of what is already allocated
195         //            then exit with the same pointer
196         if ( (previousRequest < request * 4) && (request <= previousRequest) )
197             return aligned;
198 
199         void* newRaw = malloc(request);
200         static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014
201         {
202             if (request > 0 && newRaw == null) // realloc(0) can validly return anything
203                 onOutOfMemoryError();
204         }
205 
206         void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, size, alignment);
207 
208         static if (PreserveDataIfResized)
209         {
210             size_t minSize = size < previousSize ? size : previousSize;
211             memcpy(newAligned, aligned, minSize); // memcpy OK
212         }
213 
214         // Free previous data
215         alignedFree(aligned, alignment);
216         assert(isPointerAligned(newAligned, alignment));
217         return newAligned;
218     }
219 
220     /// Returns: next pointer aligned with alignment bytes.
221     void* nextAlignedPointer(void* start, size_t alignment) pure
222     {
223         import dplug.core.math : nextMultipleOf;
224         return cast(void*)nextMultipleOf(cast(size_t)(start), alignment);
225     }
226 
227     // Returns number of bytes to actually allocate when asking
228     // for a particular alignement
229     size_t requestedSize(size_t askedSize, size_t alignment) pure
230     {
231         enum size_t pointerSize = size_t.sizeof;
232         return askedSize + alignment - 1 + pointerSize * 2;
233     }
234 
235     // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it)
236     void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment)
237     {
238         enum size_t pointerSize = size_t.sizeof;
239         char* start = cast(char*)raw + pointerSize * 2;
240         void* aligned = nextAlignedPointer(start, alignment);
241         void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize);
242         *rawLocation = raw;
243         size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize);
244         *sizeLocation = size;
245         assert( isPointerAligned(aligned, alignment) );
246         return aligned;
247     }
248 }
249 
250 unittest
251 {
252     {
253         void* p = alignedMalloc(23, 16);
254         assert(p !is null);
255         assert(((cast(size_t)p) & 0xf) == 0);
256 
257         alignedFree(p, 16);
258     }
259 
260     void* nullAlloc = alignedMalloc(0, 16);
261     assert(nullAlloc != null);
262     nullAlloc = alignedRealloc(nullAlloc, 0, 16);
263     assert(nullAlloc != null);
264     alignedFree(nullAlloc, 16);
265 
266     {
267         int alignment = 16;
268         int* p = null;
269 
270         // check if growing keep values in place
271         foreach(int i; 0..100)
272         {
273             p = cast(int*) alignedRealloc(p, (i + 1) * int.sizeof, alignment);
274             p[i] = i;
275         }
276 
277         foreach(int i; 0..100)
278             assert(p[i] == i);
279 
280         p = cast(int*) alignedRealloc(p, 0, alignment);
281         assert(p !is null);
282 
283         alignedFree(p, alignment);
284     }
285 
286     // Verify that same size alloc preserve pointer.
287     {
288         void* p = null;
289         p = alignedRealloc(p, 254, 16);
290         void* p2 = alignedRealloc(p, 254, 16);
291         assert(p == p2);
292 
293         // Test shrink heuristic
294         void* p3 = alignedRealloc(p, 128, 16);
295         assert(p == p3);
296         alignedFree(p3, 16);
297     }
298 }
299 
300 
301 
302 /// Used throughout dplug:dsp to avoid reliance on GC.
303 /// Important: Size 0 is special-case to free the slice.
304 /// This works a bit like alignedRealloc except with slices as input.
305 /// You MUST use consistent alignement thoughout the lifetime of this buffer.
306 ///
307 /// Params:
308 ///    buffer = Existing allocated buffer. Can be null.
309 ///             Input slice length is not considered.
310 ///    length = Desired slice length.
311 ///    alignment = Alignement if the slice has allocation requirements, 1 else.
312 ///                Must match for deallocation.
313 ///
314 /// Example:
315 /// ---
316 /// import std.stdio;
317 ///
318 /// struct MyDSP
319 /// {
320 /// nothrow @nogc:
321 ///
322 ///     void initialize(int maxFrames)
323 ///     {
324 ///         // mybuf points to maxFrames frames
325 ///         mybuf.reallocBuffer(maxFrames);
326 ///     }
327 ///
328 ///     ~this()
329 ///     {
330 ///         // If you don't free the buffer, it will leak.
331 ///         mybuf.reallocBuffer(0);
332 ///     }
333 ///
334 /// private:
335 ///     float[] mybuf;
336 /// }
337 /// ---
338 /// Important: Don't call that often in a real-time thread
339 ///            without amortization (keep a capacity).
340 ///            If you need to allocate from `nextBuffer`, do
341 ///            prefer the use of `Vec` which doesn't shrink,
342 ///            and will reuse the allocation.
343 ///            This, instead, will shrink the allocation
344 ///            which makes it more expensive.
345 void reallocBuffer(T)(ref T[] buffer, size_t length, int alignment = 1) nothrow @nogc
346 {
347     static if (is(T == struct) && hasElaborateDestructor!T)
348     {
349         static assert(false); // struct with destructors not supported
350     }
351 
352     /// Size 0 is special-case to free the slice.
353     if (length == 0)
354     {
355         alignedFree(buffer.ptr, alignment);
356         buffer = null;
357         return;
358     }
359 
360     T* pointer = cast(T*) alignedRealloc(buffer.ptr, T.sizeof * length, alignment);
361     if (pointer is null)
362         buffer = null; // alignment 1 can still return null
363     else
364         buffer = pointer[0..length];
365 }
366 unittest
367 {
368     int[] arr;
369     arr.reallocBuffer(15);
370     assert(arr.length == 15);
371     arr.reallocBuffer(0);
372     assert(arr.length == 0);
373 }
374 
375 
376 /// Returns: A newly created `Vec`.
377 Vec!T makeVec(T)(size_t initialSize = 0) nothrow @nogc
378 {
379     return Vec!T(initialSize);
380 }
381 
382 deprecated("Vec!T must have alignment 1. This constructor will be removed in Dplug v16")
383 Vec!T makeVec(T)(size_t initialSize, int alignment) nothrow @nogc
384 {
385     return Vec!T(initialSize, alignment);
386 }
387 
388 /// Kind of a std::vector replacement.
389 /// Grow-only array, points to a (optionally aligned) memory location.
390 /// This can also work as an output range.
391 /// `Vec` is designed to work even when uninitialized, without `makeVec`.
392 /// Warning: it is pretty barebones, doesn't respect T.init or call destructors.
393 ///          When used in a GC program, GC roots won't be registered.
394 struct Vec(T)
395 {
396 nothrow:
397 @nogc:
398 
399     static if (is(T == struct) && hasElaborateDestructor!T)
400     {
401         static assert(false, "struct with destructors cannot go in `Vec!T`. You are welcome to use the `numem` package instead or do otherwise.");
402     }
403 
404     public
405     {
406         /// Creates an aligned buffer with given initial size.
407         this(size_t initialSize) @safe
408         {
409             _size = 0;
410             _allocated = 0;
411             _data = null;
412             _alignment = 1;
413             resizeExactly(initialSize);
414         }
415 
416         deprecated("Vec!T must have alignment 1. This constructor will be removed in Dplug v16")
417             this(size_t initialSize, int alignment) @safe
418         {
419             assert(alignment != 0);
420             _size = 0;
421             _allocated = 0;
422             _data = null;
423             _alignment = alignment;
424             resizeExactly(initialSize);
425         }
426 
427         ~this() @trusted
428         {
429             if (_data !is null)
430             {
431                 alignedFree(_data, _alignment);
432                 _data = null;
433                 _allocated = 0;
434             }
435         }
436 
437         @disable this(this);
438 
439         /// Returns: Length of buffer in elements.
440         size_t length() pure const @safe
441         {
442             return _size;
443         }
444         ///ditto
445         alias size = length;
446 
447         /// Returns: Length of buffer in elements, as signed.
448         ptrdiff_t slength() pure const @safe
449         {
450             return _size;
451         }
452         ///ditto
453         alias ssize = length;
454 
455         /// Returns: `true` if length is zero.
456         bool isEmpty() pure const @safe
457         {
458             return _size == 0;
459         }
460 
461         /// Returns: Allocated size of the underlying array.
462         size_t capacity() pure const @safe
463         {
464             return _allocated;
465         }
466 
467         /// Returns: Length of buffer in elements.
468         alias opDollar = length;
469 
470         /// Resizes a buffer to hold $(D askedSize) elements.
471         void resize(size_t askedSize) @trusted
472         {
473             resizeExactly(askedSize);
474         }
475 
476         /// Pop last element
477         T popBack() @trusted
478         {
479             assert(_size > 0);
480             _size = _size - 1;
481             return _data[_size];
482         }
483 
484         /// Append an element to this buffer.
485         void pushBack(T x) @trusted
486         {
487             size_t i = _size;
488             resizeGrow(_size + 1);
489             _data[i] = x;
490         }
491 
492         // DMD 2.088 deprecates the old D1-operators
493         static if (__VERSION__ >= 2088)
494         {
495             ///ditto
496             void opOpAssign(string op)(T x) @safe if (op == "~")
497             {
498                 pushBack(x);
499             }
500         }
501         else
502         {
503             ///ditto
504             void opCatAssign(T x) @safe
505             {
506                 pushBack(x);
507             }
508         }
509 
510         // Output range support
511         alias put = pushBack;
512 
513         /// Finds an item, returns -1 if not found
514         int indexOf(T x) @trusted
515         {
516             enum bool isStaticArray(T) = __traits(isStaticArray, T);
517 
518             static if (isStaticArray!T)
519             {
520                 // static array would be compared by identity as slice, which is not what we want.
521                 foreach(int i; 0..cast(int)_size)
522                     if (_data[i] == x)
523                         return i;
524             }
525             else
526             {
527                 // base types: identity is equality
528                 // reference types: looking for identity
529                 foreach(int i; 0..cast(int)_size)
530                     if (_data[i] is x)
531                         return i;
532             }
533             return -1;
534         }
535 
536         /// Removes an item and replaces it by the last item.
537         /// Warning: this reorders the array.
538         void removeAndReplaceByLastElement(size_t index) @trusted
539         {
540             assert(index < _size);
541             _data[index] = _data[--_size];
542         }
543 
544         /// Removes an item and shift the rest of the array to front by 1.
545         /// Warning: O(N) complexity.
546         void removeAndShiftRestOfArray(size_t index) @trusted
547         {
548             assert(index < _size);
549             for (; index + 1 < _size; ++index)
550                 _data[index] = _data[index+1];
551             --_size;
552         }
553 
554         /// Removes a range of items and shift the rest of the array to front.
555         /// Warning: O(N) complexity.
556         void removeAndShiftRestOfArray(size_t first, size_t last) @trusted
557         {
558             assert(first <= last && first <= _size && (last <= _size));
559             size_t remain = _size - last;
560             for (size_t n = 0; n < remain; ++n)
561                 _data[first + n] = _data[last + n];
562             _size -= (last - first);
563         }
564 
565         /// Appends another buffer to this buffer.
566         void pushBack(ref Vec other) @trusted
567         {
568             size_t oldSize = _size;
569             resizeGrow(_size + other._size);
570             memmove(_data + oldSize, other._data, T.sizeof * other._size);
571         }
572 
573         /// Appends a slice to this buffer.
574         /// `slice` should not belong to the same buffer _data.
575         void pushBack(T[] slice) @trusted
576         {
577             size_t oldSize = _size;
578             size_t newSize = _size + slice.length;
579             resizeGrow(newSize);
580             for (size_t n = 0; n < slice.length; ++n)
581                 _data[oldSize + n] = slice[n];
582         }
583 
584         /// Returns: Raw pointer to data.
585         @property inout(T)* ptr() inout @system
586         {
587             return _data;
588         }
589 
590         /// Returns: n-th element.
591         ref inout(T) opIndex(size_t i) pure inout @trusted
592         {
593             return _data[i];
594         }
595 
596         T opIndexAssign(T x, size_t i) pure @trusted
597         {
598             return _data[i] = x;
599         }
600 
601         /// Sets size to zero, but keeps allocated buffers.
602         void clearContents() pure @safe
603         {
604             _size = 0;
605         }
606 
607         /// Returns: Whole content of the array in one slice.
608         inout(T)[] opSlice() inout @safe
609         {
610             return opSlice(0, length());
611         }
612 
613         /// Returns: A slice of the array.
614         inout(T)[] opSlice(size_t i1, size_t i2) inout @trusted
615         {
616             return _data[i1 .. i2];
617         }
618 
619         /// Fills the buffer with the same value.
620         void fill(T x) @trusted
621         {
622             _data[0.._size] = x;
623         }
624 
625         /// Move. Give up owner ship of the data.
626         T[] releaseData() @system
627         {
628             T[] data = _data[0.._size];
629             assert(_alignment == 1); // else would need to be freed with alignedFree.
630             this._data = null;
631             this._size = 0;
632             this._allocated = 0;
633             this._alignment = 0;
634             return data;
635         }
636     }
637 
638     private
639     {
640         size_t _size = 0;
641         T* _data = null;
642         size_t _allocated = 0;
643         size_t _alignment = 1; // for an unaligned Vec, you probably are not interested in alignment
644 
645         /// Used internally to grow in response to a pushBack operation.
646         /// Different heuristic, since we know that the resize is likely to be repeated for an
647         /// increasing size later.
648         void resizeGrow(size_t askedSize) @trusted
649         {
650             if (_allocated < askedSize)
651             {
652 
653                 version(all)
654                 {
655                     size_t newCap = computeNewCapacity(askedSize, _size);
656                     setCapacity(newCap);
657                 }
658                 else
659                 {
660                     setCapacity(2 * askedSize);
661                 }
662             }
663             _size = askedSize;
664         }
665 
666         // Resizes the `Vector` to hold exactly `askedSize` elements.
667         // Still if the allocated capacity is larger, do nothing.
668         void resizeExactly(size_t askedSize) @trusted
669         {
670             if (_allocated < askedSize)
671             {
672                 setCapacity(askedSize);
673             }
674             _size = askedSize;
675         }
676 
677         // Internal use, realloc internal buffer, copy existing items.
678         // Doesn't initialize the new ones.
679         void setCapacity(size_t cap)
680         {
681             size_t numBytes = cap * T.sizeof;
682             _data = cast(T*)(alignedRealloc(_data, numBytes, _alignment));
683             _allocated = cap;
684         }
685 
686         // Compute new capacity, while growing.
687         size_t computeNewCapacity(size_t newLength, size_t oldLength)
688         {
689             // Optimal value (Windows malloc) not far from there.
690             enum size_t PAGESIZE = 4096;
691 
692             size_t newLengthBytes = newLength * T.sizeof;
693             if (newLengthBytes > PAGESIZE)
694             {
695                 // Larger arrays need a smaller growth factor to avoid wasting too much bytes.
696                 // This was found when tracing curve, not too far from golden ratio for some reason.
697                 return newLength + newLength / 2 + newLength / 8;
698             }
699             else
700             {
701                 // For smaller arrays being pushBack, can bring welcome speed by minimizing realloc.
702                 return newLength * 3;
703             }
704         }
705     }
706 }
707 
708 unittest
709 {
710     import std.range.primitives;
711     static assert(isOutputRange!(Vec!ubyte, ubyte));
712 
713 
714     import std.random;
715     int NBUF = 200;
716 
717     Xorshift32 rng;
718     rng.seed(0xBAADC0DE);
719 
720     struct box2i { int a, b, c, d; }
721     Vec!box2i[] boxes;
722     boxes.length = NBUF;
723 
724     foreach(i; 0..NBUF)
725     {
726         boxes[i] = makeVec!box2i();
727     }
728 
729     foreach(j; 0..200)
730     {
731         foreach(i; 0..NBUF)
732         {
733             int previousSize = cast(int)(boxes[i].length);
734             void* previousPtr = boxes[i].ptr;
735             foreach(int k; 0..cast(int)(boxes[i].length))
736                 boxes[i][k] = box2i(k, k, k, k);
737 
738             int newSize = uniform(0, 100, rng);
739             boxes[i].resize(newSize);
740 
741             int minSize = previousSize;
742             if (minSize > boxes[i].length)
743                 minSize = cast(int)(boxes[i].length);
744 
745             void* newPtr = boxes[i].ptr;
746             foreach(int k; 0..minSize)
747             {
748                 box2i item = boxes[i][k];
749                 box2i shouldBe = box2i(k, k, k, k);
750                 assert(item == shouldBe);
751             }
752 
753             int sum = 0;
754             foreach(k; 0..newSize)
755             {
756                 box2i bb = boxes[i][k];
757                 sum += bb.a + bb.b + bb.c + bb.d;
758             }
759         }
760     }
761 
762     foreach(i; 0..NBUF)
763         boxes[i].destroy();
764 
765     {
766         auto buf = makeVec!int;
767         enum N = 10;
768         buf.resize(N);
769         foreach(i ; 0..N)
770             buf[i] = i;
771 
772         foreach(i ; 0..N)
773             assert(buf[i] == i);
774 
775         auto buf2 = makeVec!int;
776         buf2.pushBack(11);
777         buf2.pushBack(14);
778 
779         // test pushBack(slice)
780         buf.clearContents();
781         buf.pushBack(buf2[]);
782         assert(buf[0] == 11);
783         assert(buf[1] == 14);
784 
785         // test pushBack(slice)
786         buf2[1] = 8;
787         buf.clearContents();
788         buf.pushBack(buf2);
789         assert(buf[0] == 11);
790         assert(buf[1] == 8);
791     }
792 }
793 
794 // Vec should work without any initialization
795 unittest
796 {
797     Vec!string vec;
798 
799     foreach(e; vec[])
800     {
801     }
802 
803     assert(vec.length == 0);
804     assert(vec.size == 0);
805     assert(vec.slength == 0);
806     assert(vec.ssize == 0);
807     vec.clearContents();
808     vec.resize(0);
809     assert(vec == vec.init);
810     vec.fill("filler");
811     assert(vec.ptr is null);
812 }
813 
814 // Issue #312: vec.opIndex not returning ref which break struct assignment
815 unittest
816 {
817     static struct A
818     {
819         int x;
820     }
821     Vec!A vec = makeVec!A();
822     A a;
823     vec.pushBack(a);
824     vec ~= a;
825     vec[0].x = 42; // vec[0] needs to return a ref
826     assert(vec[0].x == 42);
827 }
828 unittest // removeAndShiftRestOfArray was wrong
829 {
830     Vec!int v;
831     v.pushBack(14);
832     v.pushBack(27);
833     v.pushBack(38);
834     assert(v.length == 3);
835     v.removeAndShiftRestOfArray(1);
836     assert(v.length == 2);
837     assert(v[] == [14, 38]);
838 }
839 unittest // removeAndShiftRestOfArray with slice
840 {
841     Vec!int v;
842     v.pushBack([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
843     v.removeAndShiftRestOfArray(3, 5);
844     assert(v[] == [0, 1, 2, 5, 6, 7, 8, 9]);
845     v.removeAndShiftRestOfArray(0, 4);
846     assert(v[] == [6, 7, 8, 9]);
847     v.removeAndShiftRestOfArray(2, 4);
848     assert(v[] == [6, 7]);
849     v.removeAndShiftRestOfArray(2, 2);
850     v.removeAndShiftRestOfArray(0, 0);
851     v.removeAndShiftRestOfArray(1, 1);
852     assert(v[] == [6, 7]);
853 }
854 
855 
856 
857 /// Allows to merge the allocation of several arrays, which saves allocation count and can speed up things thanks to locality.
858 ///
859 /// Example: see below unittest.
860 struct MergedAllocation
861 {
862 nothrow:
863 @nogc:
864 
865     // In debug mode, add stomp detection to `MergedAllocation`.
866     // This takes additional complexity to coarse check for stomping nearby buffers.
867     debug
868     {
869         private enum mergedAllocStompWarning = true;
870     }
871     else
872     {
873         private enum mergedAllocStompWarning = false;
874     }
875 
876 
877     // This adds 32-byte of sentinels around each allocation,
878     // and check at the end of the program that they were unaffected.
879     static if (mergedAllocStompWarning)
880     {
881         // Number of bytes to write between areas to check for stomping.
882         enum SENTINEL_BYTES = 32;
883     }
884 
885     enum maxExpectedAlignment = 32;
886 
887     /// Start defining the area of allocations.
888     void start()
889     {
890         // Detect memory errors in former uses.
891         static if (mergedAllocStompWarning)
892         {
893             checkSentinelAreas();
894             _allocateWasCalled = false;
895         }
896 
897         _base = cast(ubyte*)(cast(size_t)0);
898     }
899 
900     /// Allocate (or count) space needed for `numElems` elements of type `T` with given alignment.
901     /// This gets called twice for each array, see example for usage.
902     ///
903     /// This bumps the internal bump allocator.
904     /// Giving null to this chain and converting the result to size_t give the total needed size
905     /// for the merged allocation.
906     ///
907     /// Warning:
908     ///          - If called after a `start()` call, the area returned are wrong and are only for
909     ///             counting needed bytes. Don't use those.
910     ///
911     ///          - If called after an `allocate()` call, the area returned are an actual merged
912     ///            allocation (if the same calls are done).
913     ///
914     /// Warning: Prefer `allocArray` over `alloc` variant, since the extra length field WILL help
915     ///          you catch memory errors before release. Else it is very common to miss buffer
916     ///          overflows in samplerate changes.
917     void allocArray(T)(out T[] array, size_t numElems, size_t alignment = 1)
918     {
919         assert(alignment <= maxExpectedAlignment);
920         assert( (alignment != 0) && ((alignment & (alignment - 1)) == 0)); // power of two
921 
922         size_t adr = cast(size_t) _base;
923 
924         // 1. Align base address
925         size_t mask = ~(alignment - 1);
926         adr = (adr + alignment - 1) & mask;
927 
928         // 2. Assign array and base.
929         array = (cast(T*)adr)[0..numElems];
930         adr += T.sizeof * numElems;
931         _base = cast(ubyte*) adr;
932 
933         static if (mergedAllocStompWarning)
934         {
935             if (_allocateWasCalled && _allocation !is null)
936             {
937                 // Each allocated area followed with SENTINEL_BYTES bytes of value 0xCC
938                 _base[0..SENTINEL_BYTES] = 0xCC;
939                 registerSentinel(_base);
940             }
941             _base += SENTINEL_BYTES;
942         }
943     }
944 
945     ///ditto
946     void alloc(T)(out T* array, size_t numElems, size_t alignment = 1)
947     {
948         T[] arr;
949         allocArray(arr, numElems, alignment);
950         array = arr.ptr;
951     }
952 
953     /// Allocate actual storage for the merged allocation. From there, you need to define exactly the same area with `alloc` and `allocArray`.
954     /// This time they will get a proper value.
955     void allocate()
956     {
957         static if (mergedAllocStompWarning) _allocateWasCalled = true;
958 
959         size_t sizeNeeded =  cast(size_t)_base; // since it was fed 0 at start.
960 
961         if (sizeNeeded == 0)
962         {
963             // If no bytes are requested, it means no buffer were requested, or only with zero size.
964             // We will return a null pointer in that case, since accessing them would be illegal anyway.
965             _allocation = null;
966         }
967         else
968         {
969             // the merged allocation needs to have the largest expected alignment, else the size could depend on the hazards
970             // of the allocation. With maximum alignment, padding is the same so long as areas have smaller or equal alignment requirements.
971             _allocation = cast(ubyte*) _mm_realloc(_allocation, sizeNeeded, maxExpectedAlignment);
972         }
973 
974         // So that the next layout call points to the right area.
975         _base = _allocation;
976     }
977 
978     ~this()
979     {
980         static if (mergedAllocStompWarning)
981         {
982             checkSentinelAreas();
983         }
984 
985         if (_allocation != null)
986         {
987             _mm_free(_allocation);
988             _allocation = null;
989         }
990     }
991 
992 private:
993 
994     // Location of the allocation.
995     ubyte* _allocation = null;
996 
997     ///
998     ubyte* _base = null;
999 
1000     static if (mergedAllocStompWarning)
1001     {
1002         bool _allocateWasCalled = false;
1003 
1004         Vec!(ubyte*) _sentinels; // start of sentinel area (SENTINAL_BYTES long)
1005 
1006         void registerSentinel(ubyte* start)
1007         {
1008             _sentinels.pushBack(start);
1009         }
1010 
1011         bool hasMemoryError() // true if sentinel bytes stomped
1012         {
1013             assert(_allocateWasCalled && _allocation !is null);
1014 
1015             foreach(ubyte* s; _sentinels[])
1016             {
1017                 for (int n = 0; n < 32; ++n)
1018                 {
1019                     if (s[n] != 0xCC)
1020                         return true;
1021                 }
1022             }
1023             return false;
1024         }
1025 
1026         // Check existing sentinels, and unregister them
1027         void checkSentinelAreas()
1028         {
1029             if (!_allocateWasCalled)
1030                 return; // still haven't done an allocation, nothing to check.
1031 
1032             if (_allocation is null)
1033                 return; // nothing to check
1034 
1035             // If you fail here, there is a memory error in your access patterns.
1036             // Sentinel bytes of value 0xCC were overwritten in a `MergedAllocation`.
1037             // You can use slices with `allocArray` instead of `alloc` to find the faulty
1038             // access. This check doesn't catch everything!
1039             assert(! hasMemoryError());
1040 
1041             _sentinels.clearContents();
1042         }
1043     }
1044 }
1045 
1046 
1047 // Here is how you should use MergedAllocation.
1048 unittest
1049 {
1050     static struct MyDSPStruct
1051     {
1052     public:
1053     nothrow:
1054     @nogc:
1055         void initialize(int maxFrames)
1056         {
1057             _mergedAlloc.start();
1058             layout(_mergedAlloc, maxFrames); // you need such a layout function to be called twice.
1059             _mergedAlloc.allocate();
1060             layout(_mergedAlloc, maxFrames); // the first time arrays area allocated in the `null` area, the second time in
1061                                              // actually allocated memory (since we now have the needed length).
1062         }
1063 
1064         void layout(ref MergedAllocation ma, int maxFrames)
1065         {
1066             // allocate `maxFrames` elems, and return a slice in `_intermediateBuf`.
1067             ma.allocArray(_intermediateBuf, maxFrames);
1068 
1069             // allocate `maxFrames` elems, aligned to 16-byte boundaries. Return a pointer to that in `_coeffs`.
1070             ma.alloc(_coeffs, maxFrames, 16);
1071         }
1072 
1073     private:
1074         float[] _intermediateBuf;
1075         double* _coeffs;
1076         MergedAllocation _mergedAlloc;
1077     }
1078 
1079     MyDSPStruct s;
1080     s.initialize(14);
1081     s._coeffs[0..14] = 1.0f;
1082     s._intermediateBuf[0..14] = 1.0f;
1083     s.initialize(17);
1084     s._coeffs[0..17] = 1.0f;
1085     s._intermediateBuf[0..17] = 1.0f;
1086 }
1087 
1088 // Should be valid to allocate nothing with a MergedAllocation.
1089 unittest
1090 {
1091     MergedAllocation ma;
1092     ma.start();
1093     ma.allocate();
1094     assert(ma._allocation == null);
1095 }
1096 
1097 // test stomping detection
1098 unittest
1099 {
1100     MergedAllocation ma;
1101     ma.start();
1102     ubyte[] arr, arr2;
1103     ma.allocArray(arr, 17);
1104     ma.allocArray(arr2, 24);
1105     assert(ma._allocation == null);
1106 
1107     ma.allocate();
1108     ma.allocArray(arr, 17);
1109     ma.allocArray(arr2, 24);
1110     assert(ma._allocation != null);
1111     assert(arr.length == 17);
1112     assert(arr2.length == 24);
1113 
1114     // Test memory error detection with a simple case.
1115     static if (MergedAllocation.mergedAllocStompWarning)
1116     {
1117         assert(!ma.hasMemoryError());
1118 
1119         // Create a memory error
1120         arr.ptr[18] = 2;
1121         assert(ma.hasMemoryError());
1122         arr.ptr[18] = 0xCC; // undo the error to avoid stopping the unittests
1123     }
1124 }