1 /**
2 String build code, plus no-locale float parsing functions.
3 
4 Copyright: Guillaume Piolat, 2022.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 
8 module dplug.core..string;
9 
10 import core.stdc.stdlib;
11 import core.stdc.string;
12 import core.stdc.stdarg;
13 import dplug.core.vec;
14 
15 
16 nothrow @nogc:
17 
18 /// Create a `String` from a D `string`.
19 String makeString(const(char)[] s)
20 {
21     return String(s);
22 }
23 
24 /// For now, just a string builder that owns its memory.
25 /// Dplug `String`, designed to ease the usage of all the C string function,
26 /// allow appending, etc.
27 /// `String` always owns its memory, and can return as a D slice.
28 /// FUTURE: use realloc to be able to size down.
29 ///         Capacity to be a slice into existing memory and not own.
30 ///         Capacity to disown memory (implies: stop using Vec)
31 /// QUESTION: should String just be a managed slice!T instead? Like Go slices.
32 struct String
33 {
34 public:
35 nothrow @nogc:
36 
37     this(char ch)
38     {
39         this ~= ch;
40     }
41 
42     this(const(char)[] s)
43     {
44         this ~= s;
45     }
46 
47     ~this()
48     {
49     }
50 
51     @disable this(this);
52 
53     /// Sets as empty/null string.
54     void makeEmpty()
55     {
56         _chars.clearContents();
57     }
58 
59     /// Pointer to first character in the string, or `null`.
60     inout(char)* ptr() inout return
61     {
62         return _chars.ptr;
63     }
64 
65     /// Length in bytes of the string.
66     size_t length() const
67     {
68         return _chars.length;
69     }
70 
71     /// Converts to a D string, sliced into the `String` memory.
72     inout(char)[] asSlice() inout return
73     {
74         size_t len = length();
75         if (len == 0)
76             return null;
77         return _chars[0..len];
78     }
79 
80     /// Returns: Whole content of the sring in one slice.
81     inout(char)[] opSlice() inout return
82     {
83         return asSlice();
84     }
85 
86     /// Returns: A slice of the array.
87     inout(char)[] opSlice(size_t i1, size_t i2) inout
88     {
89         return _chars[i1 .. i2];
90     }
91 
92     void opAssign(T : char)(T x)
93     {
94         makeEmpty();
95         this ~= x;
96     }
97 
98     void opAssign(T : const(char)[])(T x)
99     {
100         makeEmpty();
101         this ~= x;
102     }
103 
104     void opAssign(T : String)(T x)
105     {
106         makeEmpty();
107         this ~= x;
108     }
109 
110     // <Appending>
111 
112     /// Append a character to the string. This invalidates pointers to characters
113     /// returned before.
114     void opOpAssign(string op)(char x) if (op == "~")
115     {
116         _chars.pushBack(x);
117     }
118 
119     /// Append a characters to the string.
120     void opOpAssign(string op)(const(char)[] str) if (op == "~")
121     {
122         size_t len = str.length;
123         for (size_t n = 0; n < len; ++n)
124             _chars.pushBack(str[n]);
125     }
126 
127     /// Append a characters to the string.
128     void opOpAssign(string op)(ref const(String) str) if (op == "~")
129     {
130         this ~= str.asSlice();
131     }
132 
133     /// Append a zero-terminated character to the string.
134     /// Name is explicit, because it should be rare and overload conflict.
135     void appendZeroTerminatedString(const(char)* str)
136     {
137         while(*str != '\0')
138             _chars.pushBack(*str++);
139     }
140 
141     bool opEquals(const(char)[] s)
142     {
143         size_t lenS = s.length;
144         size_t lenT = this.length;
145         if (lenS != lenT)
146             return false;
147         for (size_t n = 0; n < lenS; ++n)
148         {
149             if (s[n] != _chars[n])
150                 return false;
151         }        
152         return true;
153     }
154 
155     bool opEquals(ref const(String) str)
156     {
157         return this.asSlice() == str.asSlice();
158     }
159 
160     // </Appending>
161 
162 private:
163 
164     // FUTURE
165 
166     /*alias Flags = int;
167     enum : Flags
168     {
169         owned          = 1, /// String data is currently owned (C's malloc/free), not borrowed.
170         zeroTerminated = 2, /// String data is currently zero-terminated.
171     }
172 
173     Flags _flags = 0;
174     */
175 
176     Vec!char _chars;
177 
178     void clearContents()
179     {
180         _chars.clearContents();
181     }
182 }
183 
184 // Null and .ptr
185 unittest
186 {
187     string z;
188     string a = "";
189     string b = null;
190 
191     assert(a == z);
192     assert(b == z);
193     assert(a == b);
194     assert(a !is b);
195     assert(a.length == 0);
196     assert(b.length == 0);
197     assert(a.ptr !is null);
198 
199     // Must preserve semantics from D strings.
200     String Z = z;
201     String A = a;
202     String B = b;
203     assert(A == Z);
204     assert(B == Z);
205     assert(A == B);
206 }
207 
208 // Basic appending.
209 unittest
210 {
211     String s = "Hello,";
212     s ~= " world!";
213     assert(s == "Hello, world!");
214     s.makeEmpty();
215     assert(s == null);
216     assert(s.length == 0);
217 }
218 
219 /// strtod replacement, but without locale
220 ///     s Must be a zero-terminated string.
221 /// Note that this code is duplicated in wren-port, to avoid a dependency on dplug:core there.
222 public double strtod_nolocale(const(char)* s, const(char)** p)
223 {
224     bool strtod_err = false;
225     const(char)* pend;
226     double r = stb__clex_parse_number_literal(s, &pend, &strtod_err, true);
227     if (p) 
228         *p = pend;
229     if (strtod_err)
230         r = 0.0;
231     return r;
232 }
233 unittest
234 {
235     string[18] sPartial = 
236     [
237         "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", 
238         "0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF",
239         "+iNfu", "-infEXCESS", "infuh", "-infinity", 
240         "+infinity", "+nan", "-nan", "nan",
241         "INFINITY", "-NAN"
242     ]; 
243 
244     for (int n = 0; n < sPartial.length; ++n)
245     {
246         const(char)* p1, p2;
247         double r1 = strtod(sPartial[n].ptr, &p1); // in unittest, no program tampering the C locale
248         double r2 = strtod_nolocale(sPartial[n].ptr, &p2);
249         //import core.stdc.stdio;
250         //debug printf("parsing \"%s\" %lg %lg %p %p\n", sPartial[n].ptr, r1, r2, p1, p2);
251         assert(p1 == p2);
252     }
253 }
254 
255 /// C-locale independent string to integer parsing.
256 /// Params:
257 ///     s = Must be a zero-terminated string.
258 ///     mustConsumeEntireInput = if true, check that s is entirely consumed by parsing the number.
259 ///     err = optional bool
260 /// Note: unlike with `convertStringToDouble`, the string "4.7" will parse to just 4. Replaces %d in scanf-like functions.
261 /// Only parse correctly from -2147483648 to 2147483647.
262 /// Larger values are clamped to this -2147483648 to 2147483647 range.
263 public int convertStringToInteger(const(char)* s,
264                                   bool mustConsumeEntireInput,
265                                   bool* err) pure nothrow @nogc
266 {
267     if (s is null)
268     {
269         if (err) *err = true;
270         return 0;
271     }
272 
273     const(char)* end;
274     bool strtod_err = false;
275     bool allowFloat = false;
276     double r = stb__clex_parse_number_literal(s, &end, &strtod_err, allowFloat);
277 
278     if (strtod_err)
279     {
280         if (err) *err = true;
281         return 0;
282     }
283 
284     if (mustConsumeEntireInput)
285     {
286         size_t len = strlen(s);
287         if (end != s + len)
288         {
289             if (err) *err = true; // did not consume whole string
290             return 0;
291         }
292     }
293 
294     if (err) *err = false; // no error
295 
296     double r2 = cast(int)r;
297     assert(r2 == r); // should have returned an integer that fits in a double, like the whole int.min to int.max range.
298     return cast(int)r;
299 }
300 unittest
301 {
302     bool err;
303     assert(4 == convertStringToInteger(" 4.7\n", false, &err));
304     assert(!err);
305 
306     assert(-2147483648 == convertStringToInteger("-2147483649", false, &err));
307     assert( 1 == convertStringToInteger("1e30", false, &err));
308     assert( 0 == convertStringToInteger("-0", false, &err));
309     assert( 2147483647 == convertStringToInteger("10000000000", false, &err));
310 }
311 
312 
313 /// C-locale independent string to float parsing.
314 /// Params:
315 ///     s = Must be a zero-terminated string.
316 ///     mustConsumeEntireInput = if true, check that s is entirely consumed by parsing the number.
317 ///     err = optional bool
318 public double convertStringToDouble(const(char)* s, 
319                                     bool mustConsumeEntireInput,
320                                     bool* err) pure nothrow @nogc
321 {
322     if (s is null)
323     {
324         if (err) *err = true;
325         return 0.0;
326     }
327 
328     const(char)* end;
329     bool strtod_err = false;
330     double r = stb__clex_parse_number_literal(s, &end, &strtod_err, true);
331 
332     if (strtod_err)
333     {
334         if (err) *err = true;
335         return 0.0;
336     }
337     
338     if (mustConsumeEntireInput)
339     {
340         size_t len = strlen(s);
341         if (end != s + len)
342         {
343             if (err) *err = true; // did not consume whole string
344             return 0.0;
345         }
346     }
347 
348     if (err) *err = false; // no error
349     return r;
350 }
351  
352 unittest
353 {
354     bool isCloseRel(double a, double b, double maxRelDiff = 1e-2f)
355     {
356         if (a < 0)
357         {
358             a = -a;
359             b = -b;
360         }
361         
362         if (a == 0)
363             return b == 0;
364 
365         return
366            (a <= b *(1.0 + maxRelDiff))
367            &&
368            (b <= a *(1.0 + maxRelDiff));
369     }
370 
371     string[9] s = ["14", "0x123", "+0x1.921fb54442d18p+0001", "0", "-0.0", "   \n\t\n\f\r 0.65", "1.64587", "-1.0e+9", "1.1454e-25"]; 
372     double[9] correct = [14, 0x123, +0x1.921fb54442d18p+0001, 0.0, -0.0, 0.65L, 1.64587, -1e9, 1.1454e-25f];
373 
374     string[9] sPartial = ["14top", "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", "   \n\t\n\f\r 0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF"]; 
375     for (int n = 0; n < s.length; ++n)
376     {
377         /*
378         // Check vs scanf
379         double sa;
380         if (sscanf(s[n].ptr, "%lf", &sa) == 1)
381         {
382             debug printf("scanf finds %lg\n", sa);
383         }
384         else
385             debug printf("scanf no parse\n");
386         */
387 
388         bool err;
389         double a = convertStringToDouble(s[n].ptr, true, &err);
390         
391         //import core.stdc.stdio;
392         //printf("%f but correct is %f\n", a, correct[n]);
393         
394         assert(!err);
395         assert( isCloseRel(a, correct[n], 0.0001) );
396 
397         bool err2;
398         double b = convertStringToDouble(s[n].ptr, false, &err2);
399         assert(!err2);
400         assert(b == a); // same parse
401 
402         //debug printf("%lf\n", a);
403 
404         convertStringToDouble(s[n].ptr, true, null); // should run without error pointer
405     }
406 }
407 
408 private double stb__clex_parse_number_literal(const(char)* p, 
409                                               const(char)**q, 
410                                               bool* err,
411                                               bool allowFloat) pure nothrow @nogc
412 {
413     const(char)* s = p;
414     double value=0;
415     int base=10;
416     int exponent=0;
417     int signMantissa = 1;
418 
419     // Skip leading whitespace, like scanf and strtod do
420     while (true)
421     {
422         char ch = *p;
423         if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f' || ch == '\r')
424         {
425             p += 1;
426         }
427         else
428             break;
429     }
430 
431 
432     if (*p == '-') 
433     {
434         signMantissa = -1;
435         p += 1;
436     } 
437     else if (*p == '+') 
438     {
439         p += 1;
440     }
441 
442     // Issue #865, "-inf" was parsed as 0
443     // libc can produce "infinity" as well as "inf"
444     // %f specifier can produce "infinity", "inf", "nan"
445     // %F specifier can produce "INFINITY", "INF", "NAN"
446     // In practice, C libraries parse combination of uppercase and lowercase
447     if (allowFloat)
448     {
449         if (  (p[0] == 'i' || p[0] == 'I')
450            && (p[1] == 'n' || p[1] == 'N')
451            && (p[2] == 'f' || p[2] == 'F') )
452         {
453             value = double.infinity;
454             p += 3;
455 
456             if (  (p[0] == 'i' || p[0] == 'I')
457                && (p[1] == 'n' || p[1] == 'N')
458                && (p[2] == 'i' || p[2] == 'I')
459                && (p[3] == 't' || p[3] == 'T')
460                && (p[4] == 'y' || p[4] == 'Y') )            
461                 p += 5;
462 
463             goto found_value;
464         }
465 
466          if (  (p[0] == 'n' || p[0] == 'N')
467             && (p[1] == 'a' || p[1] == 'A')
468             && (p[2] == 'n' || p[2] == 'N') )
469         {
470             value = double.nan;
471             p += 3;
472             goto found_value;
473         }
474     }
475 
476     if (*p == '0') 
477     {
478         if (p[1] == 'x' || p[1] == 'X') 
479         {
480             base=16;
481             p += 2;
482         }
483     }
484 
485     for (;;) 
486     {
487         if (*p >= '0' && *p <= '9')
488             value = value*base + (*p++ - '0');
489         else if (base == 16 && *p >= 'a' && *p <= 'f')
490             value = value*base + 10 + (*p++ - 'a');
491         else if (base == 16 && *p >= 'A' && *p <= 'F')
492             value = value*base + 10 + (*p++ - 'A');
493         else
494             break;
495     }
496 
497     if (allowFloat)
498     {
499         if (*p == '.') 
500         {
501             double pow, addend = 0;
502             ++p;
503             for (pow=1; ; pow*=base) 
504             {
505                 if (*p >= '0' && *p <= '9')
506                     addend = addend*base + (*p++ - '0');
507                 else if (base == 16 && *p >= 'a' && *p <= 'f')
508                     addend = addend*base + 10 + (*p++ - 'a');
509                 else if (base == 16 && *p >= 'A' && *p <= 'F')
510                     addend = addend*base + 10 + (*p++ - 'A');
511                 else
512                     break;
513             }
514             value += addend / pow;
515         }
516         if (base == 16) {
517             // exponent required for hex float literal, else it's an integer literal like 0x123
518             exponent = (*p == 'p' || *p == 'P');
519         } else
520             exponent = (*p == 'e' || *p == 'E');
521 
522         if (exponent) 
523         {
524             int sign = p[1] == '-';
525             uint exponent2 = 0;
526             double power=1;
527             ++p;
528             if (*p == '-' || *p == '+')
529                 ++p;
530             while (*p >= '0' && *p <= '9')
531                 exponent2 = exponent2*10 + (*p++ - '0');
532 
533             if (base == 16)
534                 power = stb__clex_pow(2, exponent2);
535             else
536                 power = stb__clex_pow(10, exponent2);
537             if (sign)
538                 value /= power;
539             else
540                 value *= power;
541         }
542     }
543 
544     found_value:
545     
546     if (q) *q = p;
547     if (err) *err = false; // seen no error
548 
549     if (signMantissa < 0)
550         value = -value;
551 
552     if (!allowFloat)
553     {
554         // clamp and round to nearest integer
555         if (value > int.max) value = int.max;
556         if (value < int.min) value = int.min;
557     }    
558     return value;
559 }
560 
561 private double stb__clex_pow(double base, uint exponent) pure nothrow @nogc
562 {
563     double value=1;
564     for ( ; exponent; exponent >>= 1) {
565         if (exponent & 1)
566             value *= base;
567         base *= base;
568     }
569     return value;
570 }