1 /** 2 String build code, plus no-locale float parsing functions. 3 4 Copyright: Guillaume Piolat, 2022. 5 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module dplug.core..string; 9 10 import core.stdc.stdlib; 11 import core.stdc.string; 12 import core.stdc.stdarg; 13 import dplug.core.vec; 14 15 16 nothrow @nogc: 17 18 /// Create a `String` from a D `string`. 19 String makeString(const(char)[] s) 20 { 21 return String(s); 22 } 23 24 /// For now, just a string builder that owns its memory. 25 /// Dplug `String`, designed to ease the usage of all the C string function, 26 /// allow appending, etc. 27 /// `String` always owns its memory, and can return as a D slice. 28 /// FUTURE: use realloc to be able to size down. 29 /// Capacity to be a slice into existing memory and not own. 30 /// Capacity to disown memory (implies: stop using Vec) 31 /// QUESTION: should String just be a managed slice!T instead? Like Go slices. 32 struct String 33 { 34 public: 35 nothrow @nogc: 36 37 this(char ch) 38 { 39 this ~= ch; 40 } 41 42 this(const(char)[] s) 43 { 44 this ~= s; 45 } 46 47 ~this() 48 { 49 } 50 51 @disable this(this); 52 53 /// Sets as empty/null string. 54 void makeEmpty() 55 { 56 _chars.clearContents(); 57 } 58 59 /// Pointer to first character in the string, or `null`. 60 inout(char)* ptr() inout return 61 { 62 return _chars.ptr; 63 } 64 65 /// Length in bytes of the string. 66 size_t length() const 67 { 68 return _chars.length; 69 } 70 71 /// Converts to a D string, sliced into the `String` memory. 72 inout(char)[] asSlice() inout return 73 { 74 size_t len = length(); 75 if (len == 0) 76 return null; 77 return _chars[0..len]; 78 } 79 80 /// Returns: Whole content of the sring in one slice. 81 inout(char)[] opSlice() inout return 82 { 83 return asSlice(); 84 } 85 86 /// Returns: A slice of the array. 87 inout(char)[] opSlice(size_t i1, size_t i2) inout 88 { 89 return _chars[i1 .. i2]; 90 } 91 92 void opAssign(T : char)(T x) 93 { 94 makeEmpty(); 95 this ~= x; 96 } 97 98 void opAssign(T : const(char)[])(T x) 99 { 100 makeEmpty(); 101 this ~= x; 102 } 103 104 void opAssign(T : String)(T x) 105 { 106 makeEmpty(); 107 this ~= x; 108 } 109 110 // <Appending> 111 112 /// Append a character to the string. This invalidates pointers to characters 113 /// returned before. 114 void opOpAssign(string op)(char x) if (op == "~") 115 { 116 _chars.pushBack(x); 117 } 118 119 /// Append a characters to the string. 120 void opOpAssign(string op)(const(char)[] str) if (op == "~") 121 { 122 size_t len = str.length; 123 for (size_t n = 0; n < len; ++n) 124 _chars.pushBack(str[n]); 125 } 126 127 /// Append a characters to the string. 128 void opOpAssign(string op)(ref const(String) str) if (op == "~") 129 { 130 this ~= str.asSlice(); 131 } 132 133 /// Append a zero-terminated character to the string. 134 /// Name is explicit, because it should be rare and overload conflict. 135 void appendZeroTerminatedString(const(char)* str) 136 { 137 while(*str != '\0') 138 _chars.pushBack(*str++); 139 } 140 141 bool opEquals(const(char)[] s) 142 { 143 size_t lenS = s.length; 144 size_t lenT = this.length; 145 if (lenS != lenT) 146 return false; 147 for (size_t n = 0; n < lenS; ++n) 148 { 149 if (s[n] != _chars[n]) 150 return false; 151 } 152 return true; 153 } 154 155 bool opEquals(ref const(String) str) 156 { 157 return this.asSlice() == str.asSlice(); 158 } 159 160 // </Appending> 161 162 private: 163 164 // FUTURE 165 166 /*alias Flags = int; 167 enum : Flags 168 { 169 owned = 1, /// String data is currently owned (C's malloc/free), not borrowed. 170 zeroTerminated = 2, /// String data is currently zero-terminated. 171 } 172 173 Flags _flags = 0; 174 */ 175 176 Vec!char _chars; 177 178 void clearContents() 179 { 180 _chars.clearContents(); 181 } 182 } 183 184 // Null and .ptr 185 unittest 186 { 187 string z; 188 string a = ""; 189 string b = null; 190 191 assert(a == z); 192 assert(b == z); 193 assert(a == b); 194 assert(a !is b); 195 assert(a.length == 0); 196 assert(b.length == 0); 197 assert(a.ptr !is null); 198 199 // Must preserve semantics from D strings. 200 String Z = z; 201 String A = a; 202 String B = b; 203 assert(A == Z); 204 assert(B == Z); 205 assert(A == B); 206 } 207 208 // Basic appending. 209 unittest 210 { 211 String s = "Hello,"; 212 s ~= " world!"; 213 assert(s == "Hello, world!"); 214 s.makeEmpty(); 215 assert(s == null); 216 assert(s.length == 0); 217 } 218 219 /// strtod replacement, but without locale 220 /// s Must be a zero-terminated string. 221 /// Note that this code is duplicated in wren-port, to avoid a dependency on dplug:core there. 222 public double strtod_nolocale(const(char)* s, const(char)** p) 223 { 224 bool strtod_err = false; 225 const(char)* pend; 226 double r = stb__clex_parse_number_literal(s, &pend, &strtod_err, true); 227 if (p) 228 *p = pend; 229 if (strtod_err) 230 r = 0.0; 231 return r; 232 } 233 unittest 234 { 235 string[18] sPartial = 236 [ 237 "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", 238 "0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF", 239 "+iNfu", "-infEXCESS", "infuh", "-infinity", 240 "+infinity", "+nan", "-nan", "nan", 241 "INFINITY", "-NAN" 242 ]; 243 244 for (int n = 0; n < sPartial.length; ++n) 245 { 246 const(char)* p1, p2; 247 double r1 = strtod(sPartial[n].ptr, &p1); // in unittest, no program tampering the C locale 248 double r2 = strtod_nolocale(sPartial[n].ptr, &p2); 249 //import core.stdc.stdio; 250 //debug printf("parsing \"%s\" %lg %lg %p %p\n", sPartial[n].ptr, r1, r2, p1, p2); 251 assert(p1 == p2); 252 } 253 } 254 255 /// C-locale independent string to integer parsing. 256 /// Params: 257 /// s = Must be a zero-terminated string. 258 /// mustConsumeEntireInput = if true, check that s is entirely consumed by parsing the number. 259 /// err = optional bool 260 /// Note: unlike with `convertStringToDouble`, the string "4.7" will parse to just 4. Replaces %d in scanf-like functions. 261 /// Only parse correctly from -2147483648 to 2147483647. 262 /// Larger values are clamped to this -2147483648 to 2147483647 range. 263 public int convertStringToInteger(const(char)* s, 264 bool mustConsumeEntireInput, 265 bool* err) pure nothrow @nogc 266 { 267 if (s is null) 268 { 269 if (err) *err = true; 270 return 0; 271 } 272 273 const(char)* end; 274 bool strtod_err = false; 275 bool allowFloat = false; 276 double r = stb__clex_parse_number_literal(s, &end, &strtod_err, allowFloat); 277 278 if (strtod_err) 279 { 280 if (err) *err = true; 281 return 0; 282 } 283 284 if (mustConsumeEntireInput) 285 { 286 size_t len = strlen(s); 287 if (end != s + len) 288 { 289 if (err) *err = true; // did not consume whole string 290 return 0; 291 } 292 } 293 294 if (err) *err = false; // no error 295 296 double r2 = cast(int)r; 297 assert(r2 == r); // should have returned an integer that fits in a double, like the whole int.min to int.max range. 298 return cast(int)r; 299 } 300 unittest 301 { 302 bool err; 303 assert(4 == convertStringToInteger(" 4.7\n", false, &err)); 304 assert(!err); 305 306 assert(-2147483648 == convertStringToInteger("-2147483649", false, &err)); 307 assert( 1 == convertStringToInteger("1e30", false, &err)); 308 assert( 0 == convertStringToInteger("-0", false, &err)); 309 assert( 2147483647 == convertStringToInteger("10000000000", false, &err)); 310 } 311 312 313 /// C-locale independent string to float parsing. 314 /// Params: 315 /// s = Must be a zero-terminated string. 316 /// mustConsumeEntireInput = if true, check that s is entirely consumed by parsing the number. 317 /// err = optional bool 318 public double convertStringToDouble(const(char)* s, 319 bool mustConsumeEntireInput, 320 bool* err) pure nothrow @nogc 321 { 322 if (s is null) 323 { 324 if (err) *err = true; 325 return 0.0; 326 } 327 328 const(char)* end; 329 bool strtod_err = false; 330 double r = stb__clex_parse_number_literal(s, &end, &strtod_err, true); 331 332 if (strtod_err) 333 { 334 if (err) *err = true; 335 return 0.0; 336 } 337 338 if (mustConsumeEntireInput) 339 { 340 size_t len = strlen(s); 341 if (end != s + len) 342 { 343 if (err) *err = true; // did not consume whole string 344 return 0.0; 345 } 346 } 347 348 if (err) *err = false; // no error 349 return r; 350 } 351 352 unittest 353 { 354 bool isCloseRel(double a, double b, double maxRelDiff = 1e-2f) 355 { 356 if (a < 0) 357 { 358 a = -a; 359 b = -b; 360 } 361 362 if (a == 0) 363 return b == 0; 364 365 return 366 (a <= b *(1.0 + maxRelDiff)) 367 && 368 (b <= a *(1.0 + maxRelDiff)); 369 } 370 371 string[9] s = ["14", "0x123", "+0x1.921fb54442d18p+0001", "0", "-0.0", " \n\t\n\f\r 0.65", "1.64587", "-1.0e+9", "1.1454e-25"]; 372 double[9] correct = [14, 0x123, +0x1.921fb54442d18p+0001, 0.0, -0.0, 0.65L, 1.64587, -1e9, 1.1454e-25f]; 373 374 string[9] sPartial = ["14top", "0x123lol", "+0x1.921fb54442d18p+0001()", "0,", "-0.0,,,,", " \n\t\n\f\r 0.65,stuff", "1.64587okokok", "-1.0e+9HELLO", "1.1454e-25f#STUFF"]; 375 for (int n = 0; n < s.length; ++n) 376 { 377 /* 378 // Check vs scanf 379 double sa; 380 if (sscanf(s[n].ptr, "%lf", &sa) == 1) 381 { 382 debug printf("scanf finds %lg\n", sa); 383 } 384 else 385 debug printf("scanf no parse\n"); 386 */ 387 388 bool err; 389 double a = convertStringToDouble(s[n].ptr, true, &err); 390 391 //import core.stdc.stdio; 392 //printf("%f but correct is %f\n", a, correct[n]); 393 394 assert(!err); 395 assert( isCloseRel(a, correct[n], 0.0001) ); 396 397 bool err2; 398 double b = convertStringToDouble(s[n].ptr, false, &err2); 399 assert(!err2); 400 assert(b == a); // same parse 401 402 //debug printf("%lf\n", a); 403 404 convertStringToDouble(s[n].ptr, true, null); // should run without error pointer 405 } 406 } 407 408 private double stb__clex_parse_number_literal(const(char)* p, 409 const(char)**q, 410 bool* err, 411 bool allowFloat) pure nothrow @nogc 412 { 413 const(char)* s = p; 414 double value=0; 415 int base=10; 416 int exponent=0; 417 int signMantissa = 1; 418 419 // Skip leading whitespace, like scanf and strtod do 420 while (true) 421 { 422 char ch = *p; 423 if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f' || ch == '\r') 424 { 425 p += 1; 426 } 427 else 428 break; 429 } 430 431 432 if (*p == '-') 433 { 434 signMantissa = -1; 435 p += 1; 436 } 437 else if (*p == '+') 438 { 439 p += 1; 440 } 441 442 // Issue #865, "-inf" was parsed as 0 443 // libc can produce "infinity" as well as "inf" 444 // %f specifier can produce "infinity", "inf", "nan" 445 // %F specifier can produce "INFINITY", "INF", "NAN" 446 // In practice, C libraries parse combination of uppercase and lowercase 447 if (allowFloat) 448 { 449 if ( (p[0] == 'i' || p[0] == 'I') 450 && (p[1] == 'n' || p[1] == 'N') 451 && (p[2] == 'f' || p[2] == 'F') ) 452 { 453 value = double.infinity; 454 p += 3; 455 456 if ( (p[0] == 'i' || p[0] == 'I') 457 && (p[1] == 'n' || p[1] == 'N') 458 && (p[2] == 'i' || p[2] == 'I') 459 && (p[3] == 't' || p[3] == 'T') 460 && (p[4] == 'y' || p[4] == 'Y') ) 461 p += 5; 462 463 goto found_value; 464 } 465 466 if ( (p[0] == 'n' || p[0] == 'N') 467 && (p[1] == 'a' || p[1] == 'A') 468 && (p[2] == 'n' || p[2] == 'N') ) 469 { 470 value = double.nan; 471 p += 3; 472 goto found_value; 473 } 474 } 475 476 if (*p == '0') 477 { 478 if (p[1] == 'x' || p[1] == 'X') 479 { 480 base=16; 481 p += 2; 482 } 483 } 484 485 for (;;) 486 { 487 if (*p >= '0' && *p <= '9') 488 value = value*base + (*p++ - '0'); 489 else if (base == 16 && *p >= 'a' && *p <= 'f') 490 value = value*base + 10 + (*p++ - 'a'); 491 else if (base == 16 && *p >= 'A' && *p <= 'F') 492 value = value*base + 10 + (*p++ - 'A'); 493 else 494 break; 495 } 496 497 if (allowFloat) 498 { 499 if (*p == '.') 500 { 501 double pow, addend = 0; 502 ++p; 503 for (pow=1; ; pow*=base) 504 { 505 if (*p >= '0' && *p <= '9') 506 addend = addend*base + (*p++ - '0'); 507 else if (base == 16 && *p >= 'a' && *p <= 'f') 508 addend = addend*base + 10 + (*p++ - 'a'); 509 else if (base == 16 && *p >= 'A' && *p <= 'F') 510 addend = addend*base + 10 + (*p++ - 'A'); 511 else 512 break; 513 } 514 value += addend / pow; 515 } 516 if (base == 16) { 517 // exponent required for hex float literal, else it's an integer literal like 0x123 518 exponent = (*p == 'p' || *p == 'P'); 519 } else 520 exponent = (*p == 'e' || *p == 'E'); 521 522 if (exponent) 523 { 524 int sign = p[1] == '-'; 525 uint exponent2 = 0; 526 double power=1; 527 ++p; 528 if (*p == '-' || *p == '+') 529 ++p; 530 while (*p >= '0' && *p <= '9') 531 exponent2 = exponent2*10 + (*p++ - '0'); 532 533 if (base == 16) 534 power = stb__clex_pow(2, exponent2); 535 else 536 power = stb__clex_pow(10, exponent2); 537 if (sign) 538 value /= power; 539 else 540 value *= power; 541 } 542 } 543 544 found_value: 545 546 if (q) *q = p; 547 if (err) *err = false; // seen no error 548 549 if (signMantissa < 0) 550 value = -value; 551 552 if (!allowFloat) 553 { 554 // clamp and round to nearest integer 555 if (value > int.max) value = int.max; 556 if (value < int.min) value = int.min; 557 } 558 return value; 559 } 560 561 private double stb__clex_pow(double base, uint exponent) pure nothrow @nogc 562 { 563 double value=1; 564 for ( ; exponent; exponent >>= 1) { 565 if (exponent & 1) 566 value *= base; 567 base *= base; 568 } 569 return value; 570 }