JSONWriter.h (21905B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* A JSON pretty-printer class. */ 8 9 // A typical JSON-writing library requires you to first build up a data 10 // structure that represents a JSON object and then serialize it (to file, or 11 // somewhere else). This approach makes for a clean API, but building the data 12 // structure takes up memory. Sometimes that isn't desirable, such as when the 13 // JSON data is produced for memory reporting. 14 // 15 // The JSONWriter class instead allows JSON data to be written out 16 // incrementally without building up large data structures. 17 // 18 // The API is slightly uglier than you would see in a typical JSON-writing 19 // library, but still fairly easy to use. It's possible to generate invalid 20 // JSON with JSONWriter, but typically the most basic testing will identify any 21 // such problems. 22 // 23 // Similarly, there are no RAII facilities for automatically closing objects 24 // and arrays. These would be nice if you are generating all your code within 25 // nested functions, but in other cases you'd have to maintain an explicit 26 // stack of RAII objects and manually unwind it, which is no better than just 27 // calling "end" functions. Furthermore, the consequences of forgetting to 28 // close an object or array are obvious and, again, will be identified via 29 // basic testing, unlike other cases where RAII is typically used (e.g. smart 30 // pointers) and the consequences of defects are more subtle. 31 // 32 // Importantly, the class does solve the two hard problems of JSON 33 // pretty-printing, which are (a) correctly escaping strings, and (b) adding 34 // appropriate indentation and commas between items. 35 // 36 // By default, every property is placed on its own line. However, it is 37 // possible to request that objects and arrays be placed entirely on a single 38 // line, which can reduce output size significantly in some cases. 39 // 40 // Strings used (for property names and string property values) are |const 41 // char*| throughout, and can be ASCII or UTF-8. 42 // 43 // EXAMPLE 44 // ------- 45 // Assume that |MyWriteFunc| is a class that implements |JSONWriteFunc|. The 46 // following code: 47 // 48 // JSONWriter w(MakeUnique<MyWriteFunc>()); 49 // w.Start(); 50 // { 51 // w.NullProperty("null"); 52 // w.BoolProperty("bool", true); 53 // w.IntProperty("int", 1); 54 // w.StartArrayProperty("array"); 55 // { 56 // w.StringElement("string"); 57 // w.StartObjectElement(); 58 // { 59 // w.DoubleProperty("double", 3.4); 60 // w.StartArrayProperty("single-line array", w.SingleLineStyle); 61 // { 62 // w.IntElement(1); 63 // w.StartObjectElement(); // SingleLineStyle is inherited from 64 // w.EndObjectElement(); // above for this collection 65 // } 66 // w.EndArray(); 67 // } 68 // w.EndObjectElement(); 69 // } 70 // w.EndArrayProperty(); 71 // } 72 // w.End(); 73 // 74 // will produce pretty-printed output for the following JSON object: 75 // 76 // { 77 // "null": null, 78 // "bool": true, 79 // "int": 1, 80 // "array": [ 81 // "string", 82 // { 83 // "double": 3.4, 84 // "single-line array": [1, {}] 85 // } 86 // ] 87 // } 88 // 89 // The nesting in the example code is obviously optional, but can aid 90 // readability. 91 92 #ifndef mozilla_JSONWriter_h 93 #define mozilla_JSONWriter_h 94 95 #include "double-conversion/double-conversion.h" 96 #include "mozilla/Assertions.h" 97 #include "mozilla/IntegerPrintfMacros.h" 98 #include "mozilla/Span.h" 99 #include "mozilla/Sprintf.h" 100 #include "mozilla/UniquePtr.h" 101 #include "mozilla/Vector.h" 102 103 #include <utility> 104 105 namespace mozilla { 106 107 // A quasi-functor for JSONWriter. We don't use a true functor because that 108 // requires templatizing JSONWriter, and the templatization seeps to lots of 109 // places we don't want it to. 110 class JSONWriteFunc { 111 public: 112 virtual void Write(const Span<const char>& aStr) = 0; 113 virtual ~JSONWriteFunc() = default; 114 }; 115 116 class JSONWriter { 117 // From http://www.ietf.org/rfc/rfc4627.txt: 118 // 119 // "All Unicode characters may be placed within the quotation marks except 120 // for the characters that must be escaped: quotation mark, reverse 121 // solidus, and the control characters (U+0000 through U+001F)." 122 // 123 // This implementation uses two-char escape sequences where possible, namely: 124 // 125 // \", \\, \b, \f, \n, \r, \t 126 // 127 // All control characters not in the above list are represented with a 128 // six-char escape sequence, e.g. '\u000b' (a.k.a. '\v'). 129 // 130 class EscapedString { 131 // `mStringSpan` initially points at the user-provided string. If that 132 // string needs escaping, `mStringSpan` will point at `mOwnedStr` below. 133 Span<const char> mStringSpan; 134 // String storage in case escaping is actually needed, null otherwise. 135 UniquePtr<char[]> mOwnedStr; 136 137 void CheckInvariants() const { 138 // Either there was no escaping so `mOwnedStr` is null, or escaping was 139 // needed, in which case `mStringSpan` should point at `mOwnedStr`. 140 MOZ_ASSERT(!mOwnedStr || mStringSpan.data() == mOwnedStr.get()); 141 } 142 143 static char hexDigitToAsciiChar(uint8_t u) { 144 u = u & 0xf; 145 return u < 10 ? '0' + u : 'a' + (u - 10); 146 } 147 148 public: 149 explicit EscapedString(const Span<const char>& aStr) : mStringSpan(aStr) { 150 // clang-format off 151 // The chars with non-'___' entries in this table are those that can be 152 // represented with a two-char escape sequence. The value is the second char in 153 // the sequence, that which follows the initial backslash. 154 #define ___ 0 155 static constexpr char TwoCharEscapes[256] = { 156 /* 0 1 2 3 4 5 6 7 8 9 */ 157 /* 0+ */ ___, ___, ___, ___, ___, ___, ___, ___, 'b', 't', 158 /* 10+ */ 'n', ___, 'f', 'r', ___, ___, ___, ___, ___, ___, 159 /* 20+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 160 /* 30+ */ ___, ___, ___, ___, '"', ___, ___, ___, ___, ___, 161 /* 40+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 162 /* 50+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 163 /* 60+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 164 /* 70+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 165 /* 80+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 166 /* 90+ */ ___, ___, '\\', ___, ___, ___, ___, ___, ___, ___, 167 /* 100+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 168 /* 110+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 169 /* 120+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 170 /* 130+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 171 /* 140+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 172 /* 150+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 173 /* 160+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 174 /* 170+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 175 /* 180+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 176 /* 190+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 177 /* 200+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 178 /* 210+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 179 /* 220+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 180 /* 230+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 181 /* 240+ */ ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, 182 /* 250+ */ ___, ___, ___, ___, ___, ___}; 183 #undef ___ 184 // clang-format on 185 186 // First, see if we need to modify the string. 187 size_t nExtra = 0; 188 for (const char& c : aStr) { 189 // ensure it can't be interpreted as negative 190 uint8_t u = static_cast<uint8_t>(c); 191 if (u == 0) { 192 // Null terminator within the span, assume we may have been given a 193 // span to a buffer that contains a null-terminated string in it. 194 // We need to truncate the Span so that it doesn't include this null 195 // terminator and anything past it; Either we will return it as-is, or 196 // processing should stop there. 197 mStringSpan = mStringSpan.First(&c - mStringSpan.data()); 198 break; 199 } 200 if (TwoCharEscapes[u]) { 201 nExtra += 1; 202 } else if (u <= 0x1f) { 203 nExtra += 5; 204 } 205 } 206 207 // Note: Don't use `aStr` anymore, as it could contain a null terminator; 208 // use the correctly-sized `mStringSpan` instead. 209 210 if (nExtra == 0) { 211 // No escapes needed. mStringSpan already points at the original string. 212 CheckInvariants(); 213 return; 214 } 215 216 // Escapes are needed. We'll create a new string. 217 mOwnedStr = MakeUnique<char[]>(mStringSpan.Length() + nExtra); 218 219 size_t i = 0; 220 for (const char c : mStringSpan) { 221 // ensure it can't be interpreted as negative 222 uint8_t u = static_cast<uint8_t>(c); 223 MOZ_ASSERT(u != 0, "Null terminator should have been handled above"); 224 if (TwoCharEscapes[u]) { 225 mOwnedStr[i++] = '\\'; 226 mOwnedStr[i++] = TwoCharEscapes[u]; 227 } else if (u <= 0x1f) { 228 mOwnedStr[i++] = '\\'; 229 mOwnedStr[i++] = 'u'; 230 mOwnedStr[i++] = '0'; 231 mOwnedStr[i++] = '0'; 232 mOwnedStr[i++] = hexDigitToAsciiChar((u & 0x00f0) >> 4); 233 mOwnedStr[i++] = hexDigitToAsciiChar(u & 0x000f); 234 } else { 235 mOwnedStr[i++] = u; 236 } 237 } 238 MOZ_ASSERT(i == mStringSpan.Length() + nExtra); 239 mStringSpan = Span<const char>(mOwnedStr.get(), i); 240 CheckInvariants(); 241 } 242 243 explicit EscapedString(const char* aStr) = delete; 244 245 const Span<const char>& SpanRef() const { return mStringSpan; } 246 }; 247 248 public: 249 // Collections (objects and arrays) are printed in a multi-line style by 250 // default. This can be changed to a single-line style if SingleLineStyle is 251 // specified. If a collection is printed in single-line style, every nested 252 // collection within it is also printed in single-line style, even if 253 // multi-line style is requested. 254 // If SingleLineStyle is set in the constructer, all JSON whitespace is 255 // eliminated, including spaces after colons and commas, for the most compact 256 // encoding possible. 257 enum CollectionStyle { 258 MultiLineStyle, // the default 259 SingleLineStyle 260 }; 261 262 protected: 263 static constexpr Span<const char> scArrayBeginString = MakeStringSpan("["); 264 static constexpr Span<const char> scArrayEndString = MakeStringSpan("]"); 265 static constexpr Span<const char> scCommaString = MakeStringSpan(","); 266 static constexpr Span<const char> scEmptyString = MakeStringSpan(""); 267 static constexpr Span<const char> scFalseString = MakeStringSpan("false"); 268 static constexpr Span<const char> scNewLineString = MakeStringSpan("\n"); 269 static constexpr Span<const char> scNullString = MakeStringSpan("null"); 270 static constexpr Span<const char> scObjectBeginString = MakeStringSpan("{"); 271 static constexpr Span<const char> scObjectEndString = MakeStringSpan("}"); 272 static constexpr Span<const char> scPropertyBeginString = 273 MakeStringSpan("\""); 274 static constexpr Span<const char> scPropertyEndString = MakeStringSpan("\":"); 275 static constexpr Span<const char> scQuoteString = MakeStringSpan("\""); 276 static constexpr Span<const char> scSpaceString = MakeStringSpan(" "); 277 static constexpr Span<const char> scTopObjectBeginString = 278 MakeStringSpan("{"); 279 static constexpr Span<const char> scTopObjectEndString = MakeStringSpan("}"); 280 static constexpr Span<const char> scTrueString = MakeStringSpan("true"); 281 282 const UniquePtr<JSONWriteFunc> mMaybeOwnedWriter; 283 JSONWriteFunc& mWriter; 284 Vector<bool, 8> mNeedComma; // do we need a comma at depth N? 285 Vector<bool, 8> mNeedNewlines; // do we need newlines at depth N? 286 size_t mDepth; // the current nesting depth 287 288 void Indent() { 289 for (size_t i = 0; i < mDepth; i++) { 290 mWriter.Write(scSpaceString); 291 } 292 } 293 294 // Adds whatever is necessary (maybe a comma, and then a newline and 295 // whitespace) to separate an item (property or element) from what's come 296 // before. 297 void Separator() { 298 if (mNeedComma[mDepth]) { 299 mWriter.Write(scCommaString); 300 } 301 if (mDepth > 0 && mNeedNewlines[mDepth]) { 302 mWriter.Write(scNewLineString); 303 Indent(); 304 } else if (mNeedComma[mDepth] && mNeedNewlines[0]) { 305 mWriter.Write(scSpaceString); 306 } 307 } 308 309 void PropertyNameAndColon(const Span<const char>& aName) { 310 mWriter.Write(scPropertyBeginString); 311 mWriter.Write(EscapedString(aName).SpanRef()); 312 mWriter.Write(scPropertyEndString); 313 if (mNeedNewlines[0]) { 314 mWriter.Write(scSpaceString); 315 } 316 } 317 318 void Scalar(const Span<const char>& aMaybePropertyName, 319 const Span<const char>& aStringValue) { 320 Separator(); 321 if (!aMaybePropertyName.empty()) { 322 PropertyNameAndColon(aMaybePropertyName); 323 } 324 mWriter.Write(aStringValue); 325 mNeedComma[mDepth] = true; 326 } 327 328 void QuotedScalar(const Span<const char>& aMaybePropertyName, 329 const Span<const char>& aStringValue) { 330 Separator(); 331 if (!aMaybePropertyName.empty()) { 332 PropertyNameAndColon(aMaybePropertyName); 333 } 334 mWriter.Write(scQuoteString); 335 mWriter.Write(aStringValue); 336 mWriter.Write(scQuoteString); 337 mNeedComma[mDepth] = true; 338 } 339 340 void NewVectorEntries(bool aNeedNewLines) { 341 // If these tiny allocations OOM we might as well just crash because we 342 // must be in serious memory trouble. 343 MOZ_RELEASE_ASSERT(mNeedComma.resizeUninitialized(mDepth + 1)); 344 MOZ_RELEASE_ASSERT(mNeedNewlines.resizeUninitialized(mDepth + 1)); 345 mNeedComma[mDepth] = false; 346 mNeedNewlines[mDepth] = aNeedNewLines; 347 } 348 349 void StartCollection(const Span<const char>& aMaybePropertyName, 350 const Span<const char>& aStartChar, 351 CollectionStyle aStyle = MultiLineStyle) { 352 Separator(); 353 if (!aMaybePropertyName.empty()) { 354 PropertyNameAndColon(aMaybePropertyName); 355 } 356 mWriter.Write(aStartChar); 357 mNeedComma[mDepth] = true; 358 mDepth++; 359 NewVectorEntries(mNeedNewlines[mDepth - 1] && aStyle == MultiLineStyle); 360 } 361 362 // Adds the whitespace and closing char necessary to end a collection. 363 void EndCollection(const Span<const char>& aEndChar) { 364 MOZ_ASSERT(mDepth > 0); 365 if (mNeedNewlines[mDepth]) { 366 mWriter.Write(scNewLineString); 367 mDepth--; 368 Indent(); 369 } else { 370 mDepth--; 371 } 372 mWriter.Write(aEndChar); 373 } 374 375 public: 376 explicit JSONWriter(JSONWriteFunc& aWriter, 377 CollectionStyle aStyle = MultiLineStyle) 378 : mWriter(aWriter), mNeedComma(), mNeedNewlines(), mDepth(0) { 379 NewVectorEntries(aStyle == MultiLineStyle); 380 } 381 382 explicit JSONWriter(UniquePtr<JSONWriteFunc> aWriter, 383 CollectionStyle aStyle = MultiLineStyle) 384 : mMaybeOwnedWriter(std::move(aWriter)), 385 mWriter(*mMaybeOwnedWriter), 386 mNeedComma(), 387 mNeedNewlines(), 388 mDepth(0) { 389 MOZ_RELEASE_ASSERT( 390 mMaybeOwnedWriter, 391 "JSONWriter must be given a non-null UniquePtr<JSONWriteFunc>"); 392 NewVectorEntries(aStyle == MultiLineStyle); 393 } 394 395 // Returns the JSONWriteFunc passed in at creation, for temporary use. The 396 // JSONWriter object still owns the JSONWriteFunc. 397 JSONWriteFunc& WriteFunc() const MOZ_LIFETIME_BOUND { return mWriter; } 398 399 // For all the following functions, the "Prints:" comment indicates what the 400 // basic output looks like. However, it doesn't indicate the whitespace and 401 // trailing commas, which are automatically added as required. 402 // 403 // All property names and string properties are escaped as necessary. 404 405 // Prints: { 406 void Start(CollectionStyle aStyle = MultiLineStyle) { 407 StartCollection(scEmptyString, scTopObjectBeginString, aStyle); 408 } 409 410 // Prints: } and final newline. 411 void End() { 412 EndCollection(scTopObjectEndString); 413 if (mNeedNewlines[mDepth]) { 414 mWriter.Write(scNewLineString); 415 } 416 } 417 418 // Prints: "<aName>": null 419 void NullProperty(const Span<const char>& aName) { 420 Scalar(aName, scNullString); 421 } 422 423 template <size_t N> 424 void NullProperty(const char (&aName)[N]) { 425 // Keep null terminator from literal strings, will be removed by 426 // EscapedString. This way C buffer arrays can be used as well. 427 NullProperty(Span<const char>(aName, N)); 428 } 429 430 // Prints: null 431 void NullElement() { NullProperty(scEmptyString); } 432 433 // Prints: "<aName>": <aBool> 434 void BoolProperty(const Span<const char>& aName, bool aBool) { 435 Scalar(aName, aBool ? scTrueString : scFalseString); 436 } 437 438 template <size_t N> 439 void BoolProperty(const char (&aName)[N], bool aBool) { 440 // Keep null terminator from literal strings, will be removed by 441 // EscapedString. This way C buffer arrays can be used as well. 442 BoolProperty(Span<const char>(aName, N), aBool); 443 } 444 445 // Prints: <aBool> 446 void BoolElement(bool aBool) { BoolProperty(scEmptyString, aBool); } 447 448 // Prints: "<aName>": <aInt> 449 void IntProperty(const Span<const char>& aName, int64_t aInt) { 450 char buf[64]; 451 int len = SprintfLiteral(buf, "%" PRId64, aInt); 452 MOZ_RELEASE_ASSERT(len > 0); 453 Scalar(aName, Span<const char>(buf, size_t(len))); 454 } 455 456 template <size_t N> 457 void IntProperty(const char (&aName)[N], int64_t aInt) { 458 // Keep null terminator from literal strings, will be removed by 459 // EscapedString. This way C buffer arrays can be used as well. 460 IntProperty(Span<const char>(aName, N), aInt); 461 } 462 463 // Prints: <aInt> 464 void IntElement(int64_t aInt) { IntProperty(scEmptyString, aInt); } 465 466 // Prints: "<aName>": <aDouble> 467 void DoubleProperty(const Span<const char>& aName, double aDouble) { 468 static const size_t buflen = 64; 469 char buf[buflen]; 470 const double_conversion::DoubleToStringConverter& converter = 471 double_conversion::DoubleToStringConverter::EcmaScriptConverter(); 472 double_conversion::StringBuilder builder(buf, buflen); 473 converter.ToShortest(aDouble, &builder); 474 // TODO: The builder should know the length?! 475 Scalar(aName, MakeStringSpan(builder.Finalize())); 476 } 477 478 template <size_t N> 479 void DoubleProperty(const char (&aName)[N], double aDouble) { 480 // Keep null terminator from literal strings, will be removed by 481 // EscapedString. This way C buffer arrays can be used as well. 482 DoubleProperty(Span<const char>(aName, N), aDouble); 483 } 484 485 // Prints: <aDouble> 486 void DoubleElement(double aDouble) { DoubleProperty(scEmptyString, aDouble); } 487 488 // Prints: "<aName>": "<aStr>" 489 void StringProperty(const Span<const char>& aName, 490 const Span<const char>& aStr) { 491 QuotedScalar(aName, EscapedString(aStr).SpanRef()); 492 } 493 494 template <size_t NN> 495 void StringProperty(const char (&aName)[NN], const Span<const char>& aStr) { 496 // Keep null terminator from literal strings, will be removed by 497 // EscapedString. This way C buffer arrays can be used as well. 498 StringProperty(Span<const char>(aName, NN), aStr); 499 } 500 501 template <size_t SN> 502 void StringProperty(const Span<const char>& aName, const char (&aStr)[SN]) { 503 // Keep null terminator from literal strings, will be removed by 504 // EscapedString. This way C buffer arrays can be used as well. 505 StringProperty(aName, Span<const char>(aStr, SN)); 506 } 507 508 template <size_t NN, size_t SN> 509 void StringProperty(const char (&aName)[NN], const char (&aStr)[SN]) { 510 // Keep null terminators from literal strings, will be removed by 511 // EscapedString. This way C buffer arrays can be used as well. 512 StringProperty(Span<const char>(aName, NN), Span<const char>(aStr, SN)); 513 } 514 515 // Prints: "<aStr>" 516 void StringElement(const Span<const char>& aStr) { 517 StringProperty(scEmptyString, aStr); 518 } 519 520 template <size_t N> 521 void StringElement(const char (&aName)[N]) { 522 // Keep null terminator from literal strings, will be removed by 523 // EscapedString. This way C buffer arrays can be used as well. 524 StringElement(Span<const char>(aName, N)); 525 } 526 527 // Prints: "<aName>": [ 528 void StartArrayProperty(const Span<const char>& aName, 529 CollectionStyle aStyle = MultiLineStyle) { 530 StartCollection(aName, scArrayBeginString, aStyle); 531 } 532 533 template <size_t N> 534 void StartArrayProperty(const char (&aName)[N], 535 CollectionStyle aStyle = MultiLineStyle) { 536 // Keep null terminator from literal strings, will be removed by 537 // EscapedString. This way C buffer arrays can be used as well. 538 StartArrayProperty(Span<const char>(aName, N), aStyle); 539 } 540 541 // Prints: [ 542 void StartArrayElement(CollectionStyle aStyle = MultiLineStyle) { 543 StartArrayProperty(scEmptyString, aStyle); 544 } 545 546 // Prints: ] 547 void EndArray() { EndCollection(scArrayEndString); } 548 549 // Prints: "<aName>": { 550 void StartObjectProperty(const Span<const char>& aName, 551 CollectionStyle aStyle = MultiLineStyle) { 552 StartCollection(aName, scObjectBeginString, aStyle); 553 } 554 555 template <size_t N> 556 void StartObjectProperty(const char (&aName)[N], 557 CollectionStyle aStyle = MultiLineStyle) { 558 // Keep null terminator from literal strings, will be removed by 559 // EscapedString. This way C buffer arrays can be used as well. 560 StartObjectProperty(Span<const char>(aName, N), aStyle); 561 } 562 563 // Prints: { 564 void StartObjectElement(CollectionStyle aStyle = MultiLineStyle) { 565 StartObjectProperty(scEmptyString, aStyle); 566 } 567 568 // Prints: } 569 void EndObject() { EndCollection(scObjectEndString); } 570 }; 571 572 } // namespace mozilla 573 574 #endif /* mozilla_JSONWriter_h */