proto.h (13257B)
1 // Copyright 2020 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // ----------------------------------------------------------------------------- 16 // File: internal/proto.h 17 // ----------------------------------------------------------------------------- 18 // 19 // Declares functions for serializing and deserializing data to and from memory 20 // buffers in protocol buffer wire format. This library takes no steps to 21 // ensure that the encoded data matches with any message specification. 22 23 #ifndef ABSL_LOG_INTERNAL_PROTO_H_ 24 #define ABSL_LOG_INTERNAL_PROTO_H_ 25 26 #include <cstddef> 27 #include <cstdint> 28 #include <limits> 29 30 #include "absl/base/attributes.h" 31 #include "absl/base/casts.h" 32 #include "absl/base/config.h" 33 #include "absl/strings/string_view.h" 34 #include "absl/types/span.h" 35 36 namespace absl { 37 ABSL_NAMESPACE_BEGIN 38 namespace log_internal { 39 40 // absl::Span<char> represents a view into the available space in a mutable 41 // buffer during encoding. Encoding functions shrink the span as they go so 42 // that the same view can be passed to a series of Encode functions. If the 43 // data do not fit, nothing is encoded, the view is set to size zero (so that 44 // all subsequent encode calls fail), and false is returned. Otherwise true is 45 // returned. 46 47 // In particular, attempting to encode a series of data into an insufficient 48 // buffer has consistent and efficient behavior without any caller-side error 49 // checking. Individual values will be encoded in their entirety or not at all 50 // (unless one of the `Truncate` functions is used). Once a value is omitted 51 // because it does not fit, no subsequent values will be encoded to preserve 52 // ordering; the decoded sequence will be a prefix of the original sequence. 53 54 // There are two ways to encode a message-typed field: 55 // 56 // * Construct its contents in a separate buffer and use `EncodeBytes` to copy 57 // it into the primary buffer with type, tag, and length. 58 // * Use `EncodeMessageStart` to write type and tag fields and reserve space for 59 // the length field, then encode the contents directly into the buffer, then 60 // use `EncodeMessageLength` to write the actual length into the reserved 61 // bytes. This works fine if the actual length takes fewer bytes to encode 62 // than were reserved, although you don't get your extra bytes back. 63 // This approach will always produce a valid encoding, but your protocol may 64 // require that the whole message field by omitted if the buffer is too small 65 // to contain all desired subfields. In this case, operate on a copy of the 66 // buffer view and assign back only if everything fit, i.e. if the last 67 // `Encode` call returned true. 68 69 // Encodes the specified integer as a varint field and returns true if it fits. 70 // Used for int32_t, int64_t, uint32_t, uint64_t, bool, and enum field types. 71 // Consumes up to kMaxVarintSize * 2 bytes (20). 72 bool EncodeVarint(uint64_t tag, uint64_t value, absl::Span<char> *buf); 73 inline bool EncodeVarint(uint64_t tag, int64_t value, absl::Span<char> *buf) { 74 return EncodeVarint(tag, static_cast<uint64_t>(value), buf); 75 } 76 inline bool EncodeVarint(uint64_t tag, uint32_t value, absl::Span<char> *buf) { 77 return EncodeVarint(tag, static_cast<uint64_t>(value), buf); 78 } 79 inline bool EncodeVarint(uint64_t tag, int32_t value, absl::Span<char> *buf) { 80 return EncodeVarint(tag, static_cast<uint64_t>(value), buf); 81 } 82 83 // Encodes the specified integer as a varint field using ZigZag encoding and 84 // returns true if it fits. 85 // Used for sint32 and sint64 field types. 86 // Consumes up to kMaxVarintSize * 2 bytes (20). 87 inline bool EncodeVarintZigZag(uint64_t tag, int64_t value, 88 absl::Span<char> *buf) { 89 if (value < 0) 90 return EncodeVarint(tag, 2 * static_cast<uint64_t>(-(value + 1)) + 1, buf); 91 return EncodeVarint(tag, 2 * static_cast<uint64_t>(value), buf); 92 } 93 94 // Encodes the specified integer as a 64-bit field and returns true if it fits. 95 // Used for fixed64 and sfixed64 field types. 96 // Consumes up to kMaxVarintSize + 8 bytes (18). 97 bool Encode64Bit(uint64_t tag, uint64_t value, absl::Span<char> *buf); 98 inline bool Encode64Bit(uint64_t tag, int64_t value, absl::Span<char> *buf) { 99 return Encode64Bit(tag, static_cast<uint64_t>(value), buf); 100 } 101 inline bool Encode64Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf) { 102 return Encode64Bit(tag, static_cast<uint64_t>(value), buf); 103 } 104 inline bool Encode64Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) { 105 return Encode64Bit(tag, static_cast<uint64_t>(value), buf); 106 } 107 108 // Encodes the specified double as a 64-bit field and returns true if it fits. 109 // Used for double field type. 110 // Consumes up to kMaxVarintSize + 8 bytes (18). 111 inline bool EncodeDouble(uint64_t tag, double value, absl::Span<char> *buf) { 112 return Encode64Bit(tag, absl::bit_cast<uint64_t>(value), buf); 113 } 114 115 // Encodes the specified integer as a 32-bit field and returns true if it fits. 116 // Used for fixed32 and sfixed32 field types. 117 // Consumes up to kMaxVarintSize + 4 bytes (14). 118 bool Encode32Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf); 119 inline bool Encode32Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) { 120 return Encode32Bit(tag, static_cast<uint32_t>(value), buf); 121 } 122 123 // Encodes the specified float as a 32-bit field and returns true if it fits. 124 // Used for float field type. 125 // Consumes up to kMaxVarintSize + 4 bytes (14). 126 inline bool EncodeFloat(uint64_t tag, float value, absl::Span<char> *buf) { 127 return Encode32Bit(tag, absl::bit_cast<uint32_t>(value), buf); 128 } 129 130 // Encodes the specified bytes as a length-delimited field and returns true if 131 // they fit. 132 // Used for string, bytes, message, and packed-repeated field type. 133 // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()). 134 bool EncodeBytes(uint64_t tag, absl::Span<const char> value, 135 absl::Span<char> *buf); 136 137 // Encodes as many of the specified bytes as will fit as a length-delimited 138 // field and returns true as long as the field header (`tag_type` and `length`) 139 // fits. 140 // Used for string, bytes, message, and packed-repeated field type. 141 // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()). 142 bool EncodeBytesTruncate(uint64_t tag, absl::Span<const char> value, 143 absl::Span<char> *buf); 144 145 // Encodes the specified string as a length-delimited field and returns true if 146 // it fits. 147 // Used for string, bytes, message, and packed-repeated field type. 148 // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()). 149 inline bool EncodeString(uint64_t tag, absl::string_view value, 150 absl::Span<char> *buf) { 151 return EncodeBytes(tag, value, buf); 152 } 153 154 // Encodes as much of the specified string as will fit as a length-delimited 155 // field and returns true as long as the field header (`tag_type` and `length`) 156 // fits. 157 // Used for string, bytes, message, and packed-repeated field type. 158 // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()). 159 inline bool EncodeStringTruncate(uint64_t tag, absl::string_view value, 160 absl::Span<char> *buf) { 161 return EncodeBytesTruncate(tag, value, buf); 162 } 163 164 // Encodes the header for a length-delimited field containing up to `max_size` 165 // bytes or the number remaining in the buffer, whichever is less. If the 166 // header fits, a non-nullptr `Span` is returned; this must be passed to 167 // `EncodeMessageLength` after all contents are encoded to finalize the length 168 // field. If the header does not fit, a nullptr `Span` is returned which is 169 // safe to pass to `EncodeMessageLength` but need not be. 170 // Used for string, bytes, message, and packed-repeated field type. 171 // Consumes up to kMaxVarintSize * 2 bytes (20). 172 [[nodiscard]] absl::Span<char> EncodeMessageStart(uint64_t tag, 173 uint64_t max_size, 174 absl::Span<char> *buf); 175 176 // Finalizes the length field in `msg` so that it encompasses all data encoded 177 // since the call to `EncodeMessageStart` which returned `msg`. Does nothing if 178 // `msg` is a `nullptr` `Span`. 179 void EncodeMessageLength(absl::Span<char> msg, const absl::Span<char> *buf); 180 181 enum class WireType : uint64_t { 182 kVarint = 0, 183 k64Bit = 1, 184 kLengthDelimited = 2, 185 k32Bit = 5, 186 }; 187 188 constexpr size_t VarintSize(uint64_t value) { 189 return value < 128 ? 1 : 1 + VarintSize(value >> 7); 190 } 191 constexpr size_t MinVarintSize() { 192 return VarintSize((std::numeric_limits<uint64_t>::min)()); 193 } 194 constexpr size_t MaxVarintSize() { 195 return VarintSize((std::numeric_limits<uint64_t>::max)()); 196 } 197 198 constexpr uint64_t MaxVarintForSize(size_t size) { 199 return size >= 10 ? (std::numeric_limits<uint64_t>::max)() 200 : (static_cast<uint64_t>(1) << size * 7) - 1; 201 } 202 constexpr uint64_t MakeTagType(uint64_t tag, WireType type) { 203 return tag << 3 | static_cast<uint64_t>(type); 204 } 205 206 // `BufferSizeFor` returns a number of bytes guaranteed to be sufficient to 207 // store encoded fields as `(tag, WireType)`, regardless of data values. This 208 // only makes sense for `WireType::kLengthDelimited` if you add in the length of 209 // the contents yourself, e.g. for string and bytes fields by adding the lengths 210 // of any encoded strings to the return value or for submessage fields by 211 // enumerating the fields you may encode into their contents. 212 constexpr size_t BufferSizeFor(uint64_t tag, WireType type) { 213 size_t buffer_size = VarintSize(MakeTagType(tag, type)); 214 switch (type) { 215 case WireType::kVarint: 216 buffer_size += MaxVarintSize(); 217 break; 218 case WireType::k64Bit: 219 buffer_size += size_t{8}; 220 break; 221 case WireType::kLengthDelimited: 222 buffer_size += MaxVarintSize(); 223 break; 224 case WireType::k32Bit: 225 buffer_size += size_t{4}; 226 break; 227 } 228 return buffer_size; 229 } 230 231 // absl::Span<const char> represents a view into the un-processed space in a 232 // buffer during decoding. Decoding functions shrink the span as they go so 233 // that the same view can be decoded iteratively until all data are processed. 234 // In general, if the buffer is exhausted but additional bytes are expected by 235 // the decoder, it will return values as if the additional bytes were zeros. 236 // Length-delimited fields are an exception - if the encoded length field 237 // indicates more data bytes than are available in the buffer, the `bytes_value` 238 // and `string_value` accessors will return truncated views. 239 240 class ProtoField final { 241 public: 242 // Consumes bytes from `data` and returns true if there were any bytes to 243 // decode. 244 bool DecodeFrom(absl::Span<const char> *data); 245 uint64_t tag() const { return tag_; } 246 WireType type() const { return type_; } 247 248 // These value accessors will return nonsense if the data were not encoded in 249 // the corresponding wiretype from the corresponding C++ (or other language) 250 // type. 251 252 double double_value() const { return absl::bit_cast<double>(value_); } 253 float float_value() const { 254 return absl::bit_cast<float>(static_cast<uint32_t>(value_)); 255 } 256 int32_t int32_value() const { return static_cast<int32_t>(value_); } 257 int64_t int64_value() const { return static_cast<int64_t>(value_); } 258 int32_t sint32_value() const { 259 if (value_ % 2) return static_cast<int32_t>(0 - ((value_ - 1) / 2) - 1); 260 return static_cast<int32_t>(value_ / 2); 261 } 262 int64_t sint64_value() const { 263 if (value_ % 2) return 0 - ((value_ - 1) / 2) - 1; 264 return value_ / 2; 265 } 266 uint32_t uint32_value() const { return static_cast<uint32_t>(value_); } 267 uint64_t uint64_value() const { return value_; } 268 bool bool_value() const { return value_ != 0; } 269 // To decode an enum, call int32_value() and cast to the appropriate type. 270 // Note that the official C++ proto compiler treats enum fields with values 271 // that do not correspond to a defined enumerator as unknown fields. 272 273 // To decode fields within a submessage field, call 274 // `DecodeNextField(field.BytesValue())`. 275 absl::Span<const char> bytes_value() const { return data_; } 276 absl::string_view string_value() const { 277 const auto data = bytes_value(); 278 return absl::string_view(data.data(), data.size()); 279 } 280 // Returns the encoded length of a length-delimited field. This equals 281 // `bytes_value().size()` except when the latter has been truncated due to 282 // buffer underrun. 283 uint64_t encoded_length() const { return value_; } 284 285 private: 286 uint64_t tag_; 287 WireType type_; 288 // For `kTypeVarint`, `kType64Bit`, and `kType32Bit`, holds the decoded value. 289 // For `kTypeLengthDelimited`, holds the decoded length. 290 uint64_t value_; 291 absl::Span<const char> data_; 292 }; 293 294 } // namespace log_internal 295 ABSL_NAMESPACE_END 296 } // namespace absl 297 298 #endif // ABSL_LOG_INTERNAL_PROTO_H_