tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

parser.js (7539B)


      1 /*
      2 * Copyright (c) 2018 Deepak Kumar
      3 *
      4 * MIT License
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
      7 *
      8 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
      9 *
     10 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     11 *
     12 * https://github.com/stomp-js/stompjs
     13 * https://github.com/stomp-js/stompjs/blob/develop/src/parser.ts
     14 */
     15 
     16 "use strict";
     17 
     18 /**
     19 * @internal
     20 */
     21 const NULL = 0;
     22 /**
     23 * @internal
     24 */
     25 const LF = 10;
     26 /**
     27 * @internal
     28 */
     29 const CR = 13;
     30 /**
     31 * @internal
     32 */
     33 const COLON = 58;
     34 /**
     35 * This is an evented, rec descent parser.
     36 * A stream of Octets can be passed and whenever it recognizes
     37 * a complete Frame or an incoming ping it will invoke the registered callbacks.
     38 *
     39 * All incoming Octets are fed into _onByte function.
     40 * Depending on current state the _onByte function keeps changing.
     41 * Depending on the state it keeps accumulating into _token and _results.
     42 * State is indicated by current value of _onByte, all states are named as _collect.
     43 *
     44 * STOMP standards https://stomp.github.io/stomp-specification-1.2.html
     45 * imply that all lengths are considered in bytes (instead of string lengths).
     46 * So, before actual parsing, if the incoming data is String it is converted to Octets.
     47 * This allows faithful implementation of the protocol and allows NULL Octets to be present in the body.
     48 *
     49 * There is no peek function on the incoming data.
     50 * When a state change occurs based on an Octet without consuming the Octet,
     51 * the Octet, after state change, is fed again (_reinjectByte).
     52 * This became possible as the state change can be determined by inspecting just one Octet.
     53 *
     54 * There are two modes to collect the body, if content-length header is there then it by counting Octets
     55 * otherwise it is determined by NULL terminator.
     56 *
     57 * Following the standards, the command and headers are converted to Strings
     58 * and the body is returned as Octets.
     59 * Headers are returned as an array and not as Hash - to allow multiple occurrence of an header.
     60 *
     61 * This parser does not use Regular Expressions as that can only operate on Strings.
     62 *
     63 * It handles if multiple STOMP frames are given as one chunk, a frame is split into multiple chunks, or
     64 * any combination there of. The parser remembers its state (any partial frame) and continues when a new chunk
     65 * is pushed.
     66 *
     67 * Typically the higher level function will convert headers to Hash, handle unescaping of header values
     68 * (which is protocol version specific), and convert body to text.
     69 *
     70 * Check the parser.spec.js to understand cases that this parser is supposed to handle.
     71 *
     72 * Part of `@stomp/stompjs`.
     73 *
     74 * @internal
     75 */
     76 class Parser {
     77  constructor(onFrame, onIncomingPing) {
     78    this.onFrame = onFrame;
     79    this.onIncomingPing = onIncomingPing;
     80    this._encoder = new TextEncoder();
     81    this._decoder = new TextDecoder();
     82    this._token = [];
     83    this._initState();
     84  }
     85  parseChunk(segment, appendMissingNULLonIncoming = false) {
     86    let chunk;
     87    if (segment instanceof ArrayBuffer) {
     88      chunk = new Uint8Array(segment);
     89    } else {
     90      chunk = this._encoder.encode(segment);
     91    }
     92    // See https://github.com/stomp-js/stompjs/issues/89
     93    // Remove when underlying issue is fixed.
     94    //
     95    // Send a NULL byte, if the last byte of a Text frame was not NULL.F
     96    if (appendMissingNULLonIncoming && chunk[chunk.length - 1] !== 0) {
     97      const chunkWithNull = new Uint8Array(chunk.length + 1);
     98      chunkWithNull.set(chunk, 0);
     99      chunkWithNull[chunk.length] = 0;
    100      chunk = chunkWithNull;
    101    }
    102    // tslint:disable-next-line:prefer-for-of
    103    for (let i = 0; i < chunk.length; i++) {
    104      const byte = chunk[i];
    105      this._onByte(byte);
    106    }
    107  }
    108  // The following implements a simple Rec Descent Parser.
    109  // The grammar is simple and just one byte tells what should be the next state
    110  _collectFrame(byte) {
    111    if (byte === NULL) {
    112      // Ignore
    113      return;
    114    }
    115    if (byte === CR) {
    116      // Ignore CR
    117      return;
    118    }
    119    if (byte === LF) {
    120      // Incoming Ping
    121      this.onIncomingPing();
    122      return;
    123    }
    124    this._onByte = this._collectCommand;
    125    this._reinjectByte(byte);
    126  }
    127  _collectCommand(byte) {
    128    if (byte === CR) {
    129      // Ignore CR
    130      return;
    131    }
    132    if (byte === LF) {
    133      this._results.command = this._consumeTokenAsUTF8();
    134      this._onByte = this._collectHeaders;
    135      return;
    136    }
    137    this._consumeByte(byte);
    138  }
    139  _collectHeaders(byte) {
    140    if (byte === CR) {
    141      // Ignore CR
    142      return;
    143    }
    144    if (byte === LF) {
    145      this._setupCollectBody();
    146      return;
    147    }
    148    this._onByte = this._collectHeaderKey;
    149    this._reinjectByte(byte);
    150  }
    151  _reinjectByte(byte) {
    152    this._onByte(byte);
    153  }
    154  _collectHeaderKey(byte) {
    155    if (byte === COLON) {
    156      this._headerKey = this._consumeTokenAsUTF8();
    157      this._onByte = this._collectHeaderValue;
    158      return;
    159    }
    160    this._consumeByte(byte);
    161  }
    162  _collectHeaderValue(byte) {
    163    if (byte === CR) {
    164      // Ignore CR
    165      return;
    166    }
    167    if (byte === LF) {
    168      this._results.headers.push([this._headerKey, this._consumeTokenAsUTF8()]);
    169      this._headerKey = undefined;
    170      this._onByte = this._collectHeaders;
    171      return;
    172    }
    173    this._consumeByte(byte);
    174  }
    175  _setupCollectBody() {
    176    const contentLengthHeader = this._results.headers.filter(header => {
    177      return header[0] === "content-length";
    178    })[0];
    179    if (contentLengthHeader) {
    180      this._bodyBytesRemaining = parseInt(contentLengthHeader[1], 10);
    181      this._onByte = this._collectBodyFixedSize;
    182    } else {
    183      this._onByte = this._collectBodyNullTerminated;
    184    }
    185  }
    186  _collectBodyNullTerminated(byte) {
    187    if (byte === NULL) {
    188      this._retrievedBody();
    189      return;
    190    }
    191    this._consumeByte(byte);
    192  }
    193  _collectBodyFixedSize(byte) {
    194    // It is post decrement, so that we discard the trailing NULL octet
    195    if (this._bodyBytesRemaining-- === 0) {
    196      this._retrievedBody();
    197      return;
    198    }
    199    this._consumeByte(byte);
    200  }
    201  _retrievedBody() {
    202    this._results.binaryBody = this._consumeTokenAsRaw();
    203    this.onFrame(this._results);
    204    this._initState();
    205  }
    206  // Rec Descent Parser helpers
    207  _consumeByte(byte) {
    208    this._token.push(byte);
    209  }
    210  _consumeTokenAsUTF8() {
    211    return this._decoder.decode(this._consumeTokenAsRaw());
    212  }
    213  _consumeTokenAsRaw() {
    214    const rawResult = new Uint8Array(this._token);
    215    this._token = [];
    216    return rawResult;
    217  }
    218  _initState() {
    219    this._results = {
    220      command: undefined,
    221      headers: [],
    222      binaryBody: undefined,
    223    };
    224    this._token = [];
    225    this._headerKey = undefined;
    226    this._onByte = this._collectFrame;
    227  }
    228 }
    229 
    230 module.exports = { Parser };