tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

LzmaSpec.cpp (15999B)


      1 /* LzmaSpec.cpp -- LZMA Reference Decoder
      2 2015-06-14 : Igor Pavlov : Public domain */
      3 
      4 // This code implements LZMA file decoding according to LZMA specification.
      5 // This code is not optimized for speed.
      6 
      7 #include <stdio.h>
      8 
      9 #ifdef _MSC_VER
     10  #pragma warning(disable : 4710) // function not inlined
     11  #pragma warning(disable : 4996) // This function or variable may be unsafe
     12 #endif
     13 
     14 typedef unsigned char Byte;
     15 typedef unsigned short UInt16;
     16 
     17 #ifdef _LZMA_UINT32_IS_ULONG
     18  typedef unsigned long UInt32;
     19 #else
     20  typedef unsigned int UInt32;
     21 #endif
     22 
     23 #if defined(_MSC_VER) || defined(__BORLANDC__)
     24  typedef unsigned __int64 UInt64;
     25 #else
     26  typedef unsigned long long int UInt64;
     27 #endif
     28 
     29 
     30 struct CInputStream
     31 {
     32  FILE *File;
     33  UInt64 Processed;
     34  
     35  void Init() { Processed = 0; }
     36 
     37  Byte ReadByte()
     38  {
     39    int c = getc(File);
     40    if (c < 0)
     41      throw "Unexpected end of file";
     42    Processed++;
     43    return (Byte)c;
     44  }
     45 };
     46 
     47 
     48 struct COutStream
     49 {
     50  FILE *File;
     51  UInt64 Processed;
     52 
     53  void Init() { Processed = 0; }
     54 
     55  void WriteByte(Byte b)
     56  {
     57    if (putc(b, File) == EOF)
     58      throw "File writing error";
     59    Processed++;
     60  }
     61 };
     62 
     63 
     64 class COutWindow
     65 {
     66  Byte *Buf;
     67  UInt32 Pos;
     68  UInt32 Size;
     69  bool IsFull;
     70 
     71 public:
     72  unsigned TotalPos;
     73  COutStream OutStream;
     74 
     75  COutWindow(): Buf(NULL) {}
     76  ~COutWindow() { delete []Buf; }
     77 
     78  void Create(UInt32 dictSize)
     79  {
     80    Buf = new Byte[dictSize];
     81    Pos = 0;
     82    Size = dictSize;
     83    IsFull = false;
     84    TotalPos = 0;
     85  }
     86 
     87  void PutByte(Byte b)
     88  {
     89    TotalPos++;
     90    Buf[Pos++] = b;
     91    if (Pos == Size)
     92    {
     93      Pos = 0;
     94      IsFull = true;
     95    }
     96    OutStream.WriteByte(b);
     97  }
     98 
     99  Byte GetByte(UInt32 dist) const
    100  {
    101    return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
    102  }
    103 
    104  void CopyMatch(UInt32 dist, unsigned len)
    105  {
    106    for (; len > 0; len--)
    107      PutByte(GetByte(dist));
    108  }
    109 
    110  bool CheckDistance(UInt32 dist) const
    111  {
    112    return dist <= Pos || IsFull;
    113  }
    114 
    115  bool IsEmpty() const
    116  {
    117    return Pos == 0 && !IsFull;
    118  }
    119 };
    120 
    121 
    122 #define kNumBitModelTotalBits 11
    123 #define kNumMoveBits 5
    124 
    125 typedef UInt16 CProb;
    126 
    127 #define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
    128 
    129 #define INIT_PROBS(p) \
    130 { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
    131 
    132 class CRangeDecoder
    133 {
    134  UInt32 Range;
    135  UInt32 Code;
    136 
    137  void Normalize();
    138 
    139 public:
    140 
    141  CInputStream *InStream;
    142  bool Corrupted;
    143 
    144  bool Init();
    145  bool IsFinishedOK() const { return Code == 0; }
    146 
    147  UInt32 DecodeDirectBits(unsigned numBits);
    148  unsigned DecodeBit(CProb *prob);
    149 };
    150 
    151 bool CRangeDecoder::Init()
    152 {
    153  Corrupted = false;
    154  Range = 0xFFFFFFFF;
    155  Code = 0;
    156 
    157  Byte b = InStream->ReadByte();
    158  
    159  for (int i = 0; i < 4; i++)
    160    Code = (Code << 8) | InStream->ReadByte();
    161  
    162  if (b != 0 || Code == Range)
    163    Corrupted = true;
    164  return b == 0;
    165 }
    166 
    167 #define kTopValue ((UInt32)1 << 24)
    168 
    169 void CRangeDecoder::Normalize()
    170 {
    171  if (Range < kTopValue)
    172  {
    173    Range <<= 8;
    174    Code = (Code << 8) | InStream->ReadByte();
    175  }
    176 }
    177 
    178 UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
    179 {
    180  UInt32 res = 0;
    181  do
    182  {
    183    Range >>= 1;
    184    Code -= Range;
    185    UInt32 t = 0 - ((UInt32)Code >> 31);
    186    Code += Range & t;
    187    
    188    if (Code == Range)
    189      Corrupted = true;
    190    
    191    Normalize();
    192    res <<= 1;
    193    res += t + 1;
    194  }
    195  while (--numBits);
    196  return res;
    197 }
    198 
    199 unsigned CRangeDecoder::DecodeBit(CProb *prob)
    200 {
    201  unsigned v = *prob;
    202  UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
    203  unsigned symbol;
    204  if (Code < bound)
    205  {
    206    v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
    207    Range = bound;
    208    symbol = 0;
    209  }
    210  else
    211  {
    212    v -= v >> kNumMoveBits;
    213    Code -= bound;
    214    Range -= bound;
    215    symbol = 1;
    216  }
    217  *prob = (CProb)v;
    218  Normalize();
    219  return symbol;
    220 }
    221 
    222 
    223 unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
    224 {
    225  unsigned m = 1;
    226  unsigned symbol = 0;
    227  for (unsigned i = 0; i < numBits; i++)
    228  {
    229    unsigned bit = rc->DecodeBit(&probs[m]);
    230    m <<= 1;
    231    m += bit;
    232    symbol |= (bit << i);
    233  }
    234  return symbol;
    235 }
    236 
    237 template <unsigned NumBits>
    238 class CBitTreeDecoder
    239 {
    240  CProb Probs[(unsigned)1 << NumBits];
    241 
    242 public:
    243 
    244  void Init()
    245  {
    246    INIT_PROBS(Probs);
    247  }
    248 
    249  unsigned Decode(CRangeDecoder *rc)
    250  {
    251    unsigned m = 1;
    252    for (unsigned i = 0; i < NumBits; i++)
    253      m = (m << 1) + rc->DecodeBit(&Probs[m]);
    254    return m - ((unsigned)1 << NumBits);
    255  }
    256 
    257  unsigned ReverseDecode(CRangeDecoder *rc)
    258  {
    259    return BitTreeReverseDecode(Probs, NumBits, rc);
    260  }
    261 };
    262 
    263 #define kNumPosBitsMax 4
    264 
    265 #define kNumStates 12
    266 #define kNumLenToPosStates 4
    267 #define kNumAlignBits 4
    268 #define kStartPosModelIndex 4
    269 #define kEndPosModelIndex 14
    270 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
    271 #define kMatchMinLen 2
    272 
    273 class CLenDecoder
    274 {
    275  CProb Choice;
    276  CProb Choice2;
    277  CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
    278  CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
    279  CBitTreeDecoder<8> HighCoder;
    280 
    281 public:
    282 
    283  void Init()
    284  {
    285    Choice = PROB_INIT_VAL;
    286    Choice2 = PROB_INIT_VAL;
    287    HighCoder.Init();
    288    for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
    289    {
    290      LowCoder[i].Init();
    291      MidCoder[i].Init();
    292    }
    293  }
    294 
    295  unsigned Decode(CRangeDecoder *rc, unsigned posState)
    296  {
    297    if (rc->DecodeBit(&Choice) == 0)
    298      return LowCoder[posState].Decode(rc);
    299    if (rc->DecodeBit(&Choice2) == 0)
    300      return 8 + MidCoder[posState].Decode(rc);
    301    return 16 + HighCoder.Decode(rc);
    302  }
    303 };
    304 
    305 unsigned UpdateState_Literal(unsigned state)
    306 {
    307  if (state < 4) return 0;
    308  else if (state < 10) return state - 3;
    309  else return state - 6;
    310 }
    311 unsigned UpdateState_Match   (unsigned state) { return state < 7 ? 7 : 10; }
    312 unsigned UpdateState_Rep     (unsigned state) { return state < 7 ? 8 : 11; }
    313 unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
    314 
    315 #define LZMA_DIC_MIN (1 << 12)
    316 
    317 class CLzmaDecoder
    318 {
    319 public:
    320  CRangeDecoder RangeDec;
    321  COutWindow OutWindow;
    322 
    323  bool markerIsMandatory;
    324  unsigned lc, pb, lp;
    325  UInt32 dictSize;
    326  UInt32 dictSizeInProperties;
    327 
    328  void DecodeProperties(const Byte *properties)
    329  {
    330    unsigned d = properties[0];
    331    if (d >= (9 * 5 * 5))
    332      throw "Incorrect LZMA properties";
    333    lc = d % 9;
    334    d /= 9;
    335    pb = d / 5;
    336    lp = d % 5;
    337    dictSizeInProperties = 0;
    338    for (int i = 0; i < 4; i++)
    339      dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
    340    dictSize = dictSizeInProperties;
    341    if (dictSize < LZMA_DIC_MIN)
    342      dictSize = LZMA_DIC_MIN;
    343  }
    344 
    345  CLzmaDecoder(): LitProbs(NULL) {}
    346  ~CLzmaDecoder() { delete []LitProbs; }
    347 
    348  void Create()
    349  {
    350    OutWindow.Create(dictSize);
    351    CreateLiterals();
    352  }
    353 
    354  int Decode(bool unpackSizeDefined, UInt64 unpackSize);
    355  
    356 private:
    357 
    358  CProb *LitProbs;
    359 
    360  void CreateLiterals()
    361  {
    362    LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
    363  }
    364  
    365  void InitLiterals()
    366  {
    367    UInt32 num = (UInt32)0x300 << (lc + lp);
    368    for (UInt32 i = 0; i < num; i++)
    369      LitProbs[i] = PROB_INIT_VAL;
    370  }
    371  
    372  void DecodeLiteral(unsigned state, UInt32 rep0)
    373  {
    374    unsigned prevByte = 0;
    375    if (!OutWindow.IsEmpty())
    376      prevByte = OutWindow.GetByte(1);
    377    
    378    unsigned symbol = 1;
    379    unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
    380    CProb *probs = &LitProbs[(UInt32)0x300 * litState];
    381    
    382    if (state >= 7)
    383    {
    384      unsigned matchByte = OutWindow.GetByte(rep0 + 1);
    385      do
    386      {
    387        unsigned matchBit = (matchByte >> 7) & 1;
    388        matchByte <<= 1;
    389        unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
    390        symbol = (symbol << 1) | bit;
    391        if (matchBit != bit)
    392          break;
    393      }
    394      while (symbol < 0x100);
    395    }
    396    while (symbol < 0x100)
    397      symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
    398    OutWindow.PutByte((Byte)(symbol - 0x100));
    399  }
    400 
    401  CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
    402  CBitTreeDecoder<kNumAlignBits> AlignDecoder;
    403  CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
    404  
    405  void InitDist()
    406  {
    407    for (unsigned i = 0; i < kNumLenToPosStates; i++)
    408      PosSlotDecoder[i].Init();
    409    AlignDecoder.Init();
    410    INIT_PROBS(PosDecoders);
    411  }
    412  
    413  unsigned DecodeDistance(unsigned len)
    414  {
    415    unsigned lenState = len;
    416    if (lenState > kNumLenToPosStates - 1)
    417      lenState = kNumLenToPosStates - 1;
    418    
    419    unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
    420    if (posSlot < 4)
    421      return posSlot;
    422    
    423    unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
    424    UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
    425    if (posSlot < kEndPosModelIndex)
    426      dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
    427    else
    428    {
    429      dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
    430      dist += AlignDecoder.ReverseDecode(&RangeDec);
    431    }
    432    return dist;
    433  }
    434 
    435  CProb IsMatch[kNumStates << kNumPosBitsMax];
    436  CProb IsRep[kNumStates];
    437  CProb IsRepG0[kNumStates];
    438  CProb IsRepG1[kNumStates];
    439  CProb IsRepG2[kNumStates];
    440  CProb IsRep0Long[kNumStates << kNumPosBitsMax];
    441 
    442  CLenDecoder LenDecoder;
    443  CLenDecoder RepLenDecoder;
    444 
    445  void Init()
    446  {
    447    InitLiterals();
    448    InitDist();
    449 
    450    INIT_PROBS(IsMatch);
    451    INIT_PROBS(IsRep);
    452    INIT_PROBS(IsRepG0);
    453    INIT_PROBS(IsRepG1);
    454    INIT_PROBS(IsRepG2);
    455    INIT_PROBS(IsRep0Long);
    456 
    457    LenDecoder.Init();
    458    RepLenDecoder.Init();
    459  }
    460 };
    461    
    462 
    463 #define LZMA_RES_ERROR                   0
    464 #define LZMA_RES_FINISHED_WITH_MARKER    1
    465 #define LZMA_RES_FINISHED_WITHOUT_MARKER 2
    466 
    467 int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize)
    468 {
    469  if (!RangeDec.Init())
    470    return LZMA_RES_ERROR;
    471 
    472  Init();
    473 
    474  UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
    475  unsigned state = 0;
    476  
    477  for (;;)
    478  {
    479    if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
    480      if (RangeDec.IsFinishedOK())
    481        return LZMA_RES_FINISHED_WITHOUT_MARKER;
    482 
    483    unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
    484 
    485    if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0)
    486    {
    487      if (unpackSizeDefined && unpackSize == 0)
    488        return LZMA_RES_ERROR;
    489      DecodeLiteral(state, rep0);
    490      state = UpdateState_Literal(state);
    491      unpackSize--;
    492      continue;
    493    }
    494    
    495    unsigned len;
    496    
    497    if (RangeDec.DecodeBit(&IsRep[state]) != 0)
    498    {
    499      if (unpackSizeDefined && unpackSize == 0)
    500        return LZMA_RES_ERROR;
    501      if (OutWindow.IsEmpty())
    502        return LZMA_RES_ERROR;
    503      if (RangeDec.DecodeBit(&IsRepG0[state]) == 0)
    504      {
    505        if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0)
    506        {
    507          state = UpdateState_ShortRep(state);
    508          OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
    509          unpackSize--;
    510          continue;
    511        }
    512      }
    513      else
    514      {
    515        UInt32 dist;
    516        if (RangeDec.DecodeBit(&IsRepG1[state]) == 0)
    517          dist = rep1;
    518        else
    519        {
    520          if (RangeDec.DecodeBit(&IsRepG2[state]) == 0)
    521            dist = rep2;
    522          else
    523          {
    524            dist = rep3;
    525            rep3 = rep2;
    526          }
    527          rep2 = rep1;
    528        }
    529        rep1 = rep0;
    530        rep0 = dist;
    531      }
    532      len = RepLenDecoder.Decode(&RangeDec, posState);
    533      state = UpdateState_Rep(state);
    534    }
    535    else
    536    {
    537      rep3 = rep2;
    538      rep2 = rep1;
    539      rep1 = rep0;
    540      len = LenDecoder.Decode(&RangeDec, posState);
    541      state = UpdateState_Match(state);
    542      rep0 = DecodeDistance(len);
    543      if (rep0 == 0xFFFFFFFF)
    544        return RangeDec.IsFinishedOK() ?
    545            LZMA_RES_FINISHED_WITH_MARKER :
    546            LZMA_RES_ERROR;
    547 
    548      if (unpackSizeDefined && unpackSize == 0)
    549        return LZMA_RES_ERROR;
    550      if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
    551        return LZMA_RES_ERROR;
    552    }
    553    len += kMatchMinLen;
    554    bool isError = false;
    555    if (unpackSizeDefined && unpackSize < len)
    556    {
    557      len = (unsigned)unpackSize;
    558      isError = true;
    559    }
    560    OutWindow.CopyMatch(rep0 + 1, len);
    561    unpackSize -= len;
    562    if (isError)
    563      return LZMA_RES_ERROR;
    564  }
    565 }
    566 
    567 static void Print(const char *s)
    568 {
    569  fputs(s, stdout);
    570 }
    571 
    572 static void PrintError(const char *s)
    573 {
    574  fputs(s, stderr);
    575 }
    576 
    577 
    578 #define CONVERT_INT_TO_STR(charType, tempSize) \
    579 
    580 void ConvertUInt64ToString(UInt64 val, char *s)
    581 {
    582  char temp[32];
    583  unsigned i = 0;
    584  while (val >= 10)
    585  {
    586    temp[i++] = (char)('0' + (unsigned)(val % 10));
    587    val /= 10;
    588  }
    589  *s++ = (char)('0' + (unsigned)val);
    590  while (i != 0)
    591  {
    592    i--;
    593    *s++ = temp[i];
    594  }
    595  *s = 0;
    596 }
    597 
    598 void PrintUInt64(const char *title, UInt64 v)
    599 {
    600  Print(title);
    601  Print(" : ");
    602  char s[32];
    603  ConvertUInt64ToString(v, s);
    604  Print(s);
    605  Print(" bytes \n");
    606 }
    607 
    608 int main2(int numArgs, const char *args[])
    609 {
    610  Print("\nLZMA Reference Decoder 15.00 : Igor Pavlov : Public domain : 2015-04-16\n");
    611  if (numArgs == 1)
    612    Print("\nUse: lzmaSpec a.lzma outFile");
    613 
    614  if (numArgs != 3)
    615    throw "you must specify two parameters";
    616 
    617  CInputStream inStream;
    618  inStream.File = fopen(args[1], "rb");
    619  inStream.Init();
    620  if (inStream.File == 0)
    621    throw "Can't open input file";
    622 
    623  CLzmaDecoder lzmaDecoder;
    624  lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+");
    625  lzmaDecoder.OutWindow.OutStream.Init();
    626  if (inStream.File == 0)
    627    throw "Can't open output file";
    628 
    629  Byte header[13];
    630  int i;
    631  for (i = 0; i < 13; i++)
    632    header[i] = inStream.ReadByte();
    633 
    634  lzmaDecoder.DecodeProperties(header);
    635 
    636  printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb);
    637  printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties);
    638  printf("\nDictionary Size for decoding  = %u", lzmaDecoder.dictSize);
    639 
    640  UInt64 unpackSize = 0;
    641  bool unpackSizeDefined = false;
    642  for (i = 0; i < 8; i++)
    643  {
    644    Byte b = header[5 + i];
    645    if (b != 0xFF)
    646      unpackSizeDefined = true;
    647    unpackSize |= (UInt64)b << (8 * i);
    648  }
    649 
    650  lzmaDecoder.markerIsMandatory = !unpackSizeDefined;
    651 
    652  Print("\n");
    653  if (unpackSizeDefined)
    654    PrintUInt64("Uncompressed Size", unpackSize);
    655  else
    656    Print("End marker is expected\n");
    657  lzmaDecoder.RangeDec.InStream = &inStream;
    658 
    659  Print("\n");
    660 
    661  lzmaDecoder.Create();
    662  
    663  int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize);
    664 
    665  PrintUInt64("Read    ", inStream.Processed);
    666  PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed);
    667 
    668  if (res == LZMA_RES_ERROR)
    669    throw "LZMA decoding error";
    670  else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER)
    671    Print("Finished without end marker");
    672  else if (res == LZMA_RES_FINISHED_WITH_MARKER)
    673  {
    674    if (unpackSizeDefined)
    675    {
    676      if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize)
    677        throw "Finished with end marker before than specified size";
    678      Print("Warning: ");
    679    }
    680    Print("Finished with end marker");
    681  }
    682  else
    683    throw "Internal Error";
    684 
    685  Print("\n");
    686  
    687  if (lzmaDecoder.RangeDec.Corrupted)
    688  {
    689    Print("\nWarning: LZMA stream is corrupted\n");
    690  }
    691 
    692  return 0;
    693 }
    694 
    695 
    696 int
    697  #ifdef _MSC_VER
    698    __cdecl
    699  #endif
    700 main(int numArgs, const char *args[])
    701 {
    702  try { return main2(numArgs, args); }
    703  catch (const char *s)
    704  {
    705    PrintError("\nError:\n");
    706    PrintError(s);
    707    PrintError("\n");
    708    return 1;
    709  }
    710  catch(...)
    711  {
    712    PrintError("\nError\n");
    713    return 1;
    714  }
    715 }