tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

LzmaDecOpt.asm (31838B)


      1 ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
      2 ; 2018-02-06: Igor Pavlov : Public domain
      3 ;
      4 ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
      5 ; function for check at link time.
      6 ; That code is tightly coupled with LzmaDec_TryDummy()
      7 ; and with another functions in LzmaDec.c file.
      8 ; CLzmaDec structure, (probs) array layout, input and output of
      9 ; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).
     10 
     11 ifndef x64
     12 ; x64=1
     13 ; .err <x64_IS_REQUIRED>
     14 endif
     15 
     16 include 7zAsm.asm
     17 
     18 MY_ASM_START
     19 
     20 _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
     21 
     22 MY_ALIGN macro num:req
     23        align  num
     24 endm
     25 
     26 MY_ALIGN_16 macro
     27        MY_ALIGN 16
     28 endm
     29 
     30 MY_ALIGN_32 macro
     31        MY_ALIGN 32
     32 endm
     33 
     34 MY_ALIGN_64 macro
     35        MY_ALIGN 64
     36 endm
     37 
     38 
     39 ; _LZMA_SIZE_OPT  equ 1
     40 
     41 ; _LZMA_PROB32 equ 1
     42 
     43 ifdef _LZMA_PROB32
     44        PSHIFT  equ 2
     45        PLOAD macro dest, mem
     46                mov     dest, dword ptr [mem]
     47        endm
     48        PSTORE  macro src, mem
     49                mov     dword ptr [mem], src
     50        endm
     51 else
     52        PSHIFT  equ 1
     53        PLOAD macro dest, mem
     54                movzx   dest, word ptr [mem]
     55        endm
     56        PSTORE macro src, mem
     57                mov     word ptr [mem], @CatStr(src, _W)
     58        endm
     59 endif
     60 
     61 PMULT           equ (1 SHL PSHIFT)
     62 PMULT_HALF      equ (1 SHL (PSHIFT - 1))
     63 PMULT_2         equ (1 SHL (PSHIFT + 1))
     64 
     65 
     66 ;       x0      range
     67 ;       x1      pbPos / (prob) TREE
     68 ;       x2      probBranch / prm (MATCHED) / pbPos / cnt
     69 ;       x3      sym
     70 ;====== r4 ===  RSP
     71 ;       x5      cod
     72 ;       x6      t1 NORM_CALC / probs_state / dist
     73 ;       x7      t0 NORM_CALC / prob2 IF_BIT_1
     74 ;       x8      state
     75 ;       x9      match (MATCHED) / sym2 / dist2 / lpMask_reg
     76 ;       x10     kBitModelTotal_reg
     77 ;       r11     probs
     78 ;       x12     offs (MATCHED) / dic / len_temp
     79 ;       x13     processedPos
     80 ;       x14     bit (MATCHED) / dicPos
     81 ;       r15     buf
     82 
     83 
     84 cod     equ x5
     85 cod_L   equ x5_L
     86 range   equ x0
     87 state   equ x8
     88 state_R equ r8
     89 buf     equ r15
     90 processedPos equ x13
     91 kBitModelTotal_reg equ x10
     92 
     93 probBranch   equ x2
     94 probBranch_R equ r2
     95 probBranch_W equ x2_W
     96 
     97 pbPos   equ x1
     98 pbPos_R equ r1
     99 
    100 cnt     equ x2
    101 cnt_R   equ r2
    102 
    103 lpMask_reg equ x9
    104 dicPos  equ r14
    105 
    106 sym     equ x3
    107 sym_R   equ r3
    108 sym_L   equ x3_L
    109 
    110 probs   equ r11
    111 dic     equ r12
    112 
    113 t0      equ x7
    114 t0_W    equ x7_W
    115 t0_R    equ r7
    116 
    117 prob2   equ t0
    118 prob2_W equ t0_W
    119 
    120 t1      equ x6
    121 t1_R    equ r6
    122 
    123 probs_state     equ t1
    124 probs_state_R   equ t1_R
    125 
    126 prm     equ r2
    127 match   equ x9
    128 match_R equ r9
    129 offs    equ x12
    130 offs_R  equ r12
    131 bit     equ x14
    132 bit_R   equ r14
    133 
    134 sym2    equ x9
    135 sym2_R  equ r9
    136 
    137 len_temp equ x12
    138 
    139 dist    equ sym
    140 dist2   equ x9
    141 
    142 
    143 
    144 kNumBitModelTotalBits   equ 11
    145 kBitModelTotal          equ (1 SHL kNumBitModelTotalBits)
    146 kNumMoveBits            equ 5
    147 kBitModelOffset         equ ((1 SHL kNumMoveBits) - 1)
    148 kTopValue               equ (1 SHL 24)
    149 
    150 NORM_2 macro
    151        ; movzx   t0, BYTE PTR [buf]
    152        shl     cod, 8
    153        mov     cod_L, BYTE PTR [buf]
    154        shl     range, 8
    155        ; or      cod, t0
    156        inc     buf
    157 endm
    158 
    159 
    160 NORM macro
    161        cmp     range, kTopValue
    162        jae     SHORT @F
    163        NORM_2
    164 @@:
    165 endm
    166 
    167 
    168 ; ---------- Branch MACROS ----------
    169 
    170 UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req
    171        mov     prob2, kBitModelTotal_reg
    172        sub     prob2, probBranch
    173        shr     prob2, kNumMoveBits
    174        add     probBranch, prob2
    175        PSTORE  probBranch, probOffset * 1 + probsArray + probDisp * PMULT
    176 endm
    177 
    178 
    179 UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req
    180        sub     prob2, range
    181        sub     cod, range
    182        mov     range, prob2
    183        mov     prob2, probBranch
    184        shr     probBranch, kNumMoveBits
    185        sub     prob2, probBranch
    186        PSTORE  prob2, probOffset * 1 + probsArray + probDisp * PMULT
    187 endm
    188 
    189 
    190 CMP_COD macro probsArray:req, probOffset:req, probDisp:req
    191        PLOAD   probBranch, probOffset * 1 + probsArray + probDisp * PMULT
    192        NORM
    193        mov     prob2, range
    194        shr     range, kNumBitModelTotalBits
    195        imul    range, probBranch
    196        cmp     cod, range
    197 endm
    198 
    199 
    200 IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
    201        CMP_COD probsArray, probOffset, probDisp
    202        jae     toLabel
    203 endm
    204 
    205 
    206 IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
    207        IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel
    208        UPDATE_0 probsArray, probOffset, probDisp
    209 endm
    210 
    211 
    212 IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
    213        CMP_COD probsArray, probOffset, probDisp
    214        jb      toLabel
    215 endm
    216 
    217 
    218 ; ---------- CMOV MACROS ----------
    219 
    220 NORM_CALC macro prob:req
    221        NORM
    222        mov     t0, range
    223        shr     range, kNumBitModelTotalBits
    224        imul    range, prob
    225        sub     t0, range
    226        mov     t1, cod
    227        sub     cod, range
    228 endm
    229 
    230 
    231 PUP macro prob:req, probPtr:req
    232        sub     t0, prob
    233       ; only sar works for both 16/32 bit prob modes
    234        sar     t0, kNumMoveBits
    235        add     t0, prob
    236        PSTORE  t0, probPtr
    237 endm
    238 
    239 
    240 PUP_SUB macro prob:req, probPtr:req, symSub:req
    241        sbb     sym, symSub
    242        PUP prob, probPtr
    243 endm
    244 
    245 
    246 PUP_COD macro prob:req, probPtr:req, symSub:req
    247        mov     t0, kBitModelOffset
    248        cmovb   cod, t1
    249        mov     t1, sym
    250        cmovb   t0, kBitModelTotal_reg
    251        PUP_SUB prob, probPtr, symSub
    252 endm
    253 
    254 
    255 BIT_0 macro prob:req, probNext:req
    256        PLOAD   prob, probs + 1 * PMULT
    257        PLOAD   probNext, probs + 1 * PMULT_2
    258 
    259        NORM_CALC prob
    260        
    261        cmovae  range, t0
    262        PLOAD   t0, probs + 1 * PMULT_2 + PMULT
    263        cmovae  probNext, t0
    264        mov     t0, kBitModelOffset
    265        cmovb   cod, t1
    266        cmovb   t0, kBitModelTotal_reg
    267        mov     sym, 2
    268        PUP_SUB prob, probs + 1 * PMULT, 0 - 1
    269 endm
    270 
    271 
    272 BIT_1 macro prob:req, probNext:req
    273        PLOAD   probNext, probs + sym_R * PMULT_2
    274        add     sym, sym
    275        
    276        NORM_CALC prob
    277        
    278        cmovae  range, t0
    279        PLOAD   t0, probs + sym_R * PMULT + PMULT
    280        cmovae  probNext, t0
    281        PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1
    282 endm
    283 
    284 
    285 BIT_2 macro prob:req, symSub:req
    286        add     sym, sym
    287 
    288        NORM_CALC prob
    289        
    290        cmovae  range, t0
    291        PUP_COD prob, probs + t1_R * PMULT_HALF, symSub
    292 endm
    293 
    294 
    295 ; ---------- MATCHED LITERAL ----------
    296 
    297 LITM_0 macro
    298        mov     offs, 256 * PMULT
    299        shl     match, (PSHIFT + 1)
    300        mov     bit, offs
    301        and     bit, match
    302        PLOAD   x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT
    303        lea     prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]
    304        ; lea     prm, [probs + 256 * PMULT + 1 * PMULT]
    305        ; add     prm, bit_R
    306        xor     offs, bit
    307        add     match, match
    308 
    309        NORM_CALC x1
    310 
    311        cmovae  offs, bit
    312        mov     bit, match
    313        cmovae  range, t0
    314        mov     t0, kBitModelOffset
    315        cmovb   cod, t1
    316        cmovb   t0, kBitModelTotal_reg
    317        mov     sym, 0
    318        PUP_SUB x1, prm, -2-1
    319 endm
    320 
    321 
    322 LITM macro
    323        and     bit, offs
    324        lea     prm, [probs + offs_R * 1]
    325        add     prm, bit_R
    326        PLOAD   x1, prm + sym_R * PMULT
    327        xor     offs, bit
    328        add     sym, sym
    329        add     match, match
    330 
    331        NORM_CALC x1
    332 
    333        cmovae  offs, bit
    334        mov     bit, match
    335        cmovae  range, t0
    336        PUP_COD x1, prm + t1_R * PMULT_HALF, - 1
    337 endm
    338 
    339 
    340 LITM_2 macro
    341        and     bit, offs
    342        lea     prm, [probs + offs_R * 1]
    343        add     prm, bit_R
    344        PLOAD   x1, prm + sym_R * PMULT
    345        add     sym, sym
    346 
    347        NORM_CALC x1
    348 
    349        cmovae  range, t0
    350        PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1
    351 endm
    352 
    353 
    354 ; ---------- REVERSE BITS ----------
    355 
    356 REV_0 macro prob:req, probNext:req
    357        ; PLOAD   prob, probs + 1 * PMULT
    358        ; lea     sym2_R, [probs + 2 * PMULT]
    359        ; PLOAD   probNext, probs + 2 * PMULT
    360        PLOAD   probNext, sym2_R
    361 
    362        NORM_CALC prob
    363 
    364        cmovae  range, t0
    365        PLOAD   t0, probs + 3 * PMULT
    366        cmovae  probNext, t0
    367        cmovb   cod, t1
    368        mov     t0, kBitModelOffset
    369        cmovb   t0, kBitModelTotal_reg
    370        lea     t1_R, [probs + 3 * PMULT]
    371        cmovae  sym2_R, t1_R
    372        PUP prob, probs + 1 * PMULT
    373 endm
    374 
    375 
    376 REV_1 macro prob:req, probNext:req, step:req
    377        add     sym2_R, step * PMULT
    378        PLOAD   probNext, sym2_R
    379 
    380        NORM_CALC prob
    381 
    382        cmovae  range, t0
    383        PLOAD   t0, sym2_R + step * PMULT
    384        cmovae  probNext, t0
    385        cmovb   cod, t1
    386        mov     t0, kBitModelOffset
    387        cmovb   t0, kBitModelTotal_reg
    388        lea     t1_R, [sym2_R + step * PMULT]
    389        cmovae  sym2_R, t1_R
    390        PUP prob, t1_R - step * PMULT_2
    391 endm
    392 
    393 
    394 REV_2 macro prob:req, step:req
    395        sub     sym2_R, probs
    396        shr     sym2, PSHIFT
    397        or      sym, sym2
    398 
    399        NORM_CALC prob
    400 
    401        cmovae  range, t0
    402        lea     t0, [sym - step]
    403        cmovb   sym, t0
    404        cmovb   cod, t1
    405        mov     t0, kBitModelOffset
    406        cmovb   t0, kBitModelTotal_reg
    407        PUP prob, probs + sym2_R * PMULT
    408 endm
    409 
    410 
    411 REV_1_VAR macro prob:req
    412        PLOAD   prob, sym_R
    413        mov     probs, sym_R
    414        add     sym_R, sym2_R
    415 
    416        NORM_CALC prob
    417 
    418        cmovae  range, t0
    419        lea     t0_R, [sym_R + sym2_R]
    420        cmovae  sym_R, t0_R
    421        mov     t0, kBitModelOffset
    422        cmovb   cod, t1
    423        ; mov     t1, kBitModelTotal
    424        ; cmovb   t0, t1
    425        cmovb   t0, kBitModelTotal_reg
    426        add     sym2, sym2
    427        PUP prob, probs
    428 endm
    429 
    430 
    431 
    432 
    433 LIT_PROBS macro lpMaskParam:req
    434        ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
    435        mov     t0, processedPos
    436        shl     t0, 8
    437        add     sym, t0
    438        and     sym, lpMaskParam
    439        add     probs_state_R, pbPos_R
    440        mov     x1, LOC lc2
    441        lea     sym, dword ptr[sym_R + 2 * sym_R]
    442        add     probs, Literal * PMULT
    443        shl     sym, x1_L
    444        add     probs, sym_R
    445        UPDATE_0 probs_state_R, 0, IsMatch
    446        inc     processedPos
    447 endm
    448 
    449 
    450 
    451 kNumPosBitsMax          equ 4
    452 kNumPosStatesMax        equ (1 SHL kNumPosBitsMax)
    453 
    454 kLenNumLowBits          equ 3
    455 kLenNumLowSymbols       equ (1 SHL kLenNumLowBits)
    456 kLenNumHighBits         equ 8
    457 kLenNumHighSymbols      equ (1 SHL kLenNumHighBits)
    458 kNumLenProbs            equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)
    459 
    460 LenLow                  equ 0
    461 LenChoice               equ LenLow
    462 LenChoice2              equ (LenLow + kLenNumLowSymbols)
    463 LenHigh                 equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)
    464 
    465 kNumStates              equ 12
    466 kNumStates2             equ 16
    467 kNumLitStates           equ 7
    468 
    469 kStartPosModelIndex     equ 4
    470 kEndPosModelIndex       equ 14
    471 kNumFullDistances       equ (1 SHL (kEndPosModelIndex SHR 1))
    472 
    473 kNumPosSlotBits         equ 6
    474 kNumLenToPosStates      equ 4
    475 
    476 kNumAlignBits           equ 4
    477 kAlignTableSize         equ (1 SHL kNumAlignBits)
    478 
    479 kMatchMinLen            equ 2
    480 kMatchSpecLenStart      equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
    481 
    482 kStartOffset    equ 1664
    483 SpecPos         equ (-kStartOffset)
    484 IsRep0Long      equ (SpecPos + kNumFullDistances)
    485 RepLenCoder     equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))
    486 LenCoder        equ (RepLenCoder + kNumLenProbs)
    487 IsMatch         equ (LenCoder + kNumLenProbs)
    488 kAlign          equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax))
    489 IsRep           equ (kAlign + kAlignTableSize)
    490 IsRepG0         equ (IsRep + kNumStates)
    491 IsRepG1         equ (IsRepG0 + kNumStates)
    492 IsRepG2         equ (IsRepG1 + kNumStates)
    493 PosSlot         equ (IsRepG2 + kNumStates)
    494 Literal         equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))
    495 NUM_BASE_PROBS  equ (Literal + kStartOffset)
    496 
    497 if kAlign ne 0
    498  .err <Stop_Compiling_Bad_LZMA_kAlign>
    499 endif
    500 
    501 if NUM_BASE_PROBS ne 1984
    502  .err <Stop_Compiling_Bad_LZMA_PROBS>
    503 endif
    504 
    505 
    506 PTR_FIELD equ dq ?
    507 
    508 CLzmaDec_Asm struct
    509        lc      db ?
    510        lp      db ?
    511        pb      db ?
    512        _pad_   db ?
    513        dicSize dd ?
    514 
    515        probs_Spec      PTR_FIELD
    516        probs_1664      PTR_FIELD
    517        dic_Spec        PTR_FIELD
    518        dicBufSize      PTR_FIELD
    519        dicPos_Spec     PTR_FIELD
    520        buf_Spec        PTR_FIELD
    521 
    522        range_Spec      dd ?
    523        code_Spec       dd ?
    524        processedPos_Spec  dd ?
    525        checkDicSize    dd ?
    526        rep0    dd ?
    527        rep1    dd ?
    528        rep2    dd ?
    529        rep3    dd ?
    530        state_Spec      dd ?
    531        remainLen dd ?
    532 CLzmaDec_Asm ends
    533 
    534 
    535 CLzmaDec_Asm_Loc struct
    536        OLD_RSP    PTR_FIELD
    537        lzmaPtr    PTR_FIELD
    538        _pad0_     PTR_FIELD
    539        _pad1_     PTR_FIELD
    540        _pad2_     PTR_FIELD
    541        dicBufSize PTR_FIELD
    542        probs_Spec PTR_FIELD
    543        dic_Spec   PTR_FIELD
    544        
    545        limit      PTR_FIELD
    546        bufLimit   PTR_FIELD
    547        lc2       dd ?
    548        lpMask    dd ?
    549        pbMask    dd ?
    550        checkDicSize   dd ?
    551 
    552        _pad_     dd ?
    553        remainLen dd ?
    554        dicPos_Spec     PTR_FIELD
    555        rep0      dd ?
    556        rep1      dd ?
    557        rep2      dd ?
    558        rep3      dd ?
    559 CLzmaDec_Asm_Loc ends
    560 
    561 
    562 GLOB_2  equ [sym_R].CLzmaDec_Asm.
    563 GLOB    equ [r1].CLzmaDec_Asm.
    564 LOC_0   equ [r0].CLzmaDec_Asm_Loc.
    565 LOC     equ [RSP].CLzmaDec_Asm_Loc.
    566 
    567 
    568 COPY_VAR macro name
    569        mov     t0, GLOB_2 name
    570        mov     LOC_0 name, t0
    571 endm
    572 
    573 
    574 RESTORE_VAR macro name
    575        mov     t0, LOC name
    576        mov     GLOB name, t0
    577 endm
    578 
    579 
    580 
    581 IsMatchBranch_Pre macro reg
    582        ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
    583        mov     pbPos, LOC pbMask
    584        and     pbPos, processedPos
    585        shl     pbPos, (kLenNumLowBits + 1 + PSHIFT)
    586        lea     probs_state_R, [probs + state_R]
    587 endm
    588 
    589 
    590 IsMatchBranch macro reg
    591        IsMatchBranch_Pre
    592        IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label
    593 endm
    594        
    595 
    596 CheckLimits macro reg
    597        cmp     buf, LOC bufLimit
    598        jae     fin_OK
    599        cmp     dicPos, LOC limit
    600        jae     fin_OK
    601 endm
    602 
    603 
    604 
    605 ; RSP is (16x + 8) bytes aligned in WIN64-x64
    606 ; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)
    607 
    608 PARAM_lzma      equ REG_PARAM_0
    609 PARAM_limit     equ REG_PARAM_1
    610 PARAM_bufLimit  equ REG_PARAM_2
    611 
    612 ; MY_ALIGN_64
    613 MY_PROC LzmaDec_DecodeReal_3, 3
    614 MY_PUSH_PRESERVED_REGS
    615 
    616        lea     r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]
    617        and     r0, -128
    618        mov     r5, RSP
    619        mov     RSP, r0
    620        mov     LOC_0 Old_RSP, r5
    621        mov     LOC_0 lzmaPtr, PARAM_lzma
    622        
    623        mov     LOC_0 remainLen, 0  ; remainLen must be ZERO
    624 
    625        mov     LOC_0 bufLimit, PARAM_bufLimit
    626        mov     sym_R, PARAM_lzma  ;  CLzmaDec_Asm_Loc pointer for GLOB_2
    627        mov     dic, GLOB_2 dic_Spec
    628        add     PARAM_limit, dic
    629        mov     LOC_0 limit, PARAM_limit
    630 
    631        COPY_VAR(rep0)
    632        COPY_VAR(rep1)
    633        COPY_VAR(rep2)
    634        COPY_VAR(rep3)
    635        
    636        mov     dicPos, GLOB_2 dicPos_Spec
    637        add     dicPos, dic
    638        mov     LOC_0 dicPos_Spec, dicPos
    639        mov     LOC_0 dic_Spec, dic
    640        
    641        mov     x1_L, GLOB_2 pb
    642        mov     t0, 1
    643        shl     t0, x1_L
    644        dec     t0
    645        mov     LOC_0 pbMask, t0
    646 
    647        ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
    648        ; unsigned lc = p->prop.lc;
    649        ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
    650 
    651        mov     x1_L, GLOB_2 lc
    652        mov     x2, 100h
    653        mov     t0, x2
    654        shr     x2, x1_L
    655        ; inc     x1
    656        add     x1_L, PSHIFT
    657        mov     LOC_0 lc2, x1
    658        mov     x1_L, GLOB_2 lp
    659        shl     t0, x1_L
    660        sub     t0, x2
    661        mov     LOC_0 lpMask, t0
    662        mov     lpMask_reg, t0
    663        
    664        ; mov     probs, GLOB_2 probs_Spec
    665        ; add     probs, kStartOffset SHL PSHIFT
    666        mov     probs, GLOB_2 probs_1664
    667        mov     LOC_0 probs_Spec, probs
    668 
    669        mov     t0_R, GLOB_2 dicBufSize
    670        mov     LOC_0 dicBufSize, t0_R
    671       
    672        mov     x1, GLOB_2 checkDicSize
    673        mov     LOC_0 checkDicSize, x1
    674 
    675        mov     processedPos, GLOB_2 processedPos_Spec
    676 
    677        mov     state, GLOB_2 state_Spec
    678        shl     state, PSHIFT
    679 
    680        mov     buf,   GLOB_2 buf_Spec
    681        mov     range, GLOB_2 range_Spec
    682        mov     cod,   GLOB_2 code_Spec
    683        mov     kBitModelTotal_reg, kBitModelTotal
    684        xor     sym, sym
    685 
    686        ; if (processedPos != 0 || checkDicSize != 0)
    687        or      x1, processedPos
    688        jz      @f
    689        
    690        add     t0_R, dic
    691        cmp     dicPos, dic
    692        cmovnz  t0_R, dicPos
    693        movzx   sym, byte ptr[t0_R - 1]
    694 
    695 @@:
    696        IsMatchBranch_Pre
    697        cmp     state, 4 * PMULT
    698        jb      lit_end
    699        cmp     state, kNumLitStates * PMULT
    700        jb      lit_matched_end
    701        jmp     lz_end
    702        
    703 
    704        
    705 
    706 ; ---------- LITERAL ----------
    707 MY_ALIGN_64
    708 lit_start:
    709        xor     state, state
    710 lit_start_2:
    711        LIT_PROBS lpMask_reg
    712 
    713    ifdef _LZMA_SIZE_OPT
    714 
    715        PLOAD   x1, probs + 1 * PMULT
    716        mov     sym, 1
    717 MY_ALIGN_16
    718 lit_loop:
    719        BIT_1   x1, x2
    720        mov     x1, x2
    721        cmp     sym, 127
    722        jbe     lit_loop
    723        
    724    else
    725        
    726        BIT_0   x1, x2
    727        BIT_1   x2, x1
    728        BIT_1   x1, x2
    729        BIT_1   x2, x1
    730        BIT_1   x1, x2
    731        BIT_1   x2, x1
    732        BIT_1   x1, x2
    733        
    734    endif
    735 
    736        BIT_2   x2, 256 - 1
    737        
    738        ; mov     dic, LOC dic_Spec
    739        mov     probs, LOC probs_Spec
    740        IsMatchBranch_Pre
    741        mov     byte ptr[dicPos], sym_L
    742        inc     dicPos
    743                
    744        CheckLimits
    745 lit_end:
    746        IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start
    747 
    748        ; jmp     IsMatch_label
    749        
    750 ; ---------- MATCHES ----------
    751 ; MY_ALIGN_32
    752 IsMatch_label:
    753        UPDATE_1 probs_state_R, pbPos_R, IsMatch
    754        IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label
    755 
    756        add     probs, LenCoder * PMULT
    757        add     state, kNumStates * PMULT
    758 
    759 ; ---------- LEN DECODE ----------
    760 len_decode:
    761        mov     len_temp, 8 - 1 - kMatchMinLen
    762        IF_BIT_0_NOUP probs, 0, 0, len_mid_0
    763        UPDATE_1 probs, 0, 0
    764        add     probs, (1 SHL (kLenNumLowBits + PSHIFT))
    765        mov     len_temp, -1 - kMatchMinLen
    766        IF_BIT_0_NOUP probs, 0, 0, len_mid_0
    767        UPDATE_1 probs, 0, 0
    768        add     probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))
    769        mov     sym, 1
    770        PLOAD   x1, probs + 1 * PMULT
    771 
    772 MY_ALIGN_32
    773 len8_loop:
    774        BIT_1   x1, x2
    775        mov     x1, x2
    776        cmp     sym, 64
    777        jb      len8_loop
    778        
    779        mov     len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen
    780        jmp     len_mid_2
    781        
    782 MY_ALIGN_32
    783 len_mid_0:
    784        UPDATE_0 probs, 0, 0
    785        add     probs, pbPos_R
    786        BIT_0   x2, x1
    787 len_mid_2:
    788        BIT_1   x1, x2
    789        BIT_2   x2, len_temp
    790        mov     probs, LOC probs_Spec
    791        cmp     state, kNumStates * PMULT
    792        jb      copy_match
    793        
    794 
    795 ; ---------- DECODE DISTANCE ----------
    796        ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
    797 
    798        mov     t0, 3 + kMatchMinLen
    799        cmp     sym, 3 + kMatchMinLen
    800        cmovb   t0, sym
    801        add     probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))
    802        shl     t0, (kNumPosSlotBits + PSHIFT)
    803        add     probs, t0_R
    804        
    805        ; sym = Len
    806        ; mov     LOC remainLen, sym
    807        mov     len_temp, sym
    808 
    809    ifdef _LZMA_SIZE_OPT
    810 
    811        PLOAD   x1, probs + 1 * PMULT
    812        mov     sym, 1
    813 MY_ALIGN_16
    814 slot_loop:
    815        BIT_1   x1, x2
    816        mov     x1, x2
    817        cmp     sym, 32
    818        jb      slot_loop
    819        
    820    else
    821        
    822        BIT_0   x1, x2
    823        BIT_1   x2, x1
    824        BIT_1   x1, x2
    825        BIT_1   x2, x1
    826        BIT_1   x1, x2
    827        
    828    endif
    829        
    830        mov     x1, sym
    831        BIT_2   x2, 64-1
    832 
    833        and     sym, 3
    834        mov     probs, LOC probs_Spec
    835        cmp     x1, 32 + kEndPosModelIndex / 2
    836        jb      short_dist
    837 
    838        ;  unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
    839        sub     x1, (32 + 1 + kNumAlignBits)
    840        ;  distance = (2 | (distance & 1));
    841        or      sym, 2
    842        PLOAD   x2, probs + 1 * PMULT
    843        shl     sym, kNumAlignBits + 1
    844        lea     sym2_R, [probs + 2 * PMULT]
    845        
    846        jmp     direct_norm
    847        ; lea     t1, [sym_R + (1 SHL kNumAlignBits)]
    848        ; cmp     range, kTopValue
    849        ; jb      direct_norm
    850        
    851 ; ---------- DIRECT DISTANCE ----------
    852 MY_ALIGN_32
    853 direct_loop:
    854        shr     range, 1
    855        mov     t0, cod
    856        sub     cod, range
    857        cmovs   cod, t0
    858        cmovns  sym, t1
    859        
    860        comment ~
    861        sub     cod, range
    862        mov     x2, cod
    863        sar     x2, 31
    864        lea     sym, dword ptr [r2 + sym_R * 2 + 1]
    865        and     x2, range
    866        add     cod, x2
    867        ~
    868        dec     x1
    869        je      direct_end
    870 
    871        add     sym, sym
    872 direct_norm:
    873        lea     t1, [sym_R + (1 SHL kNumAlignBits)]
    874        cmp     range, kTopValue
    875        jae     near ptr direct_loop
    876        ; we align for 32 here with "near ptr" command above
    877        NORM_2
    878        jmp     direct_loop
    879 
    880 MY_ALIGN_32
    881 direct_end:
    882        ;  prob =  + kAlign;
    883        ;  distance <<= kNumAlignBits;
    884        REV_0   x2, x1
    885        REV_1   x1, x2, 2
    886        REV_1   x2, x1, 4
    887        REV_2   x1, 8
    888 
    889 decode_dist_end:
    890 
    891        ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
    892 
    893        mov     t0, LOC checkDicSize
    894        test    t0, t0
    895        cmove   t0, processedPos
    896        cmp     sym, t0
    897        jae     end_of_payload
    898        
    899        ; rep3 = rep2;
    900        ; rep2 = rep1;
    901        ; rep1 = rep0;
    902        ; rep0 = distance + 1;
    903 
    904        inc     sym
    905        mov     t0, LOC rep0
    906        mov     t1, LOC rep1
    907        mov     x1, LOC rep2
    908        mov     LOC rep0, sym
    909        ; mov     sym, LOC remainLen
    910        mov     sym, len_temp
    911        mov     LOC rep1, t0
    912        mov     LOC rep2, t1
    913        mov     LOC rep3, x1
    914        
    915        ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
    916        cmp     state, (kNumStates + kNumLitStates) * PMULT
    917        mov     state, kNumLitStates * PMULT
    918        mov     t0, (kNumLitStates + 3) * PMULT
    919        cmovae  state, t0
    920 
    921        
    922 ; ---------- COPY MATCH ----------
    923 copy_match:
    924 
    925        ; len += kMatchMinLen;
    926        ; add     sym, kMatchMinLen
    927 
    928        ; if ((rem = limit - dicPos) == 0)
    929        ; {
    930        ;   p->dicPos = dicPos;
    931        ;   return SZ_ERROR_DATA;
    932        ; }
    933        mov     cnt_R, LOC limit
    934        sub     cnt_R, dicPos
    935        jz      fin_ERROR
    936 
    937        ; curLen = ((rem < len) ? (unsigned)rem : len);
    938        cmp     cnt_R, sym_R
    939        ; cmovae  cnt_R, sym_R ; 64-bit
    940        cmovae  cnt, sym ; 32-bit
    941 
    942        mov     dic, LOC dic_Spec
    943        mov     x1, LOC rep0
    944 
    945        mov     t0_R, dicPos
    946        add     dicPos, cnt_R
    947        ; processedPos += curLen;
    948        add     processedPos, cnt
    949        ; len -= curLen;
    950        sub     sym, cnt
    951        mov     LOC remainLen, sym
    952 
    953        sub     t0_R, dic
    954        
    955        ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
    956        sub     t0_R, r1
    957        jae     @f
    958 
    959        mov     r1, LOC dicBufSize
    960        add     t0_R, r1
    961        sub     r1, t0_R
    962        cmp     cnt_R, r1
    963        ja      copy_match_cross
    964 @@:
    965        ; if (curLen <= dicBufSize - pos)
    966 
    967 ; ---------- COPY MATCH FAST ----------
    968        ; Byte *dest = dic + dicPos;
    969        ; mov     r1, dic
    970        ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
    971        ; sub   t0_R, dicPos
    972        ; dicPos += curLen;
    973 
    974        ; const Byte *lim = dest + curLen;
    975        add     t0_R, dic
    976        movzx   sym, byte ptr[t0_R]
    977        add     t0_R, cnt_R
    978        neg     cnt_R
    979        ; lea     r1, [dicPos - 1]
    980 copy_common:
    981        dec     dicPos
    982        ; cmp   LOC rep0, 1
    983        ; je    rep0Label
    984 
    985        ; t0_R - src_lim
    986        ; r1 - dest_lim - 1
    987        ; cnt_R - (-cnt)
    988 
    989        IsMatchBranch_Pre
    990        inc     cnt_R
    991        jz      copy_end
    992 MY_ALIGN_16
    993 @@:
    994        mov     byte ptr[cnt_R * 1 + dicPos], sym_L
    995        movzx   sym, byte ptr[cnt_R * 1 + t0_R]
    996        inc     cnt_R
    997        jnz     @b
    998 
    999 copy_end:
   1000 lz_end_match:
   1001        mov     byte ptr[dicPos], sym_L
   1002        inc     dicPos
   1003  
   1004        ; IsMatchBranch_Pre
   1005        CheckLimits
   1006 lz_end:
   1007        IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
   1008 
   1009 
   1010 
   1011 ; ---------- LITERAL MATCHED ----------
   1012                
   1013        LIT_PROBS LOC lpMask
   1014        
   1015        ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
   1016        mov     x1, LOC rep0
   1017        ; mov     dic, LOC dic_Spec
   1018        mov     LOC dicPos_Spec, dicPos
   1019        
   1020        ; state -= (state < 10) ? 3 : 6;
   1021        lea     t0, [state_R - 6 * PMULT]
   1022        sub     state, 3 * PMULT
   1023        cmp     state, 7 * PMULT
   1024        cmovae  state, t0
   1025        
   1026        sub     dicPos, dic
   1027        sub     dicPos, r1
   1028        jae     @f
   1029        add     dicPos, LOC dicBufSize
   1030 @@:
   1031        comment ~
   1032        xor     t0, t0
   1033        sub     dicPos, r1
   1034        cmovb   t0_R, LOC dicBufSize
   1035        ~
   1036        
   1037        movzx   match, byte ptr[dic + dicPos * 1]
   1038 
   1039    ifdef _LZMA_SIZE_OPT
   1040 
   1041        mov     offs, 256 * PMULT
   1042        shl     match, (PSHIFT + 1)
   1043        mov     bit, match
   1044        mov     sym, 1
   1045 MY_ALIGN_16
   1046 litm_loop:
   1047        LITM
   1048        cmp     sym, 256
   1049        jb      litm_loop
   1050        sub     sym, 256
   1051        
   1052    else
   1053        
   1054        LITM_0
   1055        LITM
   1056        LITM
   1057        LITM
   1058        LITM
   1059        LITM
   1060        LITM
   1061        LITM_2
   1062        
   1063    endif
   1064        
   1065        mov     probs, LOC probs_Spec
   1066        IsMatchBranch_Pre
   1067        ; mov     dic, LOC dic_Spec
   1068        mov     dicPos, LOC dicPos_Spec
   1069        mov     byte ptr[dicPos], sym_L
   1070        inc     dicPos
   1071        
   1072        CheckLimits
   1073 lit_matched_end:
   1074        IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
   1075        ; IsMatchBranch
   1076        mov     lpMask_reg, LOC lpMask
   1077        sub     state, 3 * PMULT
   1078        jmp     lit_start_2
   1079        
   1080 
   1081 
   1082 ; ---------- REP 0 LITERAL ----------
   1083 MY_ALIGN_32
   1084 IsRep0Short_label:
   1085        UPDATE_0 probs_state_R, pbPos_R, IsRep0Long
   1086 
   1087        ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
   1088        mov     dic, LOC dic_Spec
   1089        mov     t0_R, dicPos
   1090        mov     probBranch, LOC rep0
   1091        sub     t0_R, dic
   1092        
   1093        sub     probs, RepLenCoder * PMULT
   1094        inc     processedPos
   1095        ; state = state < kNumLitStates ? 9 : 11;
   1096        or      state, 1 * PMULT
   1097        IsMatchBranch_Pre
   1098       
   1099        sub     t0_R, probBranch_R
   1100        jae     @f
   1101        add     t0_R, LOC dicBufSize
   1102 @@:
   1103        movzx   sym, byte ptr[dic + t0_R * 1]
   1104        jmp     lz_end_match
   1105  
   1106        
   1107 MY_ALIGN_32
   1108 IsRep_label:
   1109        UPDATE_1 probs_state_R, 0, IsRep
   1110 
   1111        ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.
   1112        ; So we don't check it here.
   1113        
   1114        ; mov     t0, processedPos
   1115        ; or      t0, LOC checkDicSize
   1116        ; jz      fin_ERROR_2
   1117 
   1118        ; state = state < kNumLitStates ? 8 : 11;
   1119        cmp     state, kNumLitStates * PMULT
   1120        mov     state, 8 * PMULT
   1121        mov     probBranch, 11 * PMULT
   1122        cmovae  state, probBranch
   1123 
   1124        ; prob = probs + RepLenCoder;
   1125        add     probs, RepLenCoder * PMULT
   1126        
   1127        IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label
   1128        IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label
   1129        UPDATE_1 probs_state_R, pbPos_R, IsRep0Long
   1130        jmp     len_decode
   1131 
   1132 MY_ALIGN_32
   1133 IsRepG0_label:
   1134        UPDATE_1 probs_state_R, 0, IsRepG0
   1135        mov     dist2, LOC rep0
   1136        mov     dist, LOC rep1
   1137        mov     LOC rep1, dist2
   1138        
   1139        IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label
   1140        mov     LOC rep0, dist
   1141        jmp     len_decode
   1142        
   1143 ; MY_ALIGN_32
   1144 IsRepG1_label:
   1145        UPDATE_1 probs_state_R, 0, IsRepG1
   1146        mov     dist2, LOC rep2
   1147        mov     LOC rep2, dist
   1148        
   1149        IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label
   1150        mov     LOC rep0, dist2
   1151        jmp     len_decode
   1152 
   1153 ; MY_ALIGN_32
   1154 IsRepG2_label:
   1155        UPDATE_1 probs_state_R, 0, IsRepG2
   1156        mov     dist, LOC rep3
   1157        mov     LOC rep3, dist2
   1158        mov     LOC rep0, dist
   1159        jmp     len_decode
   1160 
   1161        
   1162 
   1163 ; ---------- SPEC SHORT DISTANCE ----------
   1164 
   1165 MY_ALIGN_32
   1166 short_dist:
   1167        sub     x1, 32 + 1
   1168        jbe     decode_dist_end
   1169        or      sym, 2
   1170        shl     sym, x1_L
   1171        lea     sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT]
   1172        mov     sym2, PMULT ; step
   1173 MY_ALIGN_32
   1174 spec_loop:
   1175        REV_1_VAR x2
   1176        dec     x1
   1177        jnz     spec_loop
   1178 
   1179        mov     probs, LOC probs_Spec
   1180        sub     sym, sym2
   1181        sub     sym, SpecPos * PMULT
   1182        sub     sym_R, probs
   1183        shr     sym, PSHIFT
   1184        
   1185        jmp     decode_dist_end
   1186 
   1187 
   1188 ; ---------- COPY MATCH CROSS ----------
   1189 copy_match_cross:
   1190        ; t0_R - src pos
   1191        ; r1 - len to dicBufSize
   1192        ; cnt_R - total copy len
   1193 
   1194        mov     t1_R, t0_R         ; srcPos
   1195        mov     t0_R, dic
   1196        mov     r1, LOC dicBufSize   ;
   1197        neg     cnt_R
   1198 @@:
   1199        movzx   sym, byte ptr[t1_R * 1 + t0_R]
   1200        inc     t1_R
   1201        mov     byte ptr[cnt_R * 1 + dicPos], sym_L
   1202        inc     cnt_R
   1203        cmp     t1_R, r1
   1204        jne     @b
   1205        
   1206        movzx   sym, byte ptr[t0_R]
   1207        sub     t0_R, cnt_R
   1208        jmp     copy_common
   1209 
   1210 
   1211 
   1212 
   1213 fin_ERROR:
   1214        mov     LOC remainLen, len_temp
   1215 ; fin_ERROR_2:
   1216        mov     sym, 1
   1217        jmp     fin
   1218 
   1219 end_of_payload:
   1220        cmp     sym, 0FFFFFFFFh ; -1
   1221        jne     fin_ERROR
   1222 
   1223        mov     LOC remainLen, kMatchSpecLenStart
   1224        sub     state, kNumStates * PMULT
   1225 
   1226 fin_OK:
   1227        xor     sym, sym
   1228 
   1229 fin:
   1230        NORM
   1231 
   1232        mov     r1, LOC lzmaPtr
   1233 
   1234        sub     dicPos, LOC dic_Spec
   1235        mov     GLOB dicPos_Spec, dicPos
   1236        mov     GLOB buf_Spec, buf
   1237        mov     GLOB range_Spec, range
   1238        mov     GLOB code_Spec, cod
   1239        shr     state, PSHIFT
   1240        mov     GLOB state_Spec, state
   1241        mov     GLOB processedPos_Spec, processedPos
   1242 
   1243        RESTORE_VAR(remainLen)
   1244        RESTORE_VAR(rep0)
   1245        RESTORE_VAR(rep1)
   1246        RESTORE_VAR(rep2)
   1247        RESTORE_VAR(rep3)
   1248 
   1249        mov     x0, sym
   1250        
   1251        mov     RSP, LOC Old_RSP
   1252 
   1253 MY_POP_PRESERVED_REGS
   1254 MY_ENDP
   1255 
   1256 _TEXT$LZMADECOPT ENDS
   1257 
   1258 end