neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

test_regexp_utf8.vim (24193B)


      1 " Tests for regexp in utf8 encoding
      2 
      3 source shared.vim
      4 
      5 func s:equivalence_test()
      6  let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ  VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňʼnǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ"
      7  let groups = split(str)
      8  for group1 in groups
      9      for c in split(group1, '\zs')
     10 " next statement confirms that equivalence class matches every
     11 " character in group
     12        call assert_match('^[[=' .. c .. '=]]*$', group1)
     13        for group2 in groups
     14          if group2 != group1
     15     " next statement converts that equivalence class doesn't match
     16     " character in any other group
     17            call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c)
     18          endif
     19        endfor
     20      endfor
     21  endfor
     22 endfunc
     23 
     24 func Test_equivalence_re1()
     25  set re=1
     26  call s:equivalence_test()
     27  set re=0
     28 endfunc
     29 
     30 func Test_equivalence_re2()
     31  set re=2
     32  call s:equivalence_test()
     33  set re=0
     34 endfunc
     35 
     36 func s:classes_test()
     37  if has('win32')
     38    set iskeyword=@,48-57,_,192-255
     39  endif
     40  set isprint=@,161-255
     41  call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
     42 
     43  let alnumchars = ''
     44  let alphachars = ''
     45  let backspacechar = ''
     46  let blankchars = ''
     47  let cntrlchars = ''
     48  let digitchars = ''
     49  let escapechar = ''
     50  let graphchars = ''
     51  let lowerchars = ''
     52  let printchars = ''
     53  let punctchars = ''
     54  let returnchar = ''
     55  let spacechars = ''
     56  let tabchar = ''
     57  let upperchars = ''
     58  let xdigitchars = ''
     59  let identchars = ''
     60  let identchars1 = ''
     61  let kwordchars = ''
     62  let kwordchars1 = ''
     63  let fnamechars = ''
     64  let fnamechars1 = ''
     65  let i = 1
     66  while i <= 255
     67    let c = nr2char(i)
     68    if c =~ '[[:alpha:]]'
     69      let alphachars .= c
     70    endif
     71    if c =~ '[[:alnum:]]'
     72      let alnumchars .= c
     73    endif
     74    if c =~ '[[:backspace:]]'
     75      let backspacechar .= c
     76    endif
     77    if c =~ '[[:blank:]]'
     78      let blankchars .= c
     79    endif
     80    if c =~ '[[:cntrl:]]'
     81      let cntrlchars .= c
     82    endif
     83    if c =~ '[[:digit:]]'
     84      let digitchars .= c
     85    endif
     86    if c =~ '[[:escape:]]'
     87      let escapechar .= c
     88    endif
     89    if c =~ '[[:graph:]]'
     90      let graphchars .= c
     91    endif
     92    if c =~ '[[:lower:]]'
     93      let lowerchars .= c
     94    endif
     95    if c =~ '[[:print:]]'
     96      let printchars .= c
     97    endif
     98    if c =~ '[[:punct:]]'
     99      let punctchars .= c
    100    endif
    101    if c =~ '[[:return:]]'
    102      let returnchar .= c
    103    endif
    104    if c =~ '[[:space:]]'
    105      let spacechars .= c
    106    endif
    107    if c =~ '[[:tab:]]'
    108      let tabchar .= c
    109    endif
    110    if c =~ '[[:upper:]]'
    111      let upperchars .= c
    112    endif
    113    if c =~ '[[:xdigit:]]'
    114      let xdigitchars .= c
    115    endif
    116    if c =~ '[[:ident:]]'
    117      let identchars .= c
    118    endif
    119    if c =~ '\i'
    120      let identchars1 .= c
    121    endif
    122    if c =~ '[[:keyword:]]'
    123      let kwordchars .= c
    124    endif
    125    if c =~ '\k'
    126      let kwordchars1 .= c
    127    endif
    128    if c =~ '[[:fname:]]'
    129      let fnamechars .= c
    130    endif
    131    if c =~ '\f'
    132      let fnamechars1 .= c
    133    endif
    134    let i += 1
    135  endwhile
    136 
    137  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
    138  call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
    139  call assert_equal("\b", backspacechar)
    140  call assert_equal("\t ", blankchars)
    141  call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
    142  call assert_equal("0123456789", digitchars)
    143  call assert_equal("\<Esc>", escapechar)
    144  call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
    145  call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
    146  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
    147  call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
    148  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
    149  call assert_equal("\r", returnchar)
    150  call assert_equal("\t\n\x0b\f\r ", spacechars)
    151  call assert_equal("\t", tabchar)
    152  call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
    153 
    154  if has('win32')
    155    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
    156    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
    157  else
    158    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
    159    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
    160  endif
    161 
    162  if has('win32')
    163    let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
    164  elseif has('amiga')
    165    let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
    166  elseif has('vms')
    167    let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
    168  else
    169    let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
    170  endif
    171 
    172  call assert_equal(identchars_ok, identchars)
    173  call assert_equal(kwordchars_ok, kwordchars)
    174  call assert_equal(fnamechars_ok, fnamechars)
    175 
    176  call assert_equal(identchars1, identchars)
    177  call assert_equal(kwordchars1, kwordchars)
    178  call assert_equal(fnamechars1, fnamechars)
    179 endfunc
    180 
    181 func Test_classes_re1()
    182  set re=1
    183  call s:classes_test()
    184  set re=0
    185 endfunc
    186 
    187 func Test_classes_re2()
    188  set re=2
    189  call s:classes_test()
    190  set re=0
    191 endfunc
    192 
    193 func Test_reversed_range()
    194  for re in range(0, 2)
    195    exe 'set re=' . re
    196    call assert_fails('call match("abc def", "[c-a]")', 'E944:', re)
    197  endfor
    198  set re=0
    199 endfunc
    200 
    201 func Test_large_class()
    202  set re=1
    203  call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
    204  set re=2
    205  call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
    206  call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
    207  set re=0
    208 endfunc
    209 
    210 func Test_optmatch_toolong()
    211  set re=1
    212  " Can only handle about 8000 characters.
    213  let pat = '\\%[' .. repeat('x', 9000) .. ']'
    214  call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:')
    215  set re=0
    216 endfunc
    217 
    218 " Test for regexp patterns with multi-byte support, using utf-8.
    219 func Test_multibyte_chars()
    220  " tl is a List of Lists with:
    221  "    2: test auto/old/new  0: test auto/old  1: test auto/new
    222  "    regexp pattern
    223  "    text to test the pattern on
    224  "    expected match (optional)
    225  "    expected submatch 1 (optional)
    226  "    expected submatch 2 (optional)
    227  "    etc.
    228  "  When there is no match use only the first two items.
    229  let tl = []
    230 
    231  " Multi-byte character tests.
    232  call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
    233  call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ'])								" equivalence classes
    234  call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
    235  call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
    236  call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
    237 
    238  " this is not a normal "i" but 0xec
    239  call add(tl, [2, '\p\+', 'ìa', 'ìa'])
    240  call add(tl, [2, '\p*', 'aあ', 'aあ'])
    241 
    242  " Test recognition of some character classes
    243  call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
    244  call add(tl, [2, '\f\+', '&*Ÿfname ', 'fname'])
    245 
    246  " Test composing character matching
    247  call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
    248  call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
    249  call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
    250  call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
    251  call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
    252  call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
    253  call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
    254  call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
    255  call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
    256  call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
    257  call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
    258  call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
    259  call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
    260  call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
    261  call add(tl, [2, "a", "ca\u0300t"])
    262  call add(tl, [2, "ca", "ca\u0300t"])
    263  call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
    264  call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
    265  call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
    266  call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])
    267 
    268  " Test \Z
    269  call add(tl, [2, 'ú\Z', 'x'])
    270  call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
    271  call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
    272  call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
    273  call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
    274  call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
    275  call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
    276  call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
    277  call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
    278  call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
    279  call add(tl, [2, "\u05b9\\Z", "xyz"])
    280  call add(tl, [2, "\\Z\u05b9", "xyz"])
    281  call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
    282  call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
    283  call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
    284  call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
    285 
    286  " Combining different tests and features
    287  call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])
    288 
    289  " Run the tests
    290  for t in tl
    291    let re = t[0]
    292    let pat = t[1]
    293    let text = t[2]
    294    let matchidx = 3
    295    for engine in [0, 1, 2]
    296      if engine == 2 && re == 0 || engine == 1 && re == 1
    297        continue
    298      endif
    299      let &regexpengine = engine
    300      try
    301        let l = matchlist(text, pat)
    302      catch
    303        call assert_report('Error ' . engine . ': pat: \"' . pat .
    304 	    \ '\", text: \"' . text .
    305 	    \ '\", caused an exception: \"' . v:exception . '\"')
    306      endtry
    307      " check the match itself
    308      if len(l) == 0 && len(t) > matchidx
    309        call assert_report('Error ' . engine . ': pat: \"' . pat .
    310 	    \ '\", text: \"' . text .
    311 	    \ '\", did not match, expected: \"' . t[matchidx] . '\"')
    312      elseif len(l) > 0 && len(t) == matchidx
    313        call assert_report('Error ' . engine . ': pat: \"' . pat .
    314 	    \ '\", text: \"' . text . '\", match: \"' . l[0] .
    315 	    \ '\", expected no match')
    316      elseif len(t) > matchidx && l[0] != t[matchidx]
    317        call assert_report('Error ' . engine . ': pat: \"' . pat .
    318 	    \ '\", text: \"' . text . '\", match: \"' . l[0] .
    319 	    \ '\", expected: \"' . t[matchidx] . '\"')
    320      else
    321        " Test passed
    322      endif
    323      if len(l) > 0
    324        " check all the nine submatches
    325        for i in range(1, 9)
    326          if len(t) <= matchidx + i
    327            let e = ''
    328          else
    329            let e = t[matchidx + i]
    330          endif
    331          if l[i] != e
    332            call assert_report('Error ' . engine . ': pat: \"' . pat .
    333                  \ '\", text: \"' . text . '\", submatch ' . i .
    334                  \ ': \"' . l[i] . '\", expected: \"' . e . '\"')
    335          endif
    336        endfor
    337        unlet i
    338      endif
    339    endfor
    340  endfor
    341  set regexpengine&
    342 endfunc
    343 
    344 " check that 'ambiwidth' does not change the meaning of \p
    345 func Test_regexp_ambiwidth()
    346  set regexpengine=1 ambiwidth=single
    347  call assert_equal(0, match("\u00EC", '\p'))
    348  set regexpengine=1 ambiwidth=double
    349  call assert_equal(0, match("\u00EC", '\p'))
    350  set regexpengine=2 ambiwidth=single
    351  call assert_equal(0, match("\u00EC", '\p'))
    352  set regexpengine=2 ambiwidth=double
    353  call assert_equal(0, match("\u00EC", '\p'))
    354  set regexpengine& ambiwidth&
    355 endfunc
    356 
    357 func Run_regexp_ignore_case()
    358  call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
    359 
    360  call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
    361  call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
    362  call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
    363  call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
    364  call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
    365  call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
    366 endfunc
    367 
    368 func Test_regexp_ignore_case()
    369  set regexpengine=1
    370  call Run_regexp_ignore_case()
    371  set regexpengine=2
    372  call Run_regexp_ignore_case()
    373  set regexpengine&
    374 endfunc
    375 
    376 " Tests for regexp with multi-byte encoding and various magic settings
    377 func Run_regexp_multibyte_magic()
    378  let text =<< trim END
    379    1 a aa abb abbccc
    380    2 d dd dee deefff
    381    3 g gg ghh ghhiii
    382    4 j jj jkk jkklll
    383    5 m mm mnn mnnooo
    384    6 x ^aa$ x
    385    7 (a)(b) abbaa
    386    8 axx [ab]xx
    387    9 หม่x อมx
    388    a อมx หม่x
    389    b ちカヨは
    390    c x ¬€x
    391    d 天使x
    392    e ������y
    393    f ������z
    394    g a啷bb
    395    j 0123x
    396    k combinations
    397    l äö üᾱ̆́
    398  END
    399 
    400  new
    401  call setline(1, text)
    402  exe 'normal /a*b\{2}c\+/e' .. "\<CR>x"
    403  call assert_equal('1 a aa abb abbcc', getline('.'))
    404  exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x"
    405  call assert_equal('2 d dd dee deeff', getline('.'))
    406  set nomagic
    407  exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x"
    408  call assert_equal('3 g gg ghh ghhii', getline('.'))
    409  exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x"
    410  call assert_equal('4 j jj jkk jkkll', getline('.'))
    411  exe 'normal /\vm*n{2}o+/e' .. "\<CR>x"
    412  call assert_equal('5 m mm mnn mnnoo', getline('.'))
    413  exe 'normal /\V^aa$/' .. "\<CR>x"
    414  call assert_equal('6 x aa$ x', getline('.'))
    415  set magic
    416  exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x"
    417  call assert_equal('7 (a)(b) abba', getline('.'))
    418  exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x"
    419  call assert_equal('8 axx ab]xx', getline('.'))
    420 
    421  " search for multi-byte without composing char
    422  exe 'normal /ม' .. "\<CR>x"
    423  call assert_equal('9 หม่x อx', getline('.'))
    424 
    425  " search for multi-byte with composing char
    426  exe 'normal /ม่' .. "\<CR>x"
    427  call assert_equal('a อมx หx', getline('.'))
    428 
    429  " find word by change of word class
    430  exe 'normal /ち\<カヨ\>は' .. "\<CR>x"
    431  call assert_equal('b カヨは', getline('.'))
    432 
    433  " Test \%u, [\u] and friends
    434  " c
    435  exe 'normal /\%u20ac' .. "\<CR>x"
    436  call assert_equal('c x ¬x', getline('.'))
    437  " d
    438  exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x"
    439  call assert_equal('d 使x', getline('.'))
    440  " e
    441  exe 'normal /\%U12345678' .. "\<CR>x"
    442  call assert_equal('e y', getline('.'))
    443  " f
    444  exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x"
    445  call assert_equal('f z', getline('.'))
    446  " g
    447  exe 'normal /\%d21879b' .. "\<CR>x"
    448  call assert_equal('g abb', getline('.'))
    449 
    450  " j Test backwards search from a multi-byte char
    451  exe "normal /x\<CR>x?.\<CR>x"
    452  call assert_equal('j 012❤', getline('.'))
    453  " k
    454  let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
    455  @w
    456  call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18))
    457 
    458  bw!
    459 endfunc
    460 
    461 func Test_regexp_multibyte_magic()
    462  set regexpengine=1
    463  call Run_regexp_multibyte_magic()
    464  set regexpengine=2
    465  call Run_regexp_multibyte_magic()
    466  set regexpengine&
    467 endfunc
    468 
    469 " Test for 7.3.192
    470 " command ":s/ \?/ /g" splits multi-byte characters into bytes
    471 func Test_split_multibyte_to_bytes()
    472  new
    473  call setline(1, 'l äö üᾱ̆́')
    474  s/ \?/ /g
    475  call assert_equal(' l ä ö ü ᾱ̆́', getline(1))
    476  bw!
    477 endfunc
    478 
    479 " Test for matchstr() with multibyte characters
    480 func Test_matchstr_multibyte()
    481  new
    482  call assert_equal('ב', matchstr("אבגד", ".", 0, 2))
    483  call assert_equal('בג', matchstr("אבגד", "..", 0, 2))
    484  call assert_equal('א', matchstr("אבגד", ".", 0, 0))
    485  call assert_equal('ג', matchstr("אבגד", ".", 4, -1))
    486  bw!
    487 endfunc
    488 
    489 " Test for 7.4.636
    490 " A search with end offset gets stuck at end of file.
    491 func Test_search_with_end_offset()
    492  new
    493  call setline(1, ['', 'dog(a', 'cat('])
    494  exe "normal /(/e+\<CR>"
    495  normal n"ayn
    496  call assert_equal("a\ncat(", @a)
    497  bw!
    498 endfunc
    499 
    500 " Check that "^" matches even when the line starts with a combining char
    501 func Test_match_start_of_line_combining()
    502  new
    503  call setline(1, ['', "\u05ae", ''])
    504  exe "normal gg/^\<CR>"
    505  call assert_equal(2, getcurpos()[1])
    506  bwipe!
    507 endfunc
    508 
    509 " Check that [[:upper:]] matches for automatic engine
    510 func Test_match_char_class_upper()
    511  new
    512 
    513  " Test 1: [[:upper:]]\{2,\}
    514  set regexpengine=0
    515  call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
    516  call cursor(1,1)
    517  let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
    518  exe search_cmd
    519  call assert_equal(4, searchcount().total, 'TEST 1')
    520  set regexpengine=1
    521  exe search_cmd
    522  call assert_equal(2, searchcount().total, 'TEST 1')
    523  set regexpengine=2
    524  exe search_cmd
    525  call assert_equal(4, searchcount().total, 'TEST 1')
    526 
    527  " Test 2: [[:upper:]].\+
    528  let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
    529  set regexpengine=0
    530  exe search_cmd
    531  call assert_equal(2, searchcount().total, 'TEST 2')
    532  set regexpengine=1
    533  exe search_cmd
    534  call assert_equal(1, searchcount().total, 'TEST 2')
    535  set regexpengine=2
    536  exe search_cmd
    537  call assert_equal(2, searchcount().total, 'TEST 2')
    538 
    539  " Test 3: [[:lower:]]\+
    540  let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
    541  set regexpengine=0
    542  exe search_cmd
    543  call assert_equal(4, searchcount().total, 'TEST 3 lower')
    544  set regexpengine=1
    545  exe search_cmd
    546  call assert_equal(2, searchcount().total, 'TEST 3 lower')
    547  set regexpengine=2
    548  exe search_cmd
    549  call assert_equal(4, searchcount().total, 'TEST 3 lower')
    550 
    551  " clean up
    552  set regexpengine=0
    553  bwipe!
    554 endfunc
    555 
    556 func Test_match_invalid_byte()
    557  call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid')
    558  new
    559  source Xinvalid
    560  bwipe!
    561  call delete('Xinvalid')
    562 endfunc
    563 
    564 func Test_match_illegal_byte()
    565  " Text has illegal bytes which need to be set explicitly
    566  let lines = ["norm :set no\x01\<CR>", "silent n\xff", "silent norm :b\xff\<CR>"]
    567  call writefile(lines, 'Xregexp')
    568  call system(GetVimCommand() .. ' -X -Z -e -s -S Xregexp -c qa!')
    569 
    570  call delete('Xregexp')
    571 endfunc
    572 
    573 func Test_match_too_complicated()
    574  set regexpengine=1
    575  exe "noswapfile vsplit \xeb\xdb\x99"
    576  silent! buf \&\zs*\zs*0
    577  bwipe!
    578  set regexpengine=0
    579 endfunc
    580 
    581 func Test_combining_chars_in_collection()
    582  new
    583  for i in range(0,2)
    584    exe "set re=".i
    585    put =['ɔ̃', 'ɔ',  '̃  ã', 'abcd']
    586    :%s/[ɔ̃]//
    587    call assert_equal(['', '', 'ɔ', '̃  ã', 'abcd'], getline(1,'$'))
    588    %d
    589  endfor
    590  bw!
    591 endfunc
    592 
    593 func Test_search_multibyte_match_ascii()
    594  new
    595  " Match single 'ſ' and 's'
    596  call setline(1,  'das abc heraus abc ſich abc ſind')
    597  for i in range(0, 2)
    598    exe "set re="..i
    599    let ic_match = matchbufline('%', '\c\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    600    let noic_match = matchbufline('%', '\C\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    601    call assert_equal(['s', 's', 'ſ','ſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
    602    call assert_equal(['ſ','ſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
    603  endfor
    604  " Match several 'ſſ' and 'ss'
    605  call setline(1,  'das abc herauss abc ſſich abc ſind')
    606  for i in range(0, 2)
    607    exe "set re="..i
    608    let ic_match = matchbufline('%', '\c\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    609    let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    610    let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
    611    let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
    612    let ic_match3 = matchbufline('%', '\c[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})
    613    let noic_match3 = matchbufline('%', '\C[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})
    614 
    615    call assert_equal(['ss', 'ſſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
    616    call assert_equal(['ſſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
    617    call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match2, "Ignorecase Regex-engine: " .. &re)
    618    call assert_equal(['ſſ','ſ'], noic_match2, "No-Ignorecase Regex-engine: " .. &re)
    619    call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match3, "Ignorecase Collection Regex-engine: " .. &re)
    620    call assert_equal(['ſſ','ſ'], noic_match3, "No-Ignorecase Collection Regex-engine: " .. &re)
    621  endfor
    622  set re&vim
    623  bw!
    624 endfunc
    625 
    626 func Test_replace_multibyte_match_in_multi_lines()
    627  new
    628  let text = ['ab 1c', 'ab 1c', 'def', '是否 a', '是否 a', 'ghi', '是否a', '是否a', '是否 1', '是否 1']
    629  let expected = ['', 'def', '', 'ghi', '', '']
    630  for i in range(0, 2)
    631    exe "set ignorecase re="..i
    632    :%d _
    633    call setline(1, text)
    634    :%s/\(.\+\)\n\1//g
    635    call assert_equal(expected, getline(1, '$'))
    636  endfor
    637  bw!
    638  set ignorecase&vim re&vim
    639 endfun
    640 
    641 " vim: shiftwidth=2 sts=2 expandtab