test_regexp_utf8.vim (24193B)
1 " Tests for regexp in utf8 encoding 2 3 source shared.vim 4 5 func s:equivalence_test() 6 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňʼnǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ" 7 let groups = split(str) 8 for group1 in groups 9 for c in split(group1, '\zs') 10 " next statement confirms that equivalence class matches every 11 " character in group 12 call assert_match('^[[=' .. c .. '=]]*$', group1) 13 for group2 in groups 14 if group2 != group1 15 " next statement converts that equivalence class doesn't match 16 " character in any other group 17 call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c) 18 endif 19 endfor 20 endfor 21 endfor 22 endfunc 23 24 func Test_equivalence_re1() 25 set re=1 26 call s:equivalence_test() 27 set re=0 28 endfunc 29 30 func Test_equivalence_re2() 31 set re=2 32 call s:equivalence_test() 33 set re=0 34 endfunc 35 36 func s:classes_test() 37 if has('win32') 38 set iskeyword=@,48-57,_,192-255 39 endif 40 set isprint=@,161-255 41 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+')) 42 43 let alnumchars = '' 44 let alphachars = '' 45 let backspacechar = '' 46 let blankchars = '' 47 let cntrlchars = '' 48 let digitchars = '' 49 let escapechar = '' 50 let graphchars = '' 51 let lowerchars = '' 52 let printchars = '' 53 let punctchars = '' 54 let returnchar = '' 55 let spacechars = '' 56 let tabchar = '' 57 let upperchars = '' 58 let xdigitchars = '' 59 let identchars = '' 60 let identchars1 = '' 61 let kwordchars = '' 62 let kwordchars1 = '' 63 let fnamechars = '' 64 let fnamechars1 = '' 65 let i = 1 66 while i <= 255 67 let c = nr2char(i) 68 if c =~ '[[:alpha:]]' 69 let alphachars .= c 70 endif 71 if c =~ '[[:alnum:]]' 72 let alnumchars .= c 73 endif 74 if c =~ '[[:backspace:]]' 75 let backspacechar .= c 76 endif 77 if c =~ '[[:blank:]]' 78 let blankchars .= c 79 endif 80 if c =~ '[[:cntrl:]]' 81 let cntrlchars .= c 82 endif 83 if c =~ '[[:digit:]]' 84 let digitchars .= c 85 endif 86 if c =~ '[[:escape:]]' 87 let escapechar .= c 88 endif 89 if c =~ '[[:graph:]]' 90 let graphchars .= c 91 endif 92 if c =~ '[[:lower:]]' 93 let lowerchars .= c 94 endif 95 if c =~ '[[:print:]]' 96 let printchars .= c 97 endif 98 if c =~ '[[:punct:]]' 99 let punctchars .= c 100 endif 101 if c =~ '[[:return:]]' 102 let returnchar .= c 103 endif 104 if c =~ '[[:space:]]' 105 let spacechars .= c 106 endif 107 if c =~ '[[:tab:]]' 108 let tabchar .= c 109 endif 110 if c =~ '[[:upper:]]' 111 let upperchars .= c 112 endif 113 if c =~ '[[:xdigit:]]' 114 let xdigitchars .= c 115 endif 116 if c =~ '[[:ident:]]' 117 let identchars .= c 118 endif 119 if c =~ '\i' 120 let identchars1 .= c 121 endif 122 if c =~ '[[:keyword:]]' 123 let kwordchars .= c 124 endif 125 if c =~ '\k' 126 let kwordchars1 .= c 127 endif 128 if c =~ '[[:fname:]]' 129 let fnamechars .= c 130 endif 131 if c =~ '\f' 132 let fnamechars1 .= c 133 endif 134 let i += 1 135 endwhile 136 137 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars) 138 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars) 139 call assert_equal("\b", backspacechar) 140 call assert_equal("\t ", blankchars) 141 call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars) 142 call assert_equal("0123456789", digitchars) 143 call assert_equal("\<Esc>", escapechar) 144 call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars) 145 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars) 146 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars) 147 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars) 148 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars) 149 call assert_equal("\r", returnchar) 150 call assert_equal("\t\n\x0b\f\r ", spacechars) 151 call assert_equal("\t", tabchar) 152 call assert_equal('0123456789ABCDEFabcdef', xdigitchars) 153 154 if has('win32') 155 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ' 156 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 157 else 158 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 159 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 160 endif 161 162 if has('win32') 163 let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 164 elseif has('amiga') 165 let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 166 elseif has('vms') 167 let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 168 else 169 let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 170 endif 171 172 call assert_equal(identchars_ok, identchars) 173 call assert_equal(kwordchars_ok, kwordchars) 174 call assert_equal(fnamechars_ok, fnamechars) 175 176 call assert_equal(identchars1, identchars) 177 call assert_equal(kwordchars1, kwordchars) 178 call assert_equal(fnamechars1, fnamechars) 179 endfunc 180 181 func Test_classes_re1() 182 set re=1 183 call s:classes_test() 184 set re=0 185 endfunc 186 187 func Test_classes_re2() 188 set re=2 189 call s:classes_test() 190 set re=0 191 endfunc 192 193 func Test_reversed_range() 194 for re in range(0, 2) 195 exe 'set re=' . re 196 call assert_fails('call match("abc def", "[c-a]")', 'E944:', re) 197 endfor 198 set re=0 199 endfunc 200 201 func Test_large_class() 202 set re=1 203 call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:') 204 set re=2 205 call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]') 206 call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]') 207 set re=0 208 endfunc 209 210 func Test_optmatch_toolong() 211 set re=1 212 " Can only handle about 8000 characters. 213 let pat = '\\%[' .. repeat('x', 9000) .. ']' 214 call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:') 215 set re=0 216 endfunc 217 218 " Test for regexp patterns with multi-byte support, using utf-8. 219 func Test_multibyte_chars() 220 " tl is a List of Lists with: 221 " 2: test auto/old/new 0: test auto/old 1: test auto/new 222 " regexp pattern 223 " text to test the pattern on 224 " expected match (optional) 225 " expected submatch 1 (optional) 226 " expected submatch 2 (optional) 227 " etc. 228 " When there is no match use only the first two items. 229 let tl = [] 230 231 " Multi-byte character tests. 232 call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna']) 233 call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes 234 call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos']) 235 call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม']) 236 call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna']) 237 238 " this is not a normal "i" but 0xec 239 call add(tl, [2, '\p\+', 'ìa', 'ìa']) 240 call add(tl, [2, '\p*', 'aあ', 'aあ']) 241 242 " Test recognition of some character classes 243 call add(tl, [2, '\i\+', '&*¨xx ', 'xx']) 244 call add(tl, [2, '\f\+', '&*fname ', 'fname']) 245 246 " Test composing character matching 247 call add(tl, [2, '.ม', 'xม่x yมy', 'yม']) 248 call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่']) 249 call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"]) 250 call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"]) 251 call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 252 call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 253 call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 254 call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 255 call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) 256 call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) 257 call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"]) 258 call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"]) 259 call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"]) 260 call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"]) 261 call add(tl, [2, "a", "ca\u0300t"]) 262 call add(tl, [2, "ca", "ca\u0300t"]) 263 call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"]) 264 call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"]) 265 call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"]) 266 call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"]) 267 268 " Test \Z 269 call add(tl, [2, 'ú\Z', 'x']) 270 call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה']) 271 call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה']) 272 call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה']) 273 call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה']) 274 call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ']) 275 call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) 276 call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"]) 277 call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) 278 call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"]) 279 call add(tl, [2, "\u05b9\\Z", "xyz"]) 280 call add(tl, [2, "\\Z\u05b9", "xyz"]) 281 call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"]) 282 call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"]) 283 call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"]) 284 call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"]) 285 286 " Combining different tests and features 287 call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd']) 288 289 " Run the tests 290 for t in tl 291 let re = t[0] 292 let pat = t[1] 293 let text = t[2] 294 let matchidx = 3 295 for engine in [0, 1, 2] 296 if engine == 2 && re == 0 || engine == 1 && re == 1 297 continue 298 endif 299 let ®expengine = engine 300 try 301 let l = matchlist(text, pat) 302 catch 303 call assert_report('Error ' . engine . ': pat: \"' . pat . 304 \ '\", text: \"' . text . 305 \ '\", caused an exception: \"' . v:exception . '\"') 306 endtry 307 " check the match itself 308 if len(l) == 0 && len(t) > matchidx 309 call assert_report('Error ' . engine . ': pat: \"' . pat . 310 \ '\", text: \"' . text . 311 \ '\", did not match, expected: \"' . t[matchidx] . '\"') 312 elseif len(l) > 0 && len(t) == matchidx 313 call assert_report('Error ' . engine . ': pat: \"' . pat . 314 \ '\", text: \"' . text . '\", match: \"' . l[0] . 315 \ '\", expected no match') 316 elseif len(t) > matchidx && l[0] != t[matchidx] 317 call assert_report('Error ' . engine . ': pat: \"' . pat . 318 \ '\", text: \"' . text . '\", match: \"' . l[0] . 319 \ '\", expected: \"' . t[matchidx] . '\"') 320 else 321 " Test passed 322 endif 323 if len(l) > 0 324 " check all the nine submatches 325 for i in range(1, 9) 326 if len(t) <= matchidx + i 327 let e = '' 328 else 329 let e = t[matchidx + i] 330 endif 331 if l[i] != e 332 call assert_report('Error ' . engine . ': pat: \"' . pat . 333 \ '\", text: \"' . text . '\", submatch ' . i . 334 \ ': \"' . l[i] . '\", expected: \"' . e . '\"') 335 endif 336 endfor 337 unlet i 338 endif 339 endfor 340 endfor 341 set regexpengine& 342 endfunc 343 344 " check that 'ambiwidth' does not change the meaning of \p 345 func Test_regexp_ambiwidth() 346 set regexpengine=1 ambiwidth=single 347 call assert_equal(0, match("\u00EC", '\p')) 348 set regexpengine=1 ambiwidth=double 349 call assert_equal(0, match("\u00EC", '\p')) 350 set regexpengine=2 ambiwidth=single 351 call assert_equal(0, match("\u00EC", '\p')) 352 set regexpengine=2 ambiwidth=double 353 call assert_equal(0, match("\u00EC", '\p')) 354 set regexpengine& ambiwidth& 355 endfunc 356 357 func Run_regexp_ignore_case() 358 call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g')) 359 360 call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g')) 361 call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g')) 362 call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g')) 363 call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g')) 364 call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g')) 365 call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g')) 366 endfunc 367 368 func Test_regexp_ignore_case() 369 set regexpengine=1 370 call Run_regexp_ignore_case() 371 set regexpengine=2 372 call Run_regexp_ignore_case() 373 set regexpengine& 374 endfunc 375 376 " Tests for regexp with multi-byte encoding and various magic settings 377 func Run_regexp_multibyte_magic() 378 let text =<< trim END 379 1 a aa abb abbccc 380 2 d dd dee deefff 381 3 g gg ghh ghhiii 382 4 j jj jkk jkklll 383 5 m mm mnn mnnooo 384 6 x ^aa$ x 385 7 (a)(b) abbaa 386 8 axx [ab]xx 387 9 หม่x อมx 388 a อมx หม่x 389 b ちカヨは 390 c x ¬€x 391 d 天使x 392 e ������y 393 f ������z 394 g a啷bb 395 j 0123❤x 396 k combinations 397 l äö üᾱ̆́ 398 END 399 400 new 401 call setline(1, text) 402 exe 'normal /a*b\{2}c\+/e' .. "\<CR>x" 403 call assert_equal('1 a aa abb abbcc', getline('.')) 404 exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x" 405 call assert_equal('2 d dd dee deeff', getline('.')) 406 set nomagic 407 exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x" 408 call assert_equal('3 g gg ghh ghhii', getline('.')) 409 exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x" 410 call assert_equal('4 j jj jkk jkkll', getline('.')) 411 exe 'normal /\vm*n{2}o+/e' .. "\<CR>x" 412 call assert_equal('5 m mm mnn mnnoo', getline('.')) 413 exe 'normal /\V^aa$/' .. "\<CR>x" 414 call assert_equal('6 x aa$ x', getline('.')) 415 set magic 416 exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x" 417 call assert_equal('7 (a)(b) abba', getline('.')) 418 exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x" 419 call assert_equal('8 axx ab]xx', getline('.')) 420 421 " search for multi-byte without composing char 422 exe 'normal /ม' .. "\<CR>x" 423 call assert_equal('9 หม่x อx', getline('.')) 424 425 " search for multi-byte with composing char 426 exe 'normal /ม่' .. "\<CR>x" 427 call assert_equal('a อมx หx', getline('.')) 428 429 " find word by change of word class 430 exe 'normal /ち\<カヨ\>は' .. "\<CR>x" 431 call assert_equal('b カヨは', getline('.')) 432 433 " Test \%u, [\u] and friends 434 " c 435 exe 'normal /\%u20ac' .. "\<CR>x" 436 call assert_equal('c x ¬x', getline('.')) 437 " d 438 exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x" 439 call assert_equal('d 使x', getline('.')) 440 " e 441 exe 'normal /\%U12345678' .. "\<CR>x" 442 call assert_equal('e y', getline('.')) 443 " f 444 exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x" 445 call assert_equal('f z', getline('.')) 446 " g 447 exe 'normal /\%d21879b' .. "\<CR>x" 448 call assert_equal('g abb', getline('.')) 449 450 " j Test backwards search from a multi-byte char 451 exe "normal /x\<CR>x?.\<CR>x" 452 call assert_equal('j 012❤', getline('.')) 453 " k 454 let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g' 455 @w 456 call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18)) 457 458 bw! 459 endfunc 460 461 func Test_regexp_multibyte_magic() 462 set regexpengine=1 463 call Run_regexp_multibyte_magic() 464 set regexpengine=2 465 call Run_regexp_multibyte_magic() 466 set regexpengine& 467 endfunc 468 469 " Test for 7.3.192 470 " command ":s/ \?/ /g" splits multi-byte characters into bytes 471 func Test_split_multibyte_to_bytes() 472 new 473 call setline(1, 'l äö üᾱ̆́') 474 s/ \?/ /g 475 call assert_equal(' l ä ö ü ᾱ̆́', getline(1)) 476 bw! 477 endfunc 478 479 " Test for matchstr() with multibyte characters 480 func Test_matchstr_multibyte() 481 new 482 call assert_equal('ב', matchstr("אבגד", ".", 0, 2)) 483 call assert_equal('בג', matchstr("אבגד", "..", 0, 2)) 484 call assert_equal('א', matchstr("אבגד", ".", 0, 0)) 485 call assert_equal('ג', matchstr("אבגד", ".", 4, -1)) 486 bw! 487 endfunc 488 489 " Test for 7.4.636 490 " A search with end offset gets stuck at end of file. 491 func Test_search_with_end_offset() 492 new 493 call setline(1, ['', 'dog(a', 'cat(']) 494 exe "normal /(/e+\<CR>" 495 normal n"ayn 496 call assert_equal("a\ncat(", @a) 497 bw! 498 endfunc 499 500 " Check that "^" matches even when the line starts with a combining char 501 func Test_match_start_of_line_combining() 502 new 503 call setline(1, ['', "\u05ae", '']) 504 exe "normal gg/^\<CR>" 505 call assert_equal(2, getcurpos()[1]) 506 bwipe! 507 endfunc 508 509 " Check that [[:upper:]] matches for automatic engine 510 func Test_match_char_class_upper() 511 new 512 513 " Test 1: [[:upper:]]\{2,\} 514 set regexpengine=0 515 call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...']) 516 call cursor(1,1) 517 let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>" 518 exe search_cmd 519 call assert_equal(4, searchcount().total, 'TEST 1') 520 set regexpengine=1 521 exe search_cmd 522 call assert_equal(2, searchcount().total, 'TEST 1') 523 set regexpengine=2 524 exe search_cmd 525 call assert_equal(4, searchcount().total, 'TEST 1') 526 527 " Test 2: [[:upper:]].\+ 528 let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>" 529 set regexpengine=0 530 exe search_cmd 531 call assert_equal(2, searchcount().total, 'TEST 2') 532 set regexpengine=1 533 exe search_cmd 534 call assert_equal(1, searchcount().total, 'TEST 2') 535 set regexpengine=2 536 exe search_cmd 537 call assert_equal(2, searchcount().total, 'TEST 2') 538 539 " Test 3: [[:lower:]]\+ 540 let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>" 541 set regexpengine=0 542 exe search_cmd 543 call assert_equal(4, searchcount().total, 'TEST 3 lower') 544 set regexpengine=1 545 exe search_cmd 546 call assert_equal(2, searchcount().total, 'TEST 3 lower') 547 set regexpengine=2 548 exe search_cmd 549 call assert_equal(4, searchcount().total, 'TEST 3 lower') 550 551 " clean up 552 set regexpengine=0 553 bwipe! 554 endfunc 555 556 func Test_match_invalid_byte() 557 call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid') 558 new 559 source Xinvalid 560 bwipe! 561 call delete('Xinvalid') 562 endfunc 563 564 func Test_match_illegal_byte() 565 " Text has illegal bytes which need to be set explicitly 566 let lines = ["norm :set no\x01\<CR>", "silent n\xff", "silent norm :b\xff\<CR>"] 567 call writefile(lines, 'Xregexp') 568 call system(GetVimCommand() .. ' -X -Z -e -s -S Xregexp -c qa!') 569 570 call delete('Xregexp') 571 endfunc 572 573 func Test_match_too_complicated() 574 set regexpengine=1 575 exe "noswapfile vsplit \xeb\xdb\x99" 576 silent! buf \&\zs*\zs*0 577 bwipe! 578 set regexpengine=0 579 endfunc 580 581 func Test_combining_chars_in_collection() 582 new 583 for i in range(0,2) 584 exe "set re=".i 585 put =['ɔ̃', 'ɔ', '̃ ã', 'abcd'] 586 :%s/[ɔ̃]// 587 call assert_equal(['', '', 'ɔ', '̃ ã', 'abcd'], getline(1,'$')) 588 %d 589 endfor 590 bw! 591 endfunc 592 593 func Test_search_multibyte_match_ascii() 594 new 595 " Match single 'ſ' and 's' 596 call setline(1, 'das abc heraus abc ſich abc ſind') 597 for i in range(0, 2) 598 exe "set re="..i 599 let ic_match = matchbufline('%', '\c\%u17f', 1, '$')->mapnew({idx, val -> val.text}) 600 let noic_match = matchbufline('%', '\C\%u17f', 1, '$')->mapnew({idx, val -> val.text}) 601 call assert_equal(['s', 's', 'ſ','ſ'], ic_match, "Ignorecase Regex-engine: " .. &re) 602 call assert_equal(['ſ','ſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re) 603 endfor 604 " Match several 'ſſ' and 'ss' 605 call setline(1, 'das abc herauss abc ſſich abc ſind') 606 for i in range(0, 2) 607 exe "set re="..i 608 let ic_match = matchbufline('%', '\c\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text}) 609 let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text}) 610 let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text}) 611 let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text}) 612 let ic_match3 = matchbufline('%', '\c[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text}) 613 let noic_match3 = matchbufline('%', '\C[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text}) 614 615 call assert_equal(['ss', 'ſſ'], ic_match, "Ignorecase Regex-engine: " .. &re) 616 call assert_equal(['ſſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re) 617 call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match2, "Ignorecase Regex-engine: " .. &re) 618 call assert_equal(['ſſ','ſ'], noic_match2, "No-Ignorecase Regex-engine: " .. &re) 619 call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match3, "Ignorecase Collection Regex-engine: " .. &re) 620 call assert_equal(['ſſ','ſ'], noic_match3, "No-Ignorecase Collection Regex-engine: " .. &re) 621 endfor 622 set re&vim 623 bw! 624 endfunc 625 626 func Test_replace_multibyte_match_in_multi_lines() 627 new 628 let text = ['ab 1c', 'ab 1c', 'def', '是否 a', '是否 a', 'ghi', '是否a', '是否a', '是否 1', '是否 1'] 629 let expected = ['', 'def', '', 'ghi', '', ''] 630 for i in range(0, 2) 631 exe "set ignorecase re="..i 632 :%d _ 633 call setline(1, text) 634 :%s/\(.\+\)\n\1//g 635 call assert_equal(expected, getline(1, '$')) 636 endfor 637 bw! 638 set ignorecase&vim re&vim 639 endfun 640 641 " vim: shiftwidth=2 sts=2 expandtab