095_regexp_multibyte_spec.lua (8621B)
1 -- Test for regexp patterns with multi-byte support, using utf-8. 2 -- See test_regexp_latin.vim for the non-multi-byte tests. 3 -- A pattern that gives the expected result produces OK, so that we know it was 4 -- actually tried. 5 6 local n = require('test.functional.testnvim')() 7 8 local insert, source = n.insert, n.source 9 local clear, expect = n.clear, n.expect 10 11 describe('regex with multi-byte', function() 12 setup(clear) 13 14 it('is working', function() 15 insert([[ 16 Results of test95:]]) 17 18 source([=[ 19 set nomore 20 let tl = [] 21 22 call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna']) 23 call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes 24 call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos']) 25 call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม']) 26 call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna']) 27 28 call add(tl, [2, '\p\+', 'ìa', 'ìa']) 29 call add(tl, [2, '\p*', 'aあ', 'aあ']) 30 31 call add(tl, [2, '\i\+', '&*¨xx ', 'xx']) 32 call add(tl, [2, '\f\+', '&*fname ', 'fname']) 33 34 call add(tl, [2, '.ม', 'xม่x yมy', 'yม']) 35 call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่']) 36 call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"]) 37 call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"]) 38 call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 39 call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 40 call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 41 call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 42 call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) 43 call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) 44 call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"]) 45 call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"]) 46 call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"]) 47 call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"]) 48 call add(tl, [2, "a", "ca\u0300t"]) 49 call add(tl, [2, "ca", "ca\u0300t"]) 50 call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"]) 51 call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"]) 52 call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"]) 53 call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"]) 54 55 call add(tl, [2, 'ú\Z', 'x']) 56 call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה']) 57 call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה']) 58 call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה']) 59 call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה']) 60 call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ']) 61 call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) 62 call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"]) 63 call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) 64 call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"]) 65 call add(tl, [2, "\u05b9\\Z", "xyz"]) 66 call add(tl, [2, "\\Z\u05b9", "xyz"]) 67 call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"]) 68 call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"]) 69 call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"]) 70 call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"]) 71 72 call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd']) 73 74 for t in tl 75 let re = t[0] 76 let pat = t[1] 77 let text = t[2] 78 let matchidx = 3 79 for engine in [0, 1, 2] 80 if engine == 2 && re == 0 || engine == 1 && re == 1 81 continue 82 endif 83 let ®expengine = engine 84 try 85 let l = matchlist(text, pat) 86 catch 87 $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"' 88 endtry 89 if len(l) == 0 && len(t) > matchidx 90 $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' 91 elseif len(l) > 0 && len(t) == matchidx 92 $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected no match' 93 elseif len(t) > matchidx && l[0] != t[matchidx] 94 $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected: \"' . t[matchidx] . '\"' 95 else 96 $put ='OK ' . engine . ' - ' . pat 97 endif 98 if len(l) > 0 99 for i in range(1, 9) 100 if len(t) <= matchidx + i 101 let e = '' 102 else 103 let e = t[matchidx + i] 104 endif 105 if l[i] != e 106 $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"' 107 endif 108 endfor 109 unlet i 110 endif 111 endfor 112 endfor 113 unlet t tl e l 114 115 set regexpengine=1 ambiwidth=single 116 $put ='eng 1 ambi single: ' . match(\"\u00EC\", '\p') 117 118 set regexpengine=1 ambiwidth=double 119 $put ='eng 1 ambi double: ' . match(\"\u00EC\", '\p') 120 121 set regexpengine=2 ambiwidth=single 122 $put ='eng 2 ambi single: ' . match(\"\u00EC\", '\p') 123 124 set regexpengine=2 ambiwidth=double 125 $put ='eng 2 ambi double: ' . match(\"\u00EC\", '\p') 126 ]=]) 127 128 -- Assert buffer contents. 129 expect([=[ 130 Results of test95: 131 OK 0 - [[:alpha:][=a=]]\+ 132 OK 1 - [[:alpha:][=a=]]\+ 133 OK 2 - [[:alpha:][=a=]]\+ 134 OK 0 - [[=a=]]\+ 135 OK 1 - [[=a=]]\+ 136 OK 2 - [[=a=]]\+ 137 OK 0 - [^ม ]\+ 138 OK 1 - [^ม ]\+ 139 OK 2 - [^ม ]\+ 140 OK 0 - [^ ]\+ 141 OK 1 - [^ ]\+ 142 OK 2 - [^ ]\+ 143 OK 0 - [ม[:alpha:][=a=]]\+ 144 OK 1 - [ม[:alpha:][=a=]]\+ 145 OK 2 - [ม[:alpha:][=a=]]\+ 146 OK 0 - \p\+ 147 OK 1 - \p\+ 148 OK 2 - \p\+ 149 OK 0 - \p* 150 OK 1 - \p* 151 OK 2 - \p* 152 OK 0 - \i\+ 153 OK 1 - \i\+ 154 OK 2 - \i\+ 155 OK 0 - \f\+ 156 OK 1 - \f\+ 157 OK 2 - \f\+ 158 OK 0 - .ม 159 OK 1 - .ม 160 OK 2 - .ม 161 OK 0 - .ม่ 162 OK 1 - .ม่ 163 OK 2 - .ม่ 164 OK 0 - ֹ 165 OK 1 - ֹ 166 OK 2 - ֹ 167 OK 0 - .ֹ 168 OK 1 - .ֹ 169 OK 2 - .ֹ 170 OK 0 - ֹֻ 171 OK 1 - ֹֻ 172 OK 2 - ֹֻ 173 OK 0 - .ֹֻ 174 OK 1 - .ֹֻ 175 OK 2 - .ֹֻ 176 OK 0 - ֹֻ 177 OK 1 - ֹֻ 178 OK 2 - ֹֻ 179 OK 0 - .ֹֻ 180 OK 1 - .ֹֻ 181 OK 2 - .ֹֻ 182 OK 0 - ֹ 183 OK 1 - ֹ 184 OK 2 - ֹ 185 OK 0 - .ֹ 186 OK 1 - .ֹ 187 OK 2 - .ֹ 188 OK 0 - ֹ 189 OK 1 - ֹ 190 OK 2 - ֹ 191 OK 0 - .ֹ 192 OK 1 - .ֹ 193 OK 2 - .ֹ 194 OK 0 - ֹֻ 195 OK 2 - ֹֻ 196 OK 0 - .ֹֻ 197 OK 1 - .ֹֻ 198 OK 2 - .ֹֻ 199 OK 0 - a 200 OK 1 - a 201 OK 2 - a 202 OK 0 - ca 203 OK 1 - ca 204 OK 2 - ca 205 OK 0 - à 206 OK 1 - à 207 OK 2 - à 208 OK 0 - a\%C 209 OK 1 - a\%C 210 OK 2 - a\%C 211 OK 0 - ca\%C 212 OK 1 - ca\%C 213 OK 2 - ca\%C 214 OK 0 - ca\%Ct 215 OK 1 - ca\%Ct 216 OK 2 - ca\%Ct 217 OK 0 - ú\Z 218 OK 1 - ú\Z 219 OK 2 - ú\Z 220 OK 0 - יהוה\Z 221 OK 1 - יהוה\Z 222 OK 2 - יהוה\Z 223 OK 0 - יְהוָה\Z 224 OK 1 - יְהוָה\Z 225 OK 2 - יְהוָה\Z 226 OK 0 - יהוה\Z 227 OK 1 - יהוה\Z 228 OK 2 - יהוה\Z 229 OK 0 - יְהוָה\Z 230 OK 1 - יְהוָה\Z 231 OK 2 - יְהוָה\Z 232 OK 0 - יְ\Z 233 OK 1 - יְ\Z 234 OK 2 - יְ\Z 235 OK 0 - קֹx\Z 236 OK 1 - קֹx\Z 237 OK 2 - קֹx\Z 238 OK 0 - קֹx\Z 239 OK 1 - קֹx\Z 240 OK 2 - קֹx\Z 241 OK 0 - קx\Z 242 OK 1 - קx\Z 243 OK 2 - קx\Z 244 OK 0 - קx\Z 245 OK 1 - קx\Z 246 OK 2 - קx\Z 247 OK 0 - ֹ\Z 248 OK 1 - ֹ\Z 249 OK 2 - ֹ\Z 250 OK 0 - \Zֹ 251 OK 1 - \Zֹ 252 OK 2 - \Zֹ 253 OK 0 - ֹ\Z 254 OK 1 - ֹ\Z 255 OK 2 - ֹ\Z 256 OK 0 - \Zֹ 257 OK 1 - \Zֹ 258 OK 2 - \Zֹ 259 OK 0 - ֹ\+\Z 260 OK 2 - ֹ\+\Z 261 OK 0 - \Zֹ\+ 262 OK 2 - \Zֹ\+ 263 OK 0 - [^[=a=]]\+ 264 OK 1 - [^[=a=]]\+ 265 OK 2 - [^[=a=]]\+ 266 eng 1 ambi single: 0 267 eng 1 ambi double: 0 268 eng 2 ambi single: 0 269 eng 2 ambi double: 0]=]) 270 end) 271 end)