commit 322a6d305d088420b23071c227af07b7c1beb41a
parent 172a90c245cf00cd5d55b3f9dd31f7aced957217
Author: Brynne Taylor <7542439+brynne8@users.noreply.github.com>
Date: Thu, 22 May 2025 15:24:49 +0800
feat(glob): new Glob implementation based on Peglob #33605
|vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that
provides ~50% speedup for complex patterns. The implementation restores
support for nested braces and follows LSP 3.17 specification with
additional constraints for improved correctness and resistance to
backtracking edge cases.
Diffstat:
4 files changed, 454 insertions(+), 119 deletions(-)
diff --git a/runtime/doc/lua.txt b/runtime/doc/lua.txt
@@ -3236,30 +3236,51 @@ vim.fs.root({source}, {marker}) *vim.fs.root()*
==============================================================================
Lua module: vim.glob *vim.glob*
-vim.glob.to_lpeg({pattern}) *vim.glob.to_lpeg()*
- Parses a raw glob into an |lua-lpeg| pattern.
+Glob-to-LPeg Converter (Peglob) This module converts glob patterns to LPeg
+patterns according to the LSP 3.17 specification:
+https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
+
+Glob grammar overview:
+• `*` to match zero or more characters in a path segment
+• `?` to match on one character in a path segment
+• `**` to match any number of path segments, including none
+• `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript
+ files)
+• `[]` to declare a range of characters to match in a path segment (e.g.,
+ `example.[0-9]` to match on `example.0`, `example.1`, …)
+• `[!...]` to negate a range of characters to match in a path segment (e.g.,
+ `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
+
+Additional constraints:
+• A Glob pattern must match an entire path, with partial matches considered
+ failures.
+• The pattern only determines success or failure, without specifying which
+ parts correspond to which characters.
+• A path segment is the portion of a path between two adjacent path separators
+ (`/`), or between the start/end of the path and the nearest separator.
+• The `**` (globstar) pattern matches zero or more path segments, including
+ intervening separators (`/`). Within pattern strings, `**` must be delimited
+ by path separators (`/`) or pattern boundaries and cannot be adjacent to any
+ characters other than `/`. If `**` is not the final element, it must be
+ followed by `/`.
+• `{}` (braced conditions) contains valid Glob patterns as branches, separated
+ by commas. Commas are exclusively used for separating branches and cannot
+ appear within a branch for any other purpose. Nested `{}` structures are
+ allowed, but `{}` must contain at least two branches—zero or one branch is
+ not permitted.
+• In `[]` or `[!...]`, a character range consists of character intervals
+ (e.g., `a-z`) or individual characters (e.g., `w`). A range including `/`
+ won’t match that character.
- This uses glob semantics from LSP 3.17.0:
- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
- Glob patterns can have the following syntax:
- • `*` to match one or more characters in a path segment
- • `?` to match on one character in a path segment
- • `**` to match any number of path segments, including none
- • `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and
- JavaScript files)
- • `[]` to declare a range of characters to match in a path segment (e.g.,
- `example.[0-9]` to match on `example.0`, `example.1`, …)
- • `[!...]` to negate a range of characters to match in a path segment
- (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not
- `example.0`)
+vim.glob.to_lpeg({pattern}) *vim.glob.to_lpeg()*
+ Parses a raw glob into an |lua-lpeg| pattern.
Parameters: ~
• {pattern} (`string`) The raw glob pattern
Return: ~
- (`vim.lpeg.Pattern`) pattern An |lua-lpeg| representation of the
- pattern
+ (`vim.lpeg.Pattern`) An |lua-lpeg| representation of the pattern
==============================================================================
diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt
@@ -175,7 +175,11 @@ OPTIONS
PERFORMANCE
-• todo
+• |vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that
+ provides ~50% speedup for complex patterns. The implementation restores
+ support for nested braces and follows LSP 3.17 specification with
+ additional constraints for improved correctness and resistance to
+ backtracking edge cases.
PLUGINS
diff --git a/runtime/lua/vim/glob.lua b/runtime/lua/vim/glob.lua
@@ -1,93 +1,375 @@
-local lpeg = vim.lpeg
-local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
-local C, Cc, Ct, Cf, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cmt
-
-local M = {}
-
-local pathsep = P('/')
-
---- Parses a raw glob into an |lua-lpeg| pattern.
+--- @brief Glob-to-LPeg Converter (Peglob)
+--- This module converts glob patterns to LPeg patterns according to the LSP 3.17 specification:
+--- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
---
---- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
----
---- Glob patterns can have the following syntax:
---- - `*` to match one or more characters in a path segment
+--- Glob grammar overview:
+--- - `*` to match zero or more characters in a path segment
--- - `?` to match on one character in a path segment
--- - `**` to match any number of path segments, including none
--- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files)
---- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
---- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
+--- - `[]` to declare a range of characters to match in a path segment
+--- (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
+--- - `[!...]` to negate a range of characters to match in a path segment
+--- (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
---
----@param pattern string The raw glob pattern
----@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
-function M.to_lpeg(pattern)
- local function class(inv, ranges)
- local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
- if inv == '!' then
- patt = P(1) - patt
+--- Additional constraints:
+--- - A Glob pattern must match an entire path, with partial matches
+--- considered failures.
+--- - The pattern only determines success or failure, without specifying
+--- which parts correspond to which characters.
+--- - A *path segment* is the portion of a path between two adjacent path
+--- separators (`/`), or between the start/end of the path and the nearest
+--- separator.
+--- - The `**` (*globstar*) pattern matches zero or more path segments,
+--- including intervening separators (`/`). Within pattern strings, `**`
+--- must be delimited by path separators (`/`) or pattern boundaries and
+--- cannot be adjacent to any characters other than `/`. If `**` is not
+--- the final element, it must be followed by `/`.
+--- - `{}` (*braced conditions*) contains valid Glob patterns as branches,
+--- separated by commas. Commas are exclusively used for separating
+--- branches and cannot appear within a branch for any other purpose.
+--- Nested `{}` structures are allowed, but `{}` must contain at least two
+--- branches—zero or one branch is not permitted.
+--- - In `[]` or `[!...]`, a *character range* consists of character
+--- intervals (e.g., `a-z`) or individual characters (e.g., `w`). A range
+--- including `/` won’t match that character.
+
+--- @diagnostic disable: missing-fields
+
+local m = vim.lpeg
+local mt = getmetatable(m.P(0))
+local re = vim.re
+local bit = require('bit')
+
+local M = {}
+
+-- Basic patterns for matching glob components
+local letter = m.P(1) - m.S(',*?[]{}/\\') -- Any character except special glob characters
+local slash = m.P '/' * m.Cc(m.P '/') -- Path separator with capture
+local notslash = m.P(1) - m.P '/' -- Any character except path separator
+local notcomma = m.P(1) - m.S(',\\') -- Any character except comma and backslash
+
+--- Handle EOF, considering whether we're in a segment or not
+--- @type vim.lpeg.Pattern
+local eof = -1
+ * m.Cb('inseg')
+ / function(flag)
+ if flag then
+ return #m.P '/'
+ else
+ return m.P(-1)
end
- return patt
end
- local function condlist(conds, after)
- return vim.iter(conds):fold(P(false), function(acc, cond)
- return acc + cond * after
- end)
+---@alias pat_table { F: string?, [1]: string, [2]: vim.lpeg.Pattern }
+---@alias seg_part { [string]: any, [integer]: pat_table }
+
+--- @param p pat_table Initial segment pattern data
+--- @return seg_part Segment structure with start pattern
+local function start_seg(p)
+ return { s = p[2], e = true, n = 0 }
+end
+
+--- @param t seg_part Segment structure
+--- @param p pat_table Pattern to look for
+--- @return table Updated segment structure
+local function lookfor(t, p)
+ t.n = t.n + 1
+ t[t.n] = p
+ return t
+end
+
+--- @param t seg_part Segment structure
+--- @return table Segment structure with end pattern
+local function to_seg_end(t)
+ t.e = notslash ^ 0
+ return t
+end
+
+--- Constructs a segment matching pattern from collected components
+---
+--- @param t seg_part Segment structure with patterns
+--- @return vim.lpeg.Pattern Complete segment match pattern
+local function end_seg(t)
+ --- @type table<any,any>
+ local seg_grammar = { 's' }
+ if t.n > 0 then
+ seg_grammar.s = t.s
+ for i = 1, t.n do
+ local rname = t[i][1]
+ if not seg_grammar[rname] then
+ -- Optimize search when deterministic first character is available
+ if t[i].F then
+ seg_grammar[rname] = t[i][2] + notslash * (notslash - m.P(t[i].F)) ^ 0 * m.V(rname)
+ else
+ seg_grammar[rname] = t[i][2] + notslash * m.V(rname)
+ end
+ end
+ seg_grammar.s = seg_grammar.s * m.V(rname)
+ end
+ if t.e then
+ seg_grammar.s = seg_grammar.s * t.e
+ end
+ return m.P(seg_grammar)
+ else
+ seg_grammar.s = t.s
+ if t.e then
+ seg_grammar.s = seg_grammar.s * t.e
+ end
+ return seg_grammar.s
end
+end
- local function mul(acc, m)
- return acc * m
+--- @param p vim.lpeg.Pattern Pattern directly after `**/`
+--- @return vim.lpeg.Pattern LPeg pattern for `**/p`
+local function dseg(p)
+ return m.P { p + notslash ^ 0 * m.P '/' * m.V(1) }
+end
+
+--- @type (vim.lpeg.Pattern|table)
+local g = nil
+
+--- Multiplies conditions for braced expansion (Cartesian product)
+---
+--- @param a string|string[] First part
+--- @param b string|string[] Second part
+--- @return string|string[] Cartesian product of values
+local function mul_cond(a, b)
+ if type(a) == 'string' then
+ if type(b) == 'string' then
+ return a .. b
+ elseif type(b) == 'table' then
+ for i = 1, #b do
+ b[i] = a .. b[i]
+ end
+ return b
+ else
+ return a
+ end
+ elseif type(a) == 'table' then
+ if type(b) == 'string' then
+ for i = 1, #a do
+ a[i] = a[i] .. b
+ end
+ return a
+ elseif type(b) == 'table' then
+ --- @type string[]
+ local res = {}
+ local idx = 0
+ for i = 1, #a do
+ for j = 1, #b do
+ idx = idx + 1
+ res[idx] = a[i] .. b[j]
+ end
+ end
+ return res
+ else
+ return a
+ end
+ else
+ return b
end
+end
- local function star(stars, after)
- return (-after * (P(1) - pathsep)) ^ #stars * after
+--- Combines alternatives in braced patterns
+---
+--- @param a string|table First part
+--- @param b string|table Second part
+--- @return table #Combined alternatives
+local function add_cond(a, b)
+ if type(a) == 'string' then
+ if type(b) == 'string' then
+ return { a, b }
+ elseif type(b) == 'table' then
+ table.insert(b, 1, a)
+ return b
+ end
+ elseif type(a) == 'table' then
+ if type(b) == 'string' then
+ table.insert(a, b)
+ return a
+ elseif type(b) == 'table' then
+ for i = 1, #b do
+ table.insert(a, b[i])
+ end
+ return a
+ end
+ --- @diagnostic disable-next-line: missing-return
end
+end
- local function dstar(after)
- return (-after * P(1)) ^ 0 * after
+--- Expands patterns handling segment boundaries
+--- `#` prefix is added for sub-grammar to detect in-segment flag
+---
+---@param a (any[]|vim.lpeg.Pattern[]) Array of patterns
+---@param b string Tail string
+---@param inseg boolean Whether inside a path segment
+---@return vim.lpeg.Pattern #Expanded pattern
+local function expand(a, b, inseg)
+ for i = 1, #a do
+ if inseg then
+ a[i] = '#' .. a[i]
+ end
+ a[i] = g:match(a[i] .. b)
+ end
+ local res = a[1]
+ for i = 2, #a do
+ res = res + a[i]
end
+ return res
+end
+
+--- Converts a UTF-8 character to its Unicode codepoint
+---
+--- @param utf8_str string UTF-8 character
+--- @return number #Codepoint value
+local function to_codepoint(utf8_str)
+ local codepoint = 0
+ local byte_count = 0
+
+ for i = 1, #utf8_str do
+ local byte = utf8_str:byte(i)
- -- luacheck: push ignore s
- local function cut(_s, idx, match)
- return idx, match
+ if byte_count ~= 0 then
+ codepoint = bit.bor(bit.lshift(codepoint, 6), bit.band(byte, 0x3F))
+ byte_count = byte_count - 1
+ else
+ if byte < 0x80 then
+ codepoint = byte
+ elseif byte < 0xE0 then
+ byte_count = 1
+ codepoint = bit.band(byte, 0x1F)
+ elseif byte < 0xF0 then
+ byte_count = 2
+ codepoint = bit.band(byte, 0x0F)
+ else
+ byte_count = 3
+ codepoint = bit.band(byte, 0x07)
+ end
+ end
+
+ if byte_count == 0 then
+ break
+ end
end
- -- luacheck: pop
-
- --- @diagnostic disable-next-line: missing-fields
- local p = P({
- 'Pattern',
- Pattern = V('Elem') ^ -1 * V('End'),
- Elem = Cmt(
- Cf(
- (V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
- * (V('Elem') + V('End')),
- mul
- ),
- cut
- ),
- DStar = (B(pathsep) + -B(P(1)))
- * P('**')
- * (pathsep * (V('Elem') + V('End')) + V('End'))
- / dstar,
- Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
- Ques = P('?') * Cc(P(1) - pathsep),
- Class = P('[')
- * C(P('!') ^ -1)
- * Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
- / class,
- CondList = P('{') * Ct(V('Cond') * (P(',') * V('Cond')) ^ 0) * P('}') * V('Pattern') / condlist,
- -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
- -- wildcard semantics it usually has.
- -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
- -- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
- -- condition means "everything after the {}" where several other options separated by ',' may
- -- exist in between that should not be matched by '*'.
- Cond = Cmt(Cf((V('Ques') + V('Class') + V('Literal') - S(',}')) ^ 1, mul), cut) + Cc(P(0)),
- Literal = P(1) / P,
- End = P(-1) * Cc(P(-1)),
- })
-
- local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
+
+ return codepoint
+end
+
+--- Pattern for matching UTF-8 characters
+local cont = m.R('\128\191')
+local any_utf8 = m.R('\0\127')
+ + m.R('\194\223') * cont
+ + m.R('\224\239') * cont * cont
+ + m.R('\240\244') * cont * cont * cont
+
+--- Creates a character class pattern for glob ranges
+--- @param inv string Inversion flag ('!' or '')
+--- @param ranges (string|string[])[] Character ranges
+--- @return vim.lpeg.Pattern #Character class pattern
+local function class(inv, ranges)
+ local patt = m.P(false)
+ if #ranges == 0 then
+ if inv == '!' then
+ return m.P '[!]'
+ else
+ return m.P '[]'
+ end
+ end
+ for _, v in ipairs(ranges) do
+ patt = patt + (type(v) == 'table' and m.utfR(to_codepoint(v[1]), to_codepoint(v[2])) or m.P(v))
+ end
+ if inv == '!' then
+ patt = m.P(1) - patt --[[@as vim.lpeg.Pattern]]
+ end
+ return patt - m.P '/'
+end
+
+-- Parse constraints for optimizing braced conditions
+local noopt_condlist = re.compile [[
+ s <- '/' / '**' / . [^/*]* s
+]]
+
+local opt_tail = re.compile [[
+ s <- (!'**' [^{/])* &'/'
+]]
+
+-- stylua: ignore start
+--- @nodoc
+--- @diagnostic disable
+--- Main grammar for glob pattern matching
+g = {
+ 'Glob',
+ Glob = (m.P'#' * m.Cg(m.Cc(true), 'inseg') + m.Cg(m.Cc(false), 'inseg')) *
+ m.Cf(m.V'Element'^-1 * (slash * m.V'Element')^0 * (slash^-1 * eof), mt.__mul),
+ -- Elements handle segments, globstar patterns
+ Element = m.V'DSeg' + m.V'DSEnd' + m.Cf(m.V'Segment' * (slash * m.V'Segment')^0 * (slash * eof + eof^-1), mt.__mul),
+ -- Globstar patterns
+ DSeg = m.P'**/' * ((m.V'Element' + eof) / dseg),
+ DSEnd = m.P'**' * -1 * m.Cc(m.P(1)^0),
+ -- Segment handling with word and star patterns
+ Segment = (m.V'Word' / start_seg + m.Cc({ '', true }) / start_seg * (m.V'Star' * m.V'Word' % lookfor)) *
+ (m.V'Star' * m.V'Word' % lookfor)^0 * (m.V'Star' * m.V'CheckBnd' % to_seg_end)^-1 / end_seg
+ + m.V'Star' * m.V'CheckBnd' * m.Cc(notslash^0),
+ CheckBnd = #m.P'/' + -1, -- Boundary constraint
+
+ -- Word patterns for fixed-length matching
+ Word = -m.P'*' * m.Ct( m.V('FIRST')^-1 * m.C(m.V'WordAux') ),
+ WordAux = m.V'Branch' + m.Cf(m.V'Simple'^1 * m.V'Branch'^-1, mt.__mul),
+ Simple = m.Cg( m.V'Token' * (m.V'Token' % mt.__mul)^0 * (m.V'Boundary' % mt.__mul)^-1),
+ Boundary = #m.P'/' * m.Cc(#m.P'/') + eof,
+ Token = m.V'Ques' + m.V'Class' + m.V'Escape' + m.V'Literal',
+ Star = m.P'*',
+ Ques = m.P'?' * m.Cc(notslash),
+ Escape = m.P'\\' * m.C(1) / m.P,
+ Literal = m.C(letter^1) / m.P,
+
+ -- Branch handling for braced conditions
+ Branch = m.Cmt(m.C(m.V'CondList'), function(s, i, p1, p2)
+ -- Optimize brace expansion when possible
+ -- p1: string form of condition list, p2: transformed lua table
+ if noopt_condlist:match(p1) then
+ -- Cannot optimize, match till the end
+ return #s + 1, p2, s:sub(i)
+ end
+ -- Find point to cut for optimization
+ local cut = opt_tail:match(s, i)
+ if cut then
+ -- Can optimize: match till cut point
+ -- true flag tells expand to transform EOF matches to &'/' predicates
+ return cut, p2, s:sub(i, cut - 1), true
+ else
+ -- Cannot optimize
+ return #s + 1, p2, s:sub(i)
+ end
+ end) / expand,
+ -- Brace expansion handling
+ CondList = m.Cf(m.P'{' * m.V'Cond' * (m.P',' * m.V'Cond')^1 * m.P'}', add_cond),
+ Cond = m.Cf((m.C((notcomma + m.P'\\' * 1 - m.S'{}')^1) + m.V'CondList')^1, mul_cond) + m.C(true),
+
+ -- Character class handling
+ Class = m.P'[' * m.C(m.P'!'^-1) * m.Ct(
+ (m.Ct(m.C(any_utf8) * m.P'-' * m.C(any_utf8 - m.P']')) + m.C(any_utf8 - m.P']'))^0
+ ) * m.P']' / class,
+
+ -- Deterministic first character extraction for optimization
+ FIRST = m.Cg(m.P(function(s, i)
+ if letter:match(s, i) then return true, s:sub(i, i)
+ else return false end
+ end), 'F')
+}
+-- stylua: ignore end
+--- @diagnostic enable
+
+--- @nodoc
+g = m.P(g)
+
+--- Parses a raw glob into an |lua-lpeg| pattern.
+---
+---@param pattern string The raw glob pattern
+---@return vim.lpeg.Pattern #An |lua-lpeg| representation of the pattern
+function M.to_lpeg(pattern)
+ local lpeg_pattern = g:match(pattern) --[[@as vim.lpeg.Pattern?]]
assert(lpeg_pattern, 'Invalid glob')
return lpeg_pattern
end
diff --git a/test/functional/lua/glob_spec.lua b/test/functional/lua/glob_spec.lua
@@ -18,6 +18,7 @@ describe('glob', function()
eq(true, match('', ''))
eq(false, match('', 'a'))
eq(true, match('a', 'a'))
+ eq(true, match('.', '.'))
eq(true, match('/', '/'))
eq(true, match('abc', 'abc'))
eq(false, match('abc', 'abcdef'))
@@ -35,7 +36,8 @@ describe('glob', function()
end)
it('should match * wildcards', function()
- eq(false, match('*', ''))
+ eq(true, match('*', ''))
+ eq(true, match('*', ' '))
eq(true, match('*', 'a'))
eq(false, match('*', '/'))
eq(false, match('*', '/a'))
@@ -43,6 +45,7 @@ describe('glob', function()
eq(true, match('*', 'aaa'))
eq(true, match('*a', 'aa'))
eq(true, match('*a', 'abca'))
+ eq(true, match('*.ts', '.ts'))
eq(true, match('*.txt', 'file.txt'))
eq(false, match('*.txt', 'file.txtxt'))
eq(false, match('*.txt', 'dir/file.txt'))
@@ -62,18 +65,13 @@ describe('glob', function()
eq(false, match('dir/*/file.txt', 'dir/file.txt'))
eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
-
- -- The spec does not describe this, but VSCode only interprets ** when it's by
- -- itself in a path segment, and otherwise interprets ** as consecutive * directives.
- -- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
- eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
- eq(true, match('**c', 'abc'))
- eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
- eq(false, match('**c', 'bc'))
- eq(true, match('a**', 'abcd'))
- eq(true, match('**d', 'abcd'))
- eq(false, match('a**', 'abc/d'))
- eq(false, match('**d', 'abc/d'))
+ eq(true, match('a*b*c*d*e*', 'axbxcxdxe'))
+ eq(true, match('a*b*c*d*e*', 'axbxcxdxexxx'))
+ eq(true, match('a*b?c*x', 'abxbbxdbxebxczzx'))
+ eq(false, match('a*b?c*x', 'abxbbxdbxebxczzy'))
+ eq(true, match('a*b*[cy]*d*e*', 'axbxcxdxexxx'))
+ eq(true, match('a*b*[cy]*d*e*', 'axbxyxdxexxx'))
+ eq(true, match('a*b*[cy]*d*e*', 'axbxxxyxdxexxx'))
end)
it('should match ? wildcards', function()
@@ -84,6 +82,11 @@ describe('glob', function()
eq(true, match('??', 'ab'))
eq(true, match('a?c', 'abc'))
eq(false, match('a?c', 'a/c'))
+ eq(false, match('a/', 'a/.b'))
+ eq(true, match('?/?', 'a/b'))
+ eq(true, match('/??', '/ab'))
+ eq(true, match('/?b', '/ab'))
+ eq(false, match('foo?bar', 'foo/bar'))
end)
it('should match ** wildcards', function()
@@ -99,7 +102,7 @@ describe('glob', function()
eq(true, match('/**', '/'))
eq(true, match('/**', '/a/b/c'))
eq(true, match('**/', '')) -- **/ absorbs trailing /
- eq(true, match('**/', '/a/b/c'))
+ eq(false, match('**/', '/a/b/c'))
eq(true, match('**/**', ''))
eq(true, match('**/**', 'a'))
eq(false, match('a/**', ''))
@@ -134,20 +137,9 @@ describe('glob', function()
end)
it('should match {} groups', function()
- eq(true, match('{}', ''))
- eq(false, match('{}', 'a'))
- eq(true, match('a{}', 'a'))
- eq(true, match('{}a', 'a'))
eq(true, match('{,}', ''))
eq(true, match('{a,}', ''))
eq(true, match('{a,}', 'a'))
- eq(true, match('{a}', 'a'))
- eq(false, match('{a}', 'aa'))
- eq(false, match('{a}', 'ab'))
- eq(true, match('{a?c}', 'abc'))
- eq(false, match('{ab}', 'a'))
- eq(false, match('{ab}', 'b'))
- eq(true, match('{ab}', 'ab'))
eq(true, match('{a,b}', 'a'))
eq(true, match('{a,b}', 'b'))
eq(false, match('{a,b}', 'ab'))
@@ -155,7 +147,22 @@ describe('glob', function()
eq(false, match('{ab,cd}', 'a'))
eq(true, match('{ab,cd}', 'cd'))
eq(true, match('{a,b,c}', 'c'))
- eq(false, match('{a,{b,c}}', 'c')) -- {} cannot nest
+ eq(true, match('{a,{b,c}}', 'c'))
+ eq(true, match('a{,/}*.txt', 'a.txt'))
+ eq(true, match('a{,/}*.txt', 'ab.txt'))
+ eq(true, match('a{,/}*.txt', 'a/b.txt'))
+ eq(true, match('a{,/}*.txt', 'a/ab.txt'))
+ eq(true, match('a/{a{a,b},b}', 'a/aa'))
+ eq(true, match('a/{a{a,b},b}', 'a/ab'))
+ eq(false, match('a/{a{a,b},b}', 'a/ac'))
+ eq(true, match('a/{a{a,b},b}', 'a/b'))
+ eq(false, match('a/{a{a,b},b}', 'a/c'))
+ eq(true, match('foo{bar,b*z}', 'foobar'))
+ eq(true, match('foo{bar,b*z}', 'foobuzz'))
+ eq(true, match('foo{bar,b*z}', 'foobarz'))
+ eq(true, match('{a,b}/c/{d,e}/**/*est.ts', 'a/c/d/one/two/three.test.ts'))
+ eq(true, match('{a,{d,e}b}/c', 'a/c'))
+ eq(true, match('{**/a,**/b}', 'b'))
end)
it('should match [] groups', function()
@@ -181,6 +188,13 @@ describe('glob', function()
eq(true, match('[a-zA-Z0-9]', 'Z'))
eq(true, match('[a-zA-Z0-9]', '9'))
eq(false, match('[a-zA-Z0-9]', '&'))
+ eq(true, match('[?]', '?'))
+ eq(false, match('[?]', 'a'))
+ eq(true, match('[*]', '*'))
+ eq(false, match('[*]', 'a'))
+ eq(true, match('[\\!]', '!'))
+ eq(true, match('a\\*b', 'a*b'))
+ eq(false, match('a\\*b', 'axb'))
end)
it('should match [!...] groups', function()
@@ -202,8 +216,7 @@ describe('glob', function()
it('should handle long patterns', function()
-- lpeg has a recursion limit of 200 by default, make sure the grammar does trigger it on
-- strings longer than that
- local fill_200 =
- 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
+ local fill_200 = ('a'):rep(200)
eq(200, fill_200:len())
local long_lit = fill_200 .. 'a'
eq(false, match(long_lit, 'b'))
@@ -212,6 +225,21 @@ describe('glob', function()
eq(true, match(long_pat, fill_200 .. 'a/b/c/d.c'))
end)
+ -- New test for unicode patterns from assets
+ it('should match unicode patterns', function()
+ eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.ts'))
+ eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.tsx'))
+ eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.js'))
+ eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsx'))
+ eq(false, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsxxxxxxxx'))
+ eq(true, match('*é*', 'café noir'))
+ eq(true, match('caf*noir', 'café noir'))
+ eq(true, match('caf*noir', 'cafeenoir'))
+ eq(true, match('F[ë£a]', 'Fë'))
+ eq(true, match('F[ë£a]', 'F£'))
+ eq(true, match('F[ë£a]', 'Fa'))
+ end)
+
it('should match complex patterns', function()
eq(false, match('**/*.{c,h}', ''))
eq(false, match('**/*.{c,h}', 'c'))