commit 92204b06e7365cf4c68e6ea8258dce801f0a5df9
parent db0ec84fb46b8235f8651d5aa25eb56a9b117eb5
Author: Steven Arcangeli <506791+stevearc@users.noreply.github.com>
Date: Fri, 22 Dec 2023 02:40:01 -0800
refactor(lsp): move glob parsing to util (#26519)
refactor(lsp): move glob parsing to vim.glob
Moving the logic for using vim.lpeg to create a match pattern from a
glob into `vim.glob`. There are several places in the LSP spec that
use globs, and it's very useful to have glob matching as a
generally-available utility.
Diffstat:
7 files changed, 316 insertions(+), 311 deletions(-)
diff --git a/runtime/lua/vim/_init_packages.lua b/runtime/lua/vim/_init_packages.lua
@@ -55,6 +55,7 @@ vim._submodules = {
inspect = true,
version = true,
fs = true,
+ glob = true,
iter = true,
re = true,
text = true,
diff --git a/runtime/lua/vim/_meta.lua b/runtime/lua/vim/_meta.lua
@@ -11,6 +11,7 @@ vim.diagnostic = require('vim.diagnostic')
vim.filetype = require('vim.filetype')
vim.fs = require('vim.fs')
vim.func = require('vim.func')
+vim.glob = require('vim.glob')
vim.health = require('vim.health')
vim.highlight = require('vim.highlight')
vim.iter = require('vim.iter')
diff --git a/runtime/lua/vim/glob.lua b/runtime/lua/vim/glob.lua
@@ -0,0 +1,81 @@
+local lpeg = vim.lpeg
+
+local M = {}
+
+--- Parses a raw glob into an |lpeg| pattern.
+---
+--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
+--- Glob patterns can have the following syntax:
+--- `*` to match one or more characters in a path segment
+--- `?` to match on one character in a path segment
+--- `**` to match any number of path segments, including none
+--- `{}` to group conditions (e.g. `**/*.{ts,js}` matches all TypeScript and JavaScript files)
+--- `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
+--- `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
+---@param pattern string The raw glob pattern
+---@return vim.lpeg.Pattern pattern An |lpeg| representation of the pattern
+function M.to_lpeg(pattern)
+ local l = lpeg
+
+ local P, S, V = lpeg.P, lpeg.S, lpeg.V
+ local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
+
+ local pathsep = '/'
+
+ local function class(inv, ranges)
+ for i, r in ipairs(ranges) do
+ ranges[i] = r[1] .. r[2]
+ end
+ local patt = l.R(unpack(ranges))
+ if inv == '!' then
+ patt = P(1) - patt
+ end
+ return patt
+ end
+
+ local function add(acc, a)
+ return acc + a
+ end
+
+ local function mul(acc, m)
+ return acc * m
+ end
+
+ local function star(stars, after)
+ return (-after * (l.P(1) - pathsep)) ^ #stars * after
+ end
+
+ local function dstar(after)
+ return (-after * l.P(1)) ^ 0 * after
+ end
+
+ local p = P({
+ 'Pattern',
+ Pattern = V('Elem') ^ -1 * V('End'),
+ Elem = Cf(
+ (V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
+ * (V('Elem') + V('End')),
+ mul
+ ),
+ DStar = P('**') * (P(pathsep) * (V('Elem') + V('End')) + V('End')) / dstar,
+ Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
+ Ques = P('?') * Cc(l.P(1) - pathsep),
+ Class = P('[') * C(P('!') ^ -1) * Ct(Ct(C(1) * '-' * C(P(1) - ']')) ^ 1 * ']') / class,
+ CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * '}',
+ -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
+ -- wildcard semantics it usually has.
+ -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
+ -- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
+ -- condition means "everything after the {}" where several other options separated by ',' may
+ -- exist in between that should not be matched by '*'.
+ Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul)
+ + Cc(l.P(0)),
+ Literal = P(1) / l.P,
+ End = P(-1) * Cc(l.P(-1)),
+ })
+
+ local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
+ return assert(lpeg_pattern, 'Invalid glob')
+end
+
+return M
diff --git a/runtime/lua/vim/lsp/_dynamic.lua b/runtime/lua/vim/lsp/_dynamic.lua
@@ -1,4 +1,4 @@
-local wf = require('vim.lsp._watchfiles')
+local glob = require('vim.glob')
--- @class lsp.DynamicCapabilities
--- @field capabilities table<string, lsp.Registration[]>
@@ -97,7 +97,7 @@ function M.match(bufnr, documentSelector)
if matches and filter.scheme and not vim.startswith(uri, filter.scheme .. ':') then
matches = false
end
- if matches and filter.pattern and not wf._match(filter.pattern, fname) then
+ if matches and filter.pattern and not glob.to_lpeg(filter.pattern):match(fname) then
matches = false
end
if matches then
diff --git a/runtime/lua/vim/lsp/_watchfiles.lua b/runtime/lua/vim/lsp/_watchfiles.lua
@@ -1,4 +1,5 @@
local bit = require('bit')
+local glob = require('vim.glob')
local watch = require('vim._watch')
local protocol = require('vim.lsp.protocol')
local ms = protocol.Methods
@@ -6,88 +7,6 @@ local lpeg = vim.lpeg
local M = {}
---- Parses the raw pattern into an |lpeg| pattern. LPeg patterns natively support the "this" or "that"
---- alternative constructions described in the LSP spec that cannot be expressed in a standard Lua pattern.
----
----@param pattern string The raw glob pattern
----@return vim.lpeg.Pattern? pattern An |lpeg| representation of the pattern, or nil if the pattern is invalid.
-local function parse(pattern)
- local l = lpeg
-
- local P, S, V = lpeg.P, lpeg.S, lpeg.V
- local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
-
- local pathsep = '/'
-
- local function class(inv, ranges)
- for i, r in ipairs(ranges) do
- ranges[i] = r[1] .. r[2]
- end
- local patt = l.R(unpack(ranges))
- if inv == '!' then
- patt = P(1) - patt
- end
- return patt
- end
-
- local function add(acc, a)
- return acc + a
- end
-
- local function mul(acc, m)
- return acc * m
- end
-
- local function star(stars, after)
- return (-after * (l.P(1) - pathsep)) ^ #stars * after
- end
-
- local function dstar(after)
- return (-after * l.P(1)) ^ 0 * after
- end
-
- local p = P({
- 'Pattern',
- Pattern = V('Elem') ^ -1 * V('End'),
- Elem = Cf(
- (V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
- * (V('Elem') + V('End')),
- mul
- ),
- DStar = P('**') * (P(pathsep) * (V('Elem') + V('End')) + V('End')) / dstar,
- Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
- Ques = P('?') * Cc(l.P(1) - pathsep),
- Class = P('[') * C(P('!') ^ -1) * Ct(Ct(C(1) * '-' * C(P(1) - ']')) ^ 1 * ']') / class,
- CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * '}',
- -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
- -- wildcard semantics it usually has.
- -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
- -- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
- -- condition means "everything after the {}" where several other options separated by ',' may
- -- exist in between that should not be matched by '*'.
- Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul)
- + Cc(l.P(0)),
- Literal = P(1) / l.P,
- End = P(-1) * Cc(l.P(-1)),
- })
-
- return p:match(pattern) --[[@as vim.lpeg.Pattern?]]
-end
-
----@private
---- Implementation of LSP 3.17.0's pattern matching: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
----
----@param pattern string|vim.lpeg.Pattern The glob pattern (raw or parsed) to match.
----@param s string The string to match against pattern.
----@return boolean Whether or not pattern matches s.
-function M._match(pattern, s)
- if type(pattern) == 'string' then
- local p = assert(parse(pattern))
- return p:match(s) ~= nil
- end
- return pattern:match(s) ~= nil
-end
-
M._watchfunc = (vim.fn.has('win32') == 1 or vim.fn.has('mac') == 1) and watch.watch or watch.poll
---@type table<integer, table<string, function[]>> client id -> registration id -> cancel function
@@ -112,9 +31,9 @@ local to_lsp_change_type = {
--- Default excludes the same as VSCode's `files.watcherExclude` setting.
--- https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/workbench/contrib/files/browser/files.contribution.ts#L261
---@type vim.lpeg.Pattern parsed Lpeg pattern
-M._poll_exclude_pattern = parse('**/.git/{objects,subtree-cache}/**')
- + parse('**/node_modules/*/**')
- + parse('**/.hg/store/**')
+M._poll_exclude_pattern = glob.to_lpeg('**/.git/{objects,subtree-cache}/**')
+ + glob.to_lpeg('**/node_modules/*/**')
+ + glob.to_lpeg('**/.hg/store/**')
--- Registers the workspace/didChangeWatchedFiles capability dynamically.
---
@@ -143,7 +62,7 @@ function M.register(reg, ctx)
local glob_pattern = w.globPattern
if type(glob_pattern) == 'string' then
- local pattern = parse(glob_pattern)
+ local pattern = glob.to_lpeg(glob_pattern)
if not pattern then
error('Cannot parse pattern: ' .. glob_pattern)
end
@@ -155,7 +74,7 @@ function M.register(reg, ctx)
local base_uri = glob_pattern.baseUri
local uri = type(base_uri) == 'string' and base_uri or base_uri.uri
local base_dir = vim.uri_to_fname(uri)
- local pattern = parse(glob_pattern.pattern)
+ local pattern = glob.to_lpeg(glob_pattern.pattern)
if not pattern then
error('Cannot parse pattern: ' .. glob_pattern.pattern)
end
diff --git a/test/functional/lua/glob_spec.lua b/test/functional/lua/glob_spec.lua
@@ -0,0 +1,225 @@
+local helpers = require('test.functional.helpers')(after_each)
+local eq = helpers.eq
+local exec_lua = helpers.exec_lua
+
+describe('glob', function()
+ before_each(helpers.clear)
+ after_each(helpers.clear)
+
+ local match = function(...)
+ return exec_lua([[
+ local pattern = select(1, ...)
+ local str = select(2, ...)
+ return require("vim.glob").to_lpeg(pattern):match(str) ~= nil
+ ]], ...)
+ end
+
+ describe('glob matching', function()
+ it('should match literal strings', function()
+ eq(true, match('', ''))
+ eq(false, match('', 'a'))
+ eq(true, match('a', 'a'))
+ eq(true, match('/', '/'))
+ eq(true, match('abc', 'abc'))
+ eq(false, match('abc', 'abcdef'))
+ eq(false, match('abc', 'a'))
+ eq(false, match('abc', 'bc'))
+ eq(false, match('a', 'b'))
+ eq(false, match('.', 'a'))
+ eq(true, match('$', '$'))
+ eq(true, match('/dir', '/dir'))
+ eq(true, match('dir/', 'dir/'))
+ eq(true, match('dir/subdir', 'dir/subdir'))
+ eq(false, match('dir/subdir', 'subdir'))
+ eq(false, match('dir/subdir', 'dir/subdir/file'))
+ eq(true, match('🤠', '🤠'))
+ end)
+
+ it('should match * wildcards', function()
+ eq(false, match('*', ''))
+ eq(true, match('*', 'a'))
+ eq(false, match('*', '/'))
+ eq(false, match('*', '/a'))
+ eq(false, match('*', 'a/'))
+ eq(true, match('*', 'aaa'))
+ eq(true, match('*a', 'aa'))
+ eq(true, match('*a', 'abca'))
+ eq(true, match('*.txt', 'file.txt'))
+ eq(false, match('*.txt', 'file.txtxt'))
+ eq(false, match('*.txt', 'dir/file.txt'))
+ eq(false, match('*.txt', '/dir/file.txt'))
+ eq(false, match('*.txt', 'C:/dir/file.txt'))
+ eq(false, match('*.dir', 'test.dir/file'))
+ eq(true, match('file.*', 'file.txt'))
+ eq(false, match('file.*', 'not-file.txt'))
+ eq(true, match('*/file.txt', 'dir/file.txt'))
+ eq(false, match('*/file.txt', 'dir/subdir/file.txt'))
+ eq(false, match('*/file.txt', '/dir/file.txt'))
+ eq(true, match('dir/*', 'dir/file.txt'))
+ eq(false, match('dir/*', 'dir'))
+ eq(false, match('dir/*.txt', 'file.txt'))
+ eq(true, match('dir/*.txt', 'dir/file.txt'))
+ eq(false, match('dir/*.txt', 'dir/subdir/file.txt'))
+ eq(false, match('dir/*/file.txt', 'dir/file.txt'))
+ eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
+ eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
+
+ -- TODO: The spec does not describe this, but VSCode only interprets ** when it's by
+ -- itself in a path segment, and otherwise interprets ** as consecutive * directives.
+ -- The following tests show how this behavior should work, but is not yet fully implemented.
+ -- Currently, "a**" parses incorrectly as "a" "**" and "**a" parses correctly as "*" "*" "a".
+ -- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
+ eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
+ eq(true, match('**c', 'abc'))
+ -- eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
+ eq(false, match('**c', 'bc'))
+ eq(true, match('a**', 'abcd'))
+ eq(true, match('**d', 'abcd'))
+ -- eq(false, match('a**', 'abc/d'))
+ eq(false, match('**d', 'abc/d'))
+ end)
+
+ it('should match ? wildcards', function()
+ eq(false, match('?', ''))
+ eq(true, match('?', 'a'))
+ eq(false, match('??', 'a'))
+ eq(false, match('?', 'ab'))
+ eq(true, match('??', 'ab'))
+ eq(true, match('a?c', 'abc'))
+ eq(false, match('a?c', 'a/c'))
+ end)
+
+ it('should match ** wildcards', function()
+ eq(true, match('**', ''))
+ eq(true, match('**', 'a'))
+ eq(true, match('**', '/'))
+ eq(true, match('**', 'a/'))
+ eq(true, match('**', '/a'))
+ eq(true, match('**', 'C:/a'))
+ eq(true, match('**', 'a/a'))
+ eq(true, match('**', 'a/a/a'))
+ eq(false, match('/**', '')) -- /** matches leading / literally
+ eq(true, match('/**', '/'))
+ eq(true, match('/**', '/a/b/c'))
+ eq(true, match('**/', '')) -- **/ absorbs trailing /
+ eq(true, match('**/', '/a/b/c'))
+ eq(true, match('**/**', ''))
+ eq(true, match('**/**', 'a'))
+ eq(false, match('a/**', ''))
+ eq(false, match('a/**', 'a'))
+ eq(true, match('a/**', 'a/b'))
+ eq(true, match('a/**', 'a/b/c'))
+ eq(false, match('a/**', 'b/a'))
+ eq(false, match('a/**', '/a'))
+ eq(false, match('**/a', ''))
+ eq(true, match('**/a', 'a'))
+ eq(false, match('**/a', 'a/b'))
+ eq(true, match('**/a', '/a'))
+ eq(true, match('**/a', '/b/a'))
+ eq(true, match('**/a', '/c/b/a'))
+ eq(true, match('**/a', '/a/a'))
+ eq(true, match('**/a', '/abc/a'))
+ eq(false, match('a/**/c', 'a'))
+ eq(false, match('a/**/c', 'c'))
+ eq(true, match('a/**/c', 'a/c'))
+ eq(true, match('a/**/c', 'a/b/c'))
+ eq(true, match('a/**/c', 'a/b/b/c'))
+ eq(false, match('**/a/**', 'a'))
+ eq(true, match('**/a/**', 'a/'))
+ eq(false, match('**/a/**', '/dir/a'))
+ eq(false, match('**/a/**', 'dir/a'))
+ eq(true, match('**/a/**', 'dir/a/'))
+ eq(true, match('**/a/**', 'a/dir'))
+ eq(true, match('**/a/**', 'dir/a/dir'))
+ eq(true, match('**/a/**', '/a/dir'))
+ eq(true, match('**/a/**', 'C:/a/dir'))
+ eq(false, match('**/a/**', 'a.txt'))
+ end)
+
+ it('should match {} groups', function()
+ eq(true, match('{}', ''))
+ eq(false, match('{}', 'a'))
+ eq(true, match('a{}', 'a'))
+ eq(true, match('{}a', 'a'))
+ eq(true, match('{,}', ''))
+ eq(true, match('{a,}', ''))
+ eq(true, match('{a,}', 'a'))
+ eq(true, match('{a}', 'a'))
+ eq(false, match('{a}', 'aa'))
+ eq(false, match('{a}', 'ab'))
+ eq(true, match('{a?c}', 'abc'))
+ eq(false, match('{ab}', 'a'))
+ eq(false, match('{ab}', 'b'))
+ eq(true, match('{ab}', 'ab'))
+ eq(true, match('{a,b}', 'a'))
+ eq(true, match('{a,b}', 'b'))
+ eq(false, match('{a,b}', 'ab'))
+ eq(true, match('{ab,cd}', 'ab'))
+ eq(false, match('{ab,cd}', 'a'))
+ eq(true, match('{ab,cd}', 'cd'))
+ eq(true, match('{a,b,c}', 'c'))
+ eq(true, match('{a,{b,c}}', 'c'))
+ end)
+
+ it('should match [] groups', function()
+ eq(true, match('[]', '[]')) -- empty [] is a literal
+ eq(false, match('[a-z]', ''))
+ eq(true, match('[a-z]', 'a'))
+ eq(false, match('[a-z]', 'ab'))
+ eq(true, match('[a-z]', 'z'))
+ eq(true, match('[a-z]', 'j'))
+ eq(false, match('[a-f]', 'j'))
+ eq(false, match('[a-z]', '`')) -- 'a' - 1
+ eq(false, match('[a-z]', '{')) -- 'z' + 1
+ eq(false, match('[a-z]', 'A'))
+ eq(false, match('[a-z]', '5'))
+ eq(true, match('[A-Z]', 'A'))
+ eq(true, match('[A-Z]', 'Z'))
+ eq(true, match('[A-Z]', 'J'))
+ eq(false, match('[A-Z]', '@')) -- 'A' - 1
+ eq(false, match('[A-Z]', '[')) -- 'Z' + 1
+ eq(false, match('[A-Z]', 'a'))
+ eq(false, match('[A-Z]', '5'))
+ eq(true, match('[a-zA-Z0-9]', 'z'))
+ eq(true, match('[a-zA-Z0-9]', 'Z'))
+ eq(true, match('[a-zA-Z0-9]', '9'))
+ eq(false, match('[a-zA-Z0-9]', '&'))
+ end)
+
+ it('should match [!...] groups', function()
+ eq(true, match('[!]', '[!]')) -- [!] is a literal
+ eq(false, match('[!a-z]', ''))
+ eq(false, match('[!a-z]', 'a'))
+ eq(false, match('[!a-z]', 'z'))
+ eq(false, match('[!a-z]', 'j'))
+ eq(true, match('[!a-f]', 'j'))
+ eq(false, match('[!a-f]', 'jj'))
+ eq(true, match('[!a-z]', '`')) -- 'a' - 1
+ eq(true, match('[!a-z]', '{')) -- 'z' + 1
+ eq(false, match('[!a-zA-Z0-9]', 'a'))
+ eq(false, match('[!a-zA-Z0-9]', 'A'))
+ eq(false, match('[!a-zA-Z0-9]', '0'))
+ eq(true, match('[!a-zA-Z0-9]', '!'))
+ end)
+
+ it('should match complex patterns', function()
+ eq(false, match('**/*.{c,h}', ''))
+ eq(false, match('**/*.{c,h}', 'c'))
+ eq(false, match('**/*.{c,h}', 'file.m'))
+ eq(true, match('**/*.{c,h}', 'file.c'))
+ eq(true, match('**/*.{c,h}', 'file.h'))
+ eq(true, match('**/*.{c,h}', '/file.c'))
+ eq(true, match('**/*.{c,h}', 'dir/subdir/file.c'))
+ eq(true, match('**/*.{c,h}', 'dir/subdir/file.h'))
+ eq(true, match('**/*.{c,h}', '/dir/subdir/file.c'))
+ eq(true, match('**/*.{c,h}', 'C:/dir/subdir/file.c'))
+ eq(true, match('/dir/**/*.{c,h}', '/dir/file.c'))
+ eq(false, match('/dir/**/*.{c,h}', 'dir/file.c'))
+ eq(true, match('/dir/**/*.{c,h}', '/dir/subdir/subdir/file.c'))
+
+ eq(true, match('{[0-9],[a-z]}', '0'))
+ eq(true, match('{[0-9],[a-z]}', 'a'))
+ eq(false, match('{[0-9],[a-z]}', 'A'))
+ end)
+ end)
+end)
diff --git a/test/functional/plugin/lsp/watchfiles_spec.lua b/test/functional/plugin/lsp/watchfiles_spec.lua
@@ -1,222 +0,0 @@
-local helpers = require('test.functional.helpers')(after_each)
-
-local eq = helpers.eq
-local exec_lua = helpers.exec_lua
-
-describe('vim.lsp._watchfiles', function()
- before_each(helpers.clear)
- after_each(helpers.clear)
-
- local match = function(...)
- return exec_lua('return require("vim.lsp._watchfiles")._match(...)', ...)
- end
-
- describe('glob matching', function()
- it('should match literal strings', function()
- eq(true, match('', ''))
- eq(false, match('', 'a'))
- eq(true, match('a', 'a'))
- eq(true, match('/', '/'))
- eq(true, match('abc', 'abc'))
- eq(false, match('abc', 'abcdef'))
- eq(false, match('abc', 'a'))
- eq(false, match('abc', 'bc'))
- eq(false, match('a', 'b'))
- eq(false, match('.', 'a'))
- eq(true, match('$', '$'))
- eq(true, match('/dir', '/dir'))
- eq(true, match('dir/', 'dir/'))
- eq(true, match('dir/subdir', 'dir/subdir'))
- eq(false, match('dir/subdir', 'subdir'))
- eq(false, match('dir/subdir', 'dir/subdir/file'))
- eq(true, match('🤠', '🤠'))
- end)
-
- it('should match * wildcards', function()
- eq(false, match('*', ''))
- eq(true, match('*', 'a'))
- eq(false, match('*', '/'))
- eq(false, match('*', '/a'))
- eq(false, match('*', 'a/'))
- eq(true, match('*', 'aaa'))
- eq(true, match('*a', 'aa'))
- eq(true, match('*a', 'abca'))
- eq(true, match('*.txt', 'file.txt'))
- eq(false, match('*.txt', 'file.txtxt'))
- eq(false, match('*.txt', 'dir/file.txt'))
- eq(false, match('*.txt', '/dir/file.txt'))
- eq(false, match('*.txt', 'C:/dir/file.txt'))
- eq(false, match('*.dir', 'test.dir/file'))
- eq(true, match('file.*', 'file.txt'))
- eq(false, match('file.*', 'not-file.txt'))
- eq(true, match('*/file.txt', 'dir/file.txt'))
- eq(false, match('*/file.txt', 'dir/subdir/file.txt'))
- eq(false, match('*/file.txt', '/dir/file.txt'))
- eq(true, match('dir/*', 'dir/file.txt'))
- eq(false, match('dir/*', 'dir'))
- eq(false, match('dir/*.txt', 'file.txt'))
- eq(true, match('dir/*.txt', 'dir/file.txt'))
- eq(false, match('dir/*.txt', 'dir/subdir/file.txt'))
- eq(false, match('dir/*/file.txt', 'dir/file.txt'))
- eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
- eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
-
- -- TODO: The spec does not describe this, but VSCode only interprets ** when it's by
- -- itself in a path segment, and otherwise interprets ** as consecutive * directives.
- -- The following tests show how this behavior should work, but is not yet fully implemented.
- -- Currently, "a**" parses incorrectly as "a" "**" and "**a" parses correctly as "*" "*" "a".
- -- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
- eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
- eq(true, match('**c', 'abc'))
- -- eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
- eq(false, match('**c', 'bc'))
- eq(true, match('a**', 'abcd'))
- eq(true, match('**d', 'abcd'))
- -- eq(false, match('a**', 'abc/d'))
- eq(false, match('**d', 'abc/d'))
- end)
-
- it('should match ? wildcards', function()
- eq(false, match('?', ''))
- eq(true, match('?', 'a'))
- eq(false, match('??', 'a'))
- eq(false, match('?', 'ab'))
- eq(true, match('??', 'ab'))
- eq(true, match('a?c', 'abc'))
- eq(false, match('a?c', 'a/c'))
- end)
-
- it('should match ** wildcards', function()
- eq(true, match('**', ''))
- eq(true, match('**', 'a'))
- eq(true, match('**', '/'))
- eq(true, match('**', 'a/'))
- eq(true, match('**', '/a'))
- eq(true, match('**', 'C:/a'))
- eq(true, match('**', 'a/a'))
- eq(true, match('**', 'a/a/a'))
- eq(false, match('/**', '')) -- /** matches leading / literally
- eq(true, match('/**', '/'))
- eq(true, match('/**', '/a/b/c'))
- eq(true, match('**/', '')) -- **/ absorbs trailing /
- eq(true, match('**/', '/a/b/c'))
- eq(true, match('**/**', ''))
- eq(true, match('**/**', 'a'))
- eq(false, match('a/**', ''))
- eq(false, match('a/**', 'a'))
- eq(true, match('a/**', 'a/b'))
- eq(true, match('a/**', 'a/b/c'))
- eq(false, match('a/**', 'b/a'))
- eq(false, match('a/**', '/a'))
- eq(false, match('**/a', ''))
- eq(true, match('**/a', 'a'))
- eq(false, match('**/a', 'a/b'))
- eq(true, match('**/a', '/a'))
- eq(true, match('**/a', '/b/a'))
- eq(true, match('**/a', '/c/b/a'))
- eq(true, match('**/a', '/a/a'))
- eq(true, match('**/a', '/abc/a'))
- eq(false, match('a/**/c', 'a'))
- eq(false, match('a/**/c', 'c'))
- eq(true, match('a/**/c', 'a/c'))
- eq(true, match('a/**/c', 'a/b/c'))
- eq(true, match('a/**/c', 'a/b/b/c'))
- eq(false, match('**/a/**', 'a'))
- eq(true, match('**/a/**', 'a/'))
- eq(false, match('**/a/**', '/dir/a'))
- eq(false, match('**/a/**', 'dir/a'))
- eq(true, match('**/a/**', 'dir/a/'))
- eq(true, match('**/a/**', 'a/dir'))
- eq(true, match('**/a/**', 'dir/a/dir'))
- eq(true, match('**/a/**', '/a/dir'))
- eq(true, match('**/a/**', 'C:/a/dir'))
- eq(false, match('**/a/**', 'a.txt'))
- end)
-
- it('should match {} groups', function()
- eq(true, match('{}', ''))
- eq(false, match('{}', 'a'))
- eq(true, match('a{}', 'a'))
- eq(true, match('{}a', 'a'))
- eq(true, match('{,}', ''))
- eq(true, match('{a,}', ''))
- eq(true, match('{a,}', 'a'))
- eq(true, match('{a}', 'a'))
- eq(false, match('{a}', 'aa'))
- eq(false, match('{a}', 'ab'))
- eq(true, match('{a?c}', 'abc'))
- eq(false, match('{ab}', 'a'))
- eq(false, match('{ab}', 'b'))
- eq(true, match('{ab}', 'ab'))
- eq(true, match('{a,b}', 'a'))
- eq(true, match('{a,b}', 'b'))
- eq(false, match('{a,b}', 'ab'))
- eq(true, match('{ab,cd}', 'ab'))
- eq(false, match('{ab,cd}', 'a'))
- eq(true, match('{ab,cd}', 'cd'))
- eq(true, match('{a,b,c}', 'c'))
- eq(true, match('{a,{b,c}}', 'c'))
- end)
-
- it('should match [] groups', function()
- eq(true, match('[]', '[]')) -- empty [] is a literal
- eq(false, match('[a-z]', ''))
- eq(true, match('[a-z]', 'a'))
- eq(false, match('[a-z]', 'ab'))
- eq(true, match('[a-z]', 'z'))
- eq(true, match('[a-z]', 'j'))
- eq(false, match('[a-f]', 'j'))
- eq(false, match('[a-z]', '`')) -- 'a' - 1
- eq(false, match('[a-z]', '{')) -- 'z' + 1
- eq(false, match('[a-z]', 'A'))
- eq(false, match('[a-z]', '5'))
- eq(true, match('[A-Z]', 'A'))
- eq(true, match('[A-Z]', 'Z'))
- eq(true, match('[A-Z]', 'J'))
- eq(false, match('[A-Z]', '@')) -- 'A' - 1
- eq(false, match('[A-Z]', '[')) -- 'Z' + 1
- eq(false, match('[A-Z]', 'a'))
- eq(false, match('[A-Z]', '5'))
- eq(true, match('[a-zA-Z0-9]', 'z'))
- eq(true, match('[a-zA-Z0-9]', 'Z'))
- eq(true, match('[a-zA-Z0-9]', '9'))
- eq(false, match('[a-zA-Z0-9]', '&'))
- end)
-
- it('should match [!...] groups', function()
- eq(true, match('[!]', '[!]')) -- [!] is a literal
- eq(false, match('[!a-z]', ''))
- eq(false, match('[!a-z]', 'a'))
- eq(false, match('[!a-z]', 'z'))
- eq(false, match('[!a-z]', 'j'))
- eq(true, match('[!a-f]', 'j'))
- eq(false, match('[!a-f]', 'jj'))
- eq(true, match('[!a-z]', '`')) -- 'a' - 1
- eq(true, match('[!a-z]', '{')) -- 'z' + 1
- eq(false, match('[!a-zA-Z0-9]', 'a'))
- eq(false, match('[!a-zA-Z0-9]', 'A'))
- eq(false, match('[!a-zA-Z0-9]', '0'))
- eq(true, match('[!a-zA-Z0-9]', '!'))
- end)
-
- it('should match complex patterns', function()
- eq(false, match('**/*.{c,h}', ''))
- eq(false, match('**/*.{c,h}', 'c'))
- eq(false, match('**/*.{c,h}', 'file.m'))
- eq(true, match('**/*.{c,h}', 'file.c'))
- eq(true, match('**/*.{c,h}', 'file.h'))
- eq(true, match('**/*.{c,h}', '/file.c'))
- eq(true, match('**/*.{c,h}', 'dir/subdir/file.c'))
- eq(true, match('**/*.{c,h}', 'dir/subdir/file.h'))
- eq(true, match('**/*.{c,h}', '/dir/subdir/file.c'))
- eq(true, match('**/*.{c,h}', 'C:/dir/subdir/file.c'))
- eq(true, match('/dir/**/*.{c,h}', '/dir/file.c'))
- eq(false, match('/dir/**/*.{c,h}', 'dir/file.c'))
- eq(true, match('/dir/**/*.{c,h}', '/dir/subdir/subdir/file.c'))
-
- eq(true, match('{[0-9],[a-z]}', '0'))
- eq(true, match('{[0-9],[a-z]}', 'a'))
- eq(false, match('{[0-9],[a-z]}', 'A'))
- end)
- end)
-end)