commit 610e9666af0ef16f3677b23a5c341c8817c89965
parent 69553f7bf55c060733553d96a068c1104c885bce
Author: bfredl <bjorn.linse@gmail.com>
Date: Sun, 29 Sep 2024 11:03:36 +0200
Merge pull request #30401 from bfredl/casefold2
refactor(multibyte): neo-casefolding without allocation
Diffstat:
2 files changed, 9 insertions(+), 14 deletions(-)
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
@@ -1379,22 +1379,11 @@ int utf_fold(int a)
return a;
}
- utf8proc_uint8_t input_str[16] = { 0 };
- if (utf8proc_encode_char(a, input_str) <= 0) {
- return a;
- }
-
- utf8proc_uint8_t *fold_str_utf;
- if (utf8proc_map((utf8proc_uint8_t *)input_str, 0, &fold_str_utf,
- UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD) < 0) {
- return a;
- }
-
- int fold_codepoint_utf = utf_ptr2char((char *)fold_str_utf);
+ utf8proc_int32_t result[1];
- xfree(fold_str_utf);
+ utf8proc_ssize_t res = utf8proc_decompose_char(a, result, 1, UTF8PROC_CASEFOLD, NULL);
- return fold_codepoint_utf;
+ return (res == 1) ? result[0] : a;
}
// Vim's own character class functions. These exist because many library
diff --git a/test/unit/mbyte_spec.lua b/test/unit/mbyte_spec.lua
@@ -351,6 +351,12 @@ describe('mbyte', function()
describe('utf_fold', function()
itp('does not crash with surrogates #30527', function()
eq(0xDDFB, lib.utf_fold(0xDDFB))
+ eq(0xd800, lib.utf_fold(0xd800)) -- high surrogate, invalid as a character
+ end)
+
+ itp("doesn't crash on invalid codepoints", function()
+ eq(9000000, lib.utf_fold(9000000))
+ eq(0, lib.utf_fold(0))
end)
end)
end)