commit b4db016217a94ccc1060c1876aa6531948ed811a
parent 2187755546cc1548643844ed91ad9f64e8c4c4a1
Author: Aristide Tossou <atossou@mozilla.com>
Date: Wed, 8 Oct 2025 14:27:11 +0000
Bug 1990534 - Support TopP Sampling for llama.cpp. r=padenot
Differential Revision: https://phabricator.services.mozilla.com/D267615
Diffstat:
3 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/config/external/mozinference/mozinference.symbols b/config/external/mozinference/mozinference.symbols
@@ -20,6 +20,7 @@ llama_sampler_init_greedy
llama_sampler_init_temp
llama_sampler_init_dist
llama_sampler_init_top_k
+llama_sampler_init_top_p
llama_sampler_free
llama_sampler_sample
llama_memory_clear
diff --git a/toolkit/components/ml/backends/llama/LlamaBackend.cpp b/toolkit/components/ml/backends/llama/LlamaBackend.cpp
@@ -380,6 +380,11 @@ LlamaBackend::SamplerResult LlamaBackend::InitializeSampler(
samplerElement = mLib->llama_sampler_init_top_k(samplerConfig.mTopK);
break;
+ case LlamaSamplerType::Top_p:
+ samplerElement = mLib->llama_sampler_init_top_p(samplerConfig.mTopP,
+ samplerConfig.mMinKeep);
+ break;
+
default:
auto msg = nsFmtCString(FMT_STRING("{}: Unimplemented sampler type"),
diff --git a/toolkit/components/ml/backends/llama/LlamaRuntimeLinker.h b/toolkit/components/ml/backends/llama/LlamaRuntimeLinker.h
@@ -48,6 +48,8 @@ namespace mozilla::llama {
X(struct llama_sampler*, llama_sampler_init_temp, (float t)) \
X(struct llama_sampler*, llama_sampler_init_dist, (uint32_t seed)) \
X(struct llama_sampler*, llama_sampler_init_top_k, (int32_t k)) \
+ X(struct llama_sampler*, llama_sampler_init_top_p, \
+ (float p, size_t min_keep)) \
X(void, llama_memory_clear, (llama_memory_t mem, bool data)) \
X(llama_memory_t, llama_get_memory, (const struct llama_context* ctx)) \
X(const struct llama_vocab*, llama_model_get_vocab, \