commit 0f948693b771e8a8ccddeb269121fe2df62b5285
parent 7540feba4b4de98267238d144d98d844d0e728e7
Author: David Goulet <dgoulet@torproject.org>
Date: Wed, 10 Sep 2025 13:51:29 -0400
Merge branch 'maint-0.4.8'
Diffstat:
6 files changed, 71 insertions(+), 6 deletions(-)
diff --git a/changes/ticket41006 b/changes/ticket41006
@@ -0,0 +1,8 @@
+ o Major bugfixes (onion service directory cache):
+ - Preserve the download counter of an onion service descriptor across
+ descriptor uploads, so that recently updated descriptors don't get
+ pruned if there is memory pressure soon after update. Additionally,
+ create a separate torrc option MaxHSDirCacheBytes that defaults to the
+ former 20% of MaxMemInQueues threshold, but can be controlled by
+ relay operators under DoS. Also enforce this theshold during HSDir
+ uploads. Fixes bug 41006; bugfix on 0.4.8.14.
diff --git a/doc/man/tor.1.txt b/doc/man/tor.1.txt
@@ -2450,6 +2450,15 @@ is non-zero):
level __notice__ message designed to help developers instrumenting Tor's
main event loop. (Default: 0)
+[[MaxHSDirCacheBytes]] **MaxHSDirCacheBytes** __N__ **bytes**|**KBytes**|**MBytes**|**GBytes**::
+ This option configures a threshold of Hidden Service Directory memory
+ consumption above which your Tor relay will begin to prune the least-frequently
+ accessed hidden service descriptors from the relay's HSDir cache. This pruning used
+ to be done as part of MaxMemInQueues, but it has been decoupled to allow more
+ fine-grained control of descriptor cache size under DDoS conditions. This
+ option defaults to 20% of the MaxMemInQueues size, which itself defaults to
+ an automatically determined value based on system memory. (Default: 20% MaxMemInQueues)
+
[[MaxMemInQueues]] **MaxMemInQueues** __N__ **bytes**|**KBytes**|**MBytes**|**GBytes**::
This option configures a threshold above which Tor will assume that it
needs to stop queueing or buffering data because it's about to run out of
diff --git a/src/app/config/config.c b/src/app/config/config.c
@@ -568,6 +568,7 @@ static const config_var_t option_vars_[] = {
V(MaxClientCircuitsPending, POSINT, "32"),
V(MaxConsensusAgeForDiffs, INTERVAL, "0 seconds"),
VAR("MaxMemInQueues", MEMUNIT, MaxMemInQueues_raw, "0"),
+ VAR("MaxHSDirCacheBytes", MEMUNIT, MaxHSDirCacheBytes, "0"),
OBSOLETE("MaxOnionsPending"),
V(MaxOnionQueueDelay, MSEC_INTERVAL, "0"),
V(MaxUnparseableDescSizeToLog, MEMUNIT, "10 MB"),
@@ -3556,6 +3557,12 @@ options_validate_cb(const void *old_options_, void *options_, char **msg)
server_mode(options));
options->MaxMemInQueues_low_threshold = (options->MaxMemInQueues / 4) * 3;
+ /* Process MaxHSDirCacheBytes. If not set (0), use MaxMemInQueues / 5 as default. */
+ if (options->MaxHSDirCacheBytes == 0) {
+ /* Default to MaxMemInQueues / 5 for HS directory cache (20%) */
+ options->MaxHSDirCacheBytes = options->MaxMemInQueues / 5;
+ }
+
if (!options->SafeLogging ||
!strcasecmp(options->SafeLogging, "0")) {
options->SafeLogging_ = SAFELOG_SCRUB_NONE;
diff --git a/src/app/config/or_options_st.h b/src/app/config/or_options_st.h
@@ -209,6 +209,10 @@ struct or_options_t {
/** Above this value, consider ourselves low on RAM. */
uint64_t MaxMemInQueues_low_threshold;
+ uint64_t MaxHSDirCacheBytes;/**< If we have more memory than this allocated
+ * for the hidden service directory cache,
+ * run the HS cache OOM handler */
+
/** @name port booleans
*
* Derived booleans: For server ports and ControlPort, true iff there is a
diff --git a/src/core/or/relay.c b/src/core/or/relay.c
@@ -2874,15 +2874,19 @@ cell_queues_check_size(void)
/* Note this overload down */
rep_hist_note_overload(OVERLOAD_GENERAL);
- /* If we're spending over 20% of the memory limit on hidden service
- * descriptors, free them until we're down to 10%. Do the same for geoip
- * client cache. */
- if (hs_cache_total > get_options()->MaxMemInQueues / 5) {
+ /* If we're spending over the configured limit on hidden service
+ * descriptors, free them until we're down to 50% of the limit. */
+ if (hs_cache_total > get_options()->MaxHSDirCacheBytes) {
const size_t bytes_to_remove =
- hs_cache_total - (size_t)(get_options()->MaxMemInQueues / 10);
+ hs_cache_total - (size_t)(get_options()->MaxHSDirCacheBytes / 2);
removed = hs_cache_handle_oom(bytes_to_remove);
oom_stats_n_bytes_removed_hsdir += removed;
alloc -= removed;
+ static ratelim_t hs_cache_oom_ratelim = RATELIM_INIT(600);
+ log_fn_ratelim(&hs_cache_oom_ratelim, LOG_NOTICE, LD_REND,
+ "HSDir cache exceeded limit (%zu > %"PRIu64" bytes). "
+ "Pruned %zu bytes during cell_queues_check_size.",
+ hs_cache_total, get_options()->MaxHSDirCacheBytes, removed);
}
if (geoip_client_cache_total > get_options()->MaxMemInQueues / 5) {
const size_t bytes_to_remove =
diff --git a/src/feature/hs/hs_cache.c b/src/feature/hs/hs_cache.c
@@ -26,6 +26,11 @@
#include "feature/nodelist/networkstatus_st.h"
+/**
+ * Spare room for 1000 descriptors when pruning cache to avoid thrashing
+ * and memory fragmentation. */
+#define HSCACHE_PRUNE_SPARE_ROOM (1000 * HS_DESC_MAX_LEN)
+
/* Total counter of the cache size. */
static size_t hs_cache_total_allocation = 0;
@@ -148,6 +153,28 @@ cache_store_v3_as_dir(hs_cache_dir_descriptor_t *desc)
tor_assert(desc);
+ /* Check if we've exceeded the MaxHSDirCacheBytes limit after adding
+ * this descriptor. If so, prune excess bytes leaving room for more. */
+ const size_t max_cache_bytes = get_options()->MaxHSDirCacheBytes;
+ const size_t current_cache_bytes = hs_cache_get_total_allocation();
+ if (max_cache_bytes > 0 && current_cache_bytes > max_cache_bytes) {
+ /* We prune only 1000 descriptors worth of memory here because
+ * pruning is an expensive O(n^2) option to keep finding lowest
+ * download count descs. */
+ size_t bytes_to_remove = current_cache_bytes/2;
+ /* Ensure user didn't set a really low max hsdir cache vlue */
+ if (HSCACHE_PRUNE_SPARE_ROOM < max_cache_bytes) {
+ bytes_to_remove = current_cache_bytes -
+ (max_cache_bytes - HSCACHE_PRUNE_SPARE_ROOM);
+ }
+ size_t removed = hs_cache_handle_oom(bytes_to_remove);
+ static ratelim_t hs_cache_oom_ratelim = RATELIM_INIT(600);
+ log_fn_ratelim(&hs_cache_oom_ratelim, LOG_NOTICE, LD_REND,
+ "HSDir cache exceeded limit (%zu > %zu bytes). "
+ "Pruned %zu bytes during an HS descriptor upload.",
+ current_cache_bytes, max_cache_bytes, removed);
+ }
+
/* Verify if we have an entry in the cache for that key and if yes, check
* if we should replace it? */
cache_entry = lookup_v3_desc_as_dir(desc->key);
@@ -164,15 +191,21 @@ cache_store_v3_as_dir(hs_cache_dir_descriptor_t *desc)
goto err;
}
/* We now know that the descriptor we just received is a new one so
+ * preserve the downloaded counter from the old entry and then
* remove the entry we currently have from our cache so we can then
* store the new one. */
+ desc->n_downloaded = cache_entry->n_downloaded;
remove_v3_desc_as_dir(cache_entry);
hs_cache_decrement_allocation(cache_get_dir_entry_size(cache_entry));
cache_dir_desc_free(cache_entry);
}
+
/* Store the descriptor we just got. We are sure here that either we
* don't have the entry or we have a newer descriptor and the old one
- * has been removed from the cache. */
+ * has been removed from the cache. We do this *after* pruning
+ * other descriptors so that this descriptor is not immediately pruned,
+ * if new. This prevents probing to detect OOM threshholds via its
+ * absence. */
store_v3_desc_as_dir(desc);
/* Update our total cache size with this entry for the OOM. This uses the