[ tor-browser ].git.dasho

commit 8c418d018af8eca4942786ec49409fda072c0d1e
parent 85663eb0e61a6b90de3f6567fbaf8bc4c4c7f7da
Author: Henry Wilkes <henry@torproject.org>
Date:   Tue, 21 Jan 2025 10:46:15 +0000

BB 42305: Add script to combine translation files across versions.

Diffstat:
A tools/base_browser/l10n/combine-translation-versions.py  | 404 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tools/base_browser/l10n/combine/__init__.py  | 3 +++
A tools/base_browser/l10n/combine/combine.py  | 206 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tools/base_browser/l10n/combine/tests/__init__.py  | 0 
A tools/base_browser/l10n/combine/tests/python.toml  | 10 ++++++++++
A tools/base_browser/l10n/combine/tests/test_android.py  | 420 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tools/base_browser/l10n/combine/tests/test_dtd.py  | 418 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tools/base_browser/l10n/combine/tests/test_fluent.py  | 482 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A tools/base_browser/l10n/combine/tests/test_properties.py  | 415 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M tools/moz.build  | 1 +

10 files changed, 2359 insertions(+), 0 deletions(-)
diff --git a/tools/base_browser/l10n/combine-translation-versions.py b/tools/base_browser/l10n/combine-translation-versions.py
@@ -0,0 +1,404 @@
+import argparse
+import json
+import logging
+import os
+import re
+import subprocess
+
+from combine import combine_files
+
+# Whether we are running within the gitlab CI, rather than on a developer
+# machine. This toggles some optimisations that work well in the temporary
+# gitlab environment but would cause problems if run locally for testing
+# purposes.
+IN_GITLAB_CI_ENV = os.environ.get("GITLAB_CI", "") == "true"
+
+arg_parser = argparse.ArgumentParser(
+    description="Combine a translation file across two different versions"
+)
+
+arg_parser.add_argument(
+    "current_branch", metavar="<current-branch>", help="branch for the newest version"
+)
+arg_parser.add_argument(
+    "files", metavar="<files>", help="JSON specifying the translation files"
+)
+arg_parser.add_argument("outname", metavar="<json>", help="name of the json output")
+
+args = arg_parser.parse_args()
+
+logging.basicConfig()
+logger = logging.getLogger("combine-translation-versions")
+logger.setLevel(logging.INFO)
+
+
+def in_pink(msg: str) -> str:
+    """Present a message as pink in the terminal output.
+
+    :param msg: The message to wrap in pink.
+    :returns: The message to print to terminal.
+    """
+    # Pink and bold.
+    return f"\x1b[1;38;5;212m{msg}\x1b[0m"
+
+
+def git_run(git_args: list[str]) -> None:
+    """Run a git command.
+
+    :param git_args: The arguments that should follow "git".
+    """
+    # Add some text to give context to git's stderr appearing in log.
+    logger.info("Running: " + in_pink("git " + " ".join(git_args)))
+    subprocess.run(["git", *git_args], check=True)
+
+
+def git_text(git_args: list[str]) -> str:
+    """Get the text output for a git command.
+
+    :param git_args: The arguments that should follow "git".
+    :returns: The stdout of the command.
+    """
+    logger.info("Running: " + in_pink("git " + " ".join(git_args)))
+    return subprocess.run(
+        ["git", *git_args], text=True, check=True, stdout=subprocess.PIPE
+    ).stdout
+
+
+def git_lines(git_args: list[str]) -> list[str]:
+    """Get the lines from a git command.
+
+    :param git_args: The arguments that should follow "git".
+    :returns: The non-empty lines from stdout of the command.
+    """
+    return [line for line in git_text(git_args).split("\n") if line]
+
+
+class TranslationFile:
+    """Represents a translation file."""
+
+    def __init__(self, path: str, content: str) -> None:
+        self.path = path
+        self.content = content
+
+
+class BrowserBranch:
+    """Represents a browser git branch."""
+
+    def __init__(self, branch_name: str, is_head: bool = False) -> None:
+        """Create a new instance.
+
+        :param branch_name: The branch's git name.
+        :param is_head: Whether the branch matches "HEAD".
+        """
+        version_match = re.match(
+            r"(?P<prefix>[a-z]+\-browser)\-"
+            r"(?P<firefox>[0-9]+(?:\.[0-9]+){1,2})(?:esr|[ab][0-9]+)?\-"
+            r"(?P<browser>[0-9]+\.[05])\-"
+            r"(?P<number>[0-9]+)$",
+            branch_name,
+        )
+
+        if not version_match:
+            raise ValueError(f"Unable to parse the version from the ref {branch_name}")
+
+        self.name = branch_name
+        self.prefix = version_match.group("prefix")
+        self.browser_version = version_match.group("browser")
+        # Convert tor-browser to "Tor Browser", and similar.
+        browser_name = self.prefix.replace("-", " ").title()
+        self.browser_version_name = f"{browser_name} {self.browser_version}"
+
+        self._is_head = is_head
+        self._ref = "HEAD" if is_head else f"origin/{branch_name}"
+
+        firefox_nums = [int(n) for n in version_match.group("firefox").split(".")]
+        if len(firefox_nums) == 2:
+            firefox_nums.append(0)
+        browser_nums = [int(n) for n in self.browser_version.split(".")]
+        branch_number = int(version_match.group("number"))
+        # Prioritise the firefox ESR version, then the browser version then the
+        # branch number.
+        self._ordered = (
+            firefox_nums[0],
+            firefox_nums[1],
+            firefox_nums[2],
+            browser_nums[0],
+            browser_nums[1],
+            branch_number,
+        )
+
+        # Minor version for browser is only ever "0" or "5", so we can convert
+        # the version to an integer.
+        self._browser_int_version = int(2 * float(self.browser_version))
+
+        self._file_paths: list[str] | None = None
+
+    def release_below(self, other: "BrowserBranch", num: int) -> bool:
+        """Determine whether another branch is within range of a previous
+        browser release.
+
+        The browser versions are expected to increment by "0.5", and a previous
+        release branch's version is expected to be `num * 0.5` behind the
+        current one.
+
+        :param other: The branch to compare.
+        :param num: The number of "0.5" releases behind to test with.
+        """
+        return other._browser_int_version == self._browser_int_version - num
+
+    def __lt__(self, other: "BrowserBranch") -> bool:
+        return self._ordered < other._ordered
+
+    def __gt__(self, other: "BrowserBranch") -> bool:
+        return self._ordered > other._ordered
+
+    def _matching_dirs(self, path: str, dir_list: list[str]) -> bool:
+        """Test that a path is contained in the list of dirs.
+
+        :param path: The path to check.
+        :param dir_list: The list of directories to check against.
+        :returns: Whether the path matches.
+        """
+        for dir_path in dir_list:
+            if os.path.commonpath([dir_path, path]) == dir_path:
+                return True
+        return False
+
+    def get_file(
+        self, filename: str, search_dirs: list[str] | None
+    ) -> TranslationFile | None:
+        """Fetch the file content for the named file in this branch.
+
+        :param filename: The name of the file to fetch the content for.
+        :param search_dirs: The directories to restrict the search to, or None
+          to search for the file anywhere.
+        :returns: The file, or `None` if no file could be found.
+        """
+        if self._file_paths is None:
+            if not self._is_head:
+                fetch_args = ()
+                if IN_GITLAB_CI_ENV:
+                    # Minimal fetch of non-HEAD branch to get the file paths.
+                    # Individual file blobs will be downloaded as needed.
+                    # Only do this when running in the gitlab CI since it will
+                    # alter the user's .git/config and will effect future
+                    # plain fetches.
+                    fetch_args = ("--depth=1", "--filter=blob:none")
+                git_run(["fetch", *fetch_args, "origin", self.name])
+            self._file_paths = git_lines(
+                ["ls-tree", "-r", "--format=%(path)", self._ref]
+            )
+
+        matching = [
+            path
+            for path in self._file_paths
+            if os.path.basename(path) == filename
+            and (search_dirs is None or self._matching_dirs(path, search_dirs))
+        ]
+        if not matching:
+            return None
+        if len(matching) > 1:
+            raise Exception(f"Multiple occurrences of {filename}")
+
+        path = matching[0]
+
+        return TranslationFile(
+            path=path, content=git_text(["cat-file", "blob", f"{self._ref}:{path}"])
+        )
+
+
+def get_stable_branch(
+    compare_version: BrowserBranch,
+) -> tuple[BrowserBranch, BrowserBranch | None]:
+    """Find the most recent stable branch in the origin repository.
+
+    :param compare_version: The development branch to compare against.
+    :returns: The stable and legacy branches. If no legacy branch is found,
+      `None` will be returned instead.
+    """
+    # We search for build1 tags. These are added *after* the rebase of browser
+    # commits, so the corresponding branch should contain our strings.
+    # Moreover, we *assume* that the branch with the most recent ESR version
+    # with such a tag will be used in the *next* stable build in
+    # tor-browser-build.
+    tag_glob = f"{compare_version.prefix}-*-build1"
+
+    fetch_args = ()
+    if IN_GITLAB_CI_ENV:
+        # To speed up, only fetch the tags without blobs.
+        # Only do this when running in the gitlab CI since it will alter the
+        # user's .git/config and will effect future plain fetches.
+        fetch_args = ("--depth=1", "--filter=object:type=tag")
+    git_run(["fetch", *fetch_args, "origin", "tag", tag_glob])
+    stable_branches = []
+    legacy_branches = []
+    stable_annotation_regex = re.compile(r"\bstable\b")
+    legacy_annotation_regex = re.compile(r"\blegacy\b")
+    tag_pattern = re.compile(
+        rf"^{re.escape(compare_version.prefix)}-[^-]+-[^-]+-[^-]+-build1$"
+    )
+
+    for build_tag, annotation in (
+        line.split(" ", 1) for line in git_lines(["tag", "-n1", "--list", tag_glob])
+    ):
+        if not tag_pattern.match(build_tag):
+            continue
+        is_stable = bool(stable_annotation_regex.search(annotation))
+        is_legacy = bool(legacy_annotation_regex.search(annotation))
+        if not is_stable and not is_legacy:
+            continue
+        try:
+            # Branch name is the same as the tag, minus "-build1".
+            branch = BrowserBranch(re.sub(r"-build1$", "", build_tag))
+        except ValueError:
+            logger.warning(f"Could not read the version for {build_tag}")
+            continue
+        if branch.prefix != compare_version.prefix:
+            continue
+        if is_stable:
+            # Stable can be one release version behind.
+            # NOTE: In principle, when switching between versions there may be a
+            # window of time where the development branch has not yet progressed
+            # to the next "0.5" release, so has the same browser version as the
+            # stable branch. So we also allow for matching browser versions.
+            # NOTE:
+            # 1. The "Will be unused in" message will not make sense, but we do
+            #    not expect string differences in this scenario.
+            # 2. We do not expect this scenario to last for long.
+            if not (
+                compare_version.release_below(branch, 1)
+                or compare_version.release_below(branch, 0)
+            ):
+                continue
+            stable_branches.append(branch)
+        elif is_legacy:
+            # Legacy can be arbitrary release versions behind.
+            legacy_branches.append(branch)
+
+    if not stable_branches:
+        raise Exception("No stable build1 branch found")
+
+    return (
+        # Return the stable branch with the highest version.
+        max(stable_branches),
+        max(legacy_branches) if legacy_branches else None,
+    )
+
+
+current_branch = BrowserBranch(args.current_branch, is_head=True)
+
+stable_branch, legacy_branch = get_stable_branch(current_branch)
+
+if os.environ.get("TRANSLATION_INCLUDE_LEGACY", "") != "true":
+    legacy_branch = None
+
+files_list = []
+
+for file_dict in json.loads(args.files):
+    name = file_dict["name"]
+    where_dirs = file_dict.get("where", None)
+    current_file = current_branch.get_file(name, where_dirs)
+    stable_file = stable_branch.get_file(name, where_dirs)
+
+    if current_file is None and stable_file is None:
+        # No file in either branch.
+        logger.warning(f"{name} does not exist in either the current or stable branch")
+    elif current_file is None:
+        logger.warning(f"{name} deleted in the current branch")
+    elif stable_file is None:
+        logger.warning(f"{name} does not exist in the stable branch")
+    elif current_file.path != stable_file.path:
+        logger.warning(
+            f"{name} has different paths in the current and stable branch. "
+            f"{current_file.path} : {stable_file.path}"
+        )
+
+    content = None if current_file is None else current_file.content
+
+    # If we have a branding file, we want to also include strings from the other
+    # branding directories that differ from the stable release.
+    # The strings that *differ* per release should be specified in
+    # file_dict["branding"]["ids"]. These strings will be copied from the other
+    # release's branding directory, with an addition suffix added to their ID,
+    # as specified in the version_dict["suffix"].
+    branding = file_dict.get("branding", None)
+    if branding:
+        include_ids = branding["ids"]
+        for version_dict in branding["versions"]:
+            branding_dirs = version_dict.get("where", None)
+            branding_file = current_branch.get_file(name, branding_dirs)
+            if branding_file is None:
+                raise Exception(f"{name} does not exist in {branding_dirs}")
+            content = combine_files(
+                name,
+                content,
+                branding_file.content,
+                f'{version_dict["name"]} Release.',
+                include_ids,
+                version_dict["suffix"],
+            )
+
+    content = combine_files(
+        name,
+        content,
+        None if stable_file is None else stable_file.content,
+        f"Will be unused in {current_branch.browser_version_name}!",
+    )
+
+    if legacy_branch and not file_dict.get("exclude-legacy", False):
+        legacy_file = legacy_branch.get_file(name, where_dirs)
+        if legacy_file is not None and current_file is None and stable_file is None:
+            logger.warning(f"{name} still exists in the legacy branch")
+        elif legacy_file is None:
+            logger.warning(f"{name} does not exist in the legacy branch")
+        elif stable_file is not None and legacy_file.path != stable_file.path:
+            logger.warning(
+                f"{name} has different paths in the stable and legacy branch. "
+                f"{stable_file.path} : {legacy_file.path}"
+            )
+        elif current_file is not None and legacy_file.path != current_file.path:
+            logger.warning(
+                f"{name} has different paths in the current and legacy branch. "
+                f"{current_file.path} : {legacy_file.path}"
+            )
+
+        content = combine_files(
+            name,
+            content,
+            legacy_file.content,
+            f"Unused in {stable_branch.browser_version_name}!",
+        )
+    elif legacy_branch:
+        logger.info(f"Excluding legacy branch for {name}")
+
+    files_list.append(
+        {
+            "name": name,
+            # If "directory" is unspecified, we place the file directly beneath
+            # en-US/ in the translation repository. i.e. "".
+            "directory": file_dict.get("directory", ""),
+            "branch": file_dict["branch"],
+            "content": content,
+        }
+    )
+
+
+ci_commit = os.environ.get("CI_COMMIT_SHA", "")
+ci_url_base = os.environ.get("CI_PROJECT_URL", "")
+
+json_data = {
+    "commit": ci_commit,
+    "commit-url": (
+        f"{ci_url_base}/-/commit/{ci_commit}" if (ci_commit and ci_url_base) else ""
+    ),
+    "project-path": os.environ.get("CI_PROJECT_PATH", ""),
+    "current-branch": current_branch.name,
+    "stable-branch": stable_branch.name,
+    "files": files_list,
+}
+
+if legacy_branch:
+    json_data["legacy-branch"] = legacy_branch.name
+
+with open(args.outname, "w") as file:
+    json.dump(json_data, file)
diff --git a/tools/base_browser/l10n/combine/__init__.py b/tools/base_browser/l10n/combine/__init__.py
@@ -0,0 +1,3 @@
+# flake8: noqa
+
+from .combine import combine_files
diff --git a/tools/base_browser/l10n/combine/combine.py b/tools/base_browser/l10n/combine/combine.py
@@ -0,0 +1,206 @@
+import re
+from typing import TYPE_CHECKING, Any
+
+from compare_locales.parser import getParser
+from compare_locales.parser.android import AndroidEntity, DocumentWrapper
+from compare_locales.parser.base import Comment, Entity, Junk, Whitespace
+from compare_locales.parser.dtd import DTDEntity
+from compare_locales.parser.fluent import FluentComment, FluentEntity
+from compare_locales.parser.properties import PropertiesEntity
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+
+def combine_files(
+    filename: str,
+    primary_content: str | None,
+    alternative_content: str | None,
+    comment_prefix: str,
+    include_ids: list[str] | None = None,
+    alternative_suffix: str = "",
+) -> str | None:
+    """Combine two translation files into one to include all strings from both.
+    The primary content is presented first, followed by the alternative content
+    at the end with an additional comment.
+
+    :param filename: The filename for the file, determines the format.
+    :param primary_content: The primary content for the file, or None if it does
+      not exist.
+    :param alternative_content: The alternative content for the file, or None if
+      it does not exist.
+    :param comment_prefix: A comment to include for any strings that are
+      appended to the content. This will be placed before any other comments for
+      the string.
+    :param include_ids: String IDs from `alternative_content` we want to
+      include. If this is `None` then we include all strings that do not already
+      have a matching ID in `primary_content`.
+    :param duplicate_suffix: The suffix to apply to the alternative IDs.
+
+    :returns: The combined content, or None if both given contents are None.
+    """
+    if primary_content is None and alternative_content is None:
+        return None
+
+    # getParser from compare_locale returns the same instance for the same file
+    # extension.
+    parser = getParser(filename)
+
+    is_android = filename.endswith(".xml")
+    if primary_content is None:
+        if is_android:
+            # File was deleted, add some document parts.
+            content_start = (
+                '<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n<resources>\n'
+            )
+            content_end = "</resources>\n"
+        else:
+            # Treat as an empty file.
+            content_start = ""
+            content_end = ""
+        existing_keys = []
+    else:
+        parser.readUnicode(primary_content)
+
+        # Start with the same content as the current file.
+        # For android strings, we want to keep the final "</resources>" until after.
+        if is_android:
+            closing_match = re.match(
+                r"^(.*)(</resources>\s*)$", parser.ctx.contents, re.DOTALL
+            )
+            if not closing_match:
+                raise ValueError("Missing a final </resources>")
+            content_start = closing_match.group(1)
+            content_end = closing_match.group(2)
+        else:
+            content_start = parser.ctx.contents
+            content_end = ""
+        existing_keys = [entry.key for entry in parser.walk(only_localizable=True)]
+
+    # For Fluent, we want to prefix the strings using GroupComments.
+    # On weblate this will cause all the strings that fall under the GroupComment's
+    # scope to have the prefix added to their "notes".
+    # We set up an initial GroupComment for the first string we find. This will also
+    # end the scope of the last GroupComment in the new translation file.
+    # This will be replaced with a the next GroupComment when it is found.
+    fluent_group_comment_prefix = f"\n## {comment_prefix}\n"
+    fluent_group_comment: str | None = fluent_group_comment_prefix
+
+    # For other formats, we want to keep all the comment lines that come directly
+    # before the string.
+    # In compare_locales.parser, only the comment line directly before an Entity
+    # counts as the pre_comment for that Entity. I.e. only this line will be
+    # included in Entity.all
+    # However, in weblate every comment line that comes before the Entity is
+    # included as a comment. So we also want to keep these additional comments to
+    # preserve them for weblate.
+    # We gather these extra comments in stacked_comments, and clear them whenever we
+    # reach an Entity or a blank line (Whitespace is more than "\n").
+    stacked_comments: list[str] = []
+
+    additions: list[str] = []
+
+    entry_iter: Iterable[Any] = ()
+    # If the file does not exist in the old branch, don't make any additions.
+    if alternative_content is not None:
+        parser.readUnicode(alternative_content)
+        entry_iter = parser.walk(only_localizable=False)
+    for entry in entry_iter:
+        if isinstance(entry, Junk):
+            raise ValueError(f"Unexpected Junk: {entry.all}")
+        if isinstance(entry, Whitespace):
+            # Clear stacked comments if more than one empty line.
+            if entry.all != "\n":
+                stacked_comments.clear()
+            continue
+        if isinstance(entry, Comment):
+            if isinstance(entry, FluentComment):
+                # Don't stack Fluent comments.
+                # Only the comments included in Entity.pre_comment count towards
+                # that Entity's comment.
+                if entry.all.startswith("##"):
+                    # A Fluent GroupComment
+                    if entry.all == "##":
+                        # Empty GroupComment. Used to end the scope of a previous
+                        # GroupComment.
+                        # Replace this with our prefix comment.
+                        fluent_group_comment = fluent_group_comment_prefix
+                    else:
+                        # Prefix the group comment.
+                        fluent_group_comment = (
+                            f"{fluent_group_comment_prefix}{entry.all}\n"
+                        )
+            else:
+                stacked_comments.append(entry.all)
+            continue
+        if isinstance(entry, DocumentWrapper):
+            # Not needed.
+            continue
+
+        if not isinstance(entry, Entity):
+            raise ValueError(f"Unexpected type: {entry.__class__.__name__}")
+
+        if include_ids is None:
+            # We include the entry if it is not already included.
+            include_entry = entry.key not in existing_keys
+        else:
+            # We include the entry if it is in our list.
+            include_entry = entry.key in include_ids
+        if not include_entry:
+            # Drop the gathered comments for this Entity.
+            stacked_comments.clear()
+            continue
+
+        if isinstance(entry, FluentEntity):
+            id_regex = rf"^({re.escape(entry.key)})( *=)"
+            if fluent_group_comment is not None:
+                # We have a found GroupComment which has not been included yet.
+                # All following Entity's will be under its scope, until the next
+                # GroupComment.
+                additions.append(fluent_group_comment)
+                # Added GroupComment, so don't need to add again.
+                fluent_group_comment = None
+        elif isinstance(entry, DTDEntity):
+            id_regex = rf"^(\s*<!ENTITY\s*{re.escape(entry.key)})(\s)"
+            # Include our additional comment before we print the rest for this
+            # Entity.
+            additions.append(f"<!-- LOCALIZATION NOTE: {comment_prefix} -->")
+        elif isinstance(entry, PropertiesEntity):
+            id_regex = rf"^({re.escape(entry.key)})( *=)"
+            additions.append(f"# {comment_prefix}")
+        elif isinstance(entry, AndroidEntity):
+            id_regex = rf'^(\s*<string\s[^>]*name="{re.escape(entry.key)})(")'
+            additions.append(f"<!-- {comment_prefix} -->")
+        else:
+            raise ValueError(f"Unexpected Entity type: {entry.__class__.__name__}")
+
+        # Add any other comment lines that came directly before this Entity.
+        additions.extend(stacked_comments)
+        stacked_comments.clear()
+        entry_content = entry.all
+        if alternative_suffix:
+            # NOTE: compare_locales does not allow us to set the entry.key
+            # value. Instead we use a regular expression to append the suffix to
+            # the expected key.
+            entry_content, count = re.subn(
+                id_regex, rf"\1{alternative_suffix}\2", entry_content, flags=re.M
+            )
+            if count != 1:
+                raise ValueError(f"Failed to substitute the ID for {entry.key}")
+        additions.append(entry_content)
+
+    content_middle = ""
+
+    if additions:
+        # New line before and after the additions
+        additions.insert(0, "")
+        additions.append("")
+        if is_android:
+            content_middle = "\n    ".join(additions)
+        else:
+            content_middle = "\n".join(additions)
+
+        # Remove " " in otherwise blank lines.
+        content_middle = re.sub("^ +$", "", content_middle, flags=re.MULTILINE)
+
+    return content_start + content_middle + content_end
diff --git a/tools/base_browser/l10n/combine/tests/__init__.py b/tools/base_browser/l10n/combine/tests/__init__.py
diff --git a/tools/base_browser/l10n/combine/tests/python.toml b/tools/base_browser/l10n/combine/tests/python.toml
@@ -0,0 +1,10 @@
+[DEFAULT]
+subsuite = "base-browser"
+
+["test_android.py"]
+
+["test_dtd.py"]
+
+["test_fluent.py"]
+
+["test_properties.py"]
diff --git a/tools/base_browser/l10n/combine/tests/test_android.py b/tools/base_browser/l10n/combine/tests/test_android.py
@@ -0,0 +1,420 @@
+import textwrap
+
+import mozunit
+from base_browser.l10n.combine import combine_files
+
+
+def wrap_in_xml(content):
+    if content is None:
+        return None
+    # Allow for indents to make the tests more readable.
+    content = textwrap.dedent(content)
+    return f"""\
+<?xml version="1.0" encoding="utf-8" standalone="yes"?>
+<resources>
+{textwrap.indent(content, "    ")}</resources>
+"""
+
+
+def assert_result(new_content, old_content, expect):
+    new_content = wrap_in_xml(new_content)
+    old_content = wrap_in_xml(old_content)
+    expect = wrap_in_xml(expect)
+    assert expect == combine_files(
+        "test_strings.xml", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def assert_alternative(content, alternative_content, alternative_ids, expect):
+    content = wrap_in_xml(content)
+    alternative_content = wrap_in_xml(alternative_content)
+    expect = wrap_in_xml(expect)
+    assert expect == combine_files(
+        "test_strings.xml",
+        content,
+        alternative_content,
+        "ALTERNATIVE STRING",
+        alternative_ids,
+        "_alt",
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        None,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+
+        <!-- REMOVED STRING -->
+        <string name="string_1">First</string>
+        <!-- REMOVED STRING -->
+        <string name="string_2">Second</string>
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        <string name="string_1">First</string>
+        <string name="string_new">NEW</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_new">NEW</string>
+        <string name="string_2">Second</string>
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="removed">REMOVED</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+
+        <!-- REMOVED STRING -->
+        <string name="removed">REMOVED</string>
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        <string name="new_1">New string</string>
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        <string name="new_2">New string 2</string>
+        """,
+        """\
+        <string name="string_1">First</string>
+        <string name="removed_1">First removed</string>
+        <string name="removed_2">Second removed</string>
+        <string name="string_2">Second</string>
+        <string name="removed_3">Third removed</string>
+        """,
+        """\
+        <string name="new_1">New string</string>
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        <string name="new_2">New string 2</string>
+
+        <!-- REMOVED STRING -->
+        <string name="removed_1">First removed</string>
+        <!-- REMOVED STRING -->
+        <string name="removed_2">Second removed</string>
+        <!-- REMOVED STRING -->
+        <string name="removed_3">Third removed</string>
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        <string name="changed_string">NEW</string>
+        """,
+        """\
+        <string name="changed_string">OLD</string>
+        """,
+        """\
+        <string name="changed_string">NEW</string>
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- OLD -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- OLD -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <string name="changed_string">string</string>
+        """,
+    )
+
+    # With file comments
+    assert_result(
+        """\
+        <!-- NEW file comment -->
+
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- OLD file comment -->
+
+        <!-- OLD -->
+        <string name="changed_string">string</string>
+        """,
+        """\
+        <!-- NEW file comment -->
+
+        <!-- NEW -->
+        <string name="changed_string">string</string>
+        """,
+    )
+
+
+def test_reordered():
+    # String was re_ordered.
+    assert_result(
+        """\
+        <string name="string_1">value</string>
+        <string name="moved_string">move</string>
+        """,
+        """\
+        <string name="moved_string">move</string>
+        <string name="string_1">value</string>
+        """,
+        """\
+        <string name="string_1">value</string>
+        <string name="moved_string">move</string>
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        <!-- Comment for first. -->
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- Comment for first. -->
+        <string name="string_1">First</string>
+        <!-- Comment for removed. -->
+        <string name="removed">REMOVED</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- Comment for first. -->
+        <string name="string_1">First</string>
+        <string name="string_2">Second</string>
+
+        <!-- REMOVED STRING -->
+        <!-- Comment for removed. -->
+        <string name="removed">REMOVED</string>
+        """,
+    )
+
+    # With file comments and multi-line.
+    # All comments prior to a removed string are moved with it, until another
+    # entity or blank line is reached.
+    assert_result(
+        """\
+        <!-- First File comment -->
+
+        <!-- Comment for first. -->
+        <!-- Comment 2 for first. -->
+        <string name="string_1">First</string>
+
+        <!-- Second -->
+        <!-- File comment -->
+
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- First File comment -->
+
+        <!-- Comment for first. -->
+        <!-- Comment 2 for first. -->
+        <string name="string_1">First</string>
+        <string name="removed_1">First removed</string>
+        <!-- Comment for second removed. -->
+        <string name="removed_2">Second removed</string>
+
+        <!-- Removed file comment -->
+
+        <!-- Comment 1 for third removed -->
+        <!-- Comment 2 for third removed -->
+        <string name="removed_3">Third removed</string>
+
+        <!-- Second -->
+        <!-- File comment -->
+
+        <string name="removed_4">Fourth removed</string>
+        <string name="string_2">Second</string>
+        """,
+        """\
+        <!-- First File comment -->
+
+        <!-- Comment for first. -->
+        <!-- Comment 2 for first. -->
+        <string name="string_1">First</string>
+
+        <!-- Second -->
+        <!-- File comment -->
+
+        <string name="string_2">Second</string>
+
+        <!-- REMOVED STRING -->
+        <string name="removed_1">First removed</string>
+        <!-- REMOVED STRING -->
+        <!-- Comment for second removed. -->
+        <string name="removed_2">Second removed</string>
+        <!-- REMOVED STRING -->
+        <!-- Comment 1 for third removed -->
+        <!-- Comment 2 for third removed -->
+        <string name="removed_3">Third removed</string>
+        <!-- REMOVED STRING -->
+        <string name="removed_4">Fourth removed</string>
+        """,
+    )
+
+
+def test_alternatives():
+    assert_alternative(
+        """\
+        <string name="string_1">First string</string>
+        """,
+        """\
+        <string name="string_1">Alternative string</string>
+        """,
+        ["string_1"],
+        """\
+        <string name="string_1">First string</string>
+
+        <!-- ALTERNATIVE STRING -->
+        <string name="string_1_alt">Alternative string</string>
+        """,
+    )
+    assert_alternative(
+        """\
+        <!-- Comment 1 -->
+        <string name="string_1">First string</string>
+        <!-- Comment 2 -->
+        <string name="string_2">Second string</string>
+        <string name="string_3">Third string</string>
+        """,
+        """\
+        <string name="string_1">First string</string>
+        <!-- Alt comment -->
+        <string name="string_2">Alternative string</string>
+        <string name="string_3">Third string different</string>
+        <string name="string_4">Other string</string>
+        """,
+        ["string_2"],
+        """\
+        <!-- Comment 1 -->
+        <string name="string_1">First string</string>
+        <!-- Comment 2 -->
+        <string name="string_2">Second string</string>
+        <string name="string_3">Third string</string>
+
+        <!-- ALTERNATIVE STRING -->
+        <!-- Alt comment -->
+        <string name="string_2_alt">Alternative string</string>
+        """,
+    )
+    assert_alternative(
+        """\
+        <string name="string_1">First string</string>
+        <string name="string_2">Second string</string>
+        <string name="string_3">Third string</string>
+        """,
+        """\
+        <string name="string_1">Alternative string</string>
+        <string name="string_3">Third string</string>
+        <!-- comment -->
+        <string name="string_4">Other string</string>
+        """,
+        ["string_1", "string_4"],
+        """\
+        <string name="string_1">First string</string>
+        <string name="string_2">Second string</string>
+        <string name="string_3">Third string</string>
+
+        <!-- ALTERNATIVE STRING -->
+        <string name="string_1_alt">Alternative string</string>
+        <!-- ALTERNATIVE STRING -->
+        <!-- comment -->
+        <string name="string_4_alt">Other string</string>
+        """,
+    )
+
+
+if __name__ == "__main__":
+    mozunit.main()
diff --git a/tools/base_browser/l10n/combine/tests/test_dtd.py b/tools/base_browser/l10n/combine/tests/test_dtd.py
@@ -0,0 +1,418 @@
+import textwrap
+
+import mozunit
+from base_browser.l10n.combine import combine_files
+
+
+def assert_result(new_content, old_content, expect):
+    # Allow for indents to make the tests more readable.
+    if new_content is not None:
+        new_content = textwrap.dedent(new_content)
+    if old_content is not None:
+        old_content = textwrap.dedent(old_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.dtd", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def assert_alternative(content, alternative_content, alternative_ids, expect):
+    if content is not None:
+        content = textwrap.dedent(content)
+    if alternative_content is not None:
+        alternative_content = textwrap.dedent(alternative_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.dtd",
+        content,
+        alternative_content,
+        "ALTERNATIVE STRING",
+        alternative_ids,
+        ".alt",
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        None,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY string.1 "First">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY string.2 "Second">
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.new "NEW">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.new "NEW">
+        <!ENTITY string.2 "Second">
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY removed "REMOVED">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed "REMOVED">
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        <!ENTITY new.1 "New string">
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        <!ENTITY new.2 "New string 2">
+        """,
+        """\
+        <!ENTITY string.1 "First">
+        <!ENTITY removed.1 "First removed">
+        <!ENTITY removed.2 "Second removed">
+        <!ENTITY string.2 "Second">
+        <!ENTITY removed.3 "Third removed">
+        """,
+        """\
+        <!ENTITY new.1 "New string">
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        <!ENTITY new.2 "New string 2">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.1 "First removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.2 "Second removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.3 "Third removed">
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        <!ENTITY changed.string "NEW">
+        """,
+        """\
+        <!ENTITY changed.string "OLD">
+        """,
+        """\
+        <!ENTITY changed.string "NEW">
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: OLD -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: OLD -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!ENTITY changed.string "string">
+        """,
+    )
+
+    # With multiple comments
+    assert_result(
+        """\
+        <!-- NEW FILE COMMENT -->
+
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- OLD -->
+
+        <!-- LOCALIZATION NOTE: OLD -->
+        <!ENTITY changed.string "string">
+        """,
+        """\
+        <!-- NEW FILE COMMENT -->
+
+        <!-- LOCALIZATION NOTE: NEW -->
+        <!ENTITY changed.string "string">
+        """,
+    )
+
+
+def test_reordered():
+    # String was re.ordered.
+    assert_result(
+        """\
+        <!ENTITY string.1 "value">
+        <!ENTITY moved.string "move">
+        """,
+        """\
+        <!ENTITY moved.string "move">
+        <!ENTITY string.1 "value">
+        """,
+        """\
+        <!ENTITY string.1 "value">
+        <!ENTITY moved.string "move">
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!-- LOCALIZATION NOTE: Comment for removed. -->
+        <!ENTITY removed "REMOVED">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!ENTITY string.2 "Second">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!-- LOCALIZATION NOTE: Comment for removed. -->
+        <!ENTITY removed "REMOVED">
+        """,
+    )
+
+    # With multiple lines of comments.
+
+    assert_result(
+        """\
+        <!-- First file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for first. -->
+        <!ENTITY string.1 "First">
+
+        <!-- Second
+           - file
+           - comment -->
+
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- First file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!ENTITY string.1 "First">
+        <!ENTITY removed.1 "First removed">
+        <!-- LOCALIZATION NOTE: Comment for second removed. -->
+        <!ENTITY removed.2 "Second removed">
+
+        <!-- Removed file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for third removed. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for
+        third removed. -->
+        <!ENTITY removed.3 "Third removed">
+
+        <!-- Second
+           - file
+           - comment -->
+
+        <!ENTITY removed.4 "Fourth removed">
+        <!ENTITY string.2 "Second">
+        """,
+        """\
+        <!-- First file comment -->
+
+        <!-- LOCALIZATION NOTE: Comment for first. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for first. -->
+        <!ENTITY string.1 "First">
+
+        <!-- Second
+           - file
+           - comment -->
+
+        <!ENTITY string.2 "Second">
+
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.1 "First removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!-- LOCALIZATION NOTE: Comment for second removed. -->
+        <!ENTITY removed.2 "Second removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!-- LOCALIZATION NOTE: Comment for third removed. -->
+        <!-- LOCALIZATION NOTE: Comment 2 for
+        third removed. -->
+        <!ENTITY removed.3 "Third removed">
+        <!-- LOCALIZATION NOTE: REMOVED STRING -->
+        <!ENTITY removed.4 "Fourth removed">
+        """,
+    )
+
+
+def test_alternatives():
+    assert_alternative(
+        """\
+        <!ENTITY string.1 "First string">
+        """,
+        """\
+        <!ENTITY string.1 "Alternative string">
+        """,
+        ["string.1"],
+        """\
+        <!ENTITY string.1 "First string">
+
+        <!-- LOCALIZATION NOTE: ALTERNATIVE STRING -->
+        <!ENTITY string.1.alt "Alternative string">
+        """,
+    )
+    assert_alternative(
+        """\
+        <!-- LOCALIZATION NOTE: Comment 1 -->
+        <!ENTITY string.1 "First string">
+        <!-- LOCALIZATION NOTE: Comment 2 -->
+        <!ENTITY string.2 "Second string">
+        <!ENTITY string.3 "Third string">
+        """,
+        """\
+        <!ENTITY string.1 "First string">
+        <!-- LOCALIZATION NOTE: Alt comment -->
+        <!ENTITY string.2 "Alternative string">
+        <!ENTITY string.3 "Third string different">
+        <!ENTITY string.4 "Other string">
+        """,
+        ["string.2"],
+        """\
+        <!-- LOCALIZATION NOTE: Comment 1 -->
+        <!ENTITY string.1 "First string">
+        <!-- LOCALIZATION NOTE: Comment 2 -->
+        <!ENTITY string.2 "Second string">
+        <!ENTITY string.3 "Third string">
+
+        <!-- LOCALIZATION NOTE: ALTERNATIVE STRING -->
+        <!-- LOCALIZATION NOTE: Alt comment -->
+        <!ENTITY string.2.alt "Alternative string">
+        """,
+    )
+    assert_alternative(
+        """\
+        <!ENTITY string.1 "First string">
+        <!ENTITY string.2 "Second string">
+        <!ENTITY string.3 "Third string">
+        """,
+        """\
+        <!ENTITY string.1 "Alternative string">
+        <!ENTITY string.3 "Third string">
+        <!-- LOCALIZATION NOTE: comment -->
+        <!ENTITY string.4 "Other string">
+        """,
+        ["string.1", "string.4"],
+        """\
+        <!ENTITY string.1 "First string">
+        <!ENTITY string.2 "Second string">
+        <!ENTITY string.3 "Third string">
+
+        <!-- LOCALIZATION NOTE: ALTERNATIVE STRING -->
+        <!ENTITY string.1.alt "Alternative string">
+        <!-- LOCALIZATION NOTE: ALTERNATIVE STRING -->
+        <!-- LOCALIZATION NOTE: comment -->
+        <!ENTITY string.4.alt "Other string">
+        """,
+    )
+
+
+if __name__ == "__main__":
+    mozunit.main()
diff --git a/tools/base_browser/l10n/combine/tests/test_fluent.py b/tools/base_browser/l10n/combine/tests/test_fluent.py
@@ -0,0 +1,482 @@
+import textwrap
+
+import mozunit
+from base_browser.l10n.combine import combine_files
+
+
+def assert_result(new_content, old_content, expect):
+    # Allow for indents to make the tests more readable.
+    if new_content is not None:
+        new_content = textwrap.dedent(new_content)
+    if old_content is not None:
+        old_content = textwrap.dedent(old_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.ftl", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def assert_alternative(content, alternative_content, alternative_ids, expect):
+    if content is not None:
+        content = textwrap.dedent(content)
+    if alternative_content is not None:
+        alternative_content = textwrap.dedent(alternative_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.ftl",
+        content,
+        alternative_content,
+        "ALTERNATIVE STRING",
+        alternative_ids,
+        "-alt",
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        None,
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+
+
+        ## REMOVED STRING
+
+        string-1 = First
+        string-2 = Second
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        string-1 = First
+        string-2 = Second
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        string-1 = First
+        string-new = NEW
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        string-new = NEW
+        string-2 = Second
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        removed = REMOVED
+        string-2 = Second
+        """,
+        """\
+        string-1 = First
+        string-2 = Second
+
+
+        ## REMOVED STRING
+
+        removed = REMOVED
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        new-1 = New string
+        string-1 =
+            .attr = First
+        string-2 = Second
+        new-2 =
+            .title = New string 2
+        """,
+        """\
+        string-1 =
+            .attr = First
+        removed-1 = First removed
+        removed-2 =
+            .attr = Second removed
+        string-2 = Second
+        removed-3 = Third removed
+        """,
+        """\
+        new-1 = New string
+        string-1 =
+            .attr = First
+        string-2 = Second
+        new-2 =
+            .title = New string 2
+
+
+        ## REMOVED STRING
+
+        removed-1 = First removed
+        removed-2 =
+            .attr = Second removed
+        removed-3 = Third removed
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        changed-string = NEW
+        """,
+        """\
+        changed-string = OLD
+        """,
+        """\
+        changed-string = NEW
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        # NEW
+        changed-string = string
+        """,
+        """\
+        # OLD
+        changed-string = string
+        """,
+        """\
+        # NEW
+        changed-string = string
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        # NEW
+        changed-string = string
+        """,
+        """\
+        changed-string = string
+        """,
+        """\
+        # NEW
+        changed-string = string
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        changed-string = string
+        """,
+        """\
+        # OLD
+        changed-string = string
+        """,
+        """\
+        changed-string = string
+        """,
+    )
+
+    # With group comments.
+    assert_result(
+        """\
+        ## GROUP NEW
+
+        # NEW
+        changed-string = string
+        """,
+        """\
+        ## GROUP OLD
+
+        # OLD
+        changed-string = string
+        """,
+        """\
+        ## GROUP NEW
+
+        # NEW
+        changed-string = string
+        """,
+    )
+
+
+def test_reordered():
+    # String was re-ordered.
+    assert_result(
+        """\
+        string-1 = value
+        moved-string = move
+        """,
+        """\
+        moved-string = move
+        string-1 = value
+        """,
+        """\
+        string-1 = value
+        moved-string = move
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        # Comment for first.
+        string-1 = First
+        string-2 = Second
+        """,
+        """\
+        # Comment for first.
+        string-1 = First
+        # Comment for removed.
+        removed = REMOVED
+        string-2 = Second
+        """,
+        """\
+        # Comment for first.
+        string-1 = First
+        string-2 = Second
+
+
+        ## REMOVED STRING
+
+        # Comment for removed.
+        removed = REMOVED
+        """,
+    )
+
+    # Group comments are combined with the "REMOVED STRING" comments.
+    # If strings have no group comment, then a single "REMOVED STRING" is
+    # included for them.
+    assert_result(
+        """\
+        ## First Group comment
+
+        # Comment for first.
+        string-1 = First
+
+        ##
+
+        no-group = No group comment
+
+        ## Second
+        ## Group comment
+
+        string-2 = Second
+        """,
+        """\
+        ## First Group comment
+
+        # Comment for first.
+        string-1 = First
+        removed-1 = First removed
+        # Comment for second removed.
+        removed-2 = Second removed
+
+        ##
+
+        no-group = No group comment
+        removed-3 = Third removed
+
+        ## Second
+        ## Group comment
+
+        removed-4 = Fourth removed
+        string-2 = Second
+        """,
+        """\
+        ## First Group comment
+
+        # Comment for first.
+        string-1 = First
+
+        ##
+
+        no-group = No group comment
+
+        ## Second
+        ## Group comment
+
+        string-2 = Second
+
+
+        ## REMOVED STRING
+        ## First Group comment
+
+        removed-1 = First removed
+        # Comment for second removed.
+        removed-2 = Second removed
+
+        ## REMOVED STRING
+
+        removed-3 = Third removed
+
+        ## REMOVED STRING
+        ## Second
+        ## Group comment
+
+        removed-4 = Fourth removed
+        """,
+    )
+
+
+def test_alternatives():
+    assert_alternative(
+        """\
+        string-1 = First string
+            .title = hello
+        """,
+        """\
+        string-1 = Alternative string
+            .title = different
+        """,
+        ["string-1"],
+        """\
+        string-1 = First string
+            .title = hello
+
+
+        ## ALTERNATIVE STRING
+
+        string-1-alt = Alternative string
+            .title = different
+        """,
+    )
+    assert_alternative(
+        """\
+        string-1 = First string
+            .title = hello
+        """,
+        """\
+        string-1 = Alternative string
+        """,
+        ["string-1"],
+        """\
+        string-1 = First string
+            .title = hello
+
+
+        ## ALTERNATIVE STRING
+
+        string-1-alt = Alternative string
+        """,
+    )
+    assert_alternative(
+        """\
+        -term-1 = First string
+        """,
+        """\
+        -term-1 = Alternative string
+        """,
+        ["-term-1"],
+        """\
+        -term-1 = First string
+
+
+        ## ALTERNATIVE STRING
+
+        -term-1-alt = Alternative string
+        """,
+    )
+    assert_alternative(
+        """\
+        # Comment 1
+        string-1 = First string
+        # Comment 2
+        string-2 = Second string
+        string-3 = Third string
+        """,
+        """\
+        string-1 = First string
+        # Alt comment
+        string-2 = Alternative string
+        string-3 = Third string different
+        string-4 = Other string
+        """,
+        ["string-2"],
+        """\
+        # Comment 1
+        string-1 = First string
+        # Comment 2
+        string-2 = Second string
+        string-3 = Third string
+
+
+        ## ALTERNATIVE STRING
+
+        # Alt comment
+        string-2-alt = Alternative string
+        """,
+    )
+    assert_alternative(
+        """\
+        string-1 = First string
+        string-2 = Second string
+        string-3 = Third string
+        """,
+        """\
+        string-1 = Alternative string
+        string-3 = Third string
+        # comment
+        -string-4 = Other string
+        """,
+        ["string-1", "-string-4"],
+        """\
+        string-1 = First string
+        string-2 = Second string
+        string-3 = Third string
+
+
+        ## ALTERNATIVE STRING
+
+        string-1-alt = Alternative string
+        # comment
+        -string-4-alt = Other string
+        """,
+    )
+
+
+if __name__ == "__main__":
+    mozunit.main()
diff --git a/tools/base_browser/l10n/combine/tests/test_properties.py b/tools/base_browser/l10n/combine/tests/test_properties.py
@@ -0,0 +1,415 @@
+import textwrap
+
+import mozunit
+from base_browser.l10n.combine import combine_files
+
+
+def assert_result(new_content, old_content, expect):
+    # Allow for indents to make the tests more readable.
+    if new_content is not None:
+        new_content = textwrap.dedent(new_content)
+    if old_content is not None:
+        old_content = textwrap.dedent(old_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.properties", new_content, old_content, "REMOVED STRING"
+    )
+
+
+def assert_alternative(content, alternative_content, alternative_ids, expect):
+    if content is not None:
+        content = textwrap.dedent(content)
+    if alternative_content is not None:
+        alternative_content = textwrap.dedent(alternative_content)
+    if expect is not None:
+        expect = textwrap.dedent(expect)
+    assert expect == combine_files(
+        "test.properties",
+        content,
+        alternative_content,
+        "ALTERNATIVE STRING",
+        alternative_ids,
+        ".alt",
+    )
+
+
+def test_combine_empty():
+    assert_result(None, None, None)
+
+
+def test_combine_new_file():
+    # New file with no old content.
+    assert_result(
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        None,
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+    )
+
+
+def test_combine_removed_file():
+    # Entire file was removed.
+    assert_result(
+        None,
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+
+        # REMOVED STRING
+        string.1 = First
+        # REMOVED STRING
+        string.2 = Second
+        """,
+    )
+
+
+def test_no_change():
+    content = """\
+        string.1 = First
+        string.2 = Second
+        """
+    assert_result(content, content, content)
+
+
+def test_added_string():
+    assert_result(
+        """\
+        string.1 = First
+        string.new = NEW
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        string.new = NEW
+        string.2 = Second
+        """,
+    )
+
+
+def test_removed_string():
+    assert_result(
+        """\
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        removed = REMOVED
+        string.2 = Second
+        """,
+        """\
+        string.1 = First
+        string.2 = Second
+
+        # REMOVED STRING
+        removed = REMOVED
+        """,
+    )
+
+
+def test_removed_and_added():
+    assert_result(
+        """\
+        new.1 = New string
+        string.1 = First
+        string.2 = Second
+        new.2 = New string 2
+        """,
+        """\
+        string.1 = First
+        removed.1 = First removed
+        removed.2 = Second removed
+        string.2 = Second
+        removed.3 = Third removed
+        """,
+        """\
+        new.1 = New string
+        string.1 = First
+        string.2 = Second
+        new.2 = New string 2
+
+        # REMOVED STRING
+        removed.1 = First removed
+        # REMOVED STRING
+        removed.2 = Second removed
+        # REMOVED STRING
+        removed.3 = Third removed
+        """,
+    )
+
+
+def test_updated():
+    # String content was updated.
+    assert_result(
+        """\
+        changed.string = NEW
+        """,
+        """\
+        changed.string = OLD
+        """,
+        """\
+        changed.string = NEW
+        """,
+    )
+
+
+def test_updated_comment():
+    # String comment was updated.
+    assert_result(
+        """\
+        # NEW
+        changed.string = string
+        """,
+        """\
+        # OLD
+        changed.string = string
+        """,
+        """\
+        # NEW
+        changed.string = string
+        """,
+    )
+    # Comment added.
+    assert_result(
+        """\
+        # NEW
+        changed.string = string
+        """,
+        """\
+        changed.string = string
+        """,
+        """\
+        # NEW
+        changed.string = string
+        """,
+    )
+    # Comment removed.
+    assert_result(
+        """\
+        changed.string = string
+        """,
+        """\
+        # OLD
+        changed.string = string
+        """,
+        """\
+        changed.string = string
+        """,
+    )
+
+    # With file comments
+    assert_result(
+        """\
+        # NEW file comment
+
+        # NEW
+        changed.string = string
+        """,
+        """\
+        # OLD file comment
+
+        # OLD
+        changed.string = string
+        """,
+        """\
+        # NEW file comment
+
+        # NEW
+        changed.string = string
+        """,
+    )
+
+
+def test_reordered():
+    # String was re.ordered.
+    assert_result(
+        """\
+        string.1 = value
+        moved.string = move
+        """,
+        """\
+        moved.string = move
+        string.1 = value
+        """,
+        """\
+        string.1 = value
+        moved.string = move
+        """,
+    )
+
+
+def test_removed_string_with_comment():
+    assert_result(
+        """\
+        # Comment for first.
+        string.1 = First
+        string.2 = Second
+        """,
+        """\
+        # Comment for first.
+        string.1 = First
+        # Comment for removed.
+        removed = REMOVED
+        string.2 = Second
+        """,
+        """\
+        # Comment for first.
+        string.1 = First
+        string.2 = Second
+
+        # REMOVED STRING
+        # Comment for removed.
+        removed = REMOVED
+        """,
+    )
+
+    # With file comments and multi-line.
+    # All comments prior to a removed string are moved with it, until another
+    # entity or blank line is reached.
+    assert_result(
+        """\
+        # First File comment
+
+        # Comment for first.
+        # Comment 2 for first.
+        string.1 = First
+
+        # Second
+        # File comment
+
+        string.2 = Second
+        """,
+        """\
+        # First File comment
+
+        # Comment for first.
+        # Comment 2 for first.
+        string.1 = First
+        removed.1 = First removed
+        # Comment for second removed.
+        removed.2 = Second removed
+
+        # Removed file comment
+
+        # Comment 1 for third removed
+        # Comment 2 for third removed
+        removed.3 = Third removed
+
+        # Second
+        # File comment
+
+        removed.4 = Fourth removed
+        string.2 = Second
+        """,
+        """\
+        # First File comment
+
+        # Comment for first.
+        # Comment 2 for first.
+        string.1 = First
+
+        # Second
+        # File comment
+
+        string.2 = Second
+
+        # REMOVED STRING
+        removed.1 = First removed
+        # REMOVED STRING
+        # Comment for second removed.
+        removed.2 = Second removed
+        # REMOVED STRING
+        # Comment 1 for third removed
+        # Comment 2 for third removed
+        removed.3 = Third removed
+        # REMOVED STRING
+        removed.4 = Fourth removed
+        """,
+    )
+
+
+def test_alternatives():
+    assert_alternative(
+        """\
+        string.1 = First string
+        """,
+        """\
+        string.1 = Alternative string
+        """,
+        ["string.1"],
+        """\
+        string.1 = First string
+
+        # ALTERNATIVE STRING
+        string.1.alt = Alternative string
+        """,
+    )
+    assert_alternative(
+        """\
+        # Comment 1
+        string.1 = First string
+        # Comment 2
+        string.2 = Second string
+        string.3 = Third string
+        """,
+        """\
+        string.1 = First string
+        # Alt comment
+        string.2 = Alternative string
+        string.3 = Third string different
+        string.4 = Other string
+        """,
+        ["string.2"],
+        """\
+        # Comment 1
+        string.1 = First string
+        # Comment 2
+        string.2 = Second string
+        string.3 = Third string
+
+        # ALTERNATIVE STRING
+        # Alt comment
+        string.2.alt = Alternative string
+        """,
+    )
+    assert_alternative(
+        """\
+        string.1 = First string
+        string.2 = Second string
+        string.3 = Third string
+        """,
+        """\
+        string.1 = Alternative string
+        string.3 = Third string
+        # comment
+        string.4 = Other string
+        """,
+        ["string.1", "string.4"],
+        """\
+        string.1 = First string
+        string.2 = Second string
+        string.3 = Third string
+
+        # ALTERNATIVE STRING
+        string.1.alt = Alternative string
+        # ALTERNATIVE STRING
+        # comment
+        string.4.alt = Other string
+        """,
+    )
+
+
+if __name__ == "__main__":
+    mozunit.main()
diff --git a/tools/moz.build b/tools/moz.build
@@ -71,6 +71,7 @@ with Files("tryselect/docs/**"):
     SCHEDULES.exclusive = ["docs"]
 
 PYTHON_UNITTEST_MANIFESTS += [
+    "base_browser/l10n/combine/tests/python.toml",
     "fuzzing/smoke/python.toml",
     "lint/test/python.toml",
     "tryselect/test/python.toml",

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

A	tools/base_browser/l10n/combine-translation-versions.py	\|	404	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tools/base_browser/l10n/combine/__init__.py	\|	3	+++
A	tools/base_browser/l10n/combine/combine.py	\|	206	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tools/base_browser/l10n/combine/tests/__init__.py	\|	0
A	tools/base_browser/l10n/combine/tests/python.toml	\|	10	++++++++++
A	tools/base_browser/l10n/combine/tests/test_android.py	\|	420	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tools/base_browser/l10n/combine/tests/test_dtd.py	\|	418	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tools/base_browser/l10n/combine/tests/test_fluent.py	\|	482	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	tools/base_browser/l10n/combine/tests/test_properties.py	\|	415	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	tools/moz.build	\|	1	+