ci/whitelisting (#5500)
All checks were successful
continuous-integration/drone/push Build is passing

Whitelist some common attributes/elements that should be localized.

This was done by making the whitelist a list of xpaths.

Co-authored-by: Darragh Elliott <me@delliott.net>
Reviewed-on: #5500
Co-authored-by: delliott <delliott@fsfe.org>
Co-committed-by: delliott <delliott@fsfe.org>
This commit was merged in pull request #5500.
This commit is contained in:
2025-11-18 08:22:42 +00:00
committed by tobiasd
parent 1a3dcb6bf5
commit 4cd97fa78c
28 changed files with 185 additions and 58 deletions

View File

@@ -4,19 +4,24 @@
"""Lib functions used mainly in checks mainly for testing a file."""
import copy
import logging
import sys
from pathlib import Path
from typing import TYPE_CHECKING
from lxml import etree
if TYPE_CHECKING:
from collections.abc import Iterable
from pathlib import Path
logger = logging.getLogger(__name__)
def compare_files(
file1: Path,
file2: Path,
attr_whitelist: set[str] | None = None,
xpaths_to_ignore: Iterable[str] | None = None,
_path: str = "",
) -> list[str]:
"""Compare two xml files, passes as paths."""
@@ -27,21 +32,50 @@ def compare_files(
logger.critical("XML parse error: %s", e)
sys.exit(1)
return compare_elements(t1.getroot(), t2.getroot(), attr_whitelist)
return compare_elements(t1.getroot(), t2.getroot(), xpaths_to_ignore)
def _delete_by_xpaths(root: etree.Element, xpaths: Iterable[str]) -> None:
"""Remove every element/attribute that matches any of the xpaths."""
for xpath in xpaths:
# Distinguish attribute XPaths (ending with /@attr) from element XPaths
if xpath.endswith(("/@*", "/@x")): # attribute path
parent_xpath = xpath.rsplit("/@", 1)[0] or "." # default to root
for parent in root.xpath(parent_xpath):
if isinstance(parent, etree.Element):
attr = xpath.rsplit("/", 1)[1].lstrip("@")
if attr == "*":
parent.attrib.clear()
else:
parent.attrib.pop(attr, None)
else: # element path
for el in root.xpath(xpath):
if isinstance(el, etree.Element):
parent = el.getparent()
if parent is not None:
parent.remove(el)
def compare_elements(
elem1: etree.Element,
elem2: etree.Element,
attr_whitelist: set[str] | None = None,
elem_input1: etree.Element,
elem_input2: etree.Element,
xpaths_to_ignore: Iterable[str] | None = None,
_path: str = "",
) -> list[str]:
"""Recursively compare two XML elements.
Returns a list of short, informative error strings.
"""
if attr_whitelist is None:
attr_whitelist = set()
if xpaths_to_ignore is None:
xpaths_to_ignore = ()
# make a copy to prevent modifying parent scope
elem1 = copy.deepcopy(elem_input1)
elem2 = copy.deepcopy(elem_input2)
# Prune ignored parts
_delete_by_xpaths(elem1, xpaths_to_ignore)
_delete_by_xpaths(elem2, xpaths_to_ignore)
errors: list[str] = []
tag_path = f"{_path}/{elem1.tag}" if _path else elem1.tag
@@ -65,10 +99,7 @@ def compare_elements(
f" only 1: {only_in_elem1} only 2: {only_in_elem2}"
)
for key in common:
if (
attributes_of_elem1[key] != attributes_of_elem2[key]
and key not in attr_whitelist
):
if attributes_of_elem1[key] != attributes_of_elem2[key]:
error_msg = (
f"Attribute value diff at <{elem1.tag} {key}>:"
f" {attributes_of_elem1[key]!r}{attributes_of_elem2[key]!r}"
@@ -85,7 +116,9 @@ def compare_elements(
# and then recurse into children
for idx, (child1, child2) in enumerate(zip(kids1, kids2, strict=False), start=1):
errors.extend(
compare_elements(child1, child2, attr_whitelist, _path=f"{tag_path}[{idx}]")
compare_elements(
child1, child2, xpaths_to_ignore=(), _path=f"{tag_path}[{idx}]"
)
)
# this should be stable from the sorts above, so no need to sort it here

View File

@@ -6,11 +6,13 @@
import logging
import subprocess
from pathlib import Path
from typing import Any, cast
from typing import TYPE_CHECKING, Any, cast
from lxml import etree
if TYPE_CHECKING:
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -7,13 +7,15 @@
import logging
import re
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import TYPE_CHECKING, Any
from lxml import etree
from fsfe_website_build.lib.misc import get_basename, get_version, lang_from_filename
if TYPE_CHECKING:
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -5,10 +5,13 @@
"""Implementation of the full build logic."""
import logging
from pathlib import Path
from typing import TYPE_CHECKING
from fsfe_website_build.lib.misc import run_command
if TYPE_CHECKING:
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -4,9 +4,12 @@
"""Global directory symlinking logic."""
import logging
import multiprocessing.pool
from itertools import product
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -9,7 +9,10 @@ found in the website to build source tree.
import logging
import sys
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -10,7 +10,10 @@ found in the website to build source tree.
import logging
import sys
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -10,8 +10,7 @@ directory tree and does not touch the target directory tree at all.
"""
import logging
import multiprocessing.pool
from pathlib import Path
from typing import TYPE_CHECKING
from .prepare_subdirectories import prepare_subdirectories
from .update_css import update_css
@@ -20,6 +19,10 @@ from .update_localmenus import update_localmenus
from .update_stylesheets import update_stylesheets
from .update_xmllists import update_xmllists
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -9,12 +9,15 @@ distributed to the web server.
"""
import logging
from pathlib import Path
from typing import TYPE_CHECKING
import minify # pyright: ignore [reportMissingTypeStubs]
from fsfe_website_build.lib.misc import run_command, update_if_changed
if TYPE_CHECKING:
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -11,8 +11,11 @@ file.
"""
import logging
import multiprocessing.pool
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -8,14 +8,17 @@ After this step, all .localmenu.??.xml files will be up to date.
"""
import logging
import multiprocessing.pool
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
from lxml import etree
from fsfe_website_build.lib.misc import get_basepath, sort_dict, update_if_changed
if TYPE_CHECKING:
import multiprocessing.pool
logger = logging.getLogger(__name__)

View File

@@ -14,14 +14,17 @@ per-year archives.
"""
import logging
import multiprocessing.pool
import re
from pathlib import Path
from typing import TYPE_CHECKING
from lxml import etree
from fsfe_website_build.lib.misc import touch_if_newer_dep
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -15,9 +15,9 @@ After this step, the following files will be up to date:
import datetime
import fnmatch
import logging
import multiprocessing.pool
import re
from pathlib import Path
from typing import TYPE_CHECKING
from lxml import etree
@@ -28,6 +28,9 @@ from fsfe_website_build.lib.misc import (
update_if_changed,
)
if TYPE_CHECKING:
import multiprocessing.pool
logger = logging.getLogger(__name__)

View File

@@ -7,9 +7,12 @@ Uses a multithreaded pathlib copy.
"""
import logging
import multiprocessing.pool
import shutil
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -10,11 +10,14 @@ generate a symlink from about/index.en.html to about.en.html
"""
import logging
import multiprocessing.pool
from pathlib import Path
from typing import TYPE_CHECKING
from fsfe_website_build.lib.misc import get_basename
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -12,8 +12,11 @@ takes the file.html.<lang> format.
"""
import logging
import multiprocessing.pool
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -9,16 +9,19 @@ which is useful to prevent reparsing the XSL multiple times.
"""
import logging
import multiprocessing.pool
from collections import defaultdict
from itertools import product
from pathlib import Path
from typing import TYPE_CHECKING
from lxml import etree
from fsfe_website_build.lib.misc import get_basepath
from fsfe_website_build.lib.process_file import process_file
if TYPE_CHECKING:
import multiprocessing.pool
logger = logging.getLogger(__name__)

View File

@@ -5,14 +5,17 @@
"""Script for FSFE website build, phase 2."""
import logging
import multiprocessing.pool
from pathlib import Path
from typing import TYPE_CHECKING
from .copy_files import copy_files
from .create_index_symlinks import create_index_symlinks
from .create_language_symlinks import create_language_symlinks
from .process_files import process_files
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -4,11 +4,14 @@
"""Use rsync to copy files to the targets."""
import logging
import multiprocessing.pool
from pathlib import Path
from typing import TYPE_CHECKING
from fsfe_website_build.lib.misc import run_command
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
logger = logging.getLogger(__name__)

View File

@@ -3,9 +3,12 @@
# SPDX-License-Identifier: GPL-3.0-or-later
from argparse import Namespace
from pathlib import Path
from typing import TYPE_CHECKING
from fsfe_website_build.build import build
from pytest_mock import MockFixture
if TYPE_CHECKING:
from pytest_mock import MockFixture
def no_rebuild_twice_test(mocker: MockFixture) -> None:

View File

@@ -2,13 +2,16 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
import tempfile
from collections.abc import Generator
from pathlib import Path
from typing import TYPE_CHECKING
import pytest
from fsfe_website_build.lib.checks import compare_elements, compare_files
from lxml import etree
if TYPE_CHECKING:
from collections.abc import Generator
class TestCompareFiles:
"""Smoke tests for the high-level entry point."""
@@ -60,9 +63,11 @@ class TestCompareElements:
assert len(diff) == 1
def whitelisted_attribute_ignored_test(self) -> None:
e1 = etree.Element("root", x="1")
e2 = etree.Element("root", x="2")
assert compare_elements(e1, e2, attr_whitelist={"x"}) == []
e1 = etree.Element("root")
etree.SubElement(e1, "test", x="1")
e2 = etree.Element("root")
etree.SubElement(e2, "test", x="2")
assert compare_elements(e1, e2, ["//*[@x]"]) == []
def child_count_mismatch_test(self) -> None:
e1 = etree.Element("root")