fix/caching (#5494)
All checks were successful
continuous-integration/drone/push Build is passing

fix some stuff not being cached, and add test to catch caching regressions.

Test takes quite a while to run, not sure it is a good idea?

Co-authored-by: Darragh Elliott <me@delliott.net>
Reviewed-on: #5494
This commit was merged in pull request #5494.
This commit is contained in:
2025-11-13 18:13:59 +00:00
parent 647b4fa16a
commit fdbc88a850
16 changed files with 163 additions and 88 deletions

View File

@@ -22,7 +22,7 @@ from .phase3.stage_to_target import stage_to_target
logger = logging.getLogger(__name__)
def parse_arguments() -> argparse.Namespace:
def _parse_arguments() -> argparse.Namespace:
"""Parse the arguments of the website build process."""
parser = argparse.ArgumentParser(
description="Python script to handle building of the fsfe webpage",
@@ -95,9 +95,8 @@ def parse_arguments() -> argparse.Namespace:
return args
def main() -> None:
"""Parse args and coordinate the website builder."""
args = parse_arguments()
def build(args: argparse.Namespace) -> None:
"""Coordinate the website builder."""
logging.basicConfig(
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
@@ -168,3 +167,9 @@ def main() -> None:
if args.serve:
serve_websites(working_target, args.sites, 2000, 100)
def main() -> None:
"""Parse args and run build."""
args = _parse_arguments()
build(args)

View File

@@ -21,7 +21,8 @@ def compare_files(
) -> list[str]:
"""Compare two xml files, passes as paths."""
try:
t1, t2 = etree.parse(file1), etree.parse(file2)
parser = etree.XMLParser(remove_comments=True)
t1, t2 = etree.parse(file1, parser), etree.parse(file2, parser)
except etree.XMLSyntaxError as e:
logger.critical("XML parse error: %s", e)
sys.exit(1)
@@ -54,14 +55,14 @@ def compare_elements(
attributes_of_elem1 = dict(elem1.attrib.items())
attributes_of_elem2 = dict(elem2.attrib.items())
only_in_elem1 = set(attributes_of_elem1) - set(attributes_of_elem2)
only_in_elem2 = set(attributes_of_elem2) - set(attributes_of_elem1)
common = set(attributes_of_elem1) & set(attributes_of_elem2)
only_in_elem1 = sorted(set(attributes_of_elem1) - set(attributes_of_elem2))
only_in_elem2 = sorted(set(attributes_of_elem2) - set(attributes_of_elem1))
common = sorted(set(attributes_of_elem1) & set(attributes_of_elem2))
if only_in_elem1 or only_in_elem2:
errors.append(
f"Attribute delta at <{elem1.tag}>"
f" only 1: {list(only_in_elem1)} only 2: {list(only_in_elem2)}"
f" only 1: {only_in_elem1} only 2: {only_in_elem2}"
)
for key in common:
if (
@@ -87,4 +88,5 @@ def compare_elements(
compare_elements(child1, child2, attr_whitelist, _path=f"{tag_path}[{idx}]")
)
# this should be stable from the sorts above, so no need to sort it here
return errors

View File

@@ -109,7 +109,6 @@ def get_version(file: Path) -> int:
xml = etree.parse(file)
result_list = xml.xpath("/*/version")
result = result_list[0].text if result_list else str(0)
logger.debug("Got version: %s", result)
return int(result)

View File

@@ -23,7 +23,12 @@ def prepare_subdirectories(
) -> None:
"""Find any subdir scripts in subdirectories and run them."""
logger.info("Preparing Subdirectories")
for subdir_path in (path.parent for path in source_dir.glob("**/subdir.py")):
for subdir_path in sorted(
(path.parent for path in source_dir.glob("**/subdir.py")),
key=lambda directory: directory.joinpath("subdir-prio.txt").read_text().strip()
if directory.joinpath("subdir-prio.txt").exists()
else "0",
):
logger.info("Preparing subdirectory %s", subdir_path)
sys.path.append(str(subdir_path.resolve()))
# Ignore this very sensible warning, as we do evil things

View File

@@ -54,12 +54,14 @@ def _update_for_base( # noqa: PLR0913
.replace("$lastyear", lastyear)
.strip()
)
if len(pattern) <= 0:
logger.debug("Pattern too short, continue!")
if not pattern:
logger.debug("Pattern match empty, continue!")
continue
search_result = re.search(r":\[(.*)\]", line)
tag_search_result = re.search(r":\[(.*)\]", line)
tag = (
search_result.group(1).strip() if search_result is not None else ""
tag_search_result.group(1).strip()
if tag_search_result is not None
else ""
)
for xml_file in filter(
@@ -81,9 +83,7 @@ def _update_for_base( # noqa: PLR0913
)
if tag != ""
else True
)
# Not just matching an empty xml_file
and len(str(xml_file)) > 0,
),
all_xml,
):
matching_files.add(str(xml_file.relative_to(source)))
@@ -94,10 +94,9 @@ def _update_for_base( # noqa: PLR0913
matching_files.add(
f"{source}/global/data/modules/{module.get('id').strip()}"
)
matching_files = set(sorted(matching_files)) # noqa: C414
update_if_changed(
Path(f"{base.parent}/.{base.name}.xmllist"),
("\n".join(matching_files) + "\n") if matching_files else "",
("\n".join(sorted(matching_files)) + "\n"),
)
@@ -112,10 +111,14 @@ def _update_module_xmllists(
# Get all the bases and stuff before multithreading the update bit
all_xml = {
get_basepath(path)
for path in filter(
lambda path: lang_from_filename(path) in languages,
list(source_dir.glob("**/*.*.xml"))
+ list(source.joinpath("global/").glob("**/*.*.xml")),
for path in sorted(
filter(
lambda path: lang_from_filename(path) in languages,
(
*source_dir.glob("**/*.*.xml"),
*source.joinpath("global/").glob("**/*.*.xml"),
),
)
)
}
source_bases = {path.with_suffix("") for path in source_dir.glob("**/*.sources")}

View File

@@ -0,0 +1,51 @@
# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. <https://fsfe.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from argparse import Namespace
from pathlib import Path
from fsfe_website_build.build import build
from pytest_mock import MockFixture
def no_rebuild_twice_test(mocker: MockFixture) -> None:
# first, run a full build
args = Namespace(
full=True,
languages=[
"ar",
"de",
"en",
"es",
"fr",
"it",
],
log_level="DEBUG",
processes=8,
source=Path(),
serve=False,
sites=[
Path("drm.info"),
Path("fsfe.org"),
Path("pdfreaders.org"),
Path("status.fsfe.org"),
],
stage=False,
target="output/final",
)
build(args)
# replace update_if_changed with
# mocked one that exceptions if the file would be changed
def fail_if_update(path: Path, content: str) -> None:
if not path.exists() or path.read_text() != content:
raise AssertionError(
f"File {path} would have been updated on incremental build."
)
mocker.patch(
"fsfe_website_build.lib.misc.update_if_changed", side_effect=fail_if_update
)
# now, run a normal build
args.full = False
build(args)

View File

@@ -0,0 +1 @@
1

View File

@@ -41,7 +41,7 @@ def _find_teaser(document: etree.ElementTree) -> str:
def _process_file(file: Path, stopwords: set[str]) -> dict[str, str | None]:
"""Generate the search index entry for a given file and set of stopwords."""
xslt_root = etree.parse(file)
tags = (
tags = sorted(
str(tag.get("key"))
for tag in xslt_root.xpath("//tag")
if tag.get("key") != "front-page"
@@ -56,7 +56,7 @@ def _process_file(file: Path, stopwords: set[str]) -> dict[str, str | None]:
),
"teaser": " ".join(
w
for w in _find_teaser(xslt_root).strip().split(" ")
for w in sorted(_find_teaser(xslt_root).strip().split(" "))
if w.lower() not in stopwords
),
"type": "news" if "news/" in str(file) else "page",
@@ -115,7 +115,10 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
)
)
articles = pool.starmap(_process_file, files_with_stopwords)
articles = sorted(
pool.starmap(_process_file, files_with_stopwords),
key=lambda article: tuple(article.values()),
)
update_if_changed(
working_dir.joinpath("index.js"),

View File

@@ -86,7 +86,7 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
with multiprocessing.Pool(processes) as pool:
logger.debug("Updating tags for %s", working_dir)
# Create a complete and current map of which tag is used in which files
files_by_tag: dict[str, set[Path]] = defaultdict(set)
files_by_tag: dict[str, list[Path]] = defaultdict(list)
tags_by_lang: defaultdict[str, dict[str, str | None]] = defaultdict(dict)
# Fill out files_by_tag and tags_by_lang
for file in filter(
@@ -108,14 +108,14 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
label = tag.text.strip() if tag.text and tag.text.strip() else None
# Load into the dicts
files_by_tag[key].add(get_basepath(file))
files_by_tag[key].append(get_basepath(file))
lang = lang_from_filename(file)
if key not in tags_by_lang[lang] or not tags_by_lang[lang][key]:
tags_by_lang[lang][key] = label
# Sort dicts to ensure that they are stable between runs
files_by_tag = sort_dict(files_by_tag)
for tag in files_by_tag:
files_by_tag[tag] = set(sorted(files_by_tag[tag])) # noqa: C414
files_by_tag[tag] = sorted(files_by_tag[tag])
tags_by_lang = sort_dict(tags_by_lang)
for lang in tags_by_lang:
tags_by_lang[lang] = sort_dict(tags_by_lang[lang])
@@ -127,17 +127,14 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
)
logger.debug("Updating tag lists")
pool.starmap(
update_if_changed,
for path, content in (
(
(
Path(f"{working_dir}/.tagged-{tag}.xmllist"),
("\n".join(str(file) for file in files_by_tag[tag]) + "\n"),
)
for tag in files_by_tag
),
)
Path(f"{working_dir}/.tagged-{tag}.xmllist"),
("\n".join(str(file) for file in files_by_tag[tag]) + "\n"),
)
for tag in files_by_tag
):
update_if_changed(path, content)
logger.debug("Updating tag sets")
# Get count of files with each tag in each section
filecount: dict[str, dict[str, int]] = defaultdict(dict)

View File

@@ -15,6 +15,7 @@ libxml2 \
libxml2-utils \
libxslt1.1 \
mediainfo \
node-less \
npm \
perl-base \
php-zip \

View File

@@ -17,13 +17,14 @@ build = "fsfe_website_build:main"
[dependency-groups]
dev = [
"lefthook", # pre-commit hook
"pyright", # python typechecker
"pytest", # python test runner
"reuse", # for enforcing licensing
"ruff", # python formatter and linter
"taplo", # toml formatter
"types-lxml", # type stubs for lxml
"lefthook", # pre-commit hook
"pyright", # python typechecker
"pytest", # python test runner
"pytest-mock", # helper for mocking in pytest
"reuse", # for enforcing licensing
"ruff", # python formatter and linter
"taplo", # toml formatter
"types-lxml", # type stubs for lxml
]
[build-system]
@@ -75,11 +76,13 @@ ignore = [
]
[tool.ruff.lint.per-file-ignores]
"build/fsfe_website_build_tests/*" = [
"D",
] # We do not need to document the tests.
"D", # We do not need to document the tests.
"TRY003", # Dont worry about big exceptions in tests
]
[tool.pyright]
typeCheckingMode = "strict"
[tool.pytest.ini_options]
testpaths = ["build/fsfe_website_build_tests"]
addopts = ["--import-mode=importlib"]
python_files = ["*_test.py"]
python_functions = ["*_test"]
testpaths = ["build/fsfe_website_build_tests"]

View File

@@ -9,7 +9,6 @@ we ensure translation pages for all langs are built.
"""
import logging
import multiprocessing
from pathlib import Path
from fsfe_website_build.lib.misc import (
@@ -20,7 +19,7 @@ from lxml import etree
logger = logging.getLogger(__name__)
def run(source: Path, processes: int, working_dir: Path) -> None:
def run(source: Path, processes: int, working_dir: Path) -> None: # noqa: ARG001
"""Place filler indices to encourage the site.
This ensures that status pages for all langs are build.
@@ -39,17 +38,10 @@ def run(source: Path, processes: int, working_dir: Path) -> None:
index_content = etree.tostring(page, xml_declaration=True, encoding="utf-8").decode(
"utf-8",
)
with multiprocessing.Pool(processes) as pool:
pool.starmap(
update_if_changed,
(
(
working_dir.joinpath(
f"index.{path.name}.xhtml",
),
index_content,
)
for path in source.glob("global/languages/*")
),
for path in (
working_dir.joinpath(
f"index.{path.name}.xhtml",
)
for path in source.glob("global/languages/*")
):
update_if_changed(path, index_content)

View File

@@ -24,9 +24,9 @@ def _worker(path: Path) -> tuple[str, Path, Path, list[tuple[str, str]]] | None:
doc = etree.parse(path)
# all elements that carry a style attribute
results = [
(element.tag, element.get("style")) for element in doc.xpath("//*[@style]")
]
results = sorted(
[(element.tag, element.get("style")) for element in doc.xpath("//*[@style]")]
)
if not results:
return None
@@ -62,9 +62,9 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
# concurrent filtering
with multiprocessing.Pool(processes) as pool:
filtered = [
result for result in pool.map(_worker, candidates) if result is not None
]
filtered = sorted(
[result for result in pool.map(_worker, candidates) if result is not None]
)
# dict to sort values by type, basepath, finalpath
data: defaultdict[str, defaultdict[Path, dict[Path, list[tuple[str, str]]]]] = (

View File

@@ -204,7 +204,7 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
# Generate our file lists by priority
# Super hardcoded unfortunately
files_by_priority: dict[str, list[Path]] = defaultdict(list)
for file in all_files_with_translations:
for file in sorted(all_files_with_translations):
for priority, searches in priorities_and_searches.items():
# If any search matches,
# add it to that priority and skip all subsequent priorities

View File

@@ -43,14 +43,18 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
["git", "ls-files", "-z"],
)
all_files = {
# Split on null bytes, strip and then parse into path
path
for path in (Path(line.strip()) for line in all_git_tracked_files.split("\x00"))
if path.suffix in [".xhtml", ".xml"]
and len(path.suffixes) >= 2 # noqa: PLR2004
and lang_from_filename(path) in languages
}
all_files = sorted(
{
# Split on null bytes, strip and then parse into path
path
for path in (
Path(line.strip()) for line in all_git_tracked_files.split("\x00")
)
if path.suffix in [".xhtml", ".xml"]
and len(path.suffixes) >= 2 # noqa: PLR2004
and lang_from_filename(path) in languages
}
)
whitelist = {"alt"}
groups: defaultdict[Path, list[Path]] = defaultdict(list)
for file in all_files:
@@ -86,15 +90,10 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
version_el = etree.SubElement(root, "version")
version_el.text = "1"
for master, details in tree.items():
for master, details in sorted(tree.items()):
master_el = etree.SubElement(root, "master", name=str(master))
for other, msg in details:
etree.SubElement(
master_el,
"detail",
name=str(other),
error=msg,
)
for other, msg in sorted(details):
etree.SubElement(master_el, "detail", name=str(other), error=msg)
xml_bytes = etree.tostring(root, xml_declaration=True, encoding="utf-8")
update_if_changed(work_file, xml_bytes.decode("utf-8"))

14
uv.lock generated
View File

@@ -131,6 +131,7 @@ dev = [
{ name = "lefthook" },
{ name = "pyright" },
{ name = "pytest" },
{ name = "pytest-mock" },
{ name = "reuse" },
{ name = "ruff" },
{ name = "taplo" },
@@ -151,6 +152,7 @@ dev = [
{ name = "lefthook" },
{ name = "pyright" },
{ name = "pytest" },
{ name = "pytest-mock" },
{ name = "reuse" },
{ name = "ruff" },
{ name = "taplo" },
@@ -362,6 +364,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" },
]
[[package]]
name = "pytest-mock"
version = "3.15.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
]
[[package]]
name = "python-debian"
version = "1.0.1"