fix/caching (#5494)
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
fix some stuff not being cached, and add test to catch caching regressions. Test takes quite a while to run, not sure it is a good idea? Co-authored-by: Darragh Elliott <me@delliott.net> Reviewed-on: #5494
This commit was merged in pull request #5494.
This commit is contained in:
@@ -22,7 +22,7 @@ from .phase3.stage_to_target import stage_to_target
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_arguments() -> argparse.Namespace:
|
||||
def _parse_arguments() -> argparse.Namespace:
|
||||
"""Parse the arguments of the website build process."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Python script to handle building of the fsfe webpage",
|
||||
@@ -95,9 +95,8 @@ def parse_arguments() -> argparse.Namespace:
|
||||
return args
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Parse args and coordinate the website builder."""
|
||||
args = parse_arguments()
|
||||
def build(args: argparse.Namespace) -> None:
|
||||
"""Coordinate the website builder."""
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
@@ -168,3 +167,9 @@ def main() -> None:
|
||||
|
||||
if args.serve:
|
||||
serve_websites(working_target, args.sites, 2000, 100)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Parse args and run build."""
|
||||
args = _parse_arguments()
|
||||
build(args)
|
||||
|
||||
@@ -21,7 +21,8 @@ def compare_files(
|
||||
) -> list[str]:
|
||||
"""Compare two xml files, passes as paths."""
|
||||
try:
|
||||
t1, t2 = etree.parse(file1), etree.parse(file2)
|
||||
parser = etree.XMLParser(remove_comments=True)
|
||||
t1, t2 = etree.parse(file1, parser), etree.parse(file2, parser)
|
||||
except etree.XMLSyntaxError as e:
|
||||
logger.critical("XML parse error: %s", e)
|
||||
sys.exit(1)
|
||||
@@ -54,14 +55,14 @@ def compare_elements(
|
||||
attributes_of_elem1 = dict(elem1.attrib.items())
|
||||
attributes_of_elem2 = dict(elem2.attrib.items())
|
||||
|
||||
only_in_elem1 = set(attributes_of_elem1) - set(attributes_of_elem2)
|
||||
only_in_elem2 = set(attributes_of_elem2) - set(attributes_of_elem1)
|
||||
common = set(attributes_of_elem1) & set(attributes_of_elem2)
|
||||
only_in_elem1 = sorted(set(attributes_of_elem1) - set(attributes_of_elem2))
|
||||
only_in_elem2 = sorted(set(attributes_of_elem2) - set(attributes_of_elem1))
|
||||
common = sorted(set(attributes_of_elem1) & set(attributes_of_elem2))
|
||||
|
||||
if only_in_elem1 or only_in_elem2:
|
||||
errors.append(
|
||||
f"Attribute delta at <{elem1.tag}>"
|
||||
f" only 1: {list(only_in_elem1)} only 2: {list(only_in_elem2)}"
|
||||
f" only 1: {only_in_elem1} only 2: {only_in_elem2}"
|
||||
)
|
||||
for key in common:
|
||||
if (
|
||||
@@ -87,4 +88,5 @@ def compare_elements(
|
||||
compare_elements(child1, child2, attr_whitelist, _path=f"{tag_path}[{idx}]")
|
||||
)
|
||||
|
||||
# this should be stable from the sorts above, so no need to sort it here
|
||||
return errors
|
||||
|
||||
@@ -109,7 +109,6 @@ def get_version(file: Path) -> int:
|
||||
xml = etree.parse(file)
|
||||
result_list = xml.xpath("/*/version")
|
||||
result = result_list[0].text if result_list else str(0)
|
||||
logger.debug("Got version: %s", result)
|
||||
return int(result)
|
||||
|
||||
|
||||
|
||||
@@ -23,7 +23,12 @@ def prepare_subdirectories(
|
||||
) -> None:
|
||||
"""Find any subdir scripts in subdirectories and run them."""
|
||||
logger.info("Preparing Subdirectories")
|
||||
for subdir_path in (path.parent for path in source_dir.glob("**/subdir.py")):
|
||||
for subdir_path in sorted(
|
||||
(path.parent for path in source_dir.glob("**/subdir.py")),
|
||||
key=lambda directory: directory.joinpath("subdir-prio.txt").read_text().strip()
|
||||
if directory.joinpath("subdir-prio.txt").exists()
|
||||
else "0",
|
||||
):
|
||||
logger.info("Preparing subdirectory %s", subdir_path)
|
||||
sys.path.append(str(subdir_path.resolve()))
|
||||
# Ignore this very sensible warning, as we do evil things
|
||||
|
||||
@@ -54,12 +54,14 @@ def _update_for_base( # noqa: PLR0913
|
||||
.replace("$lastyear", lastyear)
|
||||
.strip()
|
||||
)
|
||||
if len(pattern) <= 0:
|
||||
logger.debug("Pattern too short, continue!")
|
||||
if not pattern:
|
||||
logger.debug("Pattern match empty, continue!")
|
||||
continue
|
||||
search_result = re.search(r":\[(.*)\]", line)
|
||||
tag_search_result = re.search(r":\[(.*)\]", line)
|
||||
tag = (
|
||||
search_result.group(1).strip() if search_result is not None else ""
|
||||
tag_search_result.group(1).strip()
|
||||
if tag_search_result is not None
|
||||
else ""
|
||||
)
|
||||
|
||||
for xml_file in filter(
|
||||
@@ -81,9 +83,7 @@ def _update_for_base( # noqa: PLR0913
|
||||
)
|
||||
if tag != ""
|
||||
else True
|
||||
)
|
||||
# Not just matching an empty xml_file
|
||||
and len(str(xml_file)) > 0,
|
||||
),
|
||||
all_xml,
|
||||
):
|
||||
matching_files.add(str(xml_file.relative_to(source)))
|
||||
@@ -94,10 +94,9 @@ def _update_for_base( # noqa: PLR0913
|
||||
matching_files.add(
|
||||
f"{source}/global/data/modules/{module.get('id').strip()}"
|
||||
)
|
||||
matching_files = set(sorted(matching_files)) # noqa: C414
|
||||
update_if_changed(
|
||||
Path(f"{base.parent}/.{base.name}.xmllist"),
|
||||
("\n".join(matching_files) + "\n") if matching_files else "",
|
||||
("\n".join(sorted(matching_files)) + "\n"),
|
||||
)
|
||||
|
||||
|
||||
@@ -112,10 +111,14 @@ def _update_module_xmllists(
|
||||
# Get all the bases and stuff before multithreading the update bit
|
||||
all_xml = {
|
||||
get_basepath(path)
|
||||
for path in filter(
|
||||
lambda path: lang_from_filename(path) in languages,
|
||||
list(source_dir.glob("**/*.*.xml"))
|
||||
+ list(source.joinpath("global/").glob("**/*.*.xml")),
|
||||
for path in sorted(
|
||||
filter(
|
||||
lambda path: lang_from_filename(path) in languages,
|
||||
(
|
||||
*source_dir.glob("**/*.*.xml"),
|
||||
*source.joinpath("global/").glob("**/*.*.xml"),
|
||||
),
|
||||
)
|
||||
)
|
||||
}
|
||||
source_bases = {path.with_suffix("") for path in source_dir.glob("**/*.sources")}
|
||||
|
||||
51
build/fsfe_website_build_tests/caching_test.py
Normal file
51
build/fsfe_website_build_tests/caching_test.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. <https://fsfe.org>
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
from argparse import Namespace
|
||||
from pathlib import Path
|
||||
|
||||
from fsfe_website_build.build import build
|
||||
from pytest_mock import MockFixture
|
||||
|
||||
|
||||
def no_rebuild_twice_test(mocker: MockFixture) -> None:
|
||||
# first, run a full build
|
||||
args = Namespace(
|
||||
full=True,
|
||||
languages=[
|
||||
"ar",
|
||||
"de",
|
||||
"en",
|
||||
"es",
|
||||
"fr",
|
||||
"it",
|
||||
],
|
||||
log_level="DEBUG",
|
||||
processes=8,
|
||||
source=Path(),
|
||||
serve=False,
|
||||
sites=[
|
||||
Path("drm.info"),
|
||||
Path("fsfe.org"),
|
||||
Path("pdfreaders.org"),
|
||||
Path("status.fsfe.org"),
|
||||
],
|
||||
stage=False,
|
||||
target="output/final",
|
||||
)
|
||||
build(args)
|
||||
|
||||
# replace update_if_changed with
|
||||
# mocked one that exceptions if the file would be changed
|
||||
def fail_if_update(path: Path, content: str) -> None:
|
||||
if not path.exists() or path.read_text() != content:
|
||||
raise AssertionError(
|
||||
f"File {path} would have been updated on incremental build."
|
||||
)
|
||||
|
||||
mocker.patch(
|
||||
"fsfe_website_build.lib.misc.update_if_changed", side_effect=fail_if_update
|
||||
)
|
||||
# now, run a normal build
|
||||
args.full = False
|
||||
build(args)
|
||||
1
fsfe.org/search/subdir-prio.txt
Normal file
1
fsfe.org/search/subdir-prio.txt
Normal file
@@ -0,0 +1 @@
|
||||
1
|
||||
@@ -41,7 +41,7 @@ def _find_teaser(document: etree.ElementTree) -> str:
|
||||
def _process_file(file: Path, stopwords: set[str]) -> dict[str, str | None]:
|
||||
"""Generate the search index entry for a given file and set of stopwords."""
|
||||
xslt_root = etree.parse(file)
|
||||
tags = (
|
||||
tags = sorted(
|
||||
str(tag.get("key"))
|
||||
for tag in xslt_root.xpath("//tag")
|
||||
if tag.get("key") != "front-page"
|
||||
@@ -56,7 +56,7 @@ def _process_file(file: Path, stopwords: set[str]) -> dict[str, str | None]:
|
||||
),
|
||||
"teaser": " ".join(
|
||||
w
|
||||
for w in _find_teaser(xslt_root).strip().split(" ")
|
||||
for w in sorted(_find_teaser(xslt_root).strip().split(" "))
|
||||
if w.lower() not in stopwords
|
||||
),
|
||||
"type": "news" if "news/" in str(file) else "page",
|
||||
@@ -115,7 +115,10 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
)
|
||||
)
|
||||
|
||||
articles = pool.starmap(_process_file, files_with_stopwords)
|
||||
articles = sorted(
|
||||
pool.starmap(_process_file, files_with_stopwords),
|
||||
key=lambda article: tuple(article.values()),
|
||||
)
|
||||
|
||||
update_if_changed(
|
||||
working_dir.joinpath("index.js"),
|
||||
|
||||
@@ -86,7 +86,7 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
with multiprocessing.Pool(processes) as pool:
|
||||
logger.debug("Updating tags for %s", working_dir)
|
||||
# Create a complete and current map of which tag is used in which files
|
||||
files_by_tag: dict[str, set[Path]] = defaultdict(set)
|
||||
files_by_tag: dict[str, list[Path]] = defaultdict(list)
|
||||
tags_by_lang: defaultdict[str, dict[str, str | None]] = defaultdict(dict)
|
||||
# Fill out files_by_tag and tags_by_lang
|
||||
for file in filter(
|
||||
@@ -108,14 +108,14 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
label = tag.text.strip() if tag.text and tag.text.strip() else None
|
||||
|
||||
# Load into the dicts
|
||||
files_by_tag[key].add(get_basepath(file))
|
||||
files_by_tag[key].append(get_basepath(file))
|
||||
lang = lang_from_filename(file)
|
||||
if key not in tags_by_lang[lang] or not tags_by_lang[lang][key]:
|
||||
tags_by_lang[lang][key] = label
|
||||
# Sort dicts to ensure that they are stable between runs
|
||||
files_by_tag = sort_dict(files_by_tag)
|
||||
for tag in files_by_tag:
|
||||
files_by_tag[tag] = set(sorted(files_by_tag[tag])) # noqa: C414
|
||||
files_by_tag[tag] = sorted(files_by_tag[tag])
|
||||
tags_by_lang = sort_dict(tags_by_lang)
|
||||
for lang in tags_by_lang:
|
||||
tags_by_lang[lang] = sort_dict(tags_by_lang[lang])
|
||||
@@ -127,17 +127,14 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
)
|
||||
|
||||
logger.debug("Updating tag lists")
|
||||
pool.starmap(
|
||||
update_if_changed,
|
||||
for path, content in (
|
||||
(
|
||||
(
|
||||
Path(f"{working_dir}/.tagged-{tag}.xmllist"),
|
||||
("\n".join(str(file) for file in files_by_tag[tag]) + "\n"),
|
||||
)
|
||||
for tag in files_by_tag
|
||||
),
|
||||
)
|
||||
|
||||
Path(f"{working_dir}/.tagged-{tag}.xmllist"),
|
||||
("\n".join(str(file) for file in files_by_tag[tag]) + "\n"),
|
||||
)
|
||||
for tag in files_by_tag
|
||||
):
|
||||
update_if_changed(path, content)
|
||||
logger.debug("Updating tag sets")
|
||||
# Get count of files with each tag in each section
|
||||
filecount: dict[str, dict[str, int]] = defaultdict(dict)
|
||||
|
||||
@@ -15,6 +15,7 @@ libxml2 \
|
||||
libxml2-utils \
|
||||
libxslt1.1 \
|
||||
mediainfo \
|
||||
node-less \
|
||||
npm \
|
||||
perl-base \
|
||||
php-zip \
|
||||
|
||||
@@ -17,13 +17,14 @@ build = "fsfe_website_build:main"
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"lefthook", # pre-commit hook
|
||||
"pyright", # python typechecker
|
||||
"pytest", # python test runner
|
||||
"reuse", # for enforcing licensing
|
||||
"ruff", # python formatter and linter
|
||||
"taplo", # toml formatter
|
||||
"types-lxml", # type stubs for lxml
|
||||
"lefthook", # pre-commit hook
|
||||
"pyright", # python typechecker
|
||||
"pytest", # python test runner
|
||||
"pytest-mock", # helper for mocking in pytest
|
||||
"reuse", # for enforcing licensing
|
||||
"ruff", # python formatter and linter
|
||||
"taplo", # toml formatter
|
||||
"types-lxml", # type stubs for lxml
|
||||
]
|
||||
|
||||
[build-system]
|
||||
@@ -75,11 +76,13 @@ ignore = [
|
||||
]
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"build/fsfe_website_build_tests/*" = [
|
||||
"D",
|
||||
] # We do not need to document the tests.
|
||||
"D", # We do not need to document the tests.
|
||||
"TRY003", # Dont worry about big exceptions in tests
|
||||
]
|
||||
[tool.pyright]
|
||||
typeCheckingMode = "strict"
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["build/fsfe_website_build_tests"]
|
||||
addopts = ["--import-mode=importlib"]
|
||||
python_files = ["*_test.py"]
|
||||
python_functions = ["*_test"]
|
||||
testpaths = ["build/fsfe_website_build_tests"]
|
||||
|
||||
@@ -9,7 +9,6 @@ we ensure translation pages for all langs are built.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import multiprocessing
|
||||
from pathlib import Path
|
||||
|
||||
from fsfe_website_build.lib.misc import (
|
||||
@@ -20,7 +19,7 @@ from lxml import etree
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run(source: Path, processes: int, working_dir: Path) -> None:
|
||||
def run(source: Path, processes: int, working_dir: Path) -> None: # noqa: ARG001
|
||||
"""Place filler indices to encourage the site.
|
||||
|
||||
This ensures that status pages for all langs are build.
|
||||
@@ -39,17 +38,10 @@ def run(source: Path, processes: int, working_dir: Path) -> None:
|
||||
index_content = etree.tostring(page, xml_declaration=True, encoding="utf-8").decode(
|
||||
"utf-8",
|
||||
)
|
||||
|
||||
with multiprocessing.Pool(processes) as pool:
|
||||
pool.starmap(
|
||||
update_if_changed,
|
||||
(
|
||||
(
|
||||
working_dir.joinpath(
|
||||
f"index.{path.name}.xhtml",
|
||||
),
|
||||
index_content,
|
||||
)
|
||||
for path in source.glob("global/languages/*")
|
||||
),
|
||||
for path in (
|
||||
working_dir.joinpath(
|
||||
f"index.{path.name}.xhtml",
|
||||
)
|
||||
for path in source.glob("global/languages/*")
|
||||
):
|
||||
update_if_changed(path, index_content)
|
||||
|
||||
@@ -24,9 +24,9 @@ def _worker(path: Path) -> tuple[str, Path, Path, list[tuple[str, str]]] | None:
|
||||
doc = etree.parse(path)
|
||||
|
||||
# all elements that carry a style attribute
|
||||
results = [
|
||||
(element.tag, element.get("style")) for element in doc.xpath("//*[@style]")
|
||||
]
|
||||
results = sorted(
|
||||
[(element.tag, element.get("style")) for element in doc.xpath("//*[@style]")]
|
||||
)
|
||||
if not results:
|
||||
return None
|
||||
|
||||
@@ -62,9 +62,9 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
|
||||
# concurrent filtering
|
||||
with multiprocessing.Pool(processes) as pool:
|
||||
filtered = [
|
||||
result for result in pool.map(_worker, candidates) if result is not None
|
||||
]
|
||||
filtered = sorted(
|
||||
[result for result in pool.map(_worker, candidates) if result is not None]
|
||||
)
|
||||
|
||||
# dict to sort values by type, basepath, finalpath
|
||||
data: defaultdict[str, defaultdict[Path, dict[Path, list[tuple[str, str]]]]] = (
|
||||
|
||||
@@ -204,7 +204,7 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
# Generate our file lists by priority
|
||||
# Super hardcoded unfortunately
|
||||
files_by_priority: dict[str, list[Path]] = defaultdict(list)
|
||||
for file in all_files_with_translations:
|
||||
for file in sorted(all_files_with_translations):
|
||||
for priority, searches in priorities_and_searches.items():
|
||||
# If any search matches,
|
||||
# add it to that priority and skip all subsequent priorities
|
||||
|
||||
@@ -43,14 +43,18 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
["git", "ls-files", "-z"],
|
||||
)
|
||||
|
||||
all_files = {
|
||||
# Split on null bytes, strip and then parse into path
|
||||
path
|
||||
for path in (Path(line.strip()) for line in all_git_tracked_files.split("\x00"))
|
||||
if path.suffix in [".xhtml", ".xml"]
|
||||
and len(path.suffixes) >= 2 # noqa: PLR2004
|
||||
and lang_from_filename(path) in languages
|
||||
}
|
||||
all_files = sorted(
|
||||
{
|
||||
# Split on null bytes, strip and then parse into path
|
||||
path
|
||||
for path in (
|
||||
Path(line.strip()) for line in all_git_tracked_files.split("\x00")
|
||||
)
|
||||
if path.suffix in [".xhtml", ".xml"]
|
||||
and len(path.suffixes) >= 2 # noqa: PLR2004
|
||||
and lang_from_filename(path) in languages
|
||||
}
|
||||
)
|
||||
whitelist = {"alt"}
|
||||
groups: defaultdict[Path, list[Path]] = defaultdict(list)
|
||||
for file in all_files:
|
||||
@@ -86,15 +90,10 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
version_el = etree.SubElement(root, "version")
|
||||
version_el.text = "1"
|
||||
|
||||
for master, details in tree.items():
|
||||
for master, details in sorted(tree.items()):
|
||||
master_el = etree.SubElement(root, "master", name=str(master))
|
||||
for other, msg in details:
|
||||
etree.SubElement(
|
||||
master_el,
|
||||
"detail",
|
||||
name=str(other),
|
||||
error=msg,
|
||||
)
|
||||
for other, msg in sorted(details):
|
||||
etree.SubElement(master_el, "detail", name=str(other), error=msg)
|
||||
|
||||
xml_bytes = etree.tostring(root, xml_declaration=True, encoding="utf-8")
|
||||
update_if_changed(work_file, xml_bytes.decode("utf-8"))
|
||||
|
||||
14
uv.lock
generated
14
uv.lock
generated
@@ -131,6 +131,7 @@ dev = [
|
||||
{ name = "lefthook" },
|
||||
{ name = "pyright" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-mock" },
|
||||
{ name = "reuse" },
|
||||
{ name = "ruff" },
|
||||
{ name = "taplo" },
|
||||
@@ -151,6 +152,7 @@ dev = [
|
||||
{ name = "lefthook" },
|
||||
{ name = "pyright" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-mock" },
|
||||
{ name = "reuse" },
|
||||
{ name = "ruff" },
|
||||
{ name = "taplo" },
|
||||
@@ -362,6 +364,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-mock"
|
||||
version = "3.15.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pytest" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-debian"
|
||||
version = "1.0.1"
|
||||
|
||||
Reference in New Issue
Block a user