build: tests and more checks (#5268)
All checks were successful
continuous-integration/drone/push Build is passing

- add pytest
- add tests for all exposed lib functions
- Add a whole bunch more ruff checks and apply them

Benchmarking:

Before:
```
❯ hyperfine --runs 3 "uv run build --full"
Benchmark 1: uv run build --full
  Time (mean ± σ):     287.463 s ± 15.161 s    [User: 1573.732 s, System: 72.548 s]
  Range (min … max):   272.953 s … 303.201 s    3 runs
```

After
```
❯ hyperfine --runs 3 "uv run build --full"
Benchmark 1: uv run build --full
  Time (mean ± σ):     280.184 s ±  9.605 s    [User: 1557.788 s, System: 72.693 s]
  Range (min … max):   269.878 s … 288.886 s    3 runs
```

So we gain a few seconds, but that is all.

The main benefit is that we have better testing and more linting.

Co-authored-by: Darragh Elliott <me@delliott.net>
Reviewed-on: #5268
This commit was merged in pull request #5268.
This commit is contained in:
2025-08-29 16:46:13 +00:00
parent cdf24dfcc6
commit f03b3c2c4c
31 changed files with 586 additions and 297 deletions

View File

@@ -45,4 +45,4 @@ rsync -rlpgoDz --delete --checksum --filter=':- .gitignore' ./ /website-cached/s
cd /website-cached/source
# run build script expaning all args passed to this script
uv run --reinstall-package build build "$@"
uv run --reinstall-package fsfe_website_build build "$@"

View File

@@ -29,7 +29,7 @@ def parse_arguments() -> argparse.Namespace:
"""
parser = argparse.ArgumentParser(
description="Python script to handle building of the fsfe webpage"
description="Python script to handle building of the fsfe webpage",
)
parser.add_argument(
"--full",
@@ -64,7 +64,7 @@ def parse_arguments() -> argparse.Namespace:
"--sites",
help="What site directories to build",
default=list(filter(lambda path: path.is_dir(), Path().glob("?*.??*"))),
type=lambda sites: list(map(lambda site: Path(site), sites.split(","))),
type=lambda sites: [Path(site) for site in sites.split(",")],
)
parser.add_argument(
"--stage",
@@ -82,11 +82,10 @@ def parse_arguments() -> argparse.Namespace:
type=str,
default="./output/final",
)
args = parser.parse_args()
return args
return parser.parse_args()
def main():
def main() -> None:
"""
Main process of the website builder
"""
@@ -120,26 +119,21 @@ def main():
args.languages
if args.languages
else sorted(
list(
map(
lambda path: path.name,
Path(".").glob("global/languages/??"),
)
),
(path.name for path in Path().glob("global/languages/??")),
)
),
pool,
)
stage_required = any(
[args.stage, "@" in args.target, ":" in args.target, "," in args.target]
[args.stage, "@" in args.target, ":" in args.target, "," in args.target],
)
working_target = Path("./output/stage" if stage_required else args.target)
# the two middle phases are unconditional, and run on a per site basis
for site in args.sites:
logger.info(f"Processing {site}")
logger.info("Processing %s", site)
if not site.exists():
logger.critical(f"Site {site} does not exist, exiting")
logger.critical("Site %s does not exist, exiting", site)
sys.exit(1)
# Early subdirs
# for subdir actions that need to be performed
@@ -154,14 +148,7 @@ def main():
args.languages
if args.languages
else sorted(
list(
set(
map(
lambda path: lang_from_filename(path),
site.glob("**/*.*.xhtml"),
)
)
)
{lang_from_filename(path) for path in site.glob("**/*.*.xhtml")},
)
)
# Processes needed only for subdir stuff

View File

@@ -4,10 +4,9 @@
import logging
import subprocess
import sys
from pathlib import Path
import lxml.etree as etree
from lxml import etree
logger = logging.getLogger(__name__)
@@ -17,9 +16,9 @@ def keys_exists(element: dict, *keys: str) -> bool:
Check if *keys (nested) exists in `element` (dict).
"""
if not isinstance(element, dict):
raise AttributeError("keys_exists() expects dict as first argument.")
if len(keys) == 0:
raise AttributeError("keys_exists() expects at least two arguments, one given.")
message = "keys_exists() expects dict as first argument."
logger.error(message)
raise TypeError(message)
_element = element
for key in keys:
@@ -34,7 +33,7 @@ def sort_dict(in_dict: dict) -> dict:
"""
Sort dict by keys
"""
return {key: val for key, val in sorted(in_dict.items(), key=lambda ele: ele[0])}
return dict(sorted(in_dict.items(), key=lambda ele: ele[0]))
def update_if_changed(path: Path, content: str) -> None:
@@ -45,7 +44,7 @@ def update_if_changed(path: Path, content: str) -> None:
write content to the file.
"""
if not path.exists() or path.read_text() != content:
logger.debug(f"Updating {path}")
logger.debug("Updating %s", path)
path.write_text(content)
@@ -58,7 +57,7 @@ def touch_if_newer_dep(file: Path, deps: list[Path]) -> None:
Essentially simple reimplementation of make deps for build targets.
"""
if any(dep.stat().st_mtime > file.stat().st_mtime for dep in deps):
logger.info(f"Touching {file}")
logger.info("Touching %s", file)
file.touch()
@@ -66,7 +65,7 @@ def delete_file(file: Path) -> None:
"""
Delete given file using pathlib
"""
logger.debug(f"Removing file {file}")
logger.debug("Removing file %s", file)
file.unlink()
@@ -78,13 +77,12 @@ def lang_from_filename(file: Path) -> str:
lang = file.with_suffix("").suffix.removeprefix(".")
# Lang codes should be the iso 631 2 letter codes,
# but sometimes we use "nolang" to srop a file being built
if len(lang) != 2 and lang != "nolang":
logger.critical(
f"Language {lang} from file {file} not of correct length, exiting"
)
sys.exit(1)
else:
return lang
lang_length = 2
if len(lang) != lang_length and lang != "nolang":
message = f"Language {lang} from file {file} not of correct length"
logger.error(message)
raise RuntimeError(message)
return lang
def run_command(commands: list) -> str:
@@ -98,12 +96,14 @@ def run_command(commands: list) -> str:
)
return result.stdout.strip()
except subprocess.CalledProcessError as error:
logger.error(
f"Command: {error.cmd} returned non zero exit code {error.returncode}"
f"\nstdout: {error.stdout}"
f"\nstderr: {error.stderr}"
logger.exception(
"Command: %s returned non zero exit code %s\nstdout: %s\nstderr: %s",
error.cmd,
error.returncode,
error.stdout,
error.stderr,
)
sys.exit(1)
raise
def get_version(file: Path) -> int:
@@ -112,11 +112,11 @@ def get_version(file: Path) -> int:
"""
xslt_tree = etree.parse(Path("build/xslt/get_version.xsl"))
transform = etree.XSLT(xslt_tree)
result = transform(etree.parse(file))
result = str(result).strip()
result_tree = transform(etree.parse(file))
result = str(result_tree).strip()
if result == "":
result = str(0)
logger.debug(f"Got version: {result}")
logger.debug("Got version: %s", result)
return int(result)

View File

@@ -7,7 +7,7 @@ import re
from datetime import datetime
from pathlib import Path
import lxml.etree as etree
from lxml import etree
from fsfe_website_build.lib.misc import get_basename, get_version, lang_from_filename
@@ -70,12 +70,12 @@ def _get_set(action_file: Path, lang: str, parser: etree.XMLParser) -> etree.Ele
"""
doc_set = etree.Element("set")
list_file = action_file.with_stem(
f".{action_file.with_suffix('').stem}"
f".{action_file.with_suffix('').stem}",
).with_suffix(".xmllist")
if list_file.exists():
with list_file.open("r") as file:
for path in map(lambda line: Path(line.strip()), file):
for path in (Path(line.strip()) for line in file):
path_xml = (
path.with_suffix(f".{lang}.xml")
if path.with_suffix(f".{lang}.xml").exists()
@@ -87,10 +87,15 @@ def _get_set(action_file: Path, lang: str, parser: etree.XMLParser) -> etree.Ele
def _get_document(
action_lang: str, action_file: Path, lang: str, parser: etree.XMLParser
action_lang: str,
action_file: Path,
lang: str,
parser: etree.XMLParser,
) -> etree.Element:
document = etree.Element(
"document", language=action_lang, **_get_attributes(action_file)
"document",
language=action_lang,
**_get_attributes(action_file),
)
document.append(_get_set(action_file, lang, parser))
document.extend(_get_xmls(action_file, parser))
@@ -103,15 +108,15 @@ def _build_xmlstream(infile: Path, parser: etree.XMLParser) -> etree.Element:
the expected shortname and language flag indicate
a single xhtml page to be built
"""
logger.debug(f"infile: {infile}")
logger.debug("infile: %s", infile)
shortname = infile.with_suffix("")
lang = lang_from_filename(infile)
glob = infile.parent.joinpath(f"{get_basename(infile)}.??{infile.suffix}")
logger.debug(f"formed glob: {glob}")
logger.debug("formed glob: %s", glob)
lang_lst = list(
infile.parent.glob(f"{get_basename(infile)}.??{infile.suffix}"),
)
logger.debug(f"file lang list: {lang_lst}")
logger.debug("file lang list: %s", lang_lst)
original_lang = (
"en"
if infile.with_suffix("").with_suffix(f".en{infile.suffix}").exists()
@@ -132,7 +137,7 @@ def _build_xmlstream(infile: Path, parser: etree.XMLParser) -> etree.Element:
if infile.exists():
action_lang = lang
original_version = get_version(
shortname.with_suffix(f".{original_lang}{infile.suffix}")
shortname.with_suffix(f".{original_lang}{infile.suffix}"),
)
lang_version = get_version(shortname.with_suffix(f".{lang}{infile.suffix}"))
translation_state = (
@@ -149,7 +154,7 @@ def _build_xmlstream(infile: Path, parser: etree.XMLParser) -> etree.Element:
translation_state = "untranslated"
action_file = shortname.with_suffix(f".{action_lang}{infile.suffix}")
logger.debug(f"action_file: {action_file}")
logger.debug("action_file: %s", action_file)
# Create the root element
page = etree.Element(
"buildinfo",
@@ -180,11 +185,11 @@ def _build_xmlstream(infile: Path, parser: etree.XMLParser) -> etree.Element:
return page
def process_file(infile: Path, processor: Path) -> str:
def process_file(infile: Path, processor: Path) -> etree._XSLTResultTree:
"""
Process a given file using the correct xsl sheet
"""
logger.debug(f"Processing {infile}")
logger.debug("Processing %s", infile)
lang = lang_from_filename(infile)
parser = etree.XMLParser(remove_blank_text=True, remove_comments=True)
xmlstream = _build_xmlstream(infile, parser)
@@ -224,5 +229,5 @@ def process_file(infile: Path, processor: Path) -> str:
),
)
except AssertionError:
logger.debug(f"Output generated for file {infile} is not valid xml")
logger.debug("Output generated for file %s is not valid xml", infile)
return result

View File

@@ -13,13 +13,13 @@ def prepare_early_subdirectories(source_dir: Path, processes: int) -> None:
"""
Find any early subdir scripts in subdirectories and run them
"""
logger.info(f"Preparing Early Subdirectories for site {source_dir}")
for subdir_path in map(
lambda path: path.parent, source_dir.glob("**/early_subdir.py")
):
logger.info(f"Preparing early subdirectory {subdir_path}")
logger.info("Preparing Early Subdirectories for site %s", source_dir)
for subdir_path in (path.parent for path in source_dir.glob("**/early_subdir.py")):
logger.info("Preparing early subdirectory %s", subdir_path)
sys.path.append(str(subdir_path.resolve()))
import early_subdir
# Ignore this very sensible warning, as we do evil things
# here for out subdir scripts
import early_subdir # noqa: PLC0415
early_subdir.run(processes, subdir_path)
# Remove its path from where things can be imported

View File

@@ -10,8 +10,8 @@ import multiprocessing
from pathlib import Path
import iso639
import lxml.etree as etree
import nltk
from lxml import etree
from nltk.corpus import stopwords as nltk_stopwords
from fsfe_website_build.lib.misc import update_if_changed
@@ -29,8 +29,9 @@ def _find_teaser(document: etree.ElementTree) -> str:
:document: The parsed lxml ElementTree document
:returns: The text of the teaser or an empty string
"""
trivial_length = 10
for p in document.xpath("//body//p"):
if p.text and len(p.text.strip().split(" ")) > 10:
if p.text and len(p.text.strip().split(" ")) > trivial_length:
return p.text
return ""
@@ -39,11 +40,13 @@ def _process_file(file: Path, stopwords: set[str]) -> dict:
"""
Generate the search index entry for a given file and set of stopwords
"""
logger.debug(f"Processing file {file}")
logger.debug("Processing file %s", file)
xslt_root = etree.parse(file)
tags = map(
lambda tag: tag.get("key"),
filter(lambda tag: tag.get("key") != "front-page", xslt_root.xpath("//tag")),
tags = (
tag.get("key")
for tag in filter(
lambda tag: tag.get("key") != "front-page", xslt_root.xpath("//tag")
)
)
return {
"url": f"/{file.with_suffix('.html').relative_to(file.parents[-2])}",
@@ -69,7 +72,9 @@ def _process_file(file: Path, stopwords: set[str]) -> dict:
def index_websites(
source_dir: Path, languages: list[str], pool: multiprocessing.Pool
source_dir: Path,
languages: list[str],
pool: multiprocessing.Pool,
) -> None:
"""
Generate a search index for all sites that have a search/search.js file
@@ -81,7 +86,7 @@ def index_websites(
nltk.download("stopwords", download_dir=nltkdir, quiet=True)
# Iterate over sites
if source_dir.joinpath("search/search.js").exists():
logger.debug(f"Indexing {source_dir}")
logger.debug("Indexing %s", source_dir)
# Get all xhtml files in languages to be processed
# Create a list of tuples
@@ -90,28 +95,28 @@ def index_websites(
# Use iso639 to get the english name of the language
# from the two letter iso639-1 code we use to mark files.
# Then if that language has stopwords from nltk, use those stopwords.
files_with_stopwords = map(
lambda file: (
files_with_stopwords = (
(
file,
(
set(
nltk_stopwords.words(
iso639.Language.from_part1(
file.suffixes[0].removeprefix(".")
).name.lower()
)
file.suffixes[0].removeprefix("."),
).name.lower(),
),
)
if iso639.Language.from_part1(
file.suffixes[0].removeprefix(".")
file.suffixes[0].removeprefix("."),
).name.lower()
in nltk_stopwords.fileids()
else set()
),
),
filter(
)
for file in filter(
lambda file: file.suffixes[0].removeprefix(".") in languages,
source_dir.glob("**/*.??.xhtml"),
),
)
)
articles = pool.starmap(_process_file, files_with_stopwords)

View File

@@ -10,16 +10,20 @@ logger = logging.getLogger(__name__)
def prepare_subdirectories(
source_dir: Path, languages: list[str], processes: int
source_dir: Path,
languages: list[str],
processes: int,
) -> None:
"""
Find any subdir scripts in subdirectories and run them
"""
logger.info("Preparing Subdirectories")
for subdir_path in map(lambda path: path.parent, source_dir.glob("**/subdir.py")):
logger.info(f"Preparing subdirectory {subdir_path}")
for subdir_path in (path.parent for path in source_dir.glob("**/subdir.py")):
logger.info("Preparing subdirectory %s", subdir_path)
sys.path.append(str(subdir_path.resolve()))
import subdir
# Ignore this very sensible warning, as we do evil things
# here for out subdir scripts
import subdir # noqa: PLC0415
subdir.run(languages, processes, subdir_path)
# Remove its path from where things can be imported

View File

@@ -31,7 +31,7 @@ def phase1_run(
languages: list[str] or None,
processes: int,
pool: multiprocessing.Pool,
):
) -> None:
"""
Run all the necessary sub functions for phase1.
"""

View File

@@ -27,14 +27,14 @@ def update_css(
if directory.joinpath(name + ".less").exists() and (
not directory.joinpath(name + ".min.css").exists()
or any(
map(
lambda path: path.stat().st_mtime
> directory.joinpath(name + ".min.css").stat().st_mtime,
directory.glob("**/*.less"),
)
(
path.stat().st_mtime
> directory.joinpath(name + ".min.css").stat().st_mtime
for path in directory.glob("**/*.less")
),
)
):
logger.info(f"Compiling {name}.less")
logger.info("Compiling %s.less", name)
result = run_command(
[
"lessc",

View File

@@ -18,7 +18,7 @@ def _do_symlinking(directory: Path) -> None:
while not working_dir.joinpath("default.xsl").exists():
working_dir = working_dir.parent
directory.joinpath(".default.xsl").symlink_to(
working_dir.joinpath("default.xsl").resolve()
working_dir.joinpath("default.xsl").resolve(),
)
@@ -33,7 +33,7 @@ def update_defaultxsls(source_dir: Path, pool: multiprocessing.Pool) -> None:
logger.info("Updating default xsl's")
# Get a set of all directories containing .xhtml source files
directories = set(map(lambda path: path.parent, source_dir.glob("**/*.*.xhtml")))
directories = {path.parent for path in source_dir.glob("**/*.*.xhtml")}
# Do all directories asynchronously
pool.map(_do_symlinking, directories)

View File

@@ -6,7 +6,7 @@ import logging
import multiprocessing
from pathlib import Path
import lxml.etree as etree
from lxml import etree
from fsfe_website_build.lib.misc import get_basepath, update_if_changed
@@ -14,21 +14,18 @@ logger = logging.getLogger(__name__)
def _write_localmenus(
directory: str, files_by_dir: dict[str, list[Path]], languages: list[str]
directory: str,
files_by_dir: dict[str, list[Path]],
languages: list[str],
) -> None:
"""
Write localmenus for a given directory
"""
# Set of files with no langcode or xhtml extension
base_files = set(
map(
lambda filter_file: get_basepath(filter_file),
files_by_dir[directory],
)
)
base_files = {get_basepath(filter_file) for filter_file in files_by_dir[directory]}
for lang in languages:
file = Path(directory).joinpath(f".localmenu.{lang}.xml")
logger.debug(f"Creating {file}")
logger.debug("Creating %s", file)
page = etree.Element("feed")
# Add the subelements
@@ -37,15 +34,15 @@ def _write_localmenus(
for source_file in filter(
lambda path: path is not None,
map(
lambda base_file: base_file.with_suffix(f".{lang}.xhtml")
(
base_file.with_suffix(f".{lang}.xhtml")
if base_file.with_suffix(f".{lang}.xhtml").exists()
else (
base_file.with_suffix(".en.xhtml")
if base_file.with_suffix(".en.xhtml").exists()
else None
),
base_files,
)
for base_file in base_files
),
):
for localmenu in etree.parse(source_file).xpath("//localmenu"):
@@ -65,8 +62,8 @@ def _write_localmenus(
link=(
str(
source_file.with_suffix(".html").relative_to(
source_file.parents[0]
)
source_file.parents[0],
),
)
),
).text = localmenu.text
@@ -78,7 +75,9 @@ def _write_localmenus(
def update_localmenus(
source_dir: Path, languages: list[str], pool: multiprocessing.Pool
source_dir: Path,
languages: list[str],
pool: multiprocessing.Pool,
) -> None:
"""
Update all the .localmenu.*.xml files containing the local menus.
@@ -94,20 +93,20 @@ def update_localmenus(
if xslt_root.xpath("//localmenu"):
directory = xslt_root.xpath("//localmenu/@dir")
directory = (
directory[0] if directory else str(file.parent.relative_to(Path(".")))
directory[0] if directory else str(file.parent.relative_to(Path()))
)
if directory not in files_by_dir:
files_by_dir[directory] = set()
files_by_dir[directory].add(file)
for directory in files_by_dir:
files_by_dir[directory] = sorted(list(files_by_dir[directory]))
for directory, files in files_by_dir.items():
files_by_dir[directory] = sorted(files)
# If any of the source files has been updated, rebuild all .localmenu.*.xml
dirs = filter(
lambda directory: (
any(
map(
lambda file: (
(
(
(not Path(directory).joinpath(".localmenu.en.xml").exists())
or (
file.stat().st_mtime
@@ -116,14 +115,14 @@ def update_localmenus(
.stat()
.st_mtime
)
),
files_by_dir[directory],
)
)
for file in files_by_dir[directory]
),
)
),
files_by_dir,
)
pool.starmap(
_write_localmenus,
map(lambda directory: (directory, files_by_dir, languages), dirs),
((directory, files_by_dir, languages) for directory in dirs),
)

View File

@@ -19,13 +19,12 @@ def _update_sheet(file: Path) -> None:
Update a given xsl file if any of its dependant xsl files have been updated
"""
xslt_root = etree.parse(file)
imports = map(
lambda imp: file.parent.joinpath(imp.get("href"))
.resolve()
.relative_to(Path(".").resolve()),
xslt_root.xpath(
"//xsl:import", namespaces={"xsl": "http://www.w3.org/1999/XSL/Transform"}
),
imports = (
file.parent.joinpath(imp.get("href")).resolve().relative_to(Path.cwd())
for imp in xslt_root.xpath(
"//xsl:import",
namespaces={"xsl": "http://www.w3.org/1999/XSL/Transform"},
)
)
touch_if_newer_dep(file, imports)

View File

@@ -7,7 +7,7 @@ import multiprocessing
from pathlib import Path
from xml.sax.saxutils import escape
import lxml.etree as etree
from lxml import etree
from fsfe_website_build.lib.misc import (
get_basepath,
@@ -62,7 +62,11 @@ def _update_tag_sets(
)
if count > 0:
etree.SubElement(
page, "tag", section=section, key=tag, count=str(count)
page,
"tag",
section=section,
key=tag,
count=str(count),
).text = label
update_if_changed(
site.joinpath(f"tags/.tags.{lang}.xml"),
@@ -71,7 +75,9 @@ def _update_tag_sets(
def update_tags(
source_dir: Path, languages: list[str], pool: multiprocessing.Pool
source_dir: Path,
languages: list[str],
pool: multiprocessing.Pool,
) -> None:
"""
Update Tag pages, xmllists and xmls
@@ -92,7 +98,7 @@ def update_tags(
the tagged-* are correctly deleted.
"""
if source_dir.joinpath("tags").exists():
logger.info(f"Updating tags for {source_dir}")
logger.info("Updating tags for %s", source_dir)
# Create a complete and current map of which tag is used in which files
files_by_tag = {}
tags_by_lang = {}
@@ -140,18 +146,18 @@ def update_tags(
logger.debug("Updating tag pages")
pool.starmap(
_update_tag_pages,
map(lambda tag: (source_dir, tag, languages), files_by_tag.keys()),
((source_dir, tag, languages) for tag in files_by_tag),
)
logger.debug("Updating tag lists")
pool.starmap(
update_if_changed,
map(
lambda tag: (
(
(
Path(f"{source_dir}/tags/.tagged-{tag}.xmllist"),
("\n".join(map(lambda file: str(file), files_by_tag[tag])) + "\n"),
),
files_by_tag.keys(),
("\n".join(str(file) for file in files_by_tag[tag]) + "\n"),
)
for tag in files_by_tag
),
)
@@ -166,13 +172,13 @@ def update_tags(
filter(
lambda path: section in str(path.parent),
files_by_tag[tag],
)
)
),
),
)
pool.starmap(
_update_tag_sets,
map(
lambda lang: (source_dir, lang, filecount, files_by_tag, tags_by_lang),
filter(lambda lang: lang in languages, tags_by_lang.keys()),
(
(source_dir, lang, filecount, files_by_tag, tags_by_lang)
for lang in filter(lambda lang: lang in languages, tags_by_lang.keys())
),
)

View File

@@ -9,7 +9,7 @@ import multiprocessing
import re
from pathlib import Path
import lxml.etree as etree
from lxml import etree
from fsfe_website_build.lib.misc import (
get_basepath,
@@ -22,7 +22,11 @@ logger = logging.getLogger(__name__)
def _update_for_base(
base: Path, all_xml: set[Path], nextyear: str, thisyear: str, lastyear: str
base: Path,
all_xml: set[Path],
nextyear: str,
thisyear: str,
lastyear: str,
) -> None:
"""
Update the xmllist for a given base file
@@ -57,13 +61,15 @@ def _update_for_base(
# contains tag if tag in pattern
and (
any(
map(
lambda xml_file_with_ending: etree.parse(
xml_file_with_ending
(
etree.parse(
xml_file_with_ending,
).find(f".//tag[@key='{tag}']")
is not None,
xml_file.parent.glob(f"{xml_file.name}.*.xml"),
)
is not None
for xml_file_with_ending in xml_file.parent.glob(
f"{xml_file.name}.*.xml"
)
),
)
if tag != ""
else True
@@ -74,7 +80,7 @@ def _update_for_base(
):
matching_files.add(str(xml_file))
for file in Path("").glob(f"{base}.??.xhtml"):
for file in Path().glob(f"{base}.??.xhtml"):
xslt_root = etree.parse(file)
for module in xslt_root.xpath("//module"):
matching_files.add(f"global/data/modules/{module.get('id')}".strip())
@@ -86,46 +92,39 @@ def _update_for_base(
def _update_module_xmllists(
source_dir: Path, languages: list[str], pool: multiprocessing.Pool
source_dir: Path,
languages: list[str],
pool: multiprocessing.Pool,
) -> None:
"""
Update .xmllist files for .sources and .xhtml containing <module>s
"""
logger.info("Updating XML lists")
# Get all the bases and stuff before multithreading the update bit
all_xml = set(
map(
lambda path: get_basepath(path),
filter(
lambda path: lang_from_filename(path) in languages,
list(source_dir.glob("**/*.*.xml"))
+ list(Path("global/").glob("**/*.*.xml")),
),
all_xml = {
get_basepath(path)
for path in filter(
lambda path: lang_from_filename(path) in languages,
list(source_dir.glob("**/*.*.xml"))
+ list(Path("global/").glob("**/*.*.xml")),
)
)
source_bases = set(
map(
lambda path: path.with_suffix(""),
source_dir.glob("**/*.sources"),
}
source_bases = {path.with_suffix("") for path in source_dir.glob("**/*.sources")}
module_bases = {
get_basepath(path)
for path in filter(
lambda path: lang_from_filename(path) in languages
and etree.parse(path).xpath("//module"),
source_dir.glob("**/*.*.xhtml"),
)
)
module_bases = set(
map(
lambda path: get_basepath(path),
filter(
lambda path: lang_from_filename(path) in languages
and etree.parse(path).xpath("//module"),
source_dir.glob("**/*.*.xhtml"),
),
)
)
}
all_bases = source_bases | module_bases
nextyear = str(datetime.datetime.today().year + 1)
thisyear = str(datetime.datetime.today().year)
lastyear = str(datetime.datetime.today().year - 1)
pool.starmap(
_update_for_base,
map(lambda base: (base, all_xml, nextyear, thisyear, lastyear), all_bases),
((base, all_xml, nextyear, thisyear, lastyear) for base in all_bases),
)
@@ -136,13 +135,14 @@ def _check_xmllist_deps(file: Path) -> None:
xmls = set()
with file.open(mode="r") as fileobj:
for line in fileobj:
for newfile in Path("").glob(line.strip() + ".??.xml"):
for newfile in Path().glob(line.strip() + ".??.xml"):
xmls.add(newfile)
touch_if_newer_dep(file, list(xmls))
def _touch_xmllists_with_updated_deps(
source_dir: Path, languages: list[str], pool: multiprocessing.Pool
source_dir: Path,
pool: multiprocessing.Pool,
) -> None:
"""
Touch all .xmllist files where one of the contained files has changed
@@ -152,7 +152,9 @@ def _touch_xmllists_with_updated_deps(
def update_xmllists(
source_dir: Path, languages: list[str], pool: multiprocessing.Pool
source_dir: Path,
languages: list[str],
pool: multiprocessing.Pool,
) -> None:
"""
Update XML filelists (*.xmllist)
@@ -172,4 +174,4 @@ def update_xmllists(
the tagged-* are correctly deleted.
"""
_update_module_xmllists(source_dir, languages, pool)
_touch_xmllists_with_updated_deps(source_dir, languages, pool)
_touch_xmllists_with_updated_deps(source_dir, pool)

View File

@@ -16,7 +16,7 @@ def _copy_file(target: Path, source_dir: Path, source_file: Path) -> None:
not target_file.exists()
or source_file.stat().st_mtime > target_file.stat().st_mtime
):
logger.debug(f"Copying {source_file} to {target_file}")
logger.debug("Copying %s to %s", source_file, target_file)
target_file.parent.mkdir(parents=True, exist_ok=True)
target_file.write_bytes(source_file.read_bytes())
# preserve file modes
@@ -30,9 +30,9 @@ def copy_files(source_dir: Path, pool: multiprocessing.Pool, target: Path) -> No
logger.info("Copying over media and misc files")
pool.starmap(
_copy_file,
map(
lambda file: (target, source_dir, file),
list(
(
(target, source_dir, file)
for file in list(
filter(
lambda path: path.is_file()
and path.suffix
@@ -51,9 +51,9 @@ def copy_files(source_dir: Path, pool: multiprocessing.Pool, target: Path) -> No
]
and path.name not in ["Makefile"],
source_dir.glob("**/*"),
)
),
)
# Special case hard code pass over orde items xml required by cgi script
+ list(source_dir.glob("order/data/items.en.xml")),
+ list(source_dir.glob("order/data/items.en.xml"))
),
)

View File

@@ -13,14 +13,15 @@ logger = logging.getLogger(__name__)
def _do_symlinking(target: Path) -> None:
source = target.parent.joinpath(
f"index{target.with_suffix('').suffix}{target.suffix}"
f"index{target.with_suffix('').suffix}{target.suffix}",
)
if not source.exists():
source.symlink_to(target.relative_to(source.parent))
def create_index_symlinks(
source_dir: Path, pool: multiprocessing.Pool, target: Path
pool: multiprocessing.Pool,
target: Path,
) -> None:
"""
Create index.* symlinks

View File

@@ -16,7 +16,8 @@ def _do_symlinking(target: Path) -> None:
def create_language_symlinks(
source_dir: Path, pool: multiprocessing.Pool, target: Path
pool: multiprocessing.Pool,
target: Path,
) -> None:
"""
Create symlinks from file.<lang>.html to file.html.<lang>

View File

@@ -13,18 +13,20 @@ logger = logging.getLogger(__name__)
def _run_process(
target_file: Path, processor: Path, source_file: Path, basename: Path, lang: str
):
target_file: Path,
processor: Path,
source_file: Path,
basename: Path,
lang: str,
) -> None:
# if the target file does not exist, we make it
if not target_file.exists() or any(
# If any source file is newer than the file to be generated
# we recreate the generated file
# if the source file does not exist, ignore it.
map(
lambda file: (
file.exists() and file.stat().st_mtime > target_file.stat().st_mtime
),
[
(
(file.exists() and file.stat().st_mtime > target_file.stat().st_mtime)
for file in [
(
source_file
if source_file.exists()
@@ -33,31 +35,32 @@ def _run_process(
processor,
(
source_file.parent.joinpath("." + basename.name).with_suffix(
".xmllist"
".xmllist",
)
),
Path(f"global/data/texts/.texts.{lang}.xml"),
Path(f"global/data/topbanner/.topbanner.{lang}.xml"),
Path("global/data/texts/texts.en.xml"),
],
)
]
),
):
logger.debug(f"Building {target_file}")
logger.debug("Building %s", target_file)
result = process_file(source_file, processor)
target_file.parent.mkdir(parents=True, exist_ok=True)
result.write_output(target_file)
def _process_dir(
source_dir: Path, languages: list[str], target: Path, directory: Path
source_dir: Path,
languages: list[str],
target: Path,
directory: Path,
) -> None:
for basename in set(
map(lambda path: path.with_suffix(""), directory.glob("*.??.xhtml"))
):
for basename in {path.with_suffix("") for path in directory.glob("*.??.xhtml")}:
for lang in languages:
source_file = basename.with_suffix(f".{lang}.xhtml")
target_file = target.joinpath(
source_file.relative_to(source_dir)
source_file.relative_to(source_dir),
).with_suffix(".html")
processor = (
basename.with_suffix(".xsl")
@@ -68,20 +71,26 @@ def _process_dir(
def _process_stylesheet(
source_dir: Path, languages: list[str], target: Path, processor: Path
source_dir: Path,
languages: list[str],
target: Path,
processor: Path,
) -> None:
basename = get_basepath(processor)
destination_base = target.joinpath(basename.relative_to(source_dir))
for lang in languages:
target_file = destination_base.with_suffix(
f".{lang}{processor.with_suffix('').suffix}"
f".{lang}{processor.with_suffix('').suffix}",
)
source_file = basename.with_suffix(f".{lang}.xhtml")
_run_process(target_file, processor, source_file, basename, lang)
def process_files(
source_dir: Path, languages: list[str], pool: multiprocessing.Pool, target: Path
source_dir: Path,
languages: list[str],
pool: multiprocessing.Pool,
target: Path,
) -> None:
"""
Build .html, .rss and .ics files from .xhtml sources
@@ -92,24 +101,24 @@ def process_files(
logger.info("Processing xhtml files")
pool.starmap(
_process_dir,
map(
lambda directory: (source_dir, languages, target, directory),
set(map(lambda path: path.parent, source_dir.glob("**/*.*.xhtml"))),
(
(source_dir, languages, target, directory)
for directory in {path.parent for path in source_dir.glob("**/*.*.xhtml")}
),
)
logger.info("Processing rss files")
pool.starmap(
_process_stylesheet,
map(
lambda processor: (source_dir, languages, target, processor),
source_dir.glob("**/*.rss.xsl"),
(
(source_dir, languages, target, processor)
for processor in source_dir.glob("**/*.rss.xsl")
),
)
logger.info("Processing ics files")
pool.starmap(
_process_stylesheet,
map(
lambda processor: (source_dir, languages, target, processor),
source_dir.glob("**/*.ics.xsl"),
(
(source_dir, languages, target, processor)
for processor in source_dir.glob("**/*.ics.xsl")
),
)

View File

@@ -18,13 +18,16 @@ logger = logging.getLogger(__name__)
def phase2_run(
source_dir: Path, languages: list[str], pool: multiprocessing.Pool, target: Path
):
source_dir: Path,
languages: list[str],
pool: multiprocessing.Pool,
target: Path,
) -> None:
"""
Run all the necessary sub functions for phase2.
"""
logger.info("Starting Phase 2 - Generating output")
process_files(source_dir, languages, pool, target)
create_index_symlinks(source_dir, pool, target)
create_language_symlinks(source_dir, pool, target)
create_index_symlinks(pool, target)
create_language_symlinks(pool, target)
copy_files(source_dir, pool, target)

View File

@@ -32,12 +32,12 @@ def serve_websites(serve_dir: str, base_port: int, increment_number: int) -> Non
Takes a target directory, a base port and a number to increment port by per dir
It then serves all directories over http on localhost
"""
dirs = sorted(list(filter(lambda path: path.is_dir(), Path(serve_dir).iterdir())))
dirs = sorted(filter(lambda path: path.is_dir(), Path(serve_dir).iterdir()))
serves = []
for index, directory in enumerate(dirs):
port = base_port + (increment_number * index)
url = f"http://127.0.0.1:{port}"
logging.info(f"{directory.name} served at {url}")
logger.info("%s served at %s", directory.name, url)
if shutil.which("xdg-open") is not None:
run_command(["xdg-open", url + "/index.en.html"])
serves.append((str(directory), port))

View File

@@ -39,12 +39,12 @@ def stage_to_target(stagedir: Path, targets: str, pool: multiprocessing.Pool) ->
logger.info("Rsyncing from stage dir to target dir(s)")
pool.starmap(
_rsync,
map(
lambda target: (
(
(
stagedir,
(target if "?" not in target else target.split("?")[0]),
(int(target.split("?")[1]) if "?" in target else 22),
),
targets.split(","),
)
for target in targets.split(",")
),
)

View File

@@ -0,0 +1,113 @@
import subprocess
from pathlib import Path
from time import sleep
import pytest
from fsfe_website_build.lib.misc import (
delete_file,
get_basename,
get_basepath,
get_version,
keys_exists,
lang_from_filename,
run_command,
sort_dict,
touch_if_newer_dep,
update_if_changed,
)
def keys_exists_test() -> None:
nested = {"a": {"b": {"c": 42}}}
assert keys_exists(nested, "a", "b", "c") is True
assert keys_exists(nested, "a", "missing") is False
def keys_exists_bad_input_test() -> None:
with pytest.raises(TypeError):
keys_exists([], "a")
assert keys_exists({}, "a") is False
def sort_dict_test() -> None:
src = {"b": 2, "a": 1, "c": 3}
assert sort_dict(src) == {"a": 1, "b": 2, "c": 3}
def update_if_changed_test(tmp_path: Path) -> None:
file = tmp_path / "foo.txt"
content = "hello"
update_if_changed(file, content)
assert file.read_text() == content
update_if_changed(file, content) # no change -> no write
assert file.read_text() == content
new_content = "world"
update_if_changed(file, new_content)
assert file.read_text() == new_content
# ---------- touch_if_newer_dep ----------
def touch_if_newer_dep_test(tmp_path: Path) -> None:
target = tmp_path / "target"
target.write_text("target")
# Ensure mtime of dep is later
sleep(1)
dep = tmp_path / "dep"
dep.write_text("dep")
mtime_before = target.stat().st_mtime
touch_if_newer_dep(target, [dep])
mtime_after = target.stat().st_mtime
assert mtime_after > mtime_before
def delete_file_test(tmp_path: Path) -> None:
f = tmp_path / "gone.txt"
f.write_text("bye")
delete_file(f)
assert not f.exists()
def lang_from_filename_test() -> None:
assert lang_from_filename(Path("index.en.html")) == "en"
assert lang_from_filename(Path("index.nolang.html")) == "nolang"
def lang_from_filename_bad_test() -> None:
with pytest.raises(RuntimeError):
lang_from_filename(Path("index.eng.html"))
def run_command_test() -> None:
with pytest.raises(subprocess.CalledProcessError):
run_command(["false"])
def run_command_ok_test() -> None:
out = run_command(["echo", "success"])
assert out == "success"
def get_version_valid_test(tmp_path: Path) -> None:
xml_file = tmp_path / "page.xml"
version = 3
xml_file.write_text(f"<root><version>{version}</version></root>")
assert get_version(xml_file) == version
def get_version_no_version_test(tmp_path: Path) -> None:
xml_file = tmp_path / "page.xml"
xml_file.write_text("<root/>")
assert get_version(xml_file) == 0
def get_basepath_test() -> None:
assert get_basepath(Path("a.b.c")) == Path("a")
assert get_basepath(Path("a/b.c.d")) == Path("a/b")
def get_basename_test() -> None:
assert get_basename(Path("a.b.c")) == "a"
assert get_basename(Path("a/b.c.d")) == "b"

View File

@@ -0,0 +1,70 @@
import textwrap
from pathlib import Path
import pytest
from fsfe_website_build.lib.process_file import process_file
from lxml import etree
@pytest.fixture
def sample_xsl(tmp_path: Path) -> Path:
"""Minimal XSLT that just copies the input through."""
xsl_path = tmp_path / "sample.xsl"
xsl_path.write_text(
textwrap.dedent(
"""
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="no"/>
<xsl:template match="/|node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
""",
).strip(),
)
return xsl_path
@pytest.mark.parametrize(
("lang", "link_in", "link_out"),
[
("en", "foo/bar.html", "foo/bar.en.html"),
("en", "/foo/bar.html", "/foo/bar.en.html"),
("de", "news.rss", "news.de.rss"),
("fr", "events.ics", "events.fr.ics"),
("en", "folder/", "folder/index.en.html"),
("es", "/folder/", "/folder/index.es.html"),
("en", "https://example.com/page.html", "https://example.com/page.html"),
("en", "mailto:someone@example.com", "mailto:someone@example.com"),
],
)
def process_file_link_rewrites_test(
tmp_path: Path,
sample_xsl: Path,
lang: str,
link_in: str,
link_out: str,
) -> None:
"""Check that all link transformations work as expected."""
xml_path = tmp_path / f"dummy.{lang}.xml"
xml_path.write_text(
textwrap.dedent(
f"""
<?xml version="1.0" encoding="UTF-8"?>
<root>
<a href="{link_in}" test_url="true">link</a>
</root>
""",
).strip(),
)
result_doc = process_file(xml_path, sample_xsl)
assert isinstance(result_doc, etree._XSLTResultTree)
# We get a list, but as we have only one link in the above sample
# we only need to care about the first one
link_node = result_doc.xpath("//a[@href and @test_url]")[0]
assert link_node.get("href") == link_out

View File

@@ -12,10 +12,14 @@ from fsfe_website_build.lib.misc import update_if_changed
logger = logging.getLogger(__name__)
def _gen_archive_index(working_dir: Path, languages: list[str], directory: Path):
logger.debug(f"Operating on dir {directory}")
def _gen_archive_index(
working_dir: Path,
languages: list[str],
directory: Path,
) -> None:
logger.debug("Operating on dir %s", directory)
for lang in languages:
logger.debug(f"Operating on lang {lang}")
logger.debug("Operating on lang %s", lang)
template = working_dir.joinpath(f"archive-template.{lang}.xhtml")
if template.exists():
logger.debug("Template Exists!")
@@ -24,7 +28,7 @@ def _gen_archive_index(working_dir: Path, languages: list[str], directory: Path)
update_if_changed(directory.joinpath(f"index.{lang}.xhtml"), content)
def _gen_index_sources(directory: Path):
def _gen_index_sources(directory: Path) -> None:
update_if_changed(
directory.joinpath("index.sources"),
dedent(
@@ -32,7 +36,7 @@ def _gen_index_sources(directory: Path):
{directory}/event-*:[]
{directory}/.event-*:[]
{directory.parent}/.localmenu:[]
"""
""",
),
)
@@ -42,7 +46,7 @@ def run(languages: list[str], processes: int, working_dir: Path) -> None:
preparation for news subdirectory
"""
with multiprocessing.Pool(processes) as pool:
years = list(sorted(working_dir.glob("[0-9][0-9][0-9][0-9]")))
years = sorted(working_dir.glob("[0-9][0-9][0-9][0-9]"))
# Copy news archive template to each of the years
pool.starmap(
_gen_archive_index,

View File

@@ -8,25 +8,25 @@ import os
from pathlib import Path
from urllib.parse import urlparse
import lxml.etree as etree
import requests
from fsfe_website_build.lib.misc import update_if_changed
from lxml import etree
logger = logging.getLogger(__name__)
def run(languages: list[str], processes: int, working_dir: Path) -> None:
def run(languages: list[str], processes: int, working_dir: Path) -> None: # noqa: ARG001 # We allow unused args for subdirs
"""
Internal subdir preparation
"""
logger.info("Creating activities file")
raw_url = urlparse(
"https://git.fsfe.org/FSFE/activities/raw/branch/master/activities.csv"
"https://git.fsfe.org/FSFE/activities/raw/branch/master/activities.csv",
)
git_token = os.environ.get("FSFE_WEBSITE_GIT_TOKEN")
if git_token is None:
logger.warn(
"FSFE_WEBSITE_GIT_TOKEN is not set, skipping generation of activities file"
logger.warning(
"FSFE_WEBSITE_GIT_TOKEN is not set, skipping generation of activities file",
)
return
@@ -34,8 +34,9 @@ def run(languages: list[str], processes: int, working_dir: Path) -> None:
r = requests.get(url)
if not r.ok:
logger.error("Failed to retrieve activities file")
raise Exception("Failed to retrieve activities file")
message = "Failed to retrieve activities file"
logger.error(message)
raise RuntimeError(message)
activities_csv = csv.reader(r.text.split("\n")[1:], delimiter="\t")

View File

@@ -7,16 +7,20 @@ import multiprocessing
from pathlib import Path
from textwrap import dedent
import lxml.etree as etree
from fsfe_website_build.lib.misc import lang_from_filename, update_if_changed
from lxml import etree
logger = logging.getLogger(__name__)
def _gen_archive_index(working_dir: Path, languages: list[str], directory: Path):
logger.debug(f"Operating on dir {directory}")
def _gen_archive_index(
working_dir: Path,
languages: list[str],
directory: Path,
) -> None:
logger.debug("Operating on dir %s", directory)
for lang in languages:
logger.debug(f"Operating on lang {lang}")
logger.debug("Operating on lang %s", lang)
template = working_dir.joinpath(f"archive-template.{lang}.xhtml")
if template.exists():
logger.debug("Template Exists!")
@@ -25,7 +29,7 @@ def _gen_archive_index(working_dir: Path, languages: list[str], directory: Path)
update_if_changed(directory.joinpath(f"index.{lang}.xhtml"), content)
def _gen_index_sources(directory: Path):
def _gen_index_sources(directory: Path) -> None:
update_if_changed(
directory.joinpath("index.sources"),
dedent(
@@ -33,13 +37,13 @@ def _gen_index_sources(directory: Path):
{directory}/news-*:[]
{directory}/.news-*:[]
{directory.parent}/.localmenu:[]
"""
""",
),
)
def _gen_xml_files(working_dir: Path, file: Path):
logger.debug(f"Transforming {file}")
def _gen_xml_files(working_dir: Path, file: Path) -> None:
logger.debug("Transforming %s", file)
# Would be more efficient to pass this to the function,
# but this causes a pickling error,
# and the faq seems to indicate passing around these objects
@@ -54,7 +58,7 @@ def _gen_xml_files(working_dir: Path, file: Path):
)
update_if_changed(
file.parent.joinpath(
f".{file.with_suffix('').stem}{file.with_suffix('').suffix}.xml"
f".{file.with_suffix('').stem}{file.with_suffix('').suffix}.xml",
),
str(result),
)
@@ -65,7 +69,7 @@ def run(languages: list[str], processes: int, working_dir: Path) -> None:
preparation for news subdirectory
"""
with multiprocessing.Pool(processes) as pool:
years = list(sorted(working_dir.glob("[0-9][0-9][0-9][0-9]")))
years = sorted(working_dir.glob("[0-9][0-9][0-9][0-9]"))
# Copy news archive template to each of the years
pool.starmap(
_gen_archive_index,

View File

@@ -16,6 +16,11 @@ pre-commit:
glob: "*.py"
run: ruff format {staged_files}
stage_fixed: true
pytest:
glob:
- "*.py"
- "build/xslt/*.xsl"
run: pytest
shfmt:
glob: "*.sh"
run: shfmt --write {staged_files}

View File

@@ -22,7 +22,7 @@ dependencies = [
build = "fsfe_website_build:main"
[dependency-groups]
dev = ["ruff", "lefthook", "taplo"]
dev = ["ruff", "lefthook", "taplo", "pytest"]
[build-system]
requires = ["uv_build"]
@@ -33,16 +33,42 @@ module-name = "fsfe_website_build"
module-root = "build"
[tool.ruff.lint]
# See https://docs.astral.sh/ruff/rules/ for information
select = [
"A", # prevent using keywords that clobber python builtins
"B", # bugbear: security warnings
"E", # pycodestyle
"F", # pyflakes
"I", # ordered import
"ISC", # implicit string concatenation
"N", # pep 8 naming
"RUF", # the ruff developer's own rules
"UP", # alert you when better syntax is available in your python version
"W", # pycodestyle warnings
"Q", # flake 8 quotes
"ANN", # Flake 8 type annotations
"A", # prevent using keywords that clobber python builtins
"ARG", # Unused arguments
"ASYNC", # Flake 8 aysnc
"B", # bugbear: security warnings
"C4", # Comprehensions
"E", # pycodestyle
"ERA", # Commented out code
"F", # pyflakes
"FURB", # More upgrades to newer styles
"G", # Logging Formatting
"I", # ordered import
"ISC", # implicit string concatenation
"LOG", # Logging stuff
"N", # pep 8 naming
"PERF", # SOme performance stuff
"PIE", # Some extra checks
"PL", # Pylint
"PTH", # Use pathlib
"PT", # Pytest style
"Q", # flake 8 quotes
"RET", # Return types
"RSE", # Unneeded exception parentheses
"RUF", # the ruff developer's own rules
"SIM", # Simplify
"T20", # Warn about prints
"TC", # Type checking
"TRY", # Error handling stuff
"UP", # alert you when better syntax is available in your python version
"UP", # Update syntax to newer versions
"W", # pycodestyle warnings
]
[tool.pytest.ini_options]
testpaths = ["build/fsfe_website_build_tests"]
python_files = ["*_test.py"]
python_functions = ["*_test"]

View File

@@ -6,10 +6,10 @@ import logging
import multiprocessing
from pathlib import Path
import lxml.etree as etree
from fsfe_website_build.lib.misc import (
update_if_changed,
)
from lxml import etree
logger = logging.getLogger(__name__)
@@ -31,19 +31,19 @@ def run(processes: int, working_dir: Path) -> None:
head = etree.SubElement(page, "body")
index_content = etree.tostring(page, xml_declaration=True, encoding="utf-8").decode(
"utf-8"
"utf-8",
)
with multiprocessing.Pool(processes) as pool:
pool.starmap(
update_if_changed,
map(
lambda path: (
(
(
working_dir.joinpath(
f"index.{path.name}.xhtml",
),
index_content,
),
Path().glob("global/languages/*"),
)
for path in Path().glob("global/languages/*")
),
)

View File

@@ -7,18 +7,18 @@ import logging
import multiprocessing
from pathlib import Path
import lxml.etree as etree
from fsfe_website_build.lib.misc import (
get_basepath,
get_version,
run_command,
update_if_changed,
)
from lxml import etree
logger = logging.getLogger(__name__)
def _generate_translation_data(lang: str, priority: int, file: Path) -> dict:
def _generate_translation_data(lang: str, file: Path) -> dict:
page = get_basepath(file)
ext = file.suffix.removeprefix(".")
working_file = file.with_suffix("").with_suffix(f".{lang}.{ext}")
@@ -75,15 +75,15 @@ def _get_text_ids(file: Path) -> list[str]:
return list(
filter(
lambda text_id: text_id is not None,
map(lambda elem: elem.get("id"), root.iter()),
)
(elem.get("id") for elem in root.iter()),
),
)
def _create_overview(
target_dir: Path,
data: dict[str : dict[int : list[dict]]],
):
) -> None:
work_file = target_dir.joinpath("langs.en.xml")
if not target_dir.exists():
target_dir.mkdir(parents=True)
@@ -109,7 +109,7 @@ def _create_overview(
)
result_str = etree.tostring(page, xml_declaration=True, encoding="utf-8").decode(
"utf-8"
"utf-8",
)
update_if_changed(work_file, result_str)
@@ -124,9 +124,9 @@ def _create_translation_file(
page = etree.Element("translation-status")
version = etree.SubElement(page, "version")
version.text = "1"
for priority in data:
for priority, file_data_list in data.items():
prio = etree.SubElement(page, "priority", value=str(priority))
for file_data in data[priority]:
for file_data in file_data_list:
etree.SubElement(prio, "file", **file_data)
en_texts_file = Path("global/data/texts/texts.en.xml")
@@ -152,7 +152,7 @@ def _create_translation_file(
# Save to XML file
result_str = etree.tostring(page, xml_declaration=True, encoding="utf-8").decode(
"utf-8"
"utf-8",
)
update_if_changed(work_file, result_str)
@@ -164,7 +164,7 @@ def run(languages: list[str], processes: int, working_dir: Path) -> None:
Xmls are placed in target_dir, and only languages are processed.
"""
target_dir = working_dir.joinpath("data/")
logger.debug(f"Building index of status of translations into dir {target_dir}")
logger.debug("Building index of status of translations into dir %s", target_dir)
# TODO
# Run generating all this stuff only if some xhtml|xml files have been changed
@@ -178,8 +178,8 @@ def run(languages: list[str], processes: int, working_dir: Path) -> None:
filter(
lambda path: path.suffix in [".xhtml", ".xml"],
# Split on null bytes, strip and then parse into path
map(lambda line: Path(line.strip()), all_git_tracked_files.split("\x00")),
)
(Path(line.strip()) for line in all_git_tracked_files.split("\x00")),
),
)
priorities_and_searches = {
"1": [
@@ -200,24 +200,18 @@ def run(languages: list[str], processes: int, working_dir: Path) -> None:
"**/fsfe.org/contribute/*.en.xhtml",
],
"5": ["**/fsfe.org/order/**/*.en.xml", "**/fsfe.org/order/**/*.en.xhtml"],
# "6": ["**/fsfe.org/**/*.en.xml", "**/fsfe.org/**/*.en.xhtml"],
}
with multiprocessing.Pool(processes) as pool:
# Generate our file lists by priority
# Super hardcoded unfortunately
files_by_priority = dict()
files_by_priority = {}
for file in all_files_with_translations:
for priority in priorities_and_searches:
for priority, searches in priorities_and_searches.items():
if priority not in files_by_priority:
files_by_priority[priority] = list()
files_by_priority[priority] = []
# If any search matches,
# add it to that priority and skip all subsequent priorities
if any(
[
file.full_match(search)
for search in priorities_and_searches[priority]
]
):
if any(file.full_match(search) for search in searches):
files_by_priority[priority].append(file)
continue
@@ -230,12 +224,9 @@ def run(languages: list[str], processes: int, working_dir: Path) -> None:
lambda result: result is not None,
pool.starmap(
_generate_translation_data,
[
(lang, priority, file)
for file in files_by_priority[priority]
],
[(lang, file) for file in files_by_priority[priority]],
),
)
),
)
# sadly single treaded, as only one file being operated on

54
uv.lock generated
View File

@@ -89,6 +89,7 @@ dependencies = [
[package.dev-dependencies]
dev = [
{ name = "lefthook" },
{ name = "pytest" },
{ name = "ruff" },
{ name = "taplo" },
]
@@ -105,6 +106,7 @@ requires-dist = [
[package.metadata.requires-dev]
dev = [
{ name = "lefthook" },
{ name = "pytest" },
{ name = "ruff" },
{ name = "taplo" },
]
@@ -118,6 +120,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
]
[[package]]
name = "iniconfig"
version = "2.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
]
[[package]]
name = "joblib"
version = "1.5.2"
@@ -177,6 +188,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/4d/66/7d9e26593edda06e8cb531874633f7c2372279c3b0f46235539fe546df8b/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", size = 1505442, upload-time = "2024-08-18T19:48:21.909Z" },
]
[[package]]
name = "packaging"
version = "25.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
]
[[package]]
name = "pluggy"
version = "1.6.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
[[package]]
name = "pycparser"
version = "2.22"
@@ -186,6 +215,31 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552, upload-time = "2024-03-30T13:22:20.476Z" },
]
[[package]]
name = "pygments"
version = "2.19.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
]
[[package]]
name = "pytest"
version = "8.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" },
]
[[package]]
name = "python-iso639"
version = "2025.2.18"