From ad79262abfc3ffeed6fb1e7d471ad84600cd5462 Mon Sep 17 00:00:00 2001 From: delliott Date: Thu, 15 Jan 2026 15:49:40 +0000 Subject: [PATCH] feat/config (#5590) - use nargs instead of splitting strings. Nicer logic, more standard - Show default values of args in help message - each site can now have a `config.toml` with deps and required files for deployment. - cleaner argument handling and custom value generation - use dataclass to hold args, or cleaner typing - pass config types to run commands instead of individual args - clean up the whole build.py file Overall, this is a nice internal refactor that also offers the feature of per site config. It does have a breaking change of how multiple args are handled, but the arg help page should explain it just fine. Co-authored-by: Darragh Elliott Reviewed-on: https://git.fsfe.org/FSFE/fsfe-website/pulls/5590 Co-authored-by: delliott Co-committed-by: delliott --- .drone.yml | 12 +- build/fsfe_website_build/__init__.py | 4 +- build/fsfe_website_build/build.py | 143 +++++++++++------- build/fsfe_website_build/lib/build_config.py | 54 +++++++ build/fsfe_website_build/lib/site_config.py | 41 +++++ .../phase0/prepare_early_subdirectories.py | 19 ++- .../phase1/get_dependencies.py | 77 ++++------ .../phase1/prepare_subdirectories.py | 4 +- build/fsfe_website_build/phase1/run.py | 40 +++-- build/fsfe_website_build/phase1/update_css.py | 4 +- .../phase1/update_defaultxsls.py | 4 +- .../phase1/update_localmenus.py | 10 +- .../phase1/update_stylesheets.py | 4 +- .../phase1/update_xmllists.py | 8 +- build/fsfe_website_build/phase2/copy_files.py | 41 +++-- .../phase2/process_files.py | 10 +- build/fsfe_website_build/phase2/run.py | 30 +++- .../phase3/stage_to_target.py | 4 +- .../caching_test.py | 30 +--- docs/contributing.md | 2 +- fsfe.org/{dependencies.toml => config.toml} | 17 ++- fsfe.org/required_deploy_files.txt | 3 - fsfe.org/search/subdir.py | 6 +- pyproject.toml | 3 +- uv.lock | 11 ++ 25 files changed, 360 insertions(+), 221 deletions(-) create mode 100644 build/fsfe_website_build/lib/build_config.py create mode 100644 build/fsfe_website_build/lib/site_config.py rename fsfe.org/{dependencies.toml => config.toml} (67%) delete mode 100644 fsfe.org/required_deploy_files.txt diff --git a/.drone.yml b/.drone.yml index 1ee50f55ee..9ae9b1ba3a 100644 --- a/.drone.yml +++ b/.drone.yml @@ -65,7 +65,7 @@ steps: XDG_RUNTIME_DIR: "/run/user/1001" DOCKER_HOST: "unix:///run/user/1001/docker.sock" # Target use ipv4 proxies for noddack and gahn, as ipv6 broken. - TARGET: "www@proxy.noris.fsfeurope.org:fsfe.org/global/?10322,www@proxy.plutex.fsfeurope.org:fsfe.org/global/?10322" + TARGETS: "www@proxy.noris.fsfeurope.org:fsfe.org/global/?10322 www@proxy.plutex.fsfeurope.org:fsfe.org/global/?10322" FSFE_WEBSITE_KEY_PRIVATE: from_secret: KEY_PRIVATE FSFE_WEBSITE_KEY_PASSWORD: @@ -83,7 +83,7 @@ steps: # If we are in a cron job, then do a full rebuild # Ideally the cron would set the flag itself, but drone does not support that. - if [ "$DRONE_BUILD_EVENT" = "cron" ]; then EXTRA_FLAGS="--full --clean-cache"; fi - - docker compose run --remove-orphans --build build --target "$TARGET" $EXTRA_FLAGS + - docker compose run --remove-orphans --build build --targets $TARGETS $EXTRA_FLAGS - docker compose down when: branch: @@ -99,7 +99,7 @@ steps: XDG_RUNTIME_DIR: "/run/user/1001" DOCKER_HOST: "unix:///run/user/1001/docker.sock" # Target use ipv4 proxies for noddack and gahn, as ipv6 broken. - TARGET: "www@proxy.noris.fsfeurope.org:test.fsfe.org/global/?10322,www@proxy.plutex.fsfeurope.org:test.fsfe.org/global/?10322" + TARGETS: "www@proxy.noris.fsfeurope.org:test.fsfe.org/global/?10322 www@proxy.plutex.fsfeurope.org:test.fsfe.org/global/?10322" FSFE_WEBSITE_KEY_PRIVATE: from_secret: KEY_PRIVATE FSFE_WEBSITE_KEY_PASSWORD: @@ -117,7 +117,7 @@ steps: # If we are in a cron job, then do a full rebuild # Ideally the cron would set the flag itself, but drone does not support that. - if [ "$DRONE_BUILD_EVENT" = "cron" ]; then EXTRA_FLAGS="--full --clean-cache"; fi - - docker compose run --remove-orphans --build build --target "$TARGET" $EXTRA_FLAGS + - docker compose run --remove-orphans --build build --targets $TARGETS $EXTRA_FLAGS - docker compose down when: branch: @@ -145,6 +145,4 @@ volumes: path: /run/user/1001/docker.sock --- kind: signature -hmac: 627a990b0ba4c6dc8aa43dda45f0d35cb3d3ada367c6db7e8b59c751e13ad5b2 - -... +hmac: 5f8becf08f74b7561bc7c06c023e234689d0b281806cbf37b96b12387266479d diff --git a/build/fsfe_website_build/__init__.py b/build/fsfe_website_build/__init__.py index 46eebbb3d2..dbfd3d9a4a 100644 --- a/build/fsfe_website_build/__init__.py +++ b/build/fsfe_website_build/__init__.py @@ -4,6 +4,6 @@ """The main module for the fsfe website build process.""" -from .build import main +from .build import build -__all__ = ["main"] +__all__ = ["build"] diff --git a/build/fsfe_website_build/build.py b/build/fsfe_website_build/build.py index 6b63bd7f37..65e5c112a1 100755 --- a/build/fsfe_website_build/build.py +++ b/build/fsfe_website_build/build.py @@ -7,10 +7,15 @@ import argparse import logging import multiprocessing import sys +import tomllib from pathlib import Path from textwrap import dedent +from dacite import Config, from_dict + +from .lib.build_config import GlobalBuildConfig, SiteBuildConfig from .lib.misc import lang_from_filename +from .lib.site_config import SiteConfig from .phase0.clean_cache import clean_cache from .phase0.full import full from .phase0.global_symlinks import global_symlinks @@ -23,10 +28,11 @@ from .phase3.stage_to_target import stage_to_target logger = logging.getLogger(__name__) -def _parse_arguments() -> argparse.Namespace: - """Parse the arguments of the website build process.""" +def _build_parser() -> argparse.ArgumentParser: + """Build the argument parser.""" parser = argparse.ArgumentParser( - description="Python script to handle building of the fsfe webpage", + description="Python script to handle building of the fsfe webpages", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--full", @@ -41,15 +47,15 @@ def _parse_arguments() -> argparse.Namespace: parser.add_argument( "--languages", help="Languages to build website in.", - default=[], - type=lambda langs: sorted(langs.split(",")), + nargs="+", + type=str, ) parser.add_argument( "--log-level", type=str, default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], - help="Set the logging level (default: INFO)", + help="Set the logging level", ) parser.add_argument( "--processes", @@ -72,6 +78,7 @@ def _parse_arguments() -> argparse.Namespace: "--sites", help="What site directories to build", default=None, + nargs="+", type=str, ) parser.add_argument( @@ -80,64 +87,78 @@ def _parse_arguments() -> argparse.Namespace: action="store_true", ) parser.add_argument( - "--target", + "--targets", help=dedent("""\ Final dirs for websites to be build to. Can be a single path, or a comma separated list of valid rsync targets. Supports custom rsynx extension for specifying ports for ssh targets, name@host:path?port. """), + nargs="+", type=str, - default=None, ) - args = parser.parse_args() + return parser + + +def _build_config_from_arguments(args: argparse.Namespace) -> GlobalBuildConfig: + """Convert the arguments to a build config.""" + # Now, update any args that need to default based on other arguments args.sites = ( [path for path in args.source.glob("?*.??*") if path.is_dir()] if args.sites is None - else [args.source.joinpath(site) for site in args.sites.split(",")] + else args.sites + ) + if not args.targets: + args.targets = [str(args.source.joinpath("output/final"))] + args.stage = ( + args.stage + # Multiple targets + or len(args.targets) > 1 + # Has special char marking it as an rsync ssh target + or any(char in target for char in "@:" for target in args.targets) + ) + # And our derived settings we do not have as an argument + # args.targets is certain to be exactly one long if args.stage is not true + working_target = Path( + args.source / "output/stage" if args.stage else args.targets[0] + ) + all_languages = sorted( + (path.name for path in args.source.glob("global/languages/??")), + ) + return GlobalBuildConfig( + **vars(args), working_target=working_target, all_languages=all_languages ) - if args.target is None: - args.target = str(args.source.joinpath("output/final")) - return args -def build(args: argparse.Namespace) -> None: +def _run_build(global_build_config: GlobalBuildConfig) -> None: """Coordinate the website builder.""" logging.basicConfig( format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", - level=args.log_level, + level=global_build_config.log_level, ) - logger.debug(args) + logger.debug(global_build_config) - with multiprocessing.Pool(args.processes) as pool: + with multiprocessing.Pool(global_build_config.processes) as pool: logger.info("Starting phase 0 - Global Conditional Setup") - - if args.clean_cache: + # These are simple conditional steps that interact directly with args + if global_build_config.clean_cache: clean_cache() # TODO Should also be triggered whenever any build python file is changed - if args.full: - full(args.source) + if global_build_config.full: + full(global_build_config.source) global_symlinks( - args.source, + global_build_config.source, ( - args.languages - if args.languages - else sorted( - (path.name for path in args.source.glob("global/languages/??")), - ) + global_build_config.languages + if global_build_config.languages + else global_build_config.all_languages ), pool, ) - - stage_required = any( - [args.stage, "@" in args.target, ":" in args.target, "," in args.target], - ) - working_target = Path( - f"{args.source}/output/stage" if stage_required else args.target - ) + # Create our stable config across all sites # the two middle phases are unconditional, and run on a per site basis - for site in args.sites: + for site in global_build_config.sites: logger.info("Processing %s", site) if not site.exists(): logger.critical("Site %s does not exist, exiting", site) @@ -148,36 +169,52 @@ def build(args: argparse.Namespace) -> None: # Do not get access to languages to be built in, # and other benefits of being ran later. prepare_early_subdirectories( - args.source, + global_build_config, site, - args.processes, ) - languages = ( - args.languages - if args.languages + languages: list[str] = ( + global_build_config.languages + if global_build_config.languages else sorted( {lang_from_filename(path) for path in site.glob("**/*.*.xhtml")}, ) ) + # Now we know our languages, build our site build config + site_build_config = SiteBuildConfig(languages, site) + # And build our config that is saved inside the site + site_config = ( + from_dict( + SiteConfig, + tomllib.loads(config_file.read_text()), + Config(strict=True, cast=[Path]), + ) + if (config_file := site / "config.toml").exists() + else SiteConfig() + ) + # Processes needed only for subdir stuff - phase1_run(args.source, site, languages, args.processes, pool) + phase1_run(global_build_config, site_build_config, site_config, pool) + site_target = global_build_config.working_target / site.name + phase2_run( - args.source, - site, - languages, - pool, - working_target.joinpath(site.name), + global_build_config, site_build_config, site_config, site_target, pool ) logger.info("Starting Phase 3 - Global Conditional Finishing") - if stage_required: - stage_to_target(working_target, args.target, pool) + if global_build_config.stage: + stage_to_target( + global_build_config.working_target, global_build_config.targets, pool + ) - if args.serve: - serve_websites(working_target, args.sites, 2000, 100) + if global_build_config.serve: + serve_websites( + global_build_config.working_target, global_build_config.sites, 2000, 100 + ) -def main() -> None: +def build(passed_args: list[str] | None = None) -> None: """Parse args and run build.""" - args = _parse_arguments() - build(args) + parser = _build_parser() + args = parser.parse_args(passed_args) + global_build_config = _build_config_from_arguments(args) + _run_build(global_build_config) diff --git a/build/fsfe_website_build/lib/build_config.py b/build/fsfe_website_build/lib/build_config.py new file mode 100644 index 0000000000..b64ac26ecd --- /dev/null +++ b/build/fsfe_website_build/lib/build_config.py @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. +# +# SPDX-License-Identifier: GPL-3.0-or-later +"""Classes for holding build process config.""" + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Literal + +if TYPE_CHECKING: + from pathlib import Path + + +@dataclass(frozen=True) +class GlobalBuildConfig: + """Immutable global configuration as part of a build.""" + + all_languages: list[str] + clean_cache: bool + full: bool + languages: list[str] + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] + processes: int + serve: bool + sites: list[Path] + source: Path + stage: bool + targets: list[str] + working_target: Path + + def __post_init__(self) -> None: + """Validate build settings.""" + # Language validation + if self.languages: + # All languages are two letter codes + for lang in self.languages: + if len(lang) != 2 or not lang.isalpha(): # noqa: PLR2004 + message = ( + f"Language code '{lang}'" + " must be a two-letter alphabetic string." + ) + raise ValueError(message) + + # All languages should exist in the global config + if any(lang not in self.all_languages for lang in self.languages): + message = "All languages must be in the 'all_languages' list." + raise ValueError(message) + + +@dataclass(frozen=True) +class SiteBuildConfig: + """Immutable Build config specific to a site.""" + + languages: list[str] + site: Path diff --git a/build/fsfe_website_build/lib/site_config.py b/build/fsfe_website_build/lib/site_config.py new file mode 100644 index 0000000000..7c8f307b22 --- /dev/null +++ b/build/fsfe_website_build/lib/site_config.py @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. +# +# SPDX-License-Identifier: GPL-3.0-or-later +"""Classes for per site config.""" + +from dataclasses import dataclass, field + +# allow Path when not typechecking to allow dacite loading +from pathlib import Path # noqa: TC003 + + +@dataclass(frozen=True) +class FileSet: + """File set type.""" + + source: Path + target: Path + + +@dataclass(frozen=True) +class Dependency: + """Dependency type.""" + + repo: str + rev: str + file_sets: list[FileSet] + + +@dataclass(frozen=True) +class Deployment: + """Schema for settings for deployment.""" + + required_files: list[str] = field(default_factory=list[str]) + + +@dataclass(frozen=True) +class SiteConfig: + """Schema for per site config.""" + + dependencies: list[Dependency] = field(default_factory=list[Dependency]) + deployment: Deployment = field(default_factory=Deployment) diff --git a/build/fsfe_website_build/phase0/prepare_early_subdirectories.py b/build/fsfe_website_build/phase0/prepare_early_subdirectories.py index 256cfa7b96..fc1d47d1a6 100644 --- a/build/fsfe_website_build/phase0/prepare_early_subdirectories.py +++ b/build/fsfe_website_build/phase0/prepare_early_subdirectories.py @@ -14,24 +14,29 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: from pathlib import Path + from fsfe_website_build.lib.build_config import GlobalBuildConfig + logger = logging.getLogger(__name__) def prepare_early_subdirectories( - source: Path, source_dir: Path, processes: int + global_build_config: GlobalBuildConfig, source_site: Path ) -> None: """Find any early subdir scripts in subdirectories and run them.""" - logger.info("Preparing Early Subdirectories for site %s", source_dir) - for subdir_path in (path.parent for path in source_dir.glob("**/early_subdir.py")): + logger.info("Preparing Early Subdirectories for site %s", source_site) + for subdir_path in (path.parent for path in source_site.glob("**/early_subdir.py")): logger.info("Preparing early subdirectory %s", subdir_path) - sys.path.append(str(subdir_path.resolve())) + early_subdir_path_resolved = str(subdir_path.resolve()) + sys.path.append(early_subdir_path_resolved) # Ignore this very sensible warning, as we do evil things # here for out subdir scripts - import early_subdir # noqa: PLC0415 # pyright: ignore [reportMissingImports] + import early_subdir # noqa: PLC0415 # type: ignore # pyright: ignore [reportUnknownMemberType] - early_subdir.run(source, processes, subdir_path) # pyright: ignore [reportUnknownMemberType] + early_subdir.run( # pyright: ignore [reportUnknownMemberType] + global_build_config.source, global_build_config.processes, subdir_path + ) # Remove its path from where things can be imported - sys.path.remove(str(subdir_path.resolve())) + sys.path.remove(early_subdir_path_resolved) # Remove it from loaded modules sys.modules.pop("early_subdir") # prevent us from accessing it again diff --git a/build/fsfe_website_build/phase1/get_dependencies.py b/build/fsfe_website_build/phase1/get_dependencies.py index 7bea515714..538642434b 100644 --- a/build/fsfe_website_build/phase1/get_dependencies.py +++ b/build/fsfe_website_build/phase1/get_dependencies.py @@ -4,36 +4,36 @@ """Download the dependencies of a site, should it be necessary.""" import logging -import tomllib -from collections import defaultdict -from pathlib import Path +from typing import TYPE_CHECKING from fsfe_website_build.globals import CACHE_DIR from fsfe_website_build.lib.misc import run_command +if TYPE_CHECKING: + from pathlib import Path + + from fsfe_website_build.lib.site_config import Dependency + logger = logging.getLogger(__name__) -def fetch_sparse( - cache: Path, - repo: str, - rev: str, - file_mappings: list[dict[str, str]], -) -> None: +def fetch_sparse(cache: Path, source_site: Path, dependency: Dependency) -> None: """Clone source, and move necessary files into place with source/dest pairs.""" - clone_dir = cache / f"{Path(repo).name}_{rev}" + clone_dir = cache / f"{dependency.repo.split('/')[-1]}_{dependency.rev}" if not (clone_dir / ".git").exists(): clone_dir.mkdir(exist_ok=True) run_command( ["git", "-C", str(clone_dir), "init", "--quiet", "--no-initial-branch"] ) - run_command(["git", "-C", str(clone_dir), "remote", "add", "origin", repo]) + run_command( + ["git", "-C", str(clone_dir), "remote", "add", "origin", dependency.repo] + ) run_command( ["git", "-C", str(clone_dir), "config", "core.sparseCheckout", "true"] ) # Extract all paths for sparse checkout - paths = [mapping["source"] for mapping in file_mappings] + paths = [str(mapping.source) for mapping in dependency.file_sets] sparse_checkout_path = clone_dir / ".git" / "info" / "sparse-checkout" # if path is ".", checkout the whole repo if any(path == "." for path in paths): @@ -42,18 +42,28 @@ def fetch_sparse( sparse_checkout_path.write_text("\n".join(paths) + "\n") # Fetch the required revision - run_command(["git", "-C", str(clone_dir), "fetch", "--depth=1", "origin", rev]) + run_command( + [ + "git", + "-C", + str(clone_dir), + "fetch", + "--depth=1", + "origin", + dependency.rev, + ] + ) # Checkout the fetched revision run_command(["git", "-C", str(clone_dir), "checkout", "FETCH_HEAD"]) # Copy each file to its destination - for mapping in file_mappings: + for mapping in dependency.file_sets: # create our source path # the source syntax is that of .gitignore, and so may have a leading / # to say to interpret it only relative to the root # and so we remove that so joining gives us a proper path - src = clone_dir / mapping["source"].lstrip("/") - dest_path = Path(mapping["dest"]) - dest_path.parent.mkdir(parents=True, exist_ok=True) + src = clone_dir / str(mapping.source).lstrip("/") + target = source_site / mapping.target + target.parent.mkdir(parents=True, exist_ok=True) run_command( [ @@ -62,40 +72,15 @@ def fetch_sparse( "--del", "--exclude=.git", str(src) if not src.is_dir() else str(src) + "/", - str(dest_path), + str(target), ] ) -def get_dependencies( - source_dir: Path, -) -> None: +def get_dependencies(source_site: Path, dependencies: list[Dependency]) -> None: """Download and put in place all website dependencies.""" logger.info("Getting Dependencies") cache = CACHE_DIR / "repos" cache.mkdir(parents=True, exist_ok=True) - deps_file = source_dir / "dependencies.toml" - if deps_file.exists(): - with deps_file.open("rb") as file: - cfg = tomllib.load(file) - - # Group file mappings by repository and revision - repo_tasks: defaultdict[tuple[str, str], list[dict[str, str]]] = defaultdict( - list - ) - - for data in cfg.values(): - repo = str(data["repo"]) - rev = str(data["rev"]) - key = (repo, rev) - - for file_set in data["file_sets"]: - # Make path relative to source dir - dest = source_dir / file_set["target"] - repo_tasks[key].append( - {"source": str(file_set["source"]), "dest": str(dest)} - ) - - # Process each repository/revision only once - for (repo, rev), file_mappings in repo_tasks.items(): - fetch_sparse(cache, repo, rev, file_mappings) + for dependency in dependencies: + fetch_sparse(cache, source_site, dependency) diff --git a/build/fsfe_website_build/phase1/prepare_subdirectories.py b/build/fsfe_website_build/phase1/prepare_subdirectories.py index d60261b3d7..adec0b0a37 100644 --- a/build/fsfe_website_build/phase1/prepare_subdirectories.py +++ b/build/fsfe_website_build/phase1/prepare_subdirectories.py @@ -20,14 +20,14 @@ logger = logging.getLogger(__name__) def prepare_subdirectories( source: Path, - source_dir: Path, + source_site: Path, languages: list[str], processes: int, ) -> None: """Find any subdir scripts in subdirectories and run them.""" logger.info("Preparing Subdirectories") for subdir_path in sorted( - (path.parent for path in source_dir.glob("**/subdir.py")), + (path.parent for path in source_site.glob("**/subdir.py")), key=lambda directory: directory.joinpath("subdir-prio.txt").read_text().strip() if directory.joinpath("subdir-prio.txt").exists() else "0", diff --git a/build/fsfe_website_build/phase1/run.py b/build/fsfe_website_build/phase1/run.py index 52ffe62e67..e8889433fb 100644 --- a/build/fsfe_website_build/phase1/run.py +++ b/build/fsfe_website_build/phase1/run.py @@ -22,24 +22,40 @@ from .update_xmllists import update_xmllists if TYPE_CHECKING: import multiprocessing.pool - from pathlib import Path + + from fsfe_website_build.lib.build_config import GlobalBuildConfig, SiteBuildConfig + from fsfe_website_build.lib.site_config import SiteConfig logger = logging.getLogger(__name__) def phase1_run( - source: Path, - source_site: Path, - languages: list[str], - processes: int, + global_build_config: GlobalBuildConfig, + site_build_config: SiteBuildConfig, + site_config: SiteConfig, pool: multiprocessing.pool.Pool, ) -> None: """Run all the necessary sub functions for phase1.""" logger.info("Starting Phase 1 - Setup") - get_dependencies(source_site) - update_css(source_site) - update_stylesheets(source_site, pool) - prepare_subdirectories(source, source_site, languages, processes) - update_defaultxsls(source_site, pool) - update_localmenus(source, source_site, languages, pool) - update_xmllists(source, source_site, languages, pool) + get_dependencies(site_build_config.site, site_config.dependencies) + update_css(site_build_config.site) + update_stylesheets(site_build_config.site, pool) + prepare_subdirectories( + global_build_config.source, + site_build_config.site, + site_build_config.languages, + global_build_config.processes, + ) + update_defaultxsls(site_build_config.site, pool) + update_localmenus( + global_build_config.source, + site_build_config.site, + site_build_config.languages, + pool, + ) + update_xmllists( + global_build_config.source, + site_build_config.site, + site_build_config.languages, + pool, + ) diff --git a/build/fsfe_website_build/phase1/update_css.py b/build/fsfe_website_build/phase1/update_css.py index 029d5c2cab..575713e004 100644 --- a/build/fsfe_website_build/phase1/update_css.py +++ b/build/fsfe_website_build/phase1/update_css.py @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) def update_css( - source_dir: Path, + source_site: Path, ) -> None: """If any less files have been changed, update the css. @@ -28,7 +28,7 @@ def update_css( Then minify it, and place it in the expected location for the build process. """ logger.info("Updating css") - directory = source_dir.joinpath("look") + directory = source_site.joinpath("look") if directory.exists(): for file in directory.glob("main*.less"): compiled_path = file.with_suffix(".css") diff --git a/build/fsfe_website_build/phase1/update_defaultxsls.py b/build/fsfe_website_build/phase1/update_defaultxsls.py index 2361973717..67b944c5af 100755 --- a/build/fsfe_website_build/phase1/update_defaultxsls.py +++ b/build/fsfe_website_build/phase1/update_defaultxsls.py @@ -31,7 +31,7 @@ def _do_symlinking(directory: Path) -> None: ) -def update_defaultxsls(source_dir: Path, pool: multiprocessing.pool.Pool) -> None: +def update_defaultxsls(source_site: Path, pool: multiprocessing.pool.Pool) -> None: """Place a .default.xsl into each XHTML source directory. These .default.xsl are symlinks to the first @@ -42,7 +42,7 @@ def update_defaultxsls(source_dir: Path, pool: multiprocessing.pool.Pool) -> Non logger.info("Updating default xsl's") # Get a set of all directories containing .xhtml source files - directories = {path.parent for path in source_dir.glob("**/*.*.xhtml")} + directories = {path.parent for path in source_site.glob("**/*.*.xhtml")} # Do all directories asynchronously pool.map(_do_symlinking, directories) diff --git a/build/fsfe_website_build/phase1/update_localmenus.py b/build/fsfe_website_build/phase1/update_localmenus.py index b5df0bf3a3..81c16de568 100755 --- a/build/fsfe_website_build/phase1/update_localmenus.py +++ b/build/fsfe_website_build/phase1/update_localmenus.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) def _write_localmenus( - source_dir: Path, + source_site: Path, directory: Path, files: list[Path], languages: list[str], @@ -61,7 +61,7 @@ def _write_localmenus( link=( "/" + str( - link_file.relative_to(source_dir), + link_file.relative_to(source_site), ) ), ).text = localmenu.text @@ -71,7 +71,7 @@ def _write_localmenus( def update_localmenus( source: Path, - source_dir: Path, + source_site: Path, languages: list[str], pool: multiprocessing.pool.Pool, ) -> None: @@ -81,7 +81,7 @@ def update_localmenus( files_by_dir: dict[Path, list[Path]] = defaultdict(list) for file in ( file - for file in source_dir.glob("**/*.??.xhtml") + for file in source_site.glob("**/*.??.xhtml") if "-template" not in file.name ): xslt_root = etree.parse(file) @@ -111,5 +111,5 @@ def update_localmenus( ] pool.starmap( _write_localmenus, - ((source_dir, directory, files, languages) for directory, files in dirs), + ((source_site, directory, files, languages) for directory, files in dirs), ) diff --git a/build/fsfe_website_build/phase1/update_stylesheets.py b/build/fsfe_website_build/phase1/update_stylesheets.py index 7beb5a5ad1..f0720a0c4f 100755 --- a/build/fsfe_website_build/phase1/update_stylesheets.py +++ b/build/fsfe_website_build/phase1/update_stylesheets.py @@ -40,7 +40,7 @@ def _update_sheet(file: Path) -> None: touch_if_newer_dep(file, imports) -def update_stylesheets(source_dir: Path, pool: multiprocessing.pool.Pool) -> None: +def update_stylesheets(source_site: Path, pool: multiprocessing.pool.Pool) -> None: """Touch all XSL files dependant on an XSL that has changed since last build.""" logger.info("Updating XSL stylesheets") banned = re.compile(r"(\.venv/.*)|(.*\.default\.xsl$)") @@ -48,6 +48,6 @@ def update_stylesheets(source_dir: Path, pool: multiprocessing.pool.Pool) -> Non _update_sheet, filter( lambda file: banned.match(str(file)) is None, - source_dir.glob("**/*.xsl"), + source_site.glob("**/*.xsl"), ), ) diff --git a/build/fsfe_website_build/phase1/update_xmllists.py b/build/fsfe_website_build/phase1/update_xmllists.py index 130d8e3cf4..b3c77db229 100755 --- a/build/fsfe_website_build/phase1/update_xmllists.py +++ b/build/fsfe_website_build/phase1/update_xmllists.py @@ -103,7 +103,7 @@ def _update_for_base( # noqa: PLR0913 def _update_module_xmllists( source: Path, - source_dir: Path, + source_site: Path, languages: list[str], pool: multiprocessing.pool.Pool, ) -> None: @@ -113,15 +113,15 @@ def _update_module_xmllists( all_xml = { get_basepath(path) for path in ( - *source_dir.glob("**/*.*.xml"), + *source_site.glob("**/*.*.xml"), *source.joinpath("global/").glob("**/*.*.xml"), ) if lang_from_filename(path) in languages } - source_bases = {path.with_suffix("") for path in source_dir.glob("**/*.sources")} + source_bases = {path.with_suffix("") for path in source_site.glob("**/*.sources")} module_bases = { get_basepath(path) - for path in source_dir.glob("**/*.*.xhtml") + for path in source_site.glob("**/*.*.xhtml") if lang_from_filename(path) in languages and etree.parse(path).xpath("//module") } all_bases = source_bases | module_bases diff --git a/build/fsfe_website_build/phase2/copy_files.py b/build/fsfe_website_build/phase2/copy_files.py index db3e4c3dae..797feb267b 100644 --- a/build/fsfe_website_build/phase2/copy_files.py +++ b/build/fsfe_website_build/phase2/copy_files.py @@ -16,11 +16,13 @@ if TYPE_CHECKING: import multiprocessing.pool from pathlib import Path + from fsfe_website_build.lib.site_config import Deployment + logger = logging.getLogger(__name__) -def _copy_file(target: Path, source_dir: Path, source_file: Path) -> None: - target_file = target.joinpath(source_file.relative_to(source_dir)) +def _copy_file(target: Path, source_site: Path, source_file: Path) -> None: + target_file = target.joinpath(source_file.relative_to(source_site)) if ( not target_file.exists() or source_file.stat().st_mtime > target_file.stat().st_mtime @@ -33,9 +35,9 @@ def _copy_file(target: Path, source_dir: Path, source_file: Path) -> None: def _copy_minify_file( - target: Path, source_dir: Path, source_file: Path, mime: str + target: Path, source_site: Path, source_file: Path, mime: str ) -> None: - target_file = target.joinpath(source_file.relative_to(source_dir)) + target_file = target.joinpath(source_file.relative_to(source_site)) if ( not target_file.exists() or source_file.stat().st_mtime > target_file.stat().st_mtime @@ -52,7 +54,12 @@ def _copy_minify_file( shutil.copymode(source_file, target_file) -def copy_files(source_dir: Path, pool: multiprocessing.pool.Pool, target: Path) -> None: +def copy_files( + source_site: Path, + pool: multiprocessing.pool.Pool, + target: Path, + deploy_config: Deployment, +) -> None: """Copy images, documents etc.""" logger.info("Copying over media and misc files") # file extensions and mimes of minificable content @@ -62,30 +69,16 @@ def copy_files(source_dir: Path, pool: multiprocessing.pool.Pool, target: Path) ".js": "application/javascript", ".svg": "image/svg+xml", } - # get the special cases per site - special_case_file = source_dir / "required_deploy_files.txt" - stripped_lines = ( - [line.strip() for line in special_case_file.read_text().split("\n")] - if special_case_file.exists() - else [] - ) - special_includes = [ - file - for line in stripped_lines - if line and not line.startswith("#") - for file in source_dir.glob(line) - ] - # Here we copy everything we cannot minify pool.starmap( _copy_file, ( - (target, source_dir, file) + (target, source_site, file) for file in [ # globbing of all files, exclude blacklist *[ path - for path in source_dir.glob("**/*") + for path in source_site.glob("**/*") if path.is_file() # Things we dont want over at all and path.suffix @@ -112,7 +105,7 @@ def copy_files(source_dir: Path, pool: multiprocessing.pool.Pool, target: Path) ] ], # special whitelist to include - *special_includes, + *[source_site / file for file in deploy_config.required_files], ] ), ) @@ -121,6 +114,6 @@ def copy_files(source_dir: Path, pool: multiprocessing.pool.Pool, target: Path) # https://github.com/tdewolff/minify/issues/535 for file_suffix, mime in minifiable_content.items(): for file in [ - path for path in source_dir.glob(f"**/*{file_suffix}") if path.is_file() + path for path in source_site.glob(f"**/*{file_suffix}") if path.is_file() ]: - _copy_minify_file(target, source_dir, file, mime) + _copy_minify_file(target, source_site, file, mime) diff --git a/build/fsfe_website_build/phase2/process_files.py b/build/fsfe_website_build/phase2/process_files.py index 904468ad48..599e897925 100644 --- a/build/fsfe_website_build/phase2/process_files.py +++ b/build/fsfe_website_build/phase2/process_files.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) def _process_set( # noqa: PLR0913 source: Path, - source_dir: Path, + source_site: Path, languages: list[str], target: Path, processor: Path, @@ -55,7 +55,7 @@ def _process_set( # noqa: PLR0913 ".html" if (len(processor.suffixes) == 1) else processor.suffixes[0] ) target_file = target.joinpath( - source_file.relative_to(source_dir), + source_file.relative_to(source_site), ).with_suffix(target_suffix) # if the target file does not exist, we make it if not target_file.exists() or any( @@ -90,7 +90,7 @@ def _process_set( # noqa: PLR0913 def process_files( source: Path, - source_dir: Path, + source_site: Path, languages: list[str], pool: multiprocessing.pool.Pool, target: Path, @@ -101,7 +101,7 @@ def process_files( process_files_dict: dict[Path, set[Path]] = defaultdict(set) # This gathers all the simple xhtml files for generating xhtml output - for file in source_dir.glob("**/*.*.xhtml"): + for file in source_site.glob("**/*.*.xhtml"): # Processors with a file ending for the output encoded in the name, eg # events.rss.xsl type_specific_processors = set( @@ -134,7 +134,7 @@ def process_files( pool.starmap( _process_set, ( - (source, source_dir, languages, target, processor, files) + (source, source_site, languages, target, processor, files) for processor, files in process_files_dict.items() ), ) diff --git a/build/fsfe_website_build/phase2/run.py b/build/fsfe_website_build/phase2/run.py index 812aa97082..d769b922e5 100644 --- a/build/fsfe_website_build/phase2/run.py +++ b/build/fsfe_website_build/phase2/run.py @@ -16,19 +16,33 @@ if TYPE_CHECKING: import multiprocessing.pool from pathlib import Path + from fsfe_website_build.lib.build_config import GlobalBuildConfig, SiteBuildConfig + from fsfe_website_build.lib.site_config import SiteConfig + logger = logging.getLogger(__name__) def phase2_run( - source: Path, - source_dir: Path, - languages: list[str], + global_build_config: GlobalBuildConfig, + site_build_config: SiteBuildConfig, + site_config: SiteConfig, + site_target: Path, pool: multiprocessing.pool.Pool, - target: Path, ) -> None: """Run all the necessary sub functions for phase2.""" logger.info("Starting Phase 2 - Generating output") - process_files(source, source_dir, languages, pool, target) - create_index_symlinks(pool, target) - create_language_symlinks(pool, target) - copy_files(source_dir, pool, target) + process_files( + global_build_config.source, + site_build_config.site, + site_build_config.languages, + pool, + site_target, + ) + create_index_symlinks(pool, site_target) + create_language_symlinks(pool, site_target) + copy_files( + site_build_config.site, + pool, + site_target, + site_config.deployment, + ) diff --git a/build/fsfe_website_build/phase3/stage_to_target.py b/build/fsfe_website_build/phase3/stage_to_target.py index e0541f51a2..8ac94b4d92 100644 --- a/build/fsfe_website_build/phase3/stage_to_target.py +++ b/build/fsfe_website_build/phase3/stage_to_target.py @@ -37,7 +37,7 @@ def _rsync(stagedir: Path, target: str, port: int) -> None: def stage_to_target( - stagedir: Path, targets: str, pool: multiprocessing.pool.Pool + stagedir: Path, targets: list[str], pool: multiprocessing.pool.Pool ) -> None: """Use a multithreaded rsync to copy the stage dir to all targets.""" logger.info("Rsyncing from stage dir to target dir(s)") @@ -49,6 +49,6 @@ def stage_to_target( (target if "?" not in target else target.split("?")[0]), (int(target.split("?")[1]) if "?" in target else 22), ) - for target in targets.split(",") + for target in targets ), ) diff --git a/build/fsfe_website_build_tests_ci/caching_test.py b/build/fsfe_website_build_tests_ci/caching_test.py index 569b250cbd..66d75e9131 100644 --- a/build/fsfe_website_build_tests_ci/caching_test.py +++ b/build/fsfe_website_build_tests_ci/caching_test.py @@ -1,39 +1,20 @@ # SPDX-FileCopyrightText: Free Software Foundation Europe e.V. # # SPDX-License-Identifier: GPL-3.0-or-later -from argparse import Namespace -from pathlib import Path from typing import TYPE_CHECKING from fsfe_website_build.build import build if TYPE_CHECKING: + from pathlib import Path + from pytest_mock import MockFixture def no_rebuild_twice_test(mocker: MockFixture) -> None: + cli_args = ["--languages", "en", "--log-level", "CRITICAL"] # first, run a full build - args = Namespace( - full=True, - clean_cache=False, - languages=[ - "en", - ], - log_level="CRITICAL", # by only logging critical messages - # the build should be faster, as evaluating less things to strings - processes=8, - source=Path(), - serve=False, - sites=[ - Path("drm.info"), - Path("fsfe.org"), - Path("pdfreaders.org"), - Path("status.fsfe.org"), - ], - stage=False, - target="output/final", - ) - build(args) + build([*cli_args, "--full"]) # replace update_if_changed with # mocked one that exceptions if the file would be changed @@ -47,5 +28,4 @@ def no_rebuild_twice_test(mocker: MockFixture) -> None: "fsfe_website_build.lib.misc.update_if_changed", side_effect=fail_if_update ) # now, run a normal build - args.full = False - build(args) + build(cli_args) diff --git a/docs/contributing.md b/docs/contributing.md index 63bd31d6af..96b92776c4 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -21,7 +21,7 @@ To run it in the project using the project config, please use `uv run ruff`. We try to keep to some design patterns to keep things manageable. -Firstly, each phase as described in [the overview](./overview.md) should handle a meaningfully different kind of interaction. Each phase should be structured, to the greatest degree possible, as a sequence of steps. We consider that each phase should have a `run.py` file that exposes a `ipahse_*run` function that takes the arguments needed for its phase. +Firstly, each phase as described in [the overview](./overview.md) should handle a meaningfully different kind of interaction. Each phase should be structured, to the greatest degree possible, as a sequence of steps. We consider that each phase should have a `run.py` file that exposes a `phase_?run` function that takes the arguments needed for its phase. Each run function then calls a sequence of functions that are defined in the other files in the `phase*` folder. Each other file in the folder should expose one function, with the same name as the file, minus file extension. For example, `create_files.py` should expose the function `create_files`. It is a common pattern for the first expose function to generate a list of files or things to act on, and then multithread this using another function. diff --git a/fsfe.org/dependencies.toml b/fsfe.org/config.toml similarity index 67% rename from fsfe.org/dependencies.toml rename to fsfe.org/config.toml index b72976b85d..a4e3d3c486 100644 --- a/fsfe.org/dependencies.toml +++ b/fsfe.org/config.toml @@ -1,28 +1,35 @@ +# This is a config file for a site, that cuontains necessary settings +# # Sources for the website # All repos should be mirrored to https://git.fsfe.org/fsfe-system-hackers-mirrors # # SCRIPTS -[jquery] +[[dependencies]] repo = "https://git.fsfe.org/fsfe-system-hackers-mirrors/jquery" rev = "3.5.1" file_sets = [ { source = "/dist/jquery.min.js", target = "scripts/thirdparty/" }, { source = "/dist/jquery.min.map", target = "scripts/thirdparty/" }, ] - -[lunr] +[[dependencies]] repo = "https://git.fsfe.org/fsfe-system-hackers-mirrors/lunr.js" rev = "v2.3.9" file_sets = [{ source = "/lunr.js", target = "scripts/thirdparty/lunr.min.js" }] # STYLING -[bootstrap] +[[dependencies]] repo = "https://git.fsfe.org/fsfe-system-hackers-mirrors/bootstrap" rev = "v3.4.1" file_sets = [{ source = "/less", target = "look/thirdparty/bootstrap" }] # CGI -[phpmailer] +[[dependencies]] repo = "https://git.fsfe.org/fsfe-system-hackers-mirrors/PHPMailer" rev = "v6.10.0" file_sets = [{ source = "/src", target = "cgi-bin/thirdparty/PHPMailer" }] + +# Settings for deployment +[deployment] +# This files contains relative paths we want to deploy even if they would be blocked by the copy_files method filtering. +# Special case hard code pass over order items xml required by cgi script +required_files = ["order/data/items.en.xml"] diff --git a/fsfe.org/required_deploy_files.txt b/fsfe.org/required_deploy_files.txt deleted file mode 100644 index 020faba7de..0000000000 --- a/fsfe.org/required_deploy_files.txt +++ /dev/null @@ -1,3 +0,0 @@ -# This files contains relative paths we want to deploy even if they would be blocked by the copy_files method. -# Special case hard code pass over order items xml required by cgi script -order/data/items.en.xml diff --git a/fsfe.org/search/subdir.py b/fsfe.org/search/subdir.py index 380f5ccebc..7a3378cc84 100644 --- a/fsfe.org/search/subdir.py +++ b/fsfe.org/search/subdir.py @@ -84,11 +84,11 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) - """ # Download all stopwords nltkdir = CACHE_DIR / "nltk_data" - source_dir = working_dir.parent + source_site = working_dir.parent nltk.data.path = [nltkdir] nltk.download("stopwords", download_dir=nltkdir, quiet=True) # pyright: ignore [(reportUnknownMemberType)] with multiprocessing.Pool(processes) as pool: - logger.debug("Indexing %s", source_dir) + logger.debug("Indexing %s", source_site) # Get all xhtml files in languages to be processed # Create a list of tuples @@ -117,7 +117,7 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) - else set() ), ) - for file in source_dir.glob("**/*.??.xhtml") + for file in source_site.glob("**/*.??.xhtml") if file.suffixes[0].removeprefix(".") in languages ) diff --git a/pyproject.toml b/pyproject.toml index fdb86595dc..a82d60d8fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,10 +11,11 @@ dependencies = [ "python-iso639", # For getting english language names of languages from two letter codes. "requests", # For HTTP requests "tdewolff-minify", # For minification html css and js + "dacite", # dict to dataclass conversion ] [project.scripts] -build = "fsfe_website_build:main" +build = "fsfe_website_build:build" [dependency-groups] dev = [ diff --git a/uv.lock b/uv.lock index 6b12319e85..c4e69143d6 100644 --- a/uv.lock +++ b/uv.lock @@ -130,11 +130,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/58/257350f7db99b4ae12b614a36256d9cc870d71d9e451e79c2dc3b23d7c3c/cssselect-1.3.0-py3-none-any.whl", hash = "sha256:56d1bf3e198080cc1667e137bc51de9cadfca259f03c2d4e09037b3e01e30f0d", size = 18786, upload-time = "2025-03-10T09:30:28.048Z" }, ] +[[package]] +name = "dacite" +version = "1.9.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/55/a0/7ca79796e799a3e782045d29bf052b5cde7439a2bbb17f15ff44f7aacc63/dacite-1.9.2.tar.gz", hash = "sha256:6ccc3b299727c7aa17582f0021f6ae14d5de47c7227932c47fec4cdfefd26f09", size = 22420, upload-time = "2025-02-05T09:27:29.757Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/35/386550fd60316d1e37eccdda609b074113298f23cef5bddb2049823fe666/dacite-1.9.2-py3-none-any.whl", hash = "sha256:053f7c3f5128ca2e9aceb66892b1a3c8936d02c686e707bee96e19deef4bc4a0", size = 16600, upload-time = "2025-02-05T09:27:24.345Z" }, +] + [[package]] name = "fsfe-website-build" version = "0.0.0" source = { editable = "." } dependencies = [ + { name = "dacite" }, { name = "lxml" }, { name = "nltk" }, { name = "platformdirs" }, @@ -157,6 +167,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "dacite" }, { name = "lxml" }, { name = "nltk" }, { name = "platformdirs" },