feat/config (#5590)
All checks were successful
continuous-integration/drone/push Build is passing

- use nargs instead of splitting strings. Nicer logic, more standard
- Show default values of args in help message
- each site can now have a `config.toml` with deps and required files for deployment.
- cleaner argument handling and custom value generation
- use dataclass to hold args, or cleaner typing
- pass config types to run commands instead of individual args
- clean up the whole build.py file

Overall, this is a nice internal refactor that also offers the feature of per site config.

It does have a breaking change of how multiple args are handled, but the arg help page should explain it just fine.

Co-authored-by: Darragh Elliott <me@delliott.net>
Reviewed-on: #5590
Co-authored-by: delliott <delliott@fsfe.org>
Co-committed-by: delliott <delliott@fsfe.org>
This commit was merged in pull request #5590.
This commit is contained in:
2026-01-15 15:49:40 +00:00
committed by tobiasd
parent d5955e9533
commit ad79262abf
25 changed files with 360 additions and 221 deletions

View File

@@ -65,7 +65,7 @@ steps:
XDG_RUNTIME_DIR: "/run/user/1001"
DOCKER_HOST: "unix:///run/user/1001/docker.sock"
# Target use ipv4 proxies for noddack and gahn, as ipv6 broken.
TARGET: "www@proxy.noris.fsfeurope.org:fsfe.org/global/?10322,www@proxy.plutex.fsfeurope.org:fsfe.org/global/?10322"
TARGETS: "www@proxy.noris.fsfeurope.org:fsfe.org/global/?10322 www@proxy.plutex.fsfeurope.org:fsfe.org/global/?10322"
FSFE_WEBSITE_KEY_PRIVATE:
from_secret: KEY_PRIVATE
FSFE_WEBSITE_KEY_PASSWORD:
@@ -83,7 +83,7 @@ steps:
# If we are in a cron job, then do a full rebuild
# Ideally the cron would set the flag itself, but drone does not support that.
- if [ "$DRONE_BUILD_EVENT" = "cron" ]; then EXTRA_FLAGS="--full --clean-cache"; fi
- docker compose run --remove-orphans --build build --target "$TARGET" $EXTRA_FLAGS
- docker compose run --remove-orphans --build build --targets $TARGETS $EXTRA_FLAGS
- docker compose down
when:
branch:
@@ -99,7 +99,7 @@ steps:
XDG_RUNTIME_DIR: "/run/user/1001"
DOCKER_HOST: "unix:///run/user/1001/docker.sock"
# Target use ipv4 proxies for noddack and gahn, as ipv6 broken.
TARGET: "www@proxy.noris.fsfeurope.org:test.fsfe.org/global/?10322,www@proxy.plutex.fsfeurope.org:test.fsfe.org/global/?10322"
TARGETS: "www@proxy.noris.fsfeurope.org:test.fsfe.org/global/?10322 www@proxy.plutex.fsfeurope.org:test.fsfe.org/global/?10322"
FSFE_WEBSITE_KEY_PRIVATE:
from_secret: KEY_PRIVATE
FSFE_WEBSITE_KEY_PASSWORD:
@@ -117,7 +117,7 @@ steps:
# If we are in a cron job, then do a full rebuild
# Ideally the cron would set the flag itself, but drone does not support that.
- if [ "$DRONE_BUILD_EVENT" = "cron" ]; then EXTRA_FLAGS="--full --clean-cache"; fi
- docker compose run --remove-orphans --build build --target "$TARGET" $EXTRA_FLAGS
- docker compose run --remove-orphans --build build --targets $TARGETS $EXTRA_FLAGS
- docker compose down
when:
branch:
@@ -145,6 +145,4 @@ volumes:
path: /run/user/1001/docker.sock
---
kind: signature
hmac: 627a990b0ba4c6dc8aa43dda45f0d35cb3d3ada367c6db7e8b59c751e13ad5b2
...
hmac: 5f8becf08f74b7561bc7c06c023e234689d0b281806cbf37b96b12387266479d

View File

@@ -4,6 +4,6 @@
"""The main module for the fsfe website build process."""
from .build import main
from .build import build
__all__ = ["main"]
__all__ = ["build"]

View File

@@ -7,10 +7,15 @@ import argparse
import logging
import multiprocessing
import sys
import tomllib
from pathlib import Path
from textwrap import dedent
from dacite import Config, from_dict
from .lib.build_config import GlobalBuildConfig, SiteBuildConfig
from .lib.misc import lang_from_filename
from .lib.site_config import SiteConfig
from .phase0.clean_cache import clean_cache
from .phase0.full import full
from .phase0.global_symlinks import global_symlinks
@@ -23,10 +28,11 @@ from .phase3.stage_to_target import stage_to_target
logger = logging.getLogger(__name__)
def _parse_arguments() -> argparse.Namespace:
"""Parse the arguments of the website build process."""
def _build_parser() -> argparse.ArgumentParser:
"""Build the argument parser."""
parser = argparse.ArgumentParser(
description="Python script to handle building of the fsfe webpage",
description="Python script to handle building of the fsfe webpages",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--full",
@@ -41,15 +47,15 @@ def _parse_arguments() -> argparse.Namespace:
parser.add_argument(
"--languages",
help="Languages to build website in.",
default=[],
type=lambda langs: sorted(langs.split(",")),
nargs="+",
type=str,
)
parser.add_argument(
"--log-level",
type=str,
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set the logging level (default: INFO)",
help="Set the logging level",
)
parser.add_argument(
"--processes",
@@ -72,6 +78,7 @@ def _parse_arguments() -> argparse.Namespace:
"--sites",
help="What site directories to build",
default=None,
nargs="+",
type=str,
)
parser.add_argument(
@@ -80,64 +87,78 @@ def _parse_arguments() -> argparse.Namespace:
action="store_true",
)
parser.add_argument(
"--target",
"--targets",
help=dedent("""\
Final dirs for websites to be build to.
Can be a single path, or a comma separated list of valid rsync targets.
Supports custom rsynx extension for specifying ports for ssh targets,
name@host:path?port.
"""),
nargs="+",
type=str,
default=None,
)
args = parser.parse_args()
return parser
def _build_config_from_arguments(args: argparse.Namespace) -> GlobalBuildConfig:
"""Convert the arguments to a build config."""
# Now, update any args that need to default based on other arguments
args.sites = (
[path for path in args.source.glob("?*.??*") if path.is_dir()]
if args.sites is None
else [args.source.joinpath(site) for site in args.sites.split(",")]
else args.sites
)
if not args.targets:
args.targets = [str(args.source.joinpath("output/final"))]
args.stage = (
args.stage
# Multiple targets
or len(args.targets) > 1
# Has special char marking it as an rsync ssh target
or any(char in target for char in "@:" for target in args.targets)
)
# And our derived settings we do not have as an argument
# args.targets is certain to be exactly one long if args.stage is not true
working_target = Path(
args.source / "output/stage" if args.stage else args.targets[0]
)
all_languages = sorted(
(path.name for path in args.source.glob("global/languages/??")),
)
return GlobalBuildConfig(
**vars(args), working_target=working_target, all_languages=all_languages
)
if args.target is None:
args.target = str(args.source.joinpath("output/final"))
return args
def build(args: argparse.Namespace) -> None:
def _run_build(global_build_config: GlobalBuildConfig) -> None:
"""Coordinate the website builder."""
logging.basicConfig(
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=args.log_level,
level=global_build_config.log_level,
)
logger.debug(args)
logger.debug(global_build_config)
with multiprocessing.Pool(args.processes) as pool:
with multiprocessing.Pool(global_build_config.processes) as pool:
logger.info("Starting phase 0 - Global Conditional Setup")
if args.clean_cache:
# These are simple conditional steps that interact directly with args
if global_build_config.clean_cache:
clean_cache()
# TODO Should also be triggered whenever any build python file is changed
if args.full:
full(args.source)
if global_build_config.full:
full(global_build_config.source)
global_symlinks(
args.source,
global_build_config.source,
(
args.languages
if args.languages
else sorted(
(path.name for path in args.source.glob("global/languages/??")),
)
global_build_config.languages
if global_build_config.languages
else global_build_config.all_languages
),
pool,
)
stage_required = any(
[args.stage, "@" in args.target, ":" in args.target, "," in args.target],
)
working_target = Path(
f"{args.source}/output/stage" if stage_required else args.target
)
# Create our stable config across all sites
# the two middle phases are unconditional, and run on a per site basis
for site in args.sites:
for site in global_build_config.sites:
logger.info("Processing %s", site)
if not site.exists():
logger.critical("Site %s does not exist, exiting", site)
@@ -148,36 +169,52 @@ def build(args: argparse.Namespace) -> None:
# Do not get access to languages to be built in,
# and other benefits of being ran later.
prepare_early_subdirectories(
args.source,
global_build_config,
site,
args.processes,
)
languages = (
args.languages
if args.languages
languages: list[str] = (
global_build_config.languages
if global_build_config.languages
else sorted(
{lang_from_filename(path) for path in site.glob("**/*.*.xhtml")},
)
)
# Now we know our languages, build our site build config
site_build_config = SiteBuildConfig(languages, site)
# And build our config that is saved inside the site
site_config = (
from_dict(
SiteConfig,
tomllib.loads(config_file.read_text()),
Config(strict=True, cast=[Path]),
)
if (config_file := site / "config.toml").exists()
else SiteConfig()
)
# Processes needed only for subdir stuff
phase1_run(args.source, site, languages, args.processes, pool)
phase1_run(global_build_config, site_build_config, site_config, pool)
site_target = global_build_config.working_target / site.name
phase2_run(
args.source,
site,
languages,
pool,
working_target.joinpath(site.name),
global_build_config, site_build_config, site_config, site_target, pool
)
logger.info("Starting Phase 3 - Global Conditional Finishing")
if stage_required:
stage_to_target(working_target, args.target, pool)
if global_build_config.stage:
stage_to_target(
global_build_config.working_target, global_build_config.targets, pool
)
if args.serve:
serve_websites(working_target, args.sites, 2000, 100)
if global_build_config.serve:
serve_websites(
global_build_config.working_target, global_build_config.sites, 2000, 100
)
def main() -> None:
def build(passed_args: list[str] | None = None) -> None:
"""Parse args and run build."""
args = _parse_arguments()
build(args)
parser = _build_parser()
args = parser.parse_args(passed_args)
global_build_config = _build_config_from_arguments(args)
_run_build(global_build_config)

View File

@@ -0,0 +1,54 @@
# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. <https://fsfe.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Classes for holding build process config."""
from dataclasses import dataclass
from typing import TYPE_CHECKING, Literal
if TYPE_CHECKING:
from pathlib import Path
@dataclass(frozen=True)
class GlobalBuildConfig:
"""Immutable global configuration as part of a build."""
all_languages: list[str]
clean_cache: bool
full: bool
languages: list[str]
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
processes: int
serve: bool
sites: list[Path]
source: Path
stage: bool
targets: list[str]
working_target: Path
def __post_init__(self) -> None:
"""Validate build settings."""
# Language validation
if self.languages:
# All languages are two letter codes
for lang in self.languages:
if len(lang) != 2 or not lang.isalpha(): # noqa: PLR2004
message = (
f"Language code '{lang}'"
" must be a two-letter alphabetic string."
)
raise ValueError(message)
# All languages should exist in the global config
if any(lang not in self.all_languages for lang in self.languages):
message = "All languages must be in the 'all_languages' list."
raise ValueError(message)
@dataclass(frozen=True)
class SiteBuildConfig:
"""Immutable Build config specific to a site."""
languages: list[str]
site: Path

View File

@@ -0,0 +1,41 @@
# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. <https://fsfe.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Classes for per site config."""
from dataclasses import dataclass, field
# allow Path when not typechecking to allow dacite loading
from pathlib import Path # noqa: TC003
@dataclass(frozen=True)
class FileSet:
"""File set type."""
source: Path
target: Path
@dataclass(frozen=True)
class Dependency:
"""Dependency type."""
repo: str
rev: str
file_sets: list[FileSet]
@dataclass(frozen=True)
class Deployment:
"""Schema for settings for deployment."""
required_files: list[str] = field(default_factory=list[str])
@dataclass(frozen=True)
class SiteConfig:
"""Schema for per site config."""
dependencies: list[Dependency] = field(default_factory=list[Dependency])
deployment: Deployment = field(default_factory=Deployment)

View File

@@ -14,24 +14,29 @@ from typing import TYPE_CHECKING
if TYPE_CHECKING:
from pathlib import Path
from fsfe_website_build.lib.build_config import GlobalBuildConfig
logger = logging.getLogger(__name__)
def prepare_early_subdirectories(
source: Path, source_dir: Path, processes: int
global_build_config: GlobalBuildConfig, source_site: Path
) -> None:
"""Find any early subdir scripts in subdirectories and run them."""
logger.info("Preparing Early Subdirectories for site %s", source_dir)
for subdir_path in (path.parent for path in source_dir.glob("**/early_subdir.py")):
logger.info("Preparing Early Subdirectories for site %s", source_site)
for subdir_path in (path.parent for path in source_site.glob("**/early_subdir.py")):
logger.info("Preparing early subdirectory %s", subdir_path)
sys.path.append(str(subdir_path.resolve()))
early_subdir_path_resolved = str(subdir_path.resolve())
sys.path.append(early_subdir_path_resolved)
# Ignore this very sensible warning, as we do evil things
# here for out subdir scripts
import early_subdir # noqa: PLC0415 # pyright: ignore [reportMissingImports]
import early_subdir # noqa: PLC0415 # type: ignore # pyright: ignore [reportUnknownMemberType]
early_subdir.run(source, processes, subdir_path) # pyright: ignore [reportUnknownMemberType]
early_subdir.run( # pyright: ignore [reportUnknownMemberType]
global_build_config.source, global_build_config.processes, subdir_path
)
# Remove its path from where things can be imported
sys.path.remove(str(subdir_path.resolve()))
sys.path.remove(early_subdir_path_resolved)
# Remove it from loaded modules
sys.modules.pop("early_subdir")
# prevent us from accessing it again

View File

@@ -4,36 +4,36 @@
"""Download the dependencies of a site, should it be necessary."""
import logging
import tomllib
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
from fsfe_website_build.globals import CACHE_DIR
from fsfe_website_build.lib.misc import run_command
if TYPE_CHECKING:
from pathlib import Path
from fsfe_website_build.lib.site_config import Dependency
logger = logging.getLogger(__name__)
def fetch_sparse(
cache: Path,
repo: str,
rev: str,
file_mappings: list[dict[str, str]],
) -> None:
def fetch_sparse(cache: Path, source_site: Path, dependency: Dependency) -> None:
"""Clone source, and move necessary files into place with source/dest pairs."""
clone_dir = cache / f"{Path(repo).name}_{rev}"
clone_dir = cache / f"{dependency.repo.split('/')[-1]}_{dependency.rev}"
if not (clone_dir / ".git").exists():
clone_dir.mkdir(exist_ok=True)
run_command(
["git", "-C", str(clone_dir), "init", "--quiet", "--no-initial-branch"]
)
run_command(["git", "-C", str(clone_dir), "remote", "add", "origin", repo])
run_command(
["git", "-C", str(clone_dir), "remote", "add", "origin", dependency.repo]
)
run_command(
["git", "-C", str(clone_dir), "config", "core.sparseCheckout", "true"]
)
# Extract all paths for sparse checkout
paths = [mapping["source"] for mapping in file_mappings]
paths = [str(mapping.source) for mapping in dependency.file_sets]
sparse_checkout_path = clone_dir / ".git" / "info" / "sparse-checkout"
# if path is ".", checkout the whole repo
if any(path == "." for path in paths):
@@ -42,18 +42,28 @@ def fetch_sparse(
sparse_checkout_path.write_text("\n".join(paths) + "\n")
# Fetch the required revision
run_command(["git", "-C", str(clone_dir), "fetch", "--depth=1", "origin", rev])
run_command(
[
"git",
"-C",
str(clone_dir),
"fetch",
"--depth=1",
"origin",
dependency.rev,
]
)
# Checkout the fetched revision
run_command(["git", "-C", str(clone_dir), "checkout", "FETCH_HEAD"])
# Copy each file to its destination
for mapping in file_mappings:
for mapping in dependency.file_sets:
# create our source path
# the source syntax is that of .gitignore, and so may have a leading /
# to say to interpret it only relative to the root
# and so we remove that so joining gives us a proper path
src = clone_dir / mapping["source"].lstrip("/")
dest_path = Path(mapping["dest"])
dest_path.parent.mkdir(parents=True, exist_ok=True)
src = clone_dir / str(mapping.source).lstrip("/")
target = source_site / mapping.target
target.parent.mkdir(parents=True, exist_ok=True)
run_command(
[
@@ -62,40 +72,15 @@ def fetch_sparse(
"--del",
"--exclude=.git",
str(src) if not src.is_dir() else str(src) + "/",
str(dest_path),
str(target),
]
)
def get_dependencies(
source_dir: Path,
) -> None:
def get_dependencies(source_site: Path, dependencies: list[Dependency]) -> None:
"""Download and put in place all website dependencies."""
logger.info("Getting Dependencies")
cache = CACHE_DIR / "repos"
cache.mkdir(parents=True, exist_ok=True)
deps_file = source_dir / "dependencies.toml"
if deps_file.exists():
with deps_file.open("rb") as file:
cfg = tomllib.load(file)
# Group file mappings by repository and revision
repo_tasks: defaultdict[tuple[str, str], list[dict[str, str]]] = defaultdict(
list
)
for data in cfg.values():
repo = str(data["repo"])
rev = str(data["rev"])
key = (repo, rev)
for file_set in data["file_sets"]:
# Make path relative to source dir
dest = source_dir / file_set["target"]
repo_tasks[key].append(
{"source": str(file_set["source"]), "dest": str(dest)}
)
# Process each repository/revision only once
for (repo, rev), file_mappings in repo_tasks.items():
fetch_sparse(cache, repo, rev, file_mappings)
for dependency in dependencies:
fetch_sparse(cache, source_site, dependency)

View File

@@ -20,14 +20,14 @@ logger = logging.getLogger(__name__)
def prepare_subdirectories(
source: Path,
source_dir: Path,
source_site: Path,
languages: list[str],
processes: int,
) -> None:
"""Find any subdir scripts in subdirectories and run them."""
logger.info("Preparing Subdirectories")
for subdir_path in sorted(
(path.parent for path in source_dir.glob("**/subdir.py")),
(path.parent for path in source_site.glob("**/subdir.py")),
key=lambda directory: directory.joinpath("subdir-prio.txt").read_text().strip()
if directory.joinpath("subdir-prio.txt").exists()
else "0",

View File

@@ -22,24 +22,40 @@ from .update_xmllists import update_xmllists
if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
from fsfe_website_build.lib.build_config import GlobalBuildConfig, SiteBuildConfig
from fsfe_website_build.lib.site_config import SiteConfig
logger = logging.getLogger(__name__)
def phase1_run(
source: Path,
source_site: Path,
languages: list[str],
processes: int,
global_build_config: GlobalBuildConfig,
site_build_config: SiteBuildConfig,
site_config: SiteConfig,
pool: multiprocessing.pool.Pool,
) -> None:
"""Run all the necessary sub functions for phase1."""
logger.info("Starting Phase 1 - Setup")
get_dependencies(source_site)
update_css(source_site)
update_stylesheets(source_site, pool)
prepare_subdirectories(source, source_site, languages, processes)
update_defaultxsls(source_site, pool)
update_localmenus(source, source_site, languages, pool)
update_xmllists(source, source_site, languages, pool)
get_dependencies(site_build_config.site, site_config.dependencies)
update_css(site_build_config.site)
update_stylesheets(site_build_config.site, pool)
prepare_subdirectories(
global_build_config.source,
site_build_config.site,
site_build_config.languages,
global_build_config.processes,
)
update_defaultxsls(site_build_config.site, pool)
update_localmenus(
global_build_config.source,
site_build_config.site,
site_build_config.languages,
pool,
)
update_xmllists(
global_build_config.source,
site_build_config.site,
site_build_config.languages,
pool,
)

View File

@@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
def update_css(
source_dir: Path,
source_site: Path,
) -> None:
"""If any less files have been changed, update the css.
@@ -28,7 +28,7 @@ def update_css(
Then minify it, and place it in the expected location for the build process.
"""
logger.info("Updating css")
directory = source_dir.joinpath("look")
directory = source_site.joinpath("look")
if directory.exists():
for file in directory.glob("main*.less"):
compiled_path = file.with_suffix(".css")

View File

@@ -31,7 +31,7 @@ def _do_symlinking(directory: Path) -> None:
)
def update_defaultxsls(source_dir: Path, pool: multiprocessing.pool.Pool) -> None:
def update_defaultxsls(source_site: Path, pool: multiprocessing.pool.Pool) -> None:
"""Place a .default.xsl into each XHTML source directory.
These .default.xsl are symlinks to the first
@@ -42,7 +42,7 @@ def update_defaultxsls(source_dir: Path, pool: multiprocessing.pool.Pool) -> Non
logger.info("Updating default xsl's")
# Get a set of all directories containing .xhtml source files
directories = {path.parent for path in source_dir.glob("**/*.*.xhtml")}
directories = {path.parent for path in source_site.glob("**/*.*.xhtml")}
# Do all directories asynchronously
pool.map(_do_symlinking, directories)

View File

@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
def _write_localmenus(
source_dir: Path,
source_site: Path,
directory: Path,
files: list[Path],
languages: list[str],
@@ -61,7 +61,7 @@ def _write_localmenus(
link=(
"/"
+ str(
link_file.relative_to(source_dir),
link_file.relative_to(source_site),
)
),
).text = localmenu.text
@@ -71,7 +71,7 @@ def _write_localmenus(
def update_localmenus(
source: Path,
source_dir: Path,
source_site: Path,
languages: list[str],
pool: multiprocessing.pool.Pool,
) -> None:
@@ -81,7 +81,7 @@ def update_localmenus(
files_by_dir: dict[Path, list[Path]] = defaultdict(list)
for file in (
file
for file in source_dir.glob("**/*.??.xhtml")
for file in source_site.glob("**/*.??.xhtml")
if "-template" not in file.name
):
xslt_root = etree.parse(file)
@@ -111,5 +111,5 @@ def update_localmenus(
]
pool.starmap(
_write_localmenus,
((source_dir, directory, files, languages) for directory, files in dirs),
((source_site, directory, files, languages) for directory, files in dirs),
)

View File

@@ -40,7 +40,7 @@ def _update_sheet(file: Path) -> None:
touch_if_newer_dep(file, imports)
def update_stylesheets(source_dir: Path, pool: multiprocessing.pool.Pool) -> None:
def update_stylesheets(source_site: Path, pool: multiprocessing.pool.Pool) -> None:
"""Touch all XSL files dependant on an XSL that has changed since last build."""
logger.info("Updating XSL stylesheets")
banned = re.compile(r"(\.venv/.*)|(.*\.default\.xsl$)")
@@ -48,6 +48,6 @@ def update_stylesheets(source_dir: Path, pool: multiprocessing.pool.Pool) -> Non
_update_sheet,
filter(
lambda file: banned.match(str(file)) is None,
source_dir.glob("**/*.xsl"),
source_site.glob("**/*.xsl"),
),
)

View File

@@ -103,7 +103,7 @@ def _update_for_base( # noqa: PLR0913
def _update_module_xmllists(
source: Path,
source_dir: Path,
source_site: Path,
languages: list[str],
pool: multiprocessing.pool.Pool,
) -> None:
@@ -113,15 +113,15 @@ def _update_module_xmllists(
all_xml = {
get_basepath(path)
for path in (
*source_dir.glob("**/*.*.xml"),
*source_site.glob("**/*.*.xml"),
*source.joinpath("global/").glob("**/*.*.xml"),
)
if lang_from_filename(path) in languages
}
source_bases = {path.with_suffix("") for path in source_dir.glob("**/*.sources")}
source_bases = {path.with_suffix("") for path in source_site.glob("**/*.sources")}
module_bases = {
get_basepath(path)
for path in source_dir.glob("**/*.*.xhtml")
for path in source_site.glob("**/*.*.xhtml")
if lang_from_filename(path) in languages and etree.parse(path).xpath("//module")
}
all_bases = source_bases | module_bases

View File

@@ -16,11 +16,13 @@ if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
from fsfe_website_build.lib.site_config import Deployment
logger = logging.getLogger(__name__)
def _copy_file(target: Path, source_dir: Path, source_file: Path) -> None:
target_file = target.joinpath(source_file.relative_to(source_dir))
def _copy_file(target: Path, source_site: Path, source_file: Path) -> None:
target_file = target.joinpath(source_file.relative_to(source_site))
if (
not target_file.exists()
or source_file.stat().st_mtime > target_file.stat().st_mtime
@@ -33,9 +35,9 @@ def _copy_file(target: Path, source_dir: Path, source_file: Path) -> None:
def _copy_minify_file(
target: Path, source_dir: Path, source_file: Path, mime: str
target: Path, source_site: Path, source_file: Path, mime: str
) -> None:
target_file = target.joinpath(source_file.relative_to(source_dir))
target_file = target.joinpath(source_file.relative_to(source_site))
if (
not target_file.exists()
or source_file.stat().st_mtime > target_file.stat().st_mtime
@@ -52,7 +54,12 @@ def _copy_minify_file(
shutil.copymode(source_file, target_file)
def copy_files(source_dir: Path, pool: multiprocessing.pool.Pool, target: Path) -> None:
def copy_files(
source_site: Path,
pool: multiprocessing.pool.Pool,
target: Path,
deploy_config: Deployment,
) -> None:
"""Copy images, documents etc."""
logger.info("Copying over media and misc files")
# file extensions and mimes of minificable content
@@ -62,30 +69,16 @@ def copy_files(source_dir: Path, pool: multiprocessing.pool.Pool, target: Path)
".js": "application/javascript",
".svg": "image/svg+xml",
}
# get the special cases per site
special_case_file = source_dir / "required_deploy_files.txt"
stripped_lines = (
[line.strip() for line in special_case_file.read_text().split("\n")]
if special_case_file.exists()
else []
)
special_includes = [
file
for line in stripped_lines
if line and not line.startswith("#")
for file in source_dir.glob(line)
]
# Here we copy everything we cannot minify
pool.starmap(
_copy_file,
(
(target, source_dir, file)
(target, source_site, file)
for file in [
# globbing of all files, exclude blacklist
*[
path
for path in source_dir.glob("**/*")
for path in source_site.glob("**/*")
if path.is_file()
# Things we dont want over at all
and path.suffix
@@ -112,7 +105,7 @@ def copy_files(source_dir: Path, pool: multiprocessing.pool.Pool, target: Path)
]
],
# special whitelist to include
*special_includes,
*[source_site / file for file in deploy_config.required_files],
]
),
)
@@ -121,6 +114,6 @@ def copy_files(source_dir: Path, pool: multiprocessing.pool.Pool, target: Path)
# https://github.com/tdewolff/minify/issues/535
for file_suffix, mime in minifiable_content.items():
for file in [
path for path in source_dir.glob(f"**/*{file_suffix}") if path.is_file()
path for path in source_site.glob(f"**/*{file_suffix}") if path.is_file()
]:
_copy_minify_file(target, source_dir, file, mime)
_copy_minify_file(target, source_site, file, mime)

View File

@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
def _process_set( # noqa: PLR0913
source: Path,
source_dir: Path,
source_site: Path,
languages: list[str],
target: Path,
processor: Path,
@@ -55,7 +55,7 @@ def _process_set( # noqa: PLR0913
".html" if (len(processor.suffixes) == 1) else processor.suffixes[0]
)
target_file = target.joinpath(
source_file.relative_to(source_dir),
source_file.relative_to(source_site),
).with_suffix(target_suffix)
# if the target file does not exist, we make it
if not target_file.exists() or any(
@@ -90,7 +90,7 @@ def _process_set( # noqa: PLR0913
def process_files(
source: Path,
source_dir: Path,
source_site: Path,
languages: list[str],
pool: multiprocessing.pool.Pool,
target: Path,
@@ -101,7 +101,7 @@ def process_files(
process_files_dict: dict[Path, set[Path]] = defaultdict(set)
# This gathers all the simple xhtml files for generating xhtml output
for file in source_dir.glob("**/*.*.xhtml"):
for file in source_site.glob("**/*.*.xhtml"):
# Processors with a file ending for the output encoded in the name, eg
# events.rss.xsl
type_specific_processors = set(
@@ -134,7 +134,7 @@ def process_files(
pool.starmap(
_process_set,
(
(source, source_dir, languages, target, processor, files)
(source, source_site, languages, target, processor, files)
for processor, files in process_files_dict.items()
),
)

View File

@@ -16,19 +16,33 @@ if TYPE_CHECKING:
import multiprocessing.pool
from pathlib import Path
from fsfe_website_build.lib.build_config import GlobalBuildConfig, SiteBuildConfig
from fsfe_website_build.lib.site_config import SiteConfig
logger = logging.getLogger(__name__)
def phase2_run(
source: Path,
source_dir: Path,
languages: list[str],
global_build_config: GlobalBuildConfig,
site_build_config: SiteBuildConfig,
site_config: SiteConfig,
site_target: Path,
pool: multiprocessing.pool.Pool,
target: Path,
) -> None:
"""Run all the necessary sub functions for phase2."""
logger.info("Starting Phase 2 - Generating output")
process_files(source, source_dir, languages, pool, target)
create_index_symlinks(pool, target)
create_language_symlinks(pool, target)
copy_files(source_dir, pool, target)
process_files(
global_build_config.source,
site_build_config.site,
site_build_config.languages,
pool,
site_target,
)
create_index_symlinks(pool, site_target)
create_language_symlinks(pool, site_target)
copy_files(
site_build_config.site,
pool,
site_target,
site_config.deployment,
)

View File

@@ -37,7 +37,7 @@ def _rsync(stagedir: Path, target: str, port: int) -> None:
def stage_to_target(
stagedir: Path, targets: str, pool: multiprocessing.pool.Pool
stagedir: Path, targets: list[str], pool: multiprocessing.pool.Pool
) -> None:
"""Use a multithreaded rsync to copy the stage dir to all targets."""
logger.info("Rsyncing from stage dir to target dir(s)")
@@ -49,6 +49,6 @@ def stage_to_target(
(target if "?" not in target else target.split("?")[0]),
(int(target.split("?")[1]) if "?" in target else 22),
)
for target in targets.split(",")
for target in targets
),
)

View File

@@ -1,39 +1,20 @@
# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. <https://fsfe.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from argparse import Namespace
from pathlib import Path
from typing import TYPE_CHECKING
from fsfe_website_build.build import build
if TYPE_CHECKING:
from pathlib import Path
from pytest_mock import MockFixture
def no_rebuild_twice_test(mocker: MockFixture) -> None:
cli_args = ["--languages", "en", "--log-level", "CRITICAL"]
# first, run a full build
args = Namespace(
full=True,
clean_cache=False,
languages=[
"en",
],
log_level="CRITICAL", # by only logging critical messages
# the build should be faster, as evaluating less things to strings
processes=8,
source=Path(),
serve=False,
sites=[
Path("drm.info"),
Path("fsfe.org"),
Path("pdfreaders.org"),
Path("status.fsfe.org"),
],
stage=False,
target="output/final",
)
build(args)
build([*cli_args, "--full"])
# replace update_if_changed with
# mocked one that exceptions if the file would be changed
@@ -47,5 +28,4 @@ def no_rebuild_twice_test(mocker: MockFixture) -> None:
"fsfe_website_build.lib.misc.update_if_changed", side_effect=fail_if_update
)
# now, run a normal build
args.full = False
build(args)
build(cli_args)

View File

@@ -21,7 +21,7 @@ To run it in the project using the project config, please use `uv run ruff`.
We try to keep to some design patterns to keep things manageable.
Firstly, each phase as described in [the overview](./overview.md) should handle a meaningfully different kind of interaction. Each phase should be structured, to the greatest degree possible, as a sequence of steps. We consider that each phase should have a `run.py` file that exposes a `ipahse_*run` function that takes the arguments needed for its phase.
Firstly, each phase as described in [the overview](./overview.md) should handle a meaningfully different kind of interaction. Each phase should be structured, to the greatest degree possible, as a sequence of steps. We consider that each phase should have a `run.py` file that exposes a `phase_?run` function that takes the arguments needed for its phase.
Each run function then calls a sequence of functions that are defined in the other files in the `phase*` folder. Each other file in the folder should expose one function, with the same name as the file, minus file extension. For example, `create_files.py` should expose the function `create_files`. It is a common pattern for the first expose function to generate a list of files or things to act on, and then multithread this using another function.

View File

@@ -1,28 +1,35 @@
# This is a config file for a site, that cuontains necessary settings
#
# Sources for the website
# All repos should be mirrored to https://git.fsfe.org/fsfe-system-hackers-mirrors
#
# SCRIPTS
[jquery]
[[dependencies]]
repo = "https://git.fsfe.org/fsfe-system-hackers-mirrors/jquery"
rev = "3.5.1"
file_sets = [
{ source = "/dist/jquery.min.js", target = "scripts/thirdparty/" },
{ source = "/dist/jquery.min.map", target = "scripts/thirdparty/" },
]
[lunr]
[[dependencies]]
repo = "https://git.fsfe.org/fsfe-system-hackers-mirrors/lunr.js"
rev = "v2.3.9"
file_sets = [{ source = "/lunr.js", target = "scripts/thirdparty/lunr.min.js" }]
# STYLING
[bootstrap]
[[dependencies]]
repo = "https://git.fsfe.org/fsfe-system-hackers-mirrors/bootstrap"
rev = "v3.4.1"
file_sets = [{ source = "/less", target = "look/thirdparty/bootstrap" }]
# CGI
[phpmailer]
[[dependencies]]
repo = "https://git.fsfe.org/fsfe-system-hackers-mirrors/PHPMailer"
rev = "v6.10.0"
file_sets = [{ source = "/src", target = "cgi-bin/thirdparty/PHPMailer" }]
# Settings for deployment
[deployment]
# This files contains relative paths we want to deploy even if they would be blocked by the copy_files method filtering.
# Special case hard code pass over order items xml required by cgi script
required_files = ["order/data/items.en.xml"]

View File

@@ -1,3 +0,0 @@
# This files contains relative paths we want to deploy even if they would be blocked by the copy_files method.
# Special case hard code pass over order items xml required by cgi script
order/data/items.en.xml

View File

@@ -84,11 +84,11 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
"""
# Download all stopwords
nltkdir = CACHE_DIR / "nltk_data"
source_dir = working_dir.parent
source_site = working_dir.parent
nltk.data.path = [nltkdir]
nltk.download("stopwords", download_dir=nltkdir, quiet=True) # pyright: ignore [(reportUnknownMemberType)]
with multiprocessing.Pool(processes) as pool:
logger.debug("Indexing %s", source_dir)
logger.debug("Indexing %s", source_site)
# Get all xhtml files in languages to be processed
# Create a list of tuples
@@ -117,7 +117,7 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
else set()
),
)
for file in source_dir.glob("**/*.??.xhtml")
for file in source_site.glob("**/*.??.xhtml")
if file.suffixes[0].removeprefix(".") in languages
)

View File

@@ -11,10 +11,11 @@ dependencies = [
"python-iso639", # For getting english language names of languages from two letter codes.
"requests", # For HTTP requests
"tdewolff-minify", # For minification html css and js
"dacite", # dict to dataclass conversion
]
[project.scripts]
build = "fsfe_website_build:main"
build = "fsfe_website_build:build"
[dependency-groups]
dev = [

11
uv.lock generated
View File

@@ -130,11 +130,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ee/58/257350f7db99b4ae12b614a36256d9cc870d71d9e451e79c2dc3b23d7c3c/cssselect-1.3.0-py3-none-any.whl", hash = "sha256:56d1bf3e198080cc1667e137bc51de9cadfca259f03c2d4e09037b3e01e30f0d", size = 18786, upload-time = "2025-03-10T09:30:28.048Z" },
]
[[package]]
name = "dacite"
version = "1.9.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/55/a0/7ca79796e799a3e782045d29bf052b5cde7439a2bbb17f15ff44f7aacc63/dacite-1.9.2.tar.gz", hash = "sha256:6ccc3b299727c7aa17582f0021f6ae14d5de47c7227932c47fec4cdfefd26f09", size = 22420, upload-time = "2025-02-05T09:27:29.757Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/94/35/386550fd60316d1e37eccdda609b074113298f23cef5bddb2049823fe666/dacite-1.9.2-py3-none-any.whl", hash = "sha256:053f7c3f5128ca2e9aceb66892b1a3c8936d02c686e707bee96e19deef4bc4a0", size = 16600, upload-time = "2025-02-05T09:27:24.345Z" },
]
[[package]]
name = "fsfe-website-build"
version = "0.0.0"
source = { editable = "." }
dependencies = [
{ name = "dacite" },
{ name = "lxml" },
{ name = "nltk" },
{ name = "platformdirs" },
@@ -157,6 +167,7 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "dacite" },
{ name = "lxml" },
{ name = "nltk" },
{ name = "platformdirs" },