feat: Phase1 in python
All checks were successful
continuous-integration/drone/pr Build is passing

Install python deps using venv during the build process
Convert the entirety of phase1 to python
This commit is contained in:
Darragh Elliott 2024-11-16 13:20:35 +00:00
parent 372d718888
commit ccc40ab9a5
38 changed files with 1220 additions and 944 deletions

5
.gitignore vendored
View File

@ -17,6 +17,11 @@ fsfe.org/tags/tagged-*.en.xhtml
fsfe.org/tags/.tags.??.xml
# Local build stuff
output
# Python venv
.venv
__pycache__
#Nltk
.nltk_data
## Status dir stuff
status.fsfe.org/*/data*/*

View File

@ -14,10 +14,12 @@ make \
libc-bin \
wget \
procps \
node-less \
python3 \
python3-bs4
python3-venv \
python3-pip
WORKDIR /fsfe-websites
ENTRYPOINT ["bash", "./build.sh" ]
ENTRYPOINT ["python3", "./build.py" ]

157
Makefile
View File

@ -1,157 +0,0 @@
# -----------------------------------------------------------------------------
# Makefile for FSFE website build, phase 1
# -----------------------------------------------------------------------------
# This Makefile is executed in the root of the source directory tree, and
# creates some .xml and xhtml files as well as some symlinks, all of which
# serve as input files in phase 2. The whole phase 1 runs within the source
# directory tree and does not touch the target directory tree at all.
# -----------------------------------------------------------------------------
.PHONY: all .FORCE
.FORCE:
# This will be overwritten in the command line running this Makefile.
build_env = development
languages = none
# -----------------------------------------------------------------------------
# Build search index
# -----------------------------------------------------------------------------
# This step runs a Python tool that creates an index of all news and
# articles. It extracts titles, teaser, tags, dates and potentially more.
# The result will be fed into a JS file.
.PHONY: searchindex
all: searchindex
searchindex:
python3 tools/index-website.py
# -----------------------------------------------------------------------------
# Update CSS files
# -----------------------------------------------------------------------------
# This step recompiles the less files into the final CSS files to be
# distributed to the web server.
ifneq ($(build_env),development)
websites:=$(shell find . -mindepth 2 -maxdepth 2 -type d -regex "./[a-z\.]+\.[a-z]+/look")
all: $(foreach dir,$(websites), $(dir)/fsfe.min.css $(dir)/valentine.min.css)
$(dir $@)%.min.css: $(shell find $(dir $@) -name '*.less')
echo "* Compiling $@"
lessc "$*.less" -x "$@"
endif
# -----------------------------------------------------------------------------
# Update XSL stylesheets
# -----------------------------------------------------------------------------
# This step updates (actually: just touches) all XSL files which depend on
# another XSL file that has changed since the last build run. The phase 2
# Makefile then only has to consider the directly used stylesheet as a
# prerequisite for building each file and doesn't have to worry about other
# stylesheets imported into that one.
# This must run before the "dive into subdirectories" step, because in the news
# and events directories, the XSL files, if updated, will be copied for the
# per-year archives.
.PHONY: stylesheets
all: stylesheets
stylesheets: $(SUBDIRS)
tools/update_stylesheets.sh
# -----------------------------------------------------------------------------
# Dive into subdirectories
# -----------------------------------------------------------------------------
SUBDIRS := $(shell find . -regex "./[a-z\.]+\.[a-z]+/.*/Makefile" | xargs dirname)
all: $(SUBDIRS)
$(SUBDIRS): .FORCE
echo "* Preparing subdirectory $@"
$(MAKE) --silent --directory=$@ languages="$(languages)"
# -----------------------------------------------------------------------------
# Create XML symlinks
# -----------------------------------------------------------------------------
# After this step, the following symlinks will exist:
# * global/data/texts/.texts.<lang>.xml for each language
# * global/data/topbanner/.topbanner.<lang>.xml for each language
# Each of these symlinks will point to the corresponding file without a dot at
# the beginning of the filename, if present, and to the English version
# otherwise. This symlinks make sure that phase 2 can easily use the right file
# for each language, also as a prerequisite in the Makefile.
TEXTS_LINKS := $(foreach lang,$(languages),global/data/texts/.texts.$(lang).xml)
all: $(TEXTS_LINKS)
global/data/texts/.texts.%.xml: .FORCE
if [ -f global/data/texts/texts.$*.xml ]; then \
ln -sf texts.$*.xml $@; \
else \
ln -sf texts.en.xml $@; \
fi
TOPBANNER_LINKS := $(foreach lang,$(languages),global/data/topbanner/.topbanner.$(lang).xml)
all: $(TOPBANNER_LINKS)
global/data/topbanner/.topbanner.%.xml: .FORCE
if [ -f global/data/topbanner/topbanner.$*.xml ]; then \
ln -sf topbanner.$*.xml $@; \
else \
ln -sf topbanner.en.xml $@; \
fi
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# The following steps are handled in an external script, because the list of
# files to generate is not known when the Makefile starts - some new tags might
# be introduced when generating the .xml files in the news/* subdirectories.
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# -----------------------------------------------------------------------------
# Create XSL symlinks
# -----------------------------------------------------------------------------
# After this step, each directory with source files for HTML pages contains a
# symlink named .default.xsl and pointing to the default.xsl "responsible" for
# this directory. These symlinks make it easier for the phase 2 Makefile to
# determine which XSL script should be used to build a HTML page from a source
# file.
.PHONY: default_xsl
all: default_xsl
default_xsl:
tools/update_defaultxsls.sh
# -----------------------------------------------------------------------------
# Update local menus
# -----------------------------------------------------------------------------
# After this step, all .localmenu.??.xml files will be up to date.
.PHONY: localmenus
all: localmenus
localmenus: $(SUBDIRS)
tools/update_localmenus.sh "$(languages)"
# -----------------------------------------------------------------------------
# Update XML filelists
# -----------------------------------------------------------------------------
# After this step, the following files will be up to date:
# * tags/tagged-<tags>.en.xhtml for each tag used. Apart from being
# automatically created, these are regular source files for HTML pages, and
# in phase 2 are built into pages listing all news items and events for a
# tag.
# * tags/.tags.??.xml with a list of the tags useed.
# * <dir>/.<base>.xmllist for each <dir>/<base>.sources as well as for each
# tags/tagged-<tags>.en.xhtml. These files are used in phase 2 to include the
# correct XML files when generating the HTML pages. It is taken care that
# these files are only updated whenever their content actually changes, so
# they can serve as a prerequisite in the phase 2 Makefile.
.PHONY: xmllists
all: xmllists
xmllists: $(SUBDIRS)
tools/update_xmllists.sh "$(languages)"

View File

@ -79,18 +79,19 @@ Alterations to build scripts or the files used site-wide will result in near ful
### Native
We can either install the required dependencies manually using our preferred package manager. If you are a nix use one can run `nix-shell` to enter a shell with the required build dependencies.
The required binary names are
If installing manually, the required binary names are
```
realpath rsync xsltproc xmllint sed find egrep grep wc make tee date iconv wget shuf python3
realpath rsync xsltproc xmllint sed find egrep grep wc make tee date iconv wget shuf python3 pip3
```
The package names for Debian, are
The package names for Debian are
```
bash bash-completion coreutils diffutils findutils inotify-tools libxml2-utils libxslt make procps python3 rsync
bash bash-completion coreutils diffutils findutils inotify-tools libxml2-utils libxslt make procps python3 python3-pip python3-venv rsync
```
The python dependencies are installed as part of the build process. They can be found in `requirements.txt`.
After getting the dependencies one way or another we can actually build and serve the pages.
The pages can be built and served by running `./build.sh`. Try `--help` for more information. The simple web server used lacks the features of `apache` which used on the FSFE web servers. This is why no index is automatically selected form and directory and other behaviors.
The pages can be built and served by running `./build.py`. Try `--help` for more information. The simple web server used lacks the features of `apache` which used on the FSFE web servers. This is why no index is automatically selected for each directory and other behaviors.
### Docker
Simply running `docker compose run --service-ports build --serve` should build the webpages and make them available over localhost.

154
build.py Executable file
View File

@ -0,0 +1,154 @@
#!/usr/bin/env python3
import argparse
import logging
import os
import subprocess
import sys
from pathlib import Path
from tools.serve_websites import serve_websites
logger = logging.getLogger(__name__)
if __name__ == "__main__":
"""
Main process of the website builder
"""
# Change to the dir the script is in.
os.chdir(os.path.dirname(__file__))
parser = argparse.ArgumentParser(
description="Python script to handle building of the fsfe webpage"
)
parser.add_argument(
"--target",
dest="target",
help="Directory to build websites into.",
type=str,
default="./output/final",
)
parser.add_argument(
"--log-level",
dest="log_level",
type=str,
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set the logging level (default: INFO)",
)
parser.add_argument(
"--full",
dest="full",
help="Force a full rebuild of all webpages.",
action="store_true",
)
parser.add_argument(
"--update",
dest="update",
help="Update the repo as part of the build.",
action="store_true",
)
parser.add_argument(
"--languages",
dest="languages",
help="Languages to build website in.",
type=str,
)
# parser.add_argument(
# "--status",
# dest="status",
# help="Store status reports.",
# action="store_true",
# )
parser.add_argument(
"--status-dir",
dest="status_dir",
help="Directory to store status reports in.",
type=Path,
)
# parser.add_argument(
# "--stage",
# dest="stage",
# help="Perform a dry run, not altering anything on the server, but printing messages as though it is.",
# action="store_true",
# )
parser.add_argument(
"--stage-dir",
dest="stage_dir",
help="Directory to store build status updates in",
type=Path,
)
parser.add_argument(
"--test",
dest="test",
help="Enable some testing features that test for worse scenarios, but hamper performance.",
action="store_true",
)
parser.add_argument(
"--serve",
dest="serve",
help="Serve the webpages after rebuild",
action="store_true",
)
args = parser.parse_args()
logging.basicConfig(format="* %(message)s", level=args.log_level)
Path("./output").mkdir(parents=True, exist_ok=True)
if not (args.full and args.update):
command = (
"build_run"
if not args.full or args.update
else "build_into"
if args.full
else "git_build_into"
)
else:
logger.critical("Cannot do a full rebuild and an update at once, exiting")
sys.exit(1)
if not args.status_dir:
args.status_dir = (
f'{args.target.removesuffix("/")}/status.fsfe.org/fsfe.org/data'
)
logger.debug(f"Args: {args}")
to_run = (
[
"./build/build_main.sh",
command,
args.target,
]
+ (
[
"--stage-dir",
str(args.stage_dir),
]
if args.stage_dir
else []
)
+ (
[
"--status-dir",
str(args.status_dir),
]
if args.status_dir
else []
)
+ (
[
"--languages",
args.languages,
]
if args.languages
else []
)
)
logger.debug(f"Subprocess command: {to_run}")
env = dict(os.environ)
env["LOGLEVEL"] = args.log_level
if args.test:
env["TEST"] = str(args.test).upper()
logger.debug(f"Env Vars being set: {env}")
build = subprocess.run(to_run, env=env)
if build.returncode == 1:
logger.critical("Build process has failed, Exiting!")
sys.exit(1)
if args.serve:
serve_websites(args.target, 2000, 100)

View File

@ -1,48 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
usage() {
cat <<-EOF
# build.sh Usage
## General
This script is a wrapper script over ./build/build_main.sh that provides nicer option names, and the options to serve the files.
For documentation on the build script itself see ./build/README.md
## Flags
### -f | --full
Perform a full rebuild of the webpages.
### -s | --serve
Serve the build webpages over localhost.
### --
Everything after this is passed directly to build_main.
See ./build/README.md for valid options.
EOF
exit 1
}
command="build_run"
serve=""
extra_args=""
while [ "$#" -gt 0 ]; do
case "$1" in
--full | -f)
command="build_into" && shift 1
;;
--serve | -s)
serve="true" && shift 1
;;
--)
shift 1
while [ "$#" -gt 0 ]; do
extra_args+="$1 "
shift 1
done
;;
*)
usage
;;
esac
done
mkdir -p ./output
./build/build_main.sh "$command" ./output/final --statusdir ./output/final/status.fsfe.org/fsfe.org/data $extra_args
if [[ "$serve" ]]; then
python3 ./serve-websites.py
fi

2
build/__init__.py Normal file
View File

@ -0,0 +1,2 @@
# __init__.py is a special Python file that allows a directory to become
# a Python package so it can be accessed using the 'import' statement.

View File

@ -22,7 +22,7 @@ check_dependencies() {
}
# Check dependencies for all kinds of build envs (e.g. development, fsfe.org)
check_dependencies realpath rsync xsltproc xmllint sed find egrep grep wc make tee date iconv wget shuf python3
check_dependencies realpath rsync xsltproc xmllint sed find egrep grep wc make tee date iconv wget shuf python3 pip3
if ! make --version | grep -q "GNU Make 4"; then
echo "The build script requires GNU Make 4.x"

View File

@ -29,6 +29,10 @@ dir_maker() {
# The actual build
buildrun() {
echo "Setting up python deps!"
python3 -m venv "$basedir/.venv" || die "Failed so setup python venv!"
source "$basedir/.venv/bin/activate" || die "Failed to activate python venv!"
pip3 install -r "$basedir/requirements.txt" --quiet || die "Failed to install dependancies"
set -o pipefail
printf %s "$start_time" >"$(logname start_time)"
@ -40,7 +44,7 @@ buildrun() {
{
echo "Starting phase 1" &&
make --silent --directory="$basedir" build_env="${build_env}" languages="$languages" 2>&1 &&
python3 "$basedir"/phase1.py "$languages" 2>&1 &&
echo "Finishing phase 1" ||
die "Error during phase 1"
} | t_logstatus phase_1 || exit 1

62
build/lib.py Normal file
View File

@ -0,0 +1,62 @@
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
def keys_exists(element: dict, *keys: str) -> bool:
"""
Check if *keys (nested) exists in `element` (dict).
"""
if not isinstance(element, dict):
raise AttributeError("keys_exists() expects dict as first argument.")
if len(keys) == 0:
raise AttributeError("keys_exists() expects at least two arguments, one given.")
_element = element
for key in keys:
try:
_element = _element[key]
except KeyError:
return False
return True
def sort_dict(dict: dict) -> dict:
"""
Sort dict by keys
"""
return {key: val for key, val in sorted(dict.items(), key=lambda ele: ele[0])}
def update_if_changed(path: Path, content: str) -> None:
"""
Compare the content of the file at path with the content.
If the file does not exist,
or its contents does not match content,
write content to the file.
"""
if not path.exists() or path.read_text() != content:
logger.info(f"Updating {path}")
path.write_text(content)
def touch_if_newer_dep(file: Path, deps: list[Path]) -> None:
"""
Takes a filepath , and a list of path of its dependencies.
If any of the dependencies has been altered more recently than the file,
touch the file.
Essentially simple reimplementation of make deps for build targets.
"""
if any(dep.stat().st_mtime > file.stat().st_mtime for dep in deps):
logger.info(f"Touching {file}")
file.touch()
def delete_file(file: Path) -> None:
"""
Delete given file using pathlib
"""
logger.info(f"Removing file {file}")
file.unlink()

2
build/phase1/__init__.py Normal file
View File

@ -0,0 +1,2 @@
# __init__.py is a special Python file that allows a directory to become
# a Python package so it can be accessed using the 'import' statement.

View File

@ -0,0 +1,36 @@
import logging
import multiprocessing
from itertools import product
from pathlib import Path
logger = logging.getLogger(__name__)
def _do_symlinking(type: str, lang: str) -> None:
"""
Helper function for doing all of the global symlinking that is suitable for multithreading
"""
target = (
Path(f"global/data/{type}/{type}.{lang}.xml")
if Path(f"global/data/{type}/{type}.{lang}.xml").exists()
else Path(f"global/data/{type}/{type}.en.xml")
)
source = Path(f"global/data/{type}/.{type}.{lang}.xml")
if not source.exists():
source.symlink_to(target.relative_to(source.parent))
def global_symlinks(languages: list[str]) -> None:
"""
After this step, the following symlinks will exist:
* global/data/texts/.texts.<lang>.xml for each language
* global/data/topbanner/.topbanner.<lang>.xml for each language
Each of these symlinks will point to the corresponding file without a dot at
the beginning of the filename, if present, and to the English version
otherwise. This symlinks make sure that phase 2 can easily use the right file
for each language, also as a prerequisite in the Makefile.
"""
logger.info("Creating global symlinks")
types = ["texts", "topbanner"]
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.starmap(_do_symlinking, product(types, languages))

View File

@ -0,0 +1,119 @@
# SPDX-FileCopyrightText: 2020 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-License-Identifier: GPL-3.0-or-later
# Build an index for the search engine based on the article titles and tags
import json
import logging
import multiprocessing
from pathlib import Path
import iso639
import lxml.etree as etree
import nltk
from nltk.corpus import stopwords as nltk_stopwords
from build.lib import update_if_changed
logger = logging.getLogger(__name__)
def _find_teaser(document: etree.ElementTree) -> str:
"""
Find a suitable teaser for indexation
Get all the paragraphs in <body> and return the first which contains more
than 10 words
:document: The parsed lxml ElementTree document
:returns: The text of the teaser or an empty string
"""
for p in document.xpath("//body//p"):
if p.text and len(p.text.strip().split(" ")) > 10:
return p.text
return ""
def _process_file(file: Path, stopwords: set[str]) -> dict:
"""
Generate the search index entry for a given file and set of stopwords
"""
logger.debug(f"Processing file {file}")
xslt_root = etree.parse(file)
tags = map(
lambda tag: tag.get("key"),
filter(lambda tag: tag.get("key") != "front-page", xslt_root.xpath("//tag")),
)
return {
"url": f'/{file.with_suffix(".html")}',
"tags": " ".join(tags),
"title": xslt_root.xpath("//html//title")[0].text
if xslt_root.xpath("//html//title")
else "",
"teaser": " ".join(
w
for w in _find_teaser(xslt_root).strip().split(" ")
if w.lower() not in stopwords
),
"type": "news" if "news/" in str(file) else "page",
# Get the date of the file if it has one
"date": xslt_root.xpath("//news[@newsdate]").get("newsdate")
if xslt_root.xpath("//news[@newsdate]")
else None,
}
def index_websites(languages: list[str]) -> None:
"""
Generate a search index for all sites that have a search/search.js file
"""
logger.info("Creating search indexes")
# Download all stopwords
nltkdir = "./.nltk_data"
nltk.data.path = [nltkdir] + nltk.data.path
nltk.download("stopwords", download_dir=nltkdir, quiet=True)
# Iterate over sites
for site in filter(
lambda path: path.joinpath("search/search.js").exists(),
Path(".").glob("?*.??*"),
):
logger.debug(f"Indexing {site}")
# Get all xhtml files in languages to be processed
# Create a list of tuples
# The first element of each tuple is the file and the second is a set of stopwords for that language
# Use iso639 to get the english name of the language from the two letter iso639-1 code we use to mark files.
# Then if that language has stopwords from nltk, use those stopwords.
files_with_stopwords = sorted(
list(
map(
lambda file: (
file,
set(
nltk_stopwords.words(
iso639.Language.from_part1(
file.suffixes[0].removeprefix(".")
).name.lower()
)
)
if iso639.Language.from_part1(
file.suffixes[0].removeprefix(".")
).name.lower()
in nltk_stopwords.fileids()
else set(),
),
filter(
lambda file: file.suffixes[0].removeprefix(".") in languages,
Path(site).glob("**/*.??.xhtml"),
),
)
),
key=lambda tuple: tuple[0],
)
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
articles = pool.starmap(_process_file, files_with_stopwords)
update_if_changed(
Path(f"{site}/search/index.js"),
"var pages = " + json.dumps(articles, ensure_ascii=False),
)

View File

@ -0,0 +1,21 @@
import logging
import subprocess
from pathlib import Path
logger = logging.getLogger(__name__)
def prepare_subdirectories(languages: list[str]) -> None:
"""
Find any makefiles in subdirectories and run them
"""
logger.info("Preparing Subdirectories")
for makefile in Path("").glob("?*.?*/**/Makefile"):
subprocess.run(
[
"make",
"--silent",
f"--directory={makefile.parent}",
f'languages="{" ".join(languages)}"',
]
)

134
build/phase1/run.py Normal file
View File

@ -0,0 +1,134 @@
# -----------------------------------------------------------------------------
# script for FSFE website build, phase 1
# -----------------------------------------------------------------------------
# This script is executed in the root of the source directory tree, and
# creates some .xml and xhtml files as well as some symlinks, all of which
# serve as input files in phase 2. The whole phase 1 runs within the source
# directory tree and does not touch the target directory tree at all.
# -----------------------------------------------------------------------------
import logging
from os import environ
logger = logging.getLogger(__name__)
def phase1_run(languages: list[str]):
"""
Run all the necessary sub functions for phase1.
"""
# If in test mode
if environ.get("TEST", "FALSE") == "TRUE":
logger.info("Testing mode, typechecking enabled")
from typeguard import install_import_hook
# Must be above imports
install_import_hook("build.phase1.global_symlinks")
install_import_hook("build.phase1.index_website")
install_import_hook("build.phase1.prepare_subdirectories")
install_import_hook("build.phase1.update_css")
install_import_hook("build.phase1.update_defaultxsls")
install_import_hook("build.phase1.update_localmenus")
install_import_hook("build.phase1.update_stylesheets")
install_import_hook("build.phase1.update_tags")
install_import_hook("build.phase1.update_xmllists")
install_import_hook("build.phase1.update_xmllists")
install_import_hook("build.phase1.update_xmllists")
from build.phase1.global_symlinks import global_symlinks
from build.phase1.index_website import index_websites
from build.phase1.prepare_subdirectories import prepare_subdirectories
from build.phase1.update_css import update_css
from build.phase1.update_defaultxsls import update_defaultxsls
from build.phase1.update_localmenus import update_localmenus
from build.phase1.update_stylesheets import update_stylesheets
from build.phase1.update_tags import update_tags
from build.phase1.update_xmllists import update_xmllists
# -----------------------------------------------------------------------------
# Build search index
# -----------------------------------------------------------------------------
# This step runs a Python tool that creates an index of all news and
# articles. It extracts titles, teaser, tags, dates and potentially more.
# The result will be fed into a JS file.
index_websites(languages)
# -----------------------------------------------------------------------------
# Update CSS files
# -----------------------------------------------------------------------------
# This step recompiles the less files into the final CSS files to be
# distributed to the web server.
update_css()
# -----------------------------------------------------------------------------
# Update XSL stylesheets
# -----------------------------------------------------------------------------
# This step updates (actually: just touches) all XSL files which depend on
# another XSL file that has changed since the last build run. The phase 2
# Makefile then only has to consider the directly used stylesheet as a
# prerequisite for building each file and doesn't have to worry about other
# stylesheets imported into that one.
# This must run before the "dive into subdirectories" step, because in the news
# and events directories, the XSL files, if updated, will be copied for the
# per-year archives.
update_stylesheets()
# -----------------------------------------------------------------------------
# Dive into subdirectories
# -----------------------------------------------------------------------------
# Find any makefiles in subdirectories and run them
prepare_subdirectories(languages)
# -----------------------------------------------------------------------------
# Create XML symlinks
# -----------------------------------------------------------------------------
# After this step, the following symlinks will exist:
# * global/data/texts/.texts.<lang>.xml for each language
# * global/data/topbanner/.topbanner.<lang>.xml for each language
# Each of these symlinks will point to the corresponding file without a dot at
# the beginning of the filename, if present, and to the English version
# otherwise. This symlinks make sure that phase 2 can easily use the right file
# for each language, also as a prerequisite in the Makefile.
global_symlinks(languages)
# -----------------------------------------------------------------------------
# Create XSL symlinks
# -----------------------------------------------------------------------------
# After this step, each directory with source files for HTML pages contains a
# symlink named .default.xsl and pointing to the default.xsl "responsible" for
# this directory. These symlinks make it easier for the phase 2 Makefile to
# determine which XSL script should be used to build a HTML page from a source
# file.
update_defaultxsls()
# -----------------------------------------------------------------------------
# Update local menus
# -----------------------------------------------------------------------------
# After this step, all .localmenu.??.xml files will be up to date.
update_localmenus(languages)
# -----------------------------------------------------------------------------
# Update tags
# -----------------------------------------------------------------------------
# After this step, the following files will be up to date:
# * tags/tagged-<tags>.en.xhtml for each tag used. Apart from being
# automatically created, these are regular source files for HTML pages, and
# in phase 2 are built into pages listing all news items and events for a
# tag.
# * tags/.tags.??.xml with a list of the tags used.
update_tags(languages)
# -----------------------------------------------------------------------------
# Update XML filelists
# -----------------------------------------------------------------------------
# After this step, the following files will be up to date:
# * <dir>/.<base>.xmllist for each <dir>/<base>.sources as well as for each
# $site/tags/tagged-<tags>.en.xhtml. These files are used in phase 2 to include the
# correct XML files when generating the HTML pages. It is taken care that
# these files are only updated whenever their content actually changes, so
# they can serve as a prerequisite in the phase 2 Makefile.
update_xmllists(languages)

View File

@ -0,0 +1,44 @@
import logging
import subprocess
from pathlib import Path
import minify
from build.lib import update_if_changed
logger = logging.getLogger(__name__)
def update_css() -> None:
"""
If any less files have been changed, update the css.
Compile less found at website/look/(fsfe.less|valentine.less)
Then minify it, and place it in the expected location for the build process.
"""
logger.info("Updating css")
for folder in Path("").glob("?*.?*/look"):
for name in ["fsfe", "valentine"]:
if folder.joinpath(name + ".less").exists() and (
not folder.joinpath(name + ".min.css").exists()
or any(
[
path.stat().st_mtime
> folder.joinpath(name + ".min.css").stat().st_mtime
for path in folder.glob("**/*.less")
]
)
):
logger.info(f"Compiling {name}.less")
result = subprocess.run(
[
"lessc",
str(folder.joinpath(name + ".less")),
],
capture_output=True,
# Get output as str instead of bytes
universal_newlines=True,
)
update_if_changed(
folder.joinpath(name + ".min.css"),
minify.string("text/css", result.stdout),
)

View File

@ -0,0 +1,36 @@
import logging
import multiprocessing
from pathlib import Path
logger = logging.getLogger(__name__)
def _do_symlinking(directory: Path) -> None:
"""
In each dir, place a .default.xsl symlink pointing to the nearest default.xsl
"""
working_dir = directory
if not directory.joinpath(".default.xsl").exists():
while not working_dir.joinpath("default.xsl").exists():
working_dir = working_dir.parent
directory.joinpath(".default.xsl").symlink_to(
working_dir.joinpath("default.xsl").resolve()
)
def update_defaultxsls() -> None:
"""
Place a .default.xsl into each directory containing source files for
HTML pages (*.xhtml). These .default.xsl are symlinks to the first
available actual default.xsl found when climbing the directory tree
upwards, it's the xsl stylesheet to be used for building the HTML
files from this directory.
"""
logger.info("Updating default xsl's")
# Get a set of all directories containing .xhtml source files
directories = set(map(lambda path: path.parent, Path(".").glob("**/*.*.xhtml")))
# Do all directories asynchronously
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.map(_do_symlinking, directories)

116
build/phase1/update_localmenus.py Executable file
View File

@ -0,0 +1,116 @@
import logging
import multiprocessing
import textwrap
from pathlib import Path
import lxml.etree as etree
logger = logging.getLogger(__name__)
def _write_localmenus(
dir: str, files_by_dir: dict[str, list[Path]], languages: list[str]
) -> None:
"""
Write localmenus for a given directory
"""
base_files = sorted(
list(
set(
map(
lambda filter_file: filter_file.with_suffix("").with_suffix(""),
files_by_dir[dir],
)
)
)
)
for lang in languages:
file = Path(dir).joinpath(f".localmenu.{lang}.xml")
logger.info(f"Creating {file}")
file.write_text(
textwrap.dedent("""\
<?xml version="1.0"?>
<feed>
""")
)
with file.open("a") as working_file:
for base_file in base_files:
tmpfile = (
base_file.with_suffix(f".{lang}").with_suffix(".xhtml")
if base_file.with_suffix(f".{lang}").with_suffix(".xhtml").exists()
else base_file.with_suffix(".en.xhtml")
if base_file.with_suffix(".en.xhtml").exists()
else None
)
if not tmpfile:
continue
xslt_root = etree.parse(tmpfile)
for localmenu in xslt_root.xpath("//localmenu"):
working_file.write(
'\n<localmenuitem set="'
+ (
str(localmenu.xpath("./@set")[0])
if localmenu.xpath("./@set") != []
else "default"
)
+ '" id="'
+ (
str(localmenu.xpath("./@id")[0])
if localmenu.xpath("./@id") != []
else "default"
)
+ f'" link="/{Path(*Path(base_file).parts[1:])}.html">'
+ localmenu.text
+ "</localmenuitem>"
)
working_file.write(
textwrap.dedent("""\
\n
</feed>
""")
)
def update_localmenus(languages: list[str]) -> None:
"""
Update all the .localmenu.*.xml files containing the local menus.
"""
logger.info("Updating local menus")
# Get a dict of all source files containing local menus
files_by_dir = {}
for file in filter(
lambda path: etree.parse(path).xpath("//localmenu")
and "-template" not in str(path),
Path(".").glob("**/*.??.xhtml"),
):
xslt_root = etree.parse(file)
dir = xslt_root.xpath("//localmenu/@dir")
dir = dir[0] if dir else str(file.parent.relative_to(Path(".")))
if dir not in files_by_dir:
files_by_dir[dir] = set()
files_by_dir[dir].add(file)
for dir in files_by_dir:
files_by_dir[dir] = sorted(list(files_by_dir[dir]))
# If any of the source files has been updated, rebuild all .localmenu.*.xml
dirs = filter(
lambda dir: (
any(
(
(not Path(dir).joinpath(".localmenu.en.xml").exists())
or (
file.stat().st_mtime
> Path(dir).joinpath(".localmenu.en.xml").stat().st_mtime
)
)
for file in files_by_dir[dir]
)
),
files_by_dir,
)
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.starmap(
_write_localmenus, [(dir, files_by_dir, languages) for dir in dirs]
)

View File

@ -0,0 +1,47 @@
import logging
import multiprocessing
import re
from pathlib import Path
from lxml import etree
from build.lib import touch_if_newer_dep
logger = logging.getLogger(__name__)
def _update_sheet(file: Path) -> None:
"""
Update a given xsl file if any of its dependant xsl files have been updated
"""
xslt_root = etree.parse(file)
imports = map(
lambda imp: Path(file)
.parent.joinpath(imp.get("href"))
.resolve()
.relative_to(Path(".").resolve()),
xslt_root.xpath(
"//xsl:import", namespaces={"xsl": "http://www.w3.org/1999/XSL/Transform"}
),
)
touch_if_newer_dep(file, imports)
def update_stylesheets() -> None:
"""
This script is called from the phase 1 Makefile and touches all XSL files
which depend on another XSL file that has changed since the last build run.
The phase 2 Makefile then only has to consider the
directly used stylesheet as a prerequisite for building each file and doesn't
have to worry about other stylesheets imported into that one.
"""
logger.info("Updating XSL stylesheets")
banned = re.compile(r"(\.venv/.*)|(.*\.default\.xsl$)")
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.map(
_update_sheet,
filter(
lambda file: re.match(banned, str(file)) is None,
Path(".").glob("**/*.xsl"),
),
)

187
build/phase1/update_tags.py Executable file
View File

@ -0,0 +1,187 @@
import logging
import multiprocessing
import textwrap
from pathlib import Path
from xml.sax.saxutils import escape
import lxml.etree as etree
from build.lib import (
delete_file,
keys_exists,
sort_dict,
update_if_changed,
)
logger = logging.getLogger(__name__)
def _update_tag_pages(site: Path, tag: str) -> None:
"""
Update the xhtml pages and xmllists for a given tag
"""
taggedfile = Path(f"{site}/tags/tagged.en.xhtml")
content = taggedfile.read_text().replace("XXX_TAGNAME_XXX", tag)
update_if_changed(taggedfile, content)
def _update_tag_sets(
site: Path,
lang: str,
filecount: dict[str, dict[str, int]],
files_by_tag: dict[str, list[Path]],
tags_by_lang: dict[str, dict[str, str]],
) -> None:
"""
Update the .tags.??.xml tagset xmls for a given tag
"""
taglist = textwrap.dedent("""\
<?xml version="1.0" encoding="UTF-8"?>
<tagset>
""")
for section in ["news", "events"]:
for tag in files_by_tag:
count = filecount[section][tag]
label = (
tags_by_lang[lang][tag]
if keys_exists(tags_by_lang, lang, tag) and tags_by_lang[lang][tag]
else tags_by_lang["en"][tag]
if keys_exists(tags_by_lang, "en", tag) and tags_by_lang["en"][tag]
else tag
)
if count > 0:
taglist = taglist + textwrap.dedent(f"""\
<tag section="{section}" key="{tag}" count="{count}">{label}</tag>
""")
taglist = taglist + textwrap.dedent("""\
</tagset>
""")
update_if_changed(Path(f"{site}/tags/.tags.{lang}.xml"), taglist)
def update_tags(languages: list[str]) -> None:
"""
Update Tag pages, xmllists and xmls
Creates/update the following files:
* */tags/tagged-<tags>.en.xhtml for each tag used. Apart from being
automatically created, these are regular source files for HTML pages, and
in phase 2 are built into pages listing all news items and events for a
tag.
* */tags/.tags.??.xml with a list of the tags used.
Changing or removing tags in XML files is also considered, in which case a
file is removed from the .xmllist files.
When a tag has been removed from the last XML file where it has been used,
the tagged-* are correctly deleted.
"""
for site in filter(
lambda path: path.joinpath("tags").exists(),
Path(".").glob("?*.??*"),
):
logger.info(f"Updating tags for {site}")
# Create a complete and current map of which tag is used in which files
files_by_tag = {}
tags_by_lang = {}
# Fill out files_by_tag and tags_by_lang
for file in filter(
lambda file:
# Not in tags dir of a site
site.joinpath("tags") not in file.parents
# Has a tag element
and etree.parse(file).xpath("//tag"),
site.glob("**/*.xml"),
):
xslt_root = etree.parse(file)
for tag in xslt_root.xpath("//tag"):
# Get the key attribute, and filter out some invalid chars
key = (
tag.get("key")
.replace("/", "-")
.replace(" ", "-")
.replace(":", "-")
.strip()
)
# Get the label, and strip it.
label = str(
escape(tag.text.strip()) if tag.text and tag.text.strip() else None
)
# Load into the dicts
if key not in files_by_tag:
files_by_tag[key] = set()
files_by_tag[key].add(file.with_suffix("").with_suffix(""))
lang = file.with_suffix("").suffix.removeprefix(".")
if lang not in tags_by_lang:
tags_by_lang[lang] = {}
tags_by_lang[lang][key] = (
tags_by_lang[lang][key]
if key in tags_by_lang[lang] and tags_by_lang[lang][key]
else label
)
# Sort dicts to ensure that they are stable between runs
files_by_tag = sort_dict(files_by_tag)
for tag in files_by_tag:
files_by_tag[tag] = sorted(files_by_tag[tag])
tags_by_lang = sort_dict(tags_by_lang)
for lang in tags_by_lang:
tags_by_lang[lang] = sort_dict(tags_by_lang[lang])
# Now we have the necessary data, begin
logger.info("Removing files for removed tags")
tagfiles_to_delete = filter(
lambda path: not any([(tag in str(path)) for tag in files_by_tag]),
list(Path(f"{site}/tags/").glob("tagged-*.en.xhtml"))
+ list(Path(f"{site}/tags/").glob(".tagged-*.xmllist")),
)
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.map(delete_file, tagfiles_to_delete)
logger.info("Updating tag pages")
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.starmap(
_update_tag_pages,
[(site, tag) for tag in files_by_tag],
)
logger.info("Updating tag lists")
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.starmap(
update_if_changed,
[
(
Path(f"{site}/tags/.tagged-{tag}.xmllist"),
(
"\n".join(map(lambda file: str(file), files_by_tag[tag]))
+ "\n"
),
)
for tag in files_by_tag
],
)
logger.info("Updating tag sets")
# Get count of files with each tag in each section
filecount = {}
for section in ["news", "events"]:
filecount[section] = {}
for tag in files_by_tag:
filecount[section][tag] = len(
list(
filter(
lambda path: section in str(path.parent),
files_by_tag[tag],
)
)
)
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.starmap(
_update_tag_sets,
[
(site, lang, filecount, files_by_tag, tags_by_lang)
for lang in [lang for lang in tags_by_lang if lang in languages]
],
)

162
build/phase1/update_xmllists.py Executable file
View File

@ -0,0 +1,162 @@
import datetime
import fnmatch
import logging
import multiprocessing
import re
from pathlib import Path
import lxml.etree as etree
from build.lib import (
touch_if_newer_dep,
update_if_changed,
)
logger = logging.getLogger(__name__)
def _update_for_base(
base: Path, all_xml: set[Path], nextyear: str, thisyear: str, lastyear: str
) -> None:
"""
Update the xmllist for a given base file
"""
matching_files = set()
# If sources exist
if base.with_suffix(".sources").exists():
# Load every file that matches the pattern
# If a tag is included in the pattern, the file must contain that tag
with base.with_suffix(".sources").open(mode="r") as file:
for line in file:
pattern = (
re.sub(r":\[.*\]$", "*", line)
.replace("$nextyear", nextyear)
.replace("$thisyear", thisyear)
.replace("$lastyear", lastyear)
.strip()
)
if len(pattern) <= 0:
print("Pattern too short, continue!")
continue
tag = (
re.match(r":\[(.*)\]$", line).group().strip()
if re.match(r":\[(.*)\]$", line)
else ""
)
for line in filter(
lambda line:
# Matches glob pattern
fnmatch.fnmatchcase(str(line), pattern)
# contains tag if tag in pattern
and (
etree.parse(file).find(f"//tag[@key='{tag}']")
if tag != ""
else True
)
# Not just matching an empty line
and len(str(line)) > 0,
all_xml,
):
matching_files.add(str(line))
for file in Path("").glob(f"{base}.??.xhtml"):
xslt_root = etree.parse(file)
for module in xslt_root.xpath("//module"):
matching_files.add(f'global/data/modules/{module.get("id")}'.strip())
matching_files = sorted(matching_files)
update_if_changed(
Path(f"{base.parent}/.{base.name}.xmllist"), "\n".join(matching_files) + "\n"
)
def _update_module_xmllists(languages: list[str]) -> None:
"""
Update .xmllist files for .sources and .xhtml containing <module>s
"""
logger.info("Updating XML lists")
# Store current dir
for site in map(
lambda path: str(path),
filter(lambda path: path.is_dir(), Path(".").glob("?*.??*")),
):
logger.info(f"Updating xmllists for {site}")
# Get all the bases and stuff before multithreading the update bit
all_xml = set(
map(
lambda path: path.with_suffix("").with_suffix(""),
filter(
lambda path: path.with_suffix("").suffix.removeprefix(".")
in languages,
Path(site).glob("**/*.*.xml"),
),
)
)
source_bases = set(
map(
lambda path: path.with_suffix(""),
Path(site).glob("**/*.sources"),
)
)
module_bases = set(
map(
lambda path: path.with_suffix("").with_suffix(""),
filter(
lambda path: path.with_suffix("").suffix.removeprefix(".")
in languages
and etree.parse(path).xpath("//module"),
Path(site).glob("**/*.*.xhtml"),
),
)
)
all_bases = source_bases | module_bases
nextyear = str(datetime.datetime.today().year + 1)
thisyear = str(datetime.datetime.today().year)
lastyear = str(datetime.datetime.today().year - 1)
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.starmap(
_update_for_base,
[(base, all_xml, nextyear, thisyear, lastyear) for base in all_bases],
)
def _check_xmllist_deps(file: Path) -> None:
"""
If any of the sources in an xmllist are newer than it, touch the xmllist
"""
xmls = set()
with file.open(mode="r") as fileobj:
for line in fileobj:
for newfile in Path("").glob(line + ".??.xml"):
xmls.add(newfile)
touch_if_newer_dep(str(file), xmls)
def _touch_xmllists_with_updated_deps(languages: list[str]) -> None:
"""
Touch all .xmllist files where one of the contained files has changed
"""
logger.info("Checking contents of XML lists")
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
pool.map(_check_xmllist_deps, Path("").glob("./**/.*.xmllist"))
def update_xmllists(languages: list[str]) -> None:
"""
Update XML filelists (*.xmllist)
Creates/update the following files:
* <dir>/.<base>.xmllist for each <dir>/<base>.sources as well as for each
fsfe.org/tags/tagged-<tags>.en.xhtml. These files are used