Install python deps using venv during the build process Convert the entirety of phase1 to python
This commit is contained in:
parent
372d718888
commit
ccc40ab9a5
5
.gitignore
vendored
5
.gitignore
vendored
@ -17,6 +17,11 @@ fsfe.org/tags/tagged-*.en.xhtml
|
||||
fsfe.org/tags/.tags.??.xml
|
||||
# Local build stuff
|
||||
output
|
||||
# Python venv
|
||||
.venv
|
||||
__pycache__
|
||||
#Nltk
|
||||
.nltk_data
|
||||
|
||||
## Status dir stuff
|
||||
status.fsfe.org/*/data*/*
|
||||
|
@ -14,10 +14,12 @@ make \
|
||||
libc-bin \
|
||||
wget \
|
||||
procps \
|
||||
node-less \
|
||||
python3 \
|
||||
python3-bs4
|
||||
python3-venv \
|
||||
python3-pip
|
||||
|
||||
WORKDIR /fsfe-websites
|
||||
ENTRYPOINT ["bash", "./build.sh" ]
|
||||
ENTRYPOINT ["python3", "./build.py" ]
|
||||
|
||||
|
||||
|
157
Makefile
157
Makefile
@ -1,157 +0,0 @@
|
||||
# -----------------------------------------------------------------------------
|
||||
# Makefile for FSFE website build, phase 1
|
||||
# -----------------------------------------------------------------------------
|
||||
# This Makefile is executed in the root of the source directory tree, and
|
||||
# creates some .xml and xhtml files as well as some symlinks, all of which
|
||||
# serve as input files in phase 2. The whole phase 1 runs within the source
|
||||
# directory tree and does not touch the target directory tree at all.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
.PHONY: all .FORCE
|
||||
.FORCE:
|
||||
|
||||
# This will be overwritten in the command line running this Makefile.
|
||||
build_env = development
|
||||
languages = none
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Build search index
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# This step runs a Python tool that creates an index of all news and
|
||||
# articles. It extracts titles, teaser, tags, dates and potentially more.
|
||||
# The result will be fed into a JS file.
|
||||
|
||||
.PHONY: searchindex
|
||||
all: searchindex
|
||||
searchindex:
|
||||
python3 tools/index-website.py
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update CSS files
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# This step recompiles the less files into the final CSS files to be
|
||||
# distributed to the web server.
|
||||
|
||||
ifneq ($(build_env),development)
|
||||
websites:=$(shell find . -mindepth 2 -maxdepth 2 -type d -regex "./[a-z\.]+\.[a-z]+/look")
|
||||
all: $(foreach dir,$(websites), $(dir)/fsfe.min.css $(dir)/valentine.min.css)
|
||||
$(dir $@)%.min.css: $(shell find $(dir $@) -name '*.less')
|
||||
echo "* Compiling $@"
|
||||
lessc "$*.less" -x "$@"
|
||||
endif
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update XSL stylesheets
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# This step updates (actually: just touches) all XSL files which depend on
|
||||
# another XSL file that has changed since the last build run. The phase 2
|
||||
# Makefile then only has to consider the directly used stylesheet as a
|
||||
# prerequisite for building each file and doesn't have to worry about other
|
||||
# stylesheets imported into that one.
|
||||
# This must run before the "dive into subdirectories" step, because in the news
|
||||
# and events directories, the XSL files, if updated, will be copied for the
|
||||
# per-year archives.
|
||||
|
||||
.PHONY: stylesheets
|
||||
all: stylesheets
|
||||
stylesheets: $(SUBDIRS)
|
||||
tools/update_stylesheets.sh
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Dive into subdirectories
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
SUBDIRS := $(shell find . -regex "./[a-z\.]+\.[a-z]+/.*/Makefile" | xargs dirname)
|
||||
|
||||
all: $(SUBDIRS)
|
||||
$(SUBDIRS): .FORCE
|
||||
echo "* Preparing subdirectory $@"
|
||||
$(MAKE) --silent --directory=$@ languages="$(languages)"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Create XML symlinks
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# After this step, the following symlinks will exist:
|
||||
# * global/data/texts/.texts.<lang>.xml for each language
|
||||
# * global/data/topbanner/.topbanner.<lang>.xml for each language
|
||||
# Each of these symlinks will point to the corresponding file without a dot at
|
||||
# the beginning of the filename, if present, and to the English version
|
||||
# otherwise. This symlinks make sure that phase 2 can easily use the right file
|
||||
# for each language, also as a prerequisite in the Makefile.
|
||||
|
||||
TEXTS_LINKS := $(foreach lang,$(languages),global/data/texts/.texts.$(lang).xml)
|
||||
|
||||
all: $(TEXTS_LINKS)
|
||||
global/data/texts/.texts.%.xml: .FORCE
|
||||
if [ -f global/data/texts/texts.$*.xml ]; then \
|
||||
ln -sf texts.$*.xml $@; \
|
||||
else \
|
||||
ln -sf texts.en.xml $@; \
|
||||
fi
|
||||
|
||||
TOPBANNER_LINKS := $(foreach lang,$(languages),global/data/topbanner/.topbanner.$(lang).xml)
|
||||
|
||||
all: $(TOPBANNER_LINKS)
|
||||
global/data/topbanner/.topbanner.%.xml: .FORCE
|
||||
if [ -f global/data/topbanner/topbanner.$*.xml ]; then \
|
||||
ln -sf topbanner.$*.xml $@; \
|
||||
else \
|
||||
ln -sf topbanner.en.xml $@; \
|
||||
fi
|
||||
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# The following steps are handled in an external script, because the list of
|
||||
# files to generate is not known when the Makefile starts - some new tags might
|
||||
# be introduced when generating the .xml files in the news/* subdirectories.
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Create XSL symlinks
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# After this step, each directory with source files for HTML pages contains a
|
||||
# symlink named .default.xsl and pointing to the default.xsl "responsible" for
|
||||
# this directory. These symlinks make it easier for the phase 2 Makefile to
|
||||
# determine which XSL script should be used to build a HTML page from a source
|
||||
# file.
|
||||
|
||||
.PHONY: default_xsl
|
||||
all: default_xsl
|
||||
default_xsl:
|
||||
tools/update_defaultxsls.sh
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update local menus
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# After this step, all .localmenu.??.xml files will be up to date.
|
||||
|
||||
.PHONY: localmenus
|
||||
all: localmenus
|
||||
localmenus: $(SUBDIRS)
|
||||
tools/update_localmenus.sh "$(languages)"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update XML filelists
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# After this step, the following files will be up to date:
|
||||
# * tags/tagged-<tags>.en.xhtml for each tag used. Apart from being
|
||||
# automatically created, these are regular source files for HTML pages, and
|
||||
# in phase 2 are built into pages listing all news items and events for a
|
||||
# tag.
|
||||
# * tags/.tags.??.xml with a list of the tags useed.
|
||||
# * <dir>/.<base>.xmllist for each <dir>/<base>.sources as well as for each
|
||||
# tags/tagged-<tags>.en.xhtml. These files are used in phase 2 to include the
|
||||
# correct XML files when generating the HTML pages. It is taken care that
|
||||
# these files are only updated whenever their content actually changes, so
|
||||
# they can serve as a prerequisite in the phase 2 Makefile.
|
||||
|
||||
.PHONY: xmllists
|
||||
all: xmllists
|
||||
xmllists: $(SUBDIRS)
|
||||
tools/update_xmllists.sh "$(languages)"
|
11
README.md
11
README.md
@ -79,18 +79,19 @@ Alterations to build scripts or the files used site-wide will result in near ful
|
||||
### Native
|
||||
We can either install the required dependencies manually using our preferred package manager. If you are a nix use one can run `nix-shell` to enter a shell with the required build dependencies.
|
||||
|
||||
The required binary names are
|
||||
If installing manually, the required binary names are
|
||||
```
|
||||
realpath rsync xsltproc xmllint sed find egrep grep wc make tee date iconv wget shuf python3
|
||||
realpath rsync xsltproc xmllint sed find egrep grep wc make tee date iconv wget shuf python3 pip3
|
||||
```
|
||||
The package names for Debian, are
|
||||
The package names for Debian are
|
||||
```
|
||||
bash bash-completion coreutils diffutils findutils inotify-tools libxml2-utils libxslt make procps python3 rsync
|
||||
bash bash-completion coreutils diffutils findutils inotify-tools libxml2-utils libxslt make procps python3 python3-pip python3-venv rsync
|
||||
```
|
||||
The python dependencies are installed as part of the build process. They can be found in `requirements.txt`.
|
||||
|
||||
After getting the dependencies one way or another we can actually build and serve the pages.
|
||||
|
||||
The pages can be built and served by running `./build.sh`. Try `--help` for more information. The simple web server used lacks the features of `apache` which used on the FSFE web servers. This is why no index is automatically selected form and directory and other behaviors.
|
||||
The pages can be built and served by running `./build.py`. Try `--help` for more information. The simple web server used lacks the features of `apache` which used on the FSFE web servers. This is why no index is automatically selected for each directory and other behaviors.
|
||||
|
||||
### Docker
|
||||
Simply running `docker compose run --service-ports build --serve` should build the webpages and make them available over localhost.
|
||||
|
154
build.py
Executable file
154
build.py
Executable file
@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from tools.serve_websites import serve_websites
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Main process of the website builder
|
||||
"""
|
||||
# Change to the dir the script is in.
|
||||
os.chdir(os.path.dirname(__file__))
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Python script to handle building of the fsfe webpage"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--target",
|
||||
dest="target",
|
||||
help="Directory to build websites into.",
|
||||
type=str,
|
||||
default="./output/final",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-level",
|
||||
dest="log_level",
|
||||
type=str,
|
||||
default="INFO",
|
||||
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
||||
help="Set the logging level (default: INFO)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--full",
|
||||
dest="full",
|
||||
help="Force a full rebuild of all webpages.",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--update",
|
||||
dest="update",
|
||||
help="Update the repo as part of the build.",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--languages",
|
||||
dest="languages",
|
||||
help="Languages to build website in.",
|
||||
type=str,
|
||||
)
|
||||
# parser.add_argument(
|
||||
# "--status",
|
||||
# dest="status",
|
||||
# help="Store status reports.",
|
||||
# action="store_true",
|
||||
# )
|
||||
parser.add_argument(
|
||||
"--status-dir",
|
||||
dest="status_dir",
|
||||
help="Directory to store status reports in.",
|
||||
type=Path,
|
||||
)
|
||||
# parser.add_argument(
|
||||
# "--stage",
|
||||
# dest="stage",
|
||||
# help="Perform a dry run, not altering anything on the server, but printing messages as though it is.",
|
||||
# action="store_true",
|
||||
# )
|
||||
parser.add_argument(
|
||||
"--stage-dir",
|
||||
dest="stage_dir",
|
||||
help="Directory to store build status updates in",
|
||||
type=Path,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test",
|
||||
dest="test",
|
||||
help="Enable some testing features that test for worse scenarios, but hamper performance.",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--serve",
|
||||
dest="serve",
|
||||
help="Serve the webpages after rebuild",
|
||||
action="store_true",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(format="* %(message)s", level=args.log_level)
|
||||
Path("./output").mkdir(parents=True, exist_ok=True)
|
||||
if not (args.full and args.update):
|
||||
command = (
|
||||
"build_run"
|
||||
if not args.full or args.update
|
||||
else "build_into"
|
||||
if args.full
|
||||
else "git_build_into"
|
||||
)
|
||||
else:
|
||||
logger.critical("Cannot do a full rebuild and an update at once, exiting")
|
||||
sys.exit(1)
|
||||
if not args.status_dir:
|
||||
args.status_dir = (
|
||||
f'{args.target.removesuffix("/")}/status.fsfe.org/fsfe.org/data'
|
||||
)
|
||||
logger.debug(f"Args: {args}")
|
||||
to_run = (
|
||||
[
|
||||
"./build/build_main.sh",
|
||||
command,
|
||||
args.target,
|
||||
]
|
||||
+ (
|
||||
[
|
||||
"--stage-dir",
|
||||
str(args.stage_dir),
|
||||
]
|
||||
if args.stage_dir
|
||||
else []
|
||||
)
|
||||
+ (
|
||||
[
|
||||
"--status-dir",
|
||||
str(args.status_dir),
|
||||
]
|
||||
if args.status_dir
|
||||
else []
|
||||
)
|
||||
+ (
|
||||
[
|
||||
"--languages",
|
||||
args.languages,
|
||||
]
|
||||
if args.languages
|
||||
else []
|
||||
)
|
||||
)
|
||||
logger.debug(f"Subprocess command: {to_run}")
|
||||
env = dict(os.environ)
|
||||
env["LOGLEVEL"] = args.log_level
|
||||
if args.test:
|
||||
env["TEST"] = str(args.test).upper()
|
||||
logger.debug(f"Env Vars being set: {env}")
|
||||
build = subprocess.run(to_run, env=env)
|
||||
if build.returncode == 1:
|
||||
logger.critical("Build process has failed, Exiting!")
|
||||
sys.exit(1)
|
||||
if args.serve:
|
||||
serve_websites(args.target, 2000, 100)
|
48
build.sh
48
build.sh
@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<-EOF
|
||||
# build.sh Usage
|
||||
## General
|
||||
This script is a wrapper script over ./build/build_main.sh that provides nicer option names, and the options to serve the files.
|
||||
For documentation on the build script itself see ./build/README.md
|
||||
## Flags
|
||||
### -f | --full
|
||||
Perform a full rebuild of the webpages.
|
||||
### -s | --serve
|
||||
Serve the build webpages over localhost.
|
||||
### --
|
||||
Everything after this is passed directly to build_main.
|
||||
See ./build/README.md for valid options.
|
||||
EOF
|
||||
exit 1
|
||||
}
|
||||
command="build_run"
|
||||
serve=""
|
||||
extra_args=""
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
--full | -f)
|
||||
command="build_into" && shift 1
|
||||
;;
|
||||
--serve | -s)
|
||||
serve="true" && shift 1
|
||||
;;
|
||||
--)
|
||||
shift 1
|
||||
while [ "$#" -gt 0 ]; do
|
||||
extra_args+="$1 "
|
||||
shift 1
|
||||
done
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
mkdir -p ./output
|
||||
./build/build_main.sh "$command" ./output/final --statusdir ./output/final/status.fsfe.org/fsfe.org/data $extra_args
|
||||
if [[ "$serve" ]]; then
|
||||
python3 ./serve-websites.py
|
||||
fi
|
2
build/__init__.py
Normal file
2
build/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
# __init__.py is a special Python file that allows a directory to become
|
||||
# a Python package so it can be accessed using the 'import' statement.
|
@ -22,7 +22,7 @@ check_dependencies() {
|
||||
}
|
||||
|
||||
# Check dependencies for all kinds of build envs (e.g. development, fsfe.org)
|
||||
check_dependencies realpath rsync xsltproc xmllint sed find egrep grep wc make tee date iconv wget shuf python3
|
||||
check_dependencies realpath rsync xsltproc xmllint sed find egrep grep wc make tee date iconv wget shuf python3 pip3
|
||||
|
||||
if ! make --version | grep -q "GNU Make 4"; then
|
||||
echo "The build script requires GNU Make 4.x"
|
||||
|
@ -29,6 +29,10 @@ dir_maker() {
|
||||
|
||||
# The actual build
|
||||
buildrun() {
|
||||
echo "Setting up python deps!"
|
||||
python3 -m venv "$basedir/.venv" || die "Failed so setup python venv!"
|
||||
source "$basedir/.venv/bin/activate" || die "Failed to activate python venv!"
|
||||
pip3 install -r "$basedir/requirements.txt" --quiet || die "Failed to install dependancies"
|
||||
set -o pipefail
|
||||
|
||||
printf %s "$start_time" >"$(logname start_time)"
|
||||
@ -40,7 +44,7 @@ buildrun() {
|
||||
|
||||
{
|
||||
echo "Starting phase 1" &&
|
||||
make --silent --directory="$basedir" build_env="${build_env}" languages="$languages" 2>&1 &&
|
||||
python3 "$basedir"/phase1.py "$languages" 2>&1 &&
|
||||
echo "Finishing phase 1" ||
|
||||
die "Error during phase 1"
|
||||
} | t_logstatus phase_1 || exit 1
|
||||
|
62
build/lib.py
Normal file
62
build/lib.py
Normal file
@ -0,0 +1,62 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def keys_exists(element: dict, *keys: str) -> bool:
|
||||
"""
|
||||
Check if *keys (nested) exists in `element` (dict).
|
||||
"""
|
||||
if not isinstance(element, dict):
|
||||
raise AttributeError("keys_exists() expects dict as first argument.")
|
||||
if len(keys) == 0:
|
||||
raise AttributeError("keys_exists() expects at least two arguments, one given.")
|
||||
|
||||
_element = element
|
||||
for key in keys:
|
||||
try:
|
||||
_element = _element[key]
|
||||
except KeyError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def sort_dict(dict: dict) -> dict:
|
||||
"""
|
||||
Sort dict by keys
|
||||
"""
|
||||
return {key: val for key, val in sorted(dict.items(), key=lambda ele: ele[0])}
|
||||
|
||||
|
||||
def update_if_changed(path: Path, content: str) -> None:
|
||||
"""
|
||||
Compare the content of the file at path with the content.
|
||||
If the file does not exist,
|
||||
or its contents does not match content,
|
||||
write content to the file.
|
||||
"""
|
||||
if not path.exists() or path.read_text() != content:
|
||||
logger.info(f"Updating {path}")
|
||||
path.write_text(content)
|
||||
|
||||
|
||||
def touch_if_newer_dep(file: Path, deps: list[Path]) -> None:
|
||||
"""
|
||||
Takes a filepath , and a list of path of its dependencies.
|
||||
If any of the dependencies has been altered more recently than the file,
|
||||
touch the file.
|
||||
|
||||
Essentially simple reimplementation of make deps for build targets.
|
||||
"""
|
||||
if any(dep.stat().st_mtime > file.stat().st_mtime for dep in deps):
|
||||
logger.info(f"Touching {file}")
|
||||
file.touch()
|
||||
|
||||
|
||||
def delete_file(file: Path) -> None:
|
||||
"""
|
||||
Delete given file using pathlib
|
||||
"""
|
||||
logger.info(f"Removing file {file}")
|
||||
file.unlink()
|
2
build/phase1/__init__.py
Normal file
2
build/phase1/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
# __init__.py is a special Python file that allows a directory to become
|
||||
# a Python package so it can be accessed using the 'import' statement.
|
36
build/phase1/global_symlinks.py
Normal file
36
build/phase1/global_symlinks.py
Normal file
@ -0,0 +1,36 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
from itertools import product
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _do_symlinking(type: str, lang: str) -> None:
|
||||
"""
|
||||
Helper function for doing all of the global symlinking that is suitable for multithreading
|
||||
"""
|
||||
target = (
|
||||
Path(f"global/data/{type}/{type}.{lang}.xml")
|
||||
if Path(f"global/data/{type}/{type}.{lang}.xml").exists()
|
||||
else Path(f"global/data/{type}/{type}.en.xml")
|
||||
)
|
||||
source = Path(f"global/data/{type}/.{type}.{lang}.xml")
|
||||
if not source.exists():
|
||||
source.symlink_to(target.relative_to(source.parent))
|
||||
|
||||
|
||||
def global_symlinks(languages: list[str]) -> None:
|
||||
"""
|
||||
After this step, the following symlinks will exist:
|
||||
* global/data/texts/.texts.<lang>.xml for each language
|
||||
* global/data/topbanner/.topbanner.<lang>.xml for each language
|
||||
Each of these symlinks will point to the corresponding file without a dot at
|
||||
the beginning of the filename, if present, and to the English version
|
||||
otherwise. This symlinks make sure that phase 2 can easily use the right file
|
||||
for each language, also as a prerequisite in the Makefile.
|
||||
"""
|
||||
logger.info("Creating global symlinks")
|
||||
types = ["texts", "topbanner"]
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.starmap(_do_symlinking, product(types, languages))
|
119
build/phase1/index_website.py
Normal file
119
build/phase1/index_website.py
Normal file
@ -0,0 +1,119 @@
|
||||
# SPDX-FileCopyrightText: 2020 Free Software Foundation Europe e.V. <https://fsfe.org>
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# Build an index for the search engine based on the article titles and tags
|
||||
|
||||
import json
|
||||
import logging
|
||||
import multiprocessing
|
||||
from pathlib import Path
|
||||
|
||||
import iso639
|
||||
import lxml.etree as etree
|
||||
import nltk
|
||||
from nltk.corpus import stopwords as nltk_stopwords
|
||||
|
||||
from build.lib import update_if_changed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _find_teaser(document: etree.ElementTree) -> str:
|
||||
"""
|
||||
Find a suitable teaser for indexation
|
||||
|
||||
Get all the paragraphs in <body> and return the first which contains more
|
||||
than 10 words
|
||||
|
||||
:document: The parsed lxml ElementTree document
|
||||
:returns: The text of the teaser or an empty string
|
||||
"""
|
||||
for p in document.xpath("//body//p"):
|
||||
if p.text and len(p.text.strip().split(" ")) > 10:
|
||||
return p.text
|
||||
return ""
|
||||
|
||||
|
||||
def _process_file(file: Path, stopwords: set[str]) -> dict:
|
||||
"""
|
||||
Generate the search index entry for a given file and set of stopwords
|
||||
"""
|
||||
logger.debug(f"Processing file {file}")
|
||||
xslt_root = etree.parse(file)
|
||||
tags = map(
|
||||
lambda tag: tag.get("key"),
|
||||
filter(lambda tag: tag.get("key") != "front-page", xslt_root.xpath("//tag")),
|
||||
)
|
||||
return {
|
||||
"url": f'/{file.with_suffix(".html")}',
|
||||
"tags": " ".join(tags),
|
||||
"title": xslt_root.xpath("//html//title")[0].text
|
||||
if xslt_root.xpath("//html//title")
|
||||
else "",
|
||||
"teaser": " ".join(
|
||||
w
|
||||
for w in _find_teaser(xslt_root).strip().split(" ")
|
||||
if w.lower() not in stopwords
|
||||
),
|
||||
"type": "news" if "news/" in str(file) else "page",
|
||||
# Get the date of the file if it has one
|
||||
"date": xslt_root.xpath("//news[@newsdate]").get("newsdate")
|
||||
if xslt_root.xpath("//news[@newsdate]")
|
||||
else None,
|
||||
}
|
||||
|
||||
|
||||
def index_websites(languages: list[str]) -> None:
|
||||
"""
|
||||
Generate a search index for all sites that have a search/search.js file
|
||||
"""
|
||||
logger.info("Creating search indexes")
|
||||
# Download all stopwords
|
||||
nltkdir = "./.nltk_data"
|
||||
nltk.data.path = [nltkdir] + nltk.data.path
|
||||
nltk.download("stopwords", download_dir=nltkdir, quiet=True)
|
||||
# Iterate over sites
|
||||
for site in filter(
|
||||
lambda path: path.joinpath("search/search.js").exists(),
|
||||
Path(".").glob("?*.??*"),
|
||||
):
|
||||
logger.debug(f"Indexing {site}")
|
||||
|
||||
# Get all xhtml files in languages to be processed
|
||||
# Create a list of tuples
|
||||
# The first element of each tuple is the file and the second is a set of stopwords for that language
|
||||
# Use iso639 to get the english name of the language from the two letter iso639-1 code we use to mark files.
|
||||
# Then if that language has stopwords from nltk, use those stopwords.
|
||||
files_with_stopwords = sorted(
|
||||
list(
|
||||
map(
|
||||
lambda file: (
|
||||
file,
|
||||
set(
|
||||
nltk_stopwords.words(
|
||||
iso639.Language.from_part1(
|
||||
file.suffixes[0].removeprefix(".")
|
||||
).name.lower()
|
||||
)
|
||||
)
|
||||
if iso639.Language.from_part1(
|
||||
file.suffixes[0].removeprefix(".")
|
||||
).name.lower()
|
||||
in nltk_stopwords.fileids()
|
||||
else set(),
|
||||
),
|
||||
filter(
|
||||
lambda file: file.suffixes[0].removeprefix(".") in languages,
|
||||
Path(site).glob("**/*.??.xhtml"),
|
||||
),
|
||||
)
|
||||
),
|
||||
key=lambda tuple: tuple[0],
|
||||
)
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
articles = pool.starmap(_process_file, files_with_stopwords)
|
||||
|
||||
update_if_changed(
|
||||
Path(f"{site}/search/index.js"),
|
||||
"var pages = " + json.dumps(articles, ensure_ascii=False),
|
||||
)
|
21
build/phase1/prepare_subdirectories.py
Normal file
21
build/phase1/prepare_subdirectories.py
Normal file
@ -0,0 +1,21 @@
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def prepare_subdirectories(languages: list[str]) -> None:
|
||||
"""
|
||||
Find any makefiles in subdirectories and run them
|
||||
"""
|
||||
logger.info("Preparing Subdirectories")
|
||||
for makefile in Path("").glob("?*.?*/**/Makefile"):
|
||||
subprocess.run(
|
||||
[
|
||||
"make",
|
||||
"--silent",
|
||||
f"--directory={makefile.parent}",
|
||||
f'languages="{" ".join(languages)}"',
|
||||
]
|
||||
)
|
134
build/phase1/run.py
Normal file
134
build/phase1/run.py
Normal file
@ -0,0 +1,134 @@
|
||||
# -----------------------------------------------------------------------------
|
||||
# script for FSFE website build, phase 1
|
||||
# -----------------------------------------------------------------------------
|
||||
# This script is executed in the root of the source directory tree, and
|
||||
# creates some .xml and xhtml files as well as some symlinks, all of which
|
||||
# serve as input files in phase 2. The whole phase 1 runs within the source
|
||||
# directory tree and does not touch the target directory tree at all.
|
||||
# -----------------------------------------------------------------------------
|
||||
import logging
|
||||
from os import environ
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def phase1_run(languages: list[str]):
|
||||
"""
|
||||
Run all the necessary sub functions for phase1.
|
||||
"""
|
||||
# If in test mode
|
||||
if environ.get("TEST", "FALSE") == "TRUE":
|
||||
logger.info("Testing mode, typechecking enabled")
|
||||
from typeguard import install_import_hook
|
||||
|
||||
# Must be above imports
|
||||
install_import_hook("build.phase1.global_symlinks")
|
||||
install_import_hook("build.phase1.index_website")
|
||||
install_import_hook("build.phase1.prepare_subdirectories")
|
||||
install_import_hook("build.phase1.update_css")
|
||||
install_import_hook("build.phase1.update_defaultxsls")
|
||||
install_import_hook("build.phase1.update_localmenus")
|
||||
install_import_hook("build.phase1.update_stylesheets")
|
||||
install_import_hook("build.phase1.update_tags")
|
||||
install_import_hook("build.phase1.update_xmllists")
|
||||
install_import_hook("build.phase1.update_xmllists")
|
||||
install_import_hook("build.phase1.update_xmllists")
|
||||
|
||||
from build.phase1.global_symlinks import global_symlinks
|
||||
from build.phase1.index_website import index_websites
|
||||
from build.phase1.prepare_subdirectories import prepare_subdirectories
|
||||
from build.phase1.update_css import update_css
|
||||
from build.phase1.update_defaultxsls import update_defaultxsls
|
||||
from build.phase1.update_localmenus import update_localmenus
|
||||
from build.phase1.update_stylesheets import update_stylesheets
|
||||
from build.phase1.update_tags import update_tags
|
||||
from build.phase1.update_xmllists import update_xmllists
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Build search index
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# This step runs a Python tool that creates an index of all news and
|
||||
# articles. It extracts titles, teaser, tags, dates and potentially more.
|
||||
# The result will be fed into a JS file.
|
||||
index_websites(languages)
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update CSS files
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# This step recompiles the less files into the final CSS files to be
|
||||
# distributed to the web server.
|
||||
update_css()
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update XSL stylesheets
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# This step updates (actually: just touches) all XSL files which depend on
|
||||
# another XSL file that has changed since the last build run. The phase 2
|
||||
# Makefile then only has to consider the directly used stylesheet as a
|
||||
# prerequisite for building each file and doesn't have to worry about other
|
||||
# stylesheets imported into that one.
|
||||
# This must run before the "dive into subdirectories" step, because in the news
|
||||
# and events directories, the XSL files, if updated, will be copied for the
|
||||
# per-year archives.
|
||||
|
||||
update_stylesheets()
|
||||
# -----------------------------------------------------------------------------
|
||||
# Dive into subdirectories
|
||||
# -----------------------------------------------------------------------------
|
||||
# Find any makefiles in subdirectories and run them
|
||||
prepare_subdirectories(languages)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Create XML symlinks
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# After this step, the following symlinks will exist:
|
||||
# * global/data/texts/.texts.<lang>.xml for each language
|
||||
# * global/data/topbanner/.topbanner.<lang>.xml for each language
|
||||
# Each of these symlinks will point to the corresponding file without a dot at
|
||||
# the beginning of the filename, if present, and to the English version
|
||||
# otherwise. This symlinks make sure that phase 2 can easily use the right file
|
||||
# for each language, also as a prerequisite in the Makefile.
|
||||
global_symlinks(languages)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Create XSL symlinks
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# After this step, each directory with source files for HTML pages contains a
|
||||
# symlink named .default.xsl and pointing to the default.xsl "responsible" for
|
||||
# this directory. These symlinks make it easier for the phase 2 Makefile to
|
||||
# determine which XSL script should be used to build a HTML page from a source
|
||||
# file.
|
||||
|
||||
update_defaultxsls()
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update local menus
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# After this step, all .localmenu.??.xml files will be up to date.
|
||||
|
||||
update_localmenus(languages)
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update tags
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# After this step, the following files will be up to date:
|
||||
# * tags/tagged-<tags>.en.xhtml for each tag used. Apart from being
|
||||
# automatically created, these are regular source files for HTML pages, and
|
||||
# in phase 2 are built into pages listing all news items and events for a
|
||||
# tag.
|
||||
# * tags/.tags.??.xml with a list of the tags used.
|
||||
update_tags(languages)
|
||||
# -----------------------------------------------------------------------------
|
||||
# Update XML filelists
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# After this step, the following files will be up to date:
|
||||
# * <dir>/.<base>.xmllist for each <dir>/<base>.sources as well as for each
|
||||
# $site/tags/tagged-<tags>.en.xhtml. These files are used in phase 2 to include the
|
||||
# correct XML files when generating the HTML pages. It is taken care that
|
||||
# these files are only updated whenever their content actually changes, so
|
||||
# they can serve as a prerequisite in the phase 2 Makefile.
|
||||
update_xmllists(languages)
|
44
build/phase1/update_css.py
Normal file
44
build/phase1/update_css.py
Normal file
@ -0,0 +1,44 @@
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import minify
|
||||
|
||||
from build.lib import update_if_changed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def update_css() -> None:
|
||||
"""
|
||||
If any less files have been changed, update the css.
|
||||
Compile less found at website/look/(fsfe.less|valentine.less)
|
||||
Then minify it, and place it in the expected location for the build process.
|
||||
"""
|
||||
logger.info("Updating css")
|
||||
for folder in Path("").glob("?*.?*/look"):
|
||||
for name in ["fsfe", "valentine"]:
|
||||
if folder.joinpath(name + ".less").exists() and (
|
||||
not folder.joinpath(name + ".min.css").exists()
|
||||
or any(
|
||||
[
|
||||
path.stat().st_mtime
|
||||
> folder.joinpath(name + ".min.css").stat().st_mtime
|
||||
for path in folder.glob("**/*.less")
|
||||
]
|
||||
)
|
||||
):
|
||||
logger.info(f"Compiling {name}.less")
|
||||
result = subprocess.run(
|
||||
[
|
||||
"lessc",
|
||||
str(folder.joinpath(name + ".less")),
|
||||
],
|
||||
capture_output=True,
|
||||
# Get output as str instead of bytes
|
||||
universal_newlines=True,
|
||||
)
|
||||
update_if_changed(
|
||||
folder.joinpath(name + ".min.css"),
|
||||
minify.string("text/css", result.stdout),
|
||||
)
|
36
build/phase1/update_defaultxsls.py
Executable file
36
build/phase1/update_defaultxsls.py
Executable file
@ -0,0 +1,36 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _do_symlinking(directory: Path) -> None:
|
||||
"""
|
||||
In each dir, place a .default.xsl symlink pointing to the nearest default.xsl
|
||||
"""
|
||||
working_dir = directory
|
||||
if not directory.joinpath(".default.xsl").exists():
|
||||
while not working_dir.joinpath("default.xsl").exists():
|
||||
working_dir = working_dir.parent
|
||||
directory.joinpath(".default.xsl").symlink_to(
|
||||
working_dir.joinpath("default.xsl").resolve()
|
||||
)
|
||||
|
||||
|
||||
def update_defaultxsls() -> None:
|
||||
"""
|
||||
Place a .default.xsl into each directory containing source files for
|
||||
HTML pages (*.xhtml). These .default.xsl are symlinks to the first
|
||||
available actual default.xsl found when climbing the directory tree
|
||||
upwards, it's the xsl stylesheet to be used for building the HTML
|
||||
files from this directory.
|
||||
"""
|
||||
logger.info("Updating default xsl's")
|
||||
|
||||
# Get a set of all directories containing .xhtml source files
|
||||
directories = set(map(lambda path: path.parent, Path(".").glob("**/*.*.xhtml")))
|
||||
|
||||
# Do all directories asynchronously
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.map(_do_symlinking, directories)
|
116
build/phase1/update_localmenus.py
Executable file
116
build/phase1/update_localmenus.py
Executable file
@ -0,0 +1,116 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
import lxml.etree as etree
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _write_localmenus(
|
||||
dir: str, files_by_dir: dict[str, list[Path]], languages: list[str]
|
||||
) -> None:
|
||||
"""
|
||||
Write localmenus for a given directory
|
||||
"""
|
||||
base_files = sorted(
|
||||
list(
|
||||
set(
|
||||
map(
|
||||
lambda filter_file: filter_file.with_suffix("").with_suffix(""),
|
||||
files_by_dir[dir],
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
for lang in languages:
|
||||
file = Path(dir).joinpath(f".localmenu.{lang}.xml")
|
||||
logger.info(f"Creating {file}")
|
||||
file.write_text(
|
||||
textwrap.dedent("""\
|
||||
<?xml version="1.0"?>
|
||||
|
||||
<feed>
|
||||
""")
|
||||
)
|
||||
with file.open("a") as working_file:
|
||||
for base_file in base_files:
|
||||
tmpfile = (
|
||||
base_file.with_suffix(f".{lang}").with_suffix(".xhtml")
|
||||
if base_file.with_suffix(f".{lang}").with_suffix(".xhtml").exists()
|
||||
else base_file.with_suffix(".en.xhtml")
|
||||
if base_file.with_suffix(".en.xhtml").exists()
|
||||
else None
|
||||
)
|
||||
if not tmpfile:
|
||||
continue
|
||||
xslt_root = etree.parse(tmpfile)
|
||||
for localmenu in xslt_root.xpath("//localmenu"):
|
||||
working_file.write(
|
||||
'\n<localmenuitem set="'
|
||||
+ (
|
||||
str(localmenu.xpath("./@set")[0])
|
||||
if localmenu.xpath("./@set") != []
|
||||
else "default"
|
||||
)
|
||||
+ '" id="'
|
||||
+ (
|
||||
str(localmenu.xpath("./@id")[0])
|
||||
if localmenu.xpath("./@id") != []
|
||||
else "default"
|
||||
)
|
||||
+ f'" link="/{Path(*Path(base_file).parts[1:])}.html">'
|
||||
+ localmenu.text
|
||||
+ "</localmenuitem>"
|
||||
)
|
||||
|
||||
working_file.write(
|
||||
textwrap.dedent("""\
|
||||
\n
|
||||
</feed>
|
||||
""")
|
||||
)
|
||||
|
||||
|
||||
def update_localmenus(languages: list[str]) -> None:
|
||||
"""
|
||||
Update all the .localmenu.*.xml files containing the local menus.
|
||||
"""
|
||||
logger.info("Updating local menus")
|
||||
# Get a dict of all source files containing local menus
|
||||
files_by_dir = {}
|
||||
for file in filter(
|
||||
lambda path: etree.parse(path).xpath("//localmenu")
|
||||
and "-template" not in str(path),
|
||||
Path(".").glob("**/*.??.xhtml"),
|
||||
):
|
||||
xslt_root = etree.parse(file)
|
||||
dir = xslt_root.xpath("//localmenu/@dir")
|
||||
dir = dir[0] if dir else str(file.parent.relative_to(Path(".")))
|
||||
if dir not in files_by_dir:
|
||||
files_by_dir[dir] = set()
|
||||
files_by_dir[dir].add(file)
|
||||
for dir in files_by_dir:
|
||||
files_by_dir[dir] = sorted(list(files_by_dir[dir]))
|
||||
|
||||
# If any of the source files has been updated, rebuild all .localmenu.*.xml
|
||||
dirs = filter(
|
||||
lambda dir: (
|
||||
any(
|
||||
(
|
||||
(not Path(dir).joinpath(".localmenu.en.xml").exists())
|
||||
or (
|
||||
file.stat().st_mtime
|
||||
> Path(dir).joinpath(".localmenu.en.xml").stat().st_mtime
|
||||
)
|
||||
)
|
||||
for file in files_by_dir[dir]
|
||||
)
|
||||
),
|
||||
files_by_dir,
|
||||
)
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.starmap(
|
||||
_write_localmenus, [(dir, files_by_dir, languages) for dir in dirs]
|
||||
)
|
47
build/phase1/update_stylesheets.py
Executable file
47
build/phase1/update_stylesheets.py
Executable file
@ -0,0 +1,47 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from build.lib import touch_if_newer_dep
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _update_sheet(file: Path) -> None:
|
||||
"""
|
||||
Update a given xsl file if any of its dependant xsl files have been updated
|
||||
"""
|
||||
xslt_root = etree.parse(file)
|
||||
imports = map(
|
||||
lambda imp: Path(file)
|
||||
.parent.joinpath(imp.get("href"))
|
||||
.resolve()
|
||||
.relative_to(Path(".").resolve()),
|
||||
xslt_root.xpath(
|
||||
"//xsl:import", namespaces={"xsl": "http://www.w3.org/1999/XSL/Transform"}
|
||||
),
|
||||
)
|
||||
touch_if_newer_dep(file, imports)
|
||||
|
||||
|
||||
def update_stylesheets() -> None:
|
||||
"""
|
||||
This script is called from the phase 1 Makefile and touches all XSL files
|
||||
which depend on another XSL file that has changed since the last build run.
|
||||
The phase 2 Makefile then only has to consider the
|
||||
directly used stylesheet as a prerequisite for building each file and doesn't
|
||||
have to worry about other stylesheets imported into that one.
|
||||
"""
|
||||
logger.info("Updating XSL stylesheets")
|
||||
banned = re.compile(r"(\.venv/.*)|(.*\.default\.xsl$)")
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.map(
|
||||
_update_sheet,
|
||||
filter(
|
||||
lambda file: re.match(banned, str(file)) is None,
|
||||
Path(".").glob("**/*.xsl"),
|
||||
),
|
||||
)
|
187
build/phase1/update_tags.py
Executable file
187
build/phase1/update_tags.py
Executable file
@ -0,0 +1,187 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
import lxml.etree as etree
|
||||
|
||||
from build.lib import (
|
||||
delete_file,
|
||||
keys_exists,
|
||||
sort_dict,
|
||||
update_if_changed,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _update_tag_pages(site: Path, tag: str) -> None:
|
||||
"""
|
||||
Update the xhtml pages and xmllists for a given tag
|
||||
"""
|
||||
taggedfile = Path(f"{site}/tags/tagged.en.xhtml")
|
||||
content = taggedfile.read_text().replace("XXX_TAGNAME_XXX", tag)
|
||||
update_if_changed(taggedfile, content)
|
||||
|
||||
|
||||
def _update_tag_sets(
|
||||
site: Path,
|
||||
lang: str,
|
||||
filecount: dict[str, dict[str, int]],
|
||||
files_by_tag: dict[str, list[Path]],
|
||||
tags_by_lang: dict[str, dict[str, str]],
|
||||
) -> None:
|
||||
"""
|
||||
Update the .tags.??.xml tagset xmls for a given tag
|
||||
"""
|
||||
taglist = textwrap.dedent("""\
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<tagset>
|
||||
""")
|
||||
for section in ["news", "events"]:
|
||||
for tag in files_by_tag:
|
||||
count = filecount[section][tag]
|
||||
label = (
|
||||
tags_by_lang[lang][tag]
|
||||
if keys_exists(tags_by_lang, lang, tag) and tags_by_lang[lang][tag]
|
||||
else tags_by_lang["en"][tag]
|
||||
if keys_exists(tags_by_lang, "en", tag) and tags_by_lang["en"][tag]
|
||||
else tag
|
||||
)
|
||||
if count > 0:
|
||||
taglist = taglist + textwrap.dedent(f"""\
|
||||
<tag section="{section}" key="{tag}" count="{count}">{label}</tag>
|
||||
""")
|
||||
taglist = taglist + textwrap.dedent("""\
|
||||
</tagset>
|
||||
""")
|
||||
update_if_changed(Path(f"{site}/tags/.tags.{lang}.xml"), taglist)
|
||||
|
||||
|
||||
def update_tags(languages: list[str]) -> None:
|
||||
"""
|
||||
Update Tag pages, xmllists and xmls
|
||||
|
||||
Creates/update the following files:
|
||||
|
||||
* */tags/tagged-<tags>.en.xhtml for each tag used. Apart from being
|
||||
automatically created, these are regular source files for HTML pages, and
|
||||
in phase 2 are built into pages listing all news items and events for a
|
||||
tag.
|
||||
|
||||
* */tags/.tags.??.xml with a list of the tags used.
|
||||
|
||||
Changing or removing tags in XML files is also considered, in which case a
|
||||
file is removed from the .xmllist files.
|
||||
|
||||
When a tag has been removed from the last XML file where it has been used,
|
||||
the tagged-* are correctly deleted.
|
||||
"""
|
||||
for site in filter(
|
||||
lambda path: path.joinpath("tags").exists(),
|
||||
Path(".").glob("?*.??*"),
|
||||
):
|
||||
logger.info(f"Updating tags for {site}")
|
||||
# Create a complete and current map of which tag is used in which files
|
||||
files_by_tag = {}
|
||||
tags_by_lang = {}
|
||||
# Fill out files_by_tag and tags_by_lang
|
||||
for file in filter(
|
||||
lambda file:
|
||||
# Not in tags dir of a site
|
||||
site.joinpath("tags") not in file.parents
|
||||
# Has a tag element
|
||||
and etree.parse(file).xpath("//tag"),
|
||||
site.glob("**/*.xml"),
|
||||
):
|
||||
xslt_root = etree.parse(file)
|
||||
for tag in xslt_root.xpath("//tag"):
|
||||
# Get the key attribute, and filter out some invalid chars
|
||||
key = (
|
||||
tag.get("key")
|
||||
.replace("/", "-")
|
||||
.replace(" ", "-")
|
||||
.replace(":", "-")
|
||||
.strip()
|
||||
)
|
||||
# Get the label, and strip it.
|
||||
label = str(
|
||||
escape(tag.text.strip()) if tag.text and tag.text.strip() else None
|
||||
)
|
||||
# Load into the dicts
|
||||
if key not in files_by_tag:
|
||||
files_by_tag[key] = set()
|
||||
files_by_tag[key].add(file.with_suffix("").with_suffix(""))
|
||||
lang = file.with_suffix("").suffix.removeprefix(".")
|
||||
if lang not in tags_by_lang:
|
||||
tags_by_lang[lang] = {}
|
||||
tags_by_lang[lang][key] = (
|
||||
tags_by_lang[lang][key]
|
||||
if key in tags_by_lang[lang] and tags_by_lang[lang][key]
|
||||
else label
|
||||
)
|
||||
# Sort dicts to ensure that they are stable between runs
|
||||
files_by_tag = sort_dict(files_by_tag)
|
||||
for tag in files_by_tag:
|
||||
files_by_tag[tag] = sorted(files_by_tag[tag])
|
||||
tags_by_lang = sort_dict(tags_by_lang)
|
||||
for lang in tags_by_lang:
|
||||
tags_by_lang[lang] = sort_dict(tags_by_lang[lang])
|
||||
|
||||
# Now we have the necessary data, begin
|
||||
logger.info("Removing files for removed tags")
|
||||
tagfiles_to_delete = filter(
|
||||
lambda path: not any([(tag in str(path)) for tag in files_by_tag]),
|
||||
list(Path(f"{site}/tags/").glob("tagged-*.en.xhtml"))
|
||||
+ list(Path(f"{site}/tags/").glob(".tagged-*.xmllist")),
|
||||
)
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.map(delete_file, tagfiles_to_delete)
|
||||
|
||||
logger.info("Updating tag pages")
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.starmap(
|
||||
_update_tag_pages,
|
||||
[(site, tag) for tag in files_by_tag],
|
||||
)
|
||||
|
||||
logger.info("Updating tag lists")
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.starmap(
|
||||
update_if_changed,
|
||||
[
|
||||
(
|
||||
Path(f"{site}/tags/.tagged-{tag}.xmllist"),
|
||||
(
|
||||
"\n".join(map(lambda file: str(file), files_by_tag[tag]))
|
||||
+ "\n"
|
||||
),
|
||||
)
|
||||
for tag in files_by_tag
|
||||
],
|
||||
)
|
||||
|
||||
logger.info("Updating tag sets")
|
||||
# Get count of files with each tag in each section
|
||||
filecount = {}
|
||||
for section in ["news", "events"]:
|
||||
filecount[section] = {}
|
||||
for tag in files_by_tag:
|
||||
filecount[section][tag] = len(
|
||||
list(
|
||||
filter(
|
||||
lambda path: section in str(path.parent),
|
||||
files_by_tag[tag],
|
||||
)
|
||||
)
|
||||
)
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.starmap(
|
||||
_update_tag_sets,
|
||||
[
|
||||
(site, lang, filecount, files_by_tag, tags_by_lang)
|
||||
for lang in [lang for lang in tags_by_lang if lang in languages]
|
||||
],
|
||||
)
|
162
build/phase1/update_xmllists.py
Executable file
162
build/phase1/update_xmllists.py
Executable file
@ -0,0 +1,162 @@
|
||||
import datetime
|
||||
import fnmatch
|
||||
import logging
|
||||
import multiprocessing
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import lxml.etree as etree
|
||||
|
||||
from build.lib import (
|
||||
touch_if_newer_dep,
|
||||
update_if_changed,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _update_for_base(
|
||||
base: Path, all_xml: set[Path], nextyear: str, thisyear: str, lastyear: str
|
||||
) -> None:
|
||||
"""
|
||||
Update the xmllist for a given base file
|
||||
"""
|
||||
matching_files = set()
|
||||
# If sources exist
|
||||
if base.with_suffix(".sources").exists():
|
||||
# Load every file that matches the pattern
|
||||
# If a tag is included in the pattern, the file must contain that tag
|
||||
with base.with_suffix(".sources").open(mode="r") as file:
|
||||
for line in file:
|
||||
pattern = (
|
||||
re.sub(r":\[.*\]$", "*", line)
|
||||
.replace("$nextyear", nextyear)
|
||||
.replace("$thisyear", thisyear)
|
||||
.replace("$lastyear", lastyear)
|
||||
.strip()
|
||||
)
|
||||
if len(pattern) <= 0:
|
||||
print("Pattern too short, continue!")
|
||||
continue
|
||||
tag = (
|
||||
re.match(r":\[(.*)\]$", line).group().strip()
|
||||
if re.match(r":\[(.*)\]$", line)
|
||||
else ""
|
||||
)
|
||||
for line in filter(
|
||||
lambda line:
|
||||
# Matches glob pattern
|
||||
fnmatch.fnmatchcase(str(line), pattern)
|
||||
# contains tag if tag in pattern
|
||||
and (
|
||||
etree.parse(file).find(f"//tag[@key='{tag}']")
|
||||
if tag != ""
|
||||
else True
|
||||
)
|
||||
# Not just matching an empty line
|
||||
and len(str(line)) > 0,
|
||||
all_xml,
|
||||
):
|
||||
matching_files.add(str(line))
|
||||
|
||||
for file in Path("").glob(f"{base}.??.xhtml"):
|
||||
xslt_root = etree.parse(file)
|
||||
for module in xslt_root.xpath("//module"):
|
||||
matching_files.add(f'global/data/modules/{module.get("id")}'.strip())
|
||||
matching_files = sorted(matching_files)
|
||||
update_if_changed(
|
||||
Path(f"{base.parent}/.{base.name}.xmllist"), "\n".join(matching_files) + "\n"
|
||||
)
|
||||
|
||||
|
||||
def _update_module_xmllists(languages: list[str]) -> None:
|
||||
"""
|
||||
Update .xmllist files for .sources and .xhtml containing <module>s
|
||||
"""
|
||||
logger.info("Updating XML lists")
|
||||
# Store current dir
|
||||
for site in map(
|
||||
lambda path: str(path),
|
||||
filter(lambda path: path.is_dir(), Path(".").glob("?*.??*")),
|
||||
):
|
||||
logger.info(f"Updating xmllists for {site}")
|
||||
# Get all the bases and stuff before multithreading the update bit
|
||||
all_xml = set(
|
||||
map(
|
||||
lambda path: path.with_suffix("").with_suffix(""),
|
||||
filter(
|
||||
lambda path: path.with_suffix("").suffix.removeprefix(".")
|
||||
in languages,
|
||||
Path(site).glob("**/*.*.xml"),
|
||||
),
|
||||
)
|
||||
)
|
||||
source_bases = set(
|
||||
map(
|
||||
lambda path: path.with_suffix(""),
|
||||
Path(site).glob("**/*.sources"),
|
||||
)
|
||||
)
|
||||
module_bases = set(
|
||||
map(
|
||||
lambda path: path.with_suffix("").with_suffix(""),
|
||||
filter(
|
||||
lambda path: path.with_suffix("").suffix.removeprefix(".")
|
||||
in languages
|
||||
and etree.parse(path).xpath("//module"),
|
||||
Path(site).glob("**/*.*.xhtml"),
|
||||
),
|
||||
)
|
||||
)
|
||||
all_bases = source_bases | module_bases
|
||||
nextyear = str(datetime.datetime.today().year + 1)
|
||||
thisyear = str(datetime.datetime.today().year)
|
||||
lastyear = str(datetime.datetime.today().year - 1)
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.starmap(
|
||||
_update_for_base,
|
||||
[(base, all_xml, nextyear, thisyear, lastyear) for base in all_bases],
|
||||
)
|
||||
|
||||
|
||||
def _check_xmllist_deps(file: Path) -> None:
|
||||
"""
|
||||
If any of the sources in an xmllist are newer than it, touch the xmllist
|
||||
"""
|
||||
xmls = set()
|
||||
with file.open(mode="r") as fileobj:
|
||||
for line in fileobj:
|
||||
for newfile in Path("").glob(line + ".??.xml"):
|
||||
xmls.add(newfile)
|
||||
touch_if_newer_dep(str(file), xmls)
|
||||
|
||||
|
||||
def _touch_xmllists_with_updated_deps(languages: list[str]) -> None:
|
||||
"""
|
||||
Touch all .xmllist files where one of the contained files has changed
|
||||
"""
|
||||
logger.info("Checking contents of XML lists")
|
||||
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
|
||||
pool.map(_check_xmllist_deps, Path("").glob("./**/.*.xmllist"))
|
||||
|
||||
|
||||
def update_xmllists(languages: list[str]) -> None:
|
||||
"""
|
||||
Update XML filelists (*.xmllist)
|
||||
|
||||
Creates/update the following files:
|
||||
|
||||
* <dir>/.<base>.xmllist for each <dir>/<base>.sources as well as for each
|
||||
fsfe.org/tags/tagged-<tags>.en.xhtml. These files are used |