Revert "refactor: use lxml instead of string templating in process_file. (#5146)"
All checks were successful
continuous-integration/drone/pr Build is passing
All checks were successful
continuous-integration/drone/pr Build is passing
This reverts commit c4b7f0f33c.
This commit is contained in:
@@ -14,61 +14,76 @@ from build.lib.misc import get_basename, get_version, lang_from_filename
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_xmls(file: Path, parser: etree.XMLParser) -> etree.Element:
|
||||
def _include_xml(file: Path) -> str:
|
||||
"""
|
||||
include second level elements of a given XML file
|
||||
this emulates the behaviour of the original
|
||||
build script which wasn't able to load top
|
||||
level elements from any file
|
||||
"""
|
||||
elements = []
|
||||
work_str = ""
|
||||
if file.exists():
|
||||
tree = etree.parse(file, parser)
|
||||
tree = etree.parse(file)
|
||||
root = tree.getroot()
|
||||
# Remove <version> because the filename attribute would otherwise be added
|
||||
# to this element instead of the actual content element.
|
||||
for elem in root.xpath("version"):
|
||||
root.remove(elem)
|
||||
# Iterate over all elements in root node, add a filename attribute and
|
||||
# then append the string to work_str
|
||||
for elem in root.xpath("*"):
|
||||
elem.set("filename", get_basename(file))
|
||||
elements.append(elem)
|
||||
# and then we return the element
|
||||
return elements
|
||||
work_str += etree.tostring(elem, encoding="utf-8").decode("utf-8")
|
||||
|
||||
return work_str
|
||||
|
||||
|
||||
def _get_attributes(file: Path) -> dict:
|
||||
def _get_attributes(file: Path) -> str:
|
||||
"""
|
||||
get attributes of top level element in a given
|
||||
XHTML file
|
||||
"""
|
||||
work_str = ""
|
||||
tree = etree.parse(file)
|
||||
root = tree.getroot()
|
||||
attributes = root.items()
|
||||
return dict(attributes)
|
||||
attributes = root.attrib
|
||||
for attrib in attributes:
|
||||
work_str += f'{attrib}="{attributes[attrib]}"\n'
|
||||
|
||||
return work_str
|
||||
|
||||
|
||||
def _get_trlist(file: Path) -> etree.Element:
|
||||
def _list_langs(file: Path) -> str:
|
||||
"""
|
||||
list all languages a file exists in by globbing up
|
||||
the shortname (i.e. file path with file ending omitted)
|
||||
output is readily formatted for inclusion
|
||||
in xml stream
|
||||
"""
|
||||
trlist = etree.Element("trlist")
|
||||
for path in file.parent.glob(f"{get_basename(file)}.??{file.suffix}"):
|
||||
tr = etree.SubElement(trlist, "tr", id=lang_from_filename(path))
|
||||
tr.text = (
|
||||
Path(f"global/languages/{lang_from_filename(path)}").read_text().strip()
|
||||
return "\n".join(
|
||||
list(
|
||||
map(
|
||||
lambda path: (
|
||||
f'<tr id="{lang_from_filename(path)}">'
|
||||
+ (
|
||||
Path(f"global/languages/{lang_from_filename(path)}")
|
||||
.read_text()
|
||||
.strip()
|
||||
)
|
||||
+ "</tr>"
|
||||
),
|
||||
file.parent.glob(f"{get_basename(file)}.??{file.suffix}"),
|
||||
)
|
||||
)
|
||||
return trlist
|
||||
)
|
||||
|
||||
|
||||
def _get_set(action_file: Path, lang: str, parser: etree.XMLParser) -> etree.Element:
|
||||
def _auto_sources(action_file: Path, lang: str) -> str:
|
||||
"""
|
||||
import elements from source files, add file name
|
||||
attribute to first element included from each file
|
||||
"""
|
||||
doc_set = etree.Element("set")
|
||||
work_str = ""
|
||||
list_file = action_file.with_stem(
|
||||
f".{action_file.with_suffix('').stem}"
|
||||
).with_suffix(".xmllist")
|
||||
@@ -81,28 +96,21 @@ def _get_set(action_file: Path, lang: str, parser: etree.XMLParser) -> etree.Ele
|
||||
if path.with_suffix(f".{lang}.xml").exists()
|
||||
else path.with_suffix(".en.xml")
|
||||
)
|
||||
doc_set.extend(_get_xmls(path_xml, parser))
|
||||
work_str += _include_xml(path_xml)
|
||||
|
||||
return doc_set
|
||||
return work_str
|
||||
|
||||
|
||||
def _get_document(
|
||||
action_lang: str, action_file: Path, lang: str, parser: etree.XMLParser
|
||||
) -> etree.Element:
|
||||
document = etree.Element(
|
||||
"document", language=action_lang, **_get_attributes(action_file)
|
||||
)
|
||||
document.append(_get_set(action_file, lang, parser))
|
||||
document.extend(_get_xmls(action_file, parser))
|
||||
return document
|
||||
|
||||
|
||||
def _build_xmlstream(infile: Path, parser: etree.XMLParser) -> etree.Element:
|
||||
def _build_xmlstream(infile: Path):
|
||||
"""
|
||||
assemble the xml stream for feeding into xsltproc
|
||||
the expected shortname and language flag indicate
|
||||
a single xhtml page to be built
|
||||
"""
|
||||
# TODO
|
||||
# Ideally this would use lxml to construct an object instead of string templating.
|
||||
# Should be a little faster, and also guarantees that its valid xml
|
||||
|
||||
logger.debug(f"infile: {infile}")
|
||||
shortname = infile.with_suffix("")
|
||||
lang = lang_from_filename(infile)
|
||||
@@ -126,6 +134,7 @@ def _build_xmlstream(infile: Path, parser: etree.XMLParser) -> etree.Element:
|
||||
topbanner_xml = Path(f"global/data/topbanner/.topbanner.{lang}.xml")
|
||||
texts_xml = Path(f"global/data/texts/.texts.{lang}.xml")
|
||||
date = str(datetime.now().date())
|
||||
# time = str(datetime.now().time())
|
||||
action_lang = ""
|
||||
translation_state = ""
|
||||
|
||||
@@ -150,29 +159,41 @@ def _build_xmlstream(infile: Path, parser: etree.XMLParser) -> etree.Element:
|
||||
|
||||
action_file = shortname.with_suffix(f".{action_lang}{infile.suffix}")
|
||||
logger.debug(f"action_file: {action_file}")
|
||||
# Create the root element
|
||||
page = etree.Element(
|
||||
"buildinfo",
|
||||
date=date,
|
||||
original=original_lang,
|
||||
filename=f"/{str(shortname.with_suffix('')).removeprefix('/')}",
|
||||
fileurl=f"/{shortname.relative_to(shortname.parts[0]).with_suffix('')}",
|
||||
dirname=f"/{shortname.parent}/",
|
||||
language=lang,
|
||||
translation_state=translation_state,
|
||||
)
|
||||
|
||||
# Add the subelements
|
||||
page.append(_get_trlist(infile))
|
||||
|
||||
page.extend(_get_xmls(topbanner_xml, parser))
|
||||
|
||||
page.extend(_get_xmls(Path("global/data/texts/texts.en.xml"), parser))
|
||||
|
||||
page.extend(_get_xmls(texts_xml, parser))
|
||||
|
||||
page.append(_get_document(action_lang, action_file, lang, parser))
|
||||
return page
|
||||
result_str = f"""
|
||||
<buildinfo
|
||||
date="{date}"
|
||||
original="{original_lang}"
|
||||
filename="/{str(shortname.with_suffix("")).removeprefix("/")}"
|
||||
fileurl="/{shortname.relative_to(shortname.parts[0]).with_suffix("")}"
|
||||
dirname="/{shortname.parent}/"
|
||||
language="{lang}"
|
||||
translation_state="{translation_state}"
|
||||
>
|
||||
<trlist>
|
||||
{_list_langs(infile)}
|
||||
</trlist>
|
||||
<topbanner>
|
||||
{_include_xml(topbanner_xml)}
|
||||
</topbanner>
|
||||
<textsetbackup>
|
||||
{_include_xml(Path("global/data/texts/texts.en.xml"))}
|
||||
</textsetbackup>
|
||||
<textset>
|
||||
{_include_xml(texts_xml)}
|
||||
</textset>
|
||||
<document
|
||||
language="{action_lang}"
|
||||
{_get_attributes(action_file)}
|
||||
>
|
||||
<set>
|
||||
{_auto_sources(action_file, lang)}
|
||||
</set>
|
||||
{_include_xml(action_file)}
|
||||
</document>
|
||||
</buildinfo>
|
||||
"""
|
||||
return result_str
|
||||
|
||||
|
||||
def process_file(infile: Path, processor: Path) -> str:
|
||||
@@ -181,54 +202,41 @@ def process_file(infile: Path, processor: Path) -> str:
|
||||
"""
|
||||
logger.debug(f"Processing {infile}")
|
||||
lang = lang_from_filename(infile)
|
||||
parser = etree.XMLParser(remove_blank_text=True, remove_comments=True)
|
||||
xmlstream = _build_xmlstream(infile, parser)
|
||||
xslt_tree = etree.parse(processor.resolve(), parser)
|
||||
xmlstream = _build_xmlstream(infile)
|
||||
xslt_tree = etree.parse(processor.resolve())
|
||||
transform = etree.XSLT(xslt_tree)
|
||||
result = transform(xmlstream)
|
||||
result = str(transform(etree.XML(xmlstream)))
|
||||
# And now a bunch of regexes to fix some links.
|
||||
# xx is the language code in all comments
|
||||
try:
|
||||
for linkelem in result.xpath("//*[@href]"):
|
||||
# remove any spurious whitespace
|
||||
linkelem.set(
|
||||
"href",
|
||||
linkelem.get("href").strip(),
|
||||
)
|
||||
# Remove https://fsfe.org (or https://test.fsfe.org)
|
||||
# from the start of all links
|
||||
linkelem.set(
|
||||
"href",
|
||||
re.sub(
|
||||
r"""^(https?://(test\.)?fsfe\.org)""",
|
||||
"",
|
||||
linkelem.get("href"),
|
||||
flags=re.IGNORECASE,
|
||||
),
|
||||
)
|
||||
# Change links from /foo/bar.html into /foo/bar.xx.html
|
||||
# Change links from foo/bar.html into foo/bar.xx.html
|
||||
# Same for .rss and .ics links
|
||||
linkelem.set(
|
||||
"href",
|
||||
re.sub(
|
||||
r"""^(/?([^:>]+/)?[^:/.]{3,}\.)(html|rss|ics)""",
|
||||
rf"""\1{lang}.\3""",
|
||||
linkelem.get("href"),
|
||||
flags=re.IGNORECASE,
|
||||
),
|
||||
)
|
||||
# Change links from /foo/bar/ into /foo/bar/index.xx.html
|
||||
# Change links from foo/bar/ into foo/bar/index.xx.html
|
||||
linkelem.set(
|
||||
"href",
|
||||
re.sub(
|
||||
r"""^(/?[^:>]+/)$""",
|
||||
rf"""\1index.{lang}.html""",
|
||||
linkelem.get("href"),
|
||||
flags=re.IGNORECASE,
|
||||
),
|
||||
)
|
||||
except AssertionError:
|
||||
logger.debug(f"Output generated for file {infile} is not valid xml")
|
||||
|
||||
# TODO
|
||||
# Probably a faster way to do this
|
||||
# Maybe iterating though all a tags with lxml?
|
||||
# Once buildxmlstream generates an xml object that should be faster.
|
||||
|
||||
# Remove https://fsfe.org (or https://test.fsfe.org) from the start of all
|
||||
result = re.sub(
|
||||
r"""href\s*=\s*("|')(https?://(test\.)?fsfe\.org)([^>])\1""",
|
||||
r"""href=\1\3\1""",
|
||||
result,
|
||||
flags=re.MULTILINE | re.IGNORECASE,
|
||||
)
|
||||
# Change links from /foo/bar.html into /foo/bar.xx.html
|
||||
# Change links from foo/bar.html into foo/bar.xx.html
|
||||
# Same for .rss and .ics links
|
||||
result = re.sub(
|
||||
r"""href\s*=\s*("|')(/?([^:>]+/)?[^:/.]+\.)(html|rss|ics)(#[^>]*)?\1""",
|
||||
rf"""href=\1\2{lang}.\4\5\1""",
|
||||
result,
|
||||
flags=re.MULTILINE | re.IGNORECASE,
|
||||
)
|
||||
# Change links from /foo/bar/ into /foo/bar/index.xx.html
|
||||
# Change links from foo/bar/ into foo/bar/index.xx.html
|
||||
result = re.sub(
|
||||
r"""href\s*=\s*("|')(/?[^:>]+/)\1""",
|
||||
rf"""href=\1\2index.{lang}.html\1""",
|
||||
result,
|
||||
flags=re.MULTILINE | re.IGNORECASE,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
@@ -16,5 +16,5 @@ def full() -> None:
|
||||
"""
|
||||
logger.info("Performing a full rebuild, git cleaning")
|
||||
run_command(
|
||||
["git", "clean", "-fdx", "--exclude", "/.venv", "--exclude", "/.nltk_data"],
|
||||
["git", "clean", "-fdx", "--exclude", "/.venv"],
|
||||
)
|
||||
|
||||
@@ -45,7 +45,7 @@ def _run_process(
|
||||
logger.debug(f"Building {target_file}")
|
||||
result = process_file(source_file, processor)
|
||||
target_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
result.write_output(target_file)
|
||||
target_file.write_text(result)
|
||||
|
||||
|
||||
def _process_dir(
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<!-- ====================================================================== -->
|
||||
|
||||
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
<xsl:output method="text" encoding="utf-8"/>
|
||||
<xsl:output method="text" encoding="UTF-8"/>
|
||||
|
||||
<xsl:template match="version">
|
||||
<xsl:value-of select="."/>
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
<xsl:import href="xslt/drm_info_head.xsl" />
|
||||
<xsl:import href="xslt/drm_info_body.xsl" />
|
||||
<xsl:output method="html" omit-xml-declaration="yes" encoding="utf-8" doctype-system="about:legacy-compat" />
|
||||
<xsl:output method="html" omit-xml-declaration="yes" encoding="utf-8" indent="yes" doctype-system="about:legacy-compat" />
|
||||
<xsl:include href="../build/xslt/fsfe_document.xsl" />
|
||||
<xsl:include href="../build/xslt/fsfe_nolocal.xsl" />
|
||||
</xsl:stylesheet>
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
xmlns:str="http://exslt.org/strings"
|
||||
extension-element-prefixes="str">
|
||||
|
||||
<xsl:output method="text" encoding="utf-8" />
|
||||
<xsl:output method="text" encoding="UTF-8" indent="no" />
|
||||
<xsl:strip-space elements="body"/>
|
||||
|
||||
<!-- new line template -->
|
||||
|
||||
@@ -4,7 +4,8 @@
|
||||
|
||||
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
|
||||
<xsl:output method="xml" encoding="utf-8" />
|
||||
<xsl:output method="xml" encoding="UTF-8" omit-xml-declaration="yes"
|
||||
indent="yes" />
|
||||
|
||||
<!-- ============= -->
|
||||
<!-- Link handling -->
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
<xsl:include href="../build/xslt/peertube.xsl" />
|
||||
|
||||
<!-- HTML 5 compatibility doctype, since our XSLT parser doesn't support disabling output escaping -->
|
||||
<xsl:output method="html" encoding="utf-8" doctype-system="about:legacy-compat" />
|
||||
<xsl:output method="html" encoding="utf-8" indent="yes" doctype-system="about:legacy-compat" />
|
||||
|
||||
<!-- EXTRACT / DESCRIPTION of each page -->
|
||||
<xsl:variable name="metadesc">
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
|
||||
<xsl:output method="xml" encoding="utf-8"/>
|
||||
<xsl:output method="xml" encoding="utf-8" indent="yes"/>
|
||||
|
||||
<!-- ====== -->
|
||||
<!-- Months -->
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
<xsl:import href="podcast.rss.xsl" />
|
||||
|
||||
<xsl:output method="xml" encoding="utf-8" />
|
||||
<xsl:output method="xml" encoding="utf-8" indent="yes" />
|
||||
|
||||
<xsl:template match="/">
|
||||
<xsl:apply-templates select="/buildinfo/document">
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
<xsl:import href="../../build/xslt/gettext.xsl" />
|
||||
|
||||
<xsl:output method="xml" encoding="utf-8"/>
|
||||
<xsl:output method="xml" encoding="utf-8" indent="yes"/>
|
||||
|
||||
<!-- ====== -->
|
||||
<!-- Months -->
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
|
||||
<xsl:output method="xml" encoding="utf-8" />
|
||||
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
|
||||
|
||||
<xsl:param name="link"/>
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
<xsl:import href="xslt/pdfreaders_head.xsl" />
|
||||
<xsl:import href="xslt/pdfreaders_body.xsl" />
|
||||
<xsl:output method="html" omit-xml-declaration="yes" encoding="utf-8" doctype-system="about:legacy-compat" />
|
||||
<xsl:output method="html" omit-xml-declaration="yes" encoding="utf-8" indent="yes" doctype-system="about:legacy-compat" />
|
||||
|
||||
<xsl:include href="../build/xslt/fsfe_document.xsl" />
|
||||
<xsl:include href="xslt/pdfreaders_list.xsl" />
|
||||
|
||||
13
shell.nix
13
shell.nix
@@ -5,7 +5,6 @@
|
||||
},
|
||||
}:
|
||||
let
|
||||
inherit (pkgs) lib;
|
||||
treefmt-nixSrc = builtins.fetchTarball "https://github.com/numtide/treefmt-nix/archive/refs/heads/master.tar.gz";
|
||||
treefmt-nix = import treefmt-nixSrc;
|
||||
in
|
||||
@@ -17,8 +16,6 @@ in
|
||||
(with pkgs; [
|
||||
# For getting python deps
|
||||
uv
|
||||
# Need to use a nix python to prevent ssl certs issues
|
||||
python312
|
||||
# needed by lxml
|
||||
libxslt
|
||||
libxml2
|
||||
@@ -59,13 +56,5 @@ in
|
||||
pkgs:
|
||||
(with pkgs; [
|
||||
]);
|
||||
runScript = pkgs.writeShellScript "fsfe-website-env" ''
|
||||
set -euo pipefail
|
||||
# Force uv to use Python interpreter from venv
|
||||
export UV_PYTHON="${lib.getExe pkgs.python312}";
|
||||
# Prevent uv from downloading managed Python's
|
||||
export UV_PYTHON_DOWNLOADS="never"
|
||||
uv venv
|
||||
bash --rcfile .venv/bin/activate "$@"
|
||||
'';
|
||||
# runScript = '''';
|
||||
}).env
|
||||
|
||||
Reference in New Issue
Block a user