feat/overview (#5435)
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
- fixes issues with xml structure pre commit hook - Moves comprea-files to lib - adds a n overview page on status.fsfe.org to see what files have different structures Co-authored-by: Darragh Elliott <me@delliott.net> Reviewed-on: #5435 Co-authored-by: delliott <delliott@fsfe.org> Co-committed-by: delliott <delliott@fsfe.org>
This commit was merged in pull request #5435.
This commit is contained in:
86
build/fsfe_website_build/lib/checks.py
Normal file
86
build/fsfe_website_build/lib/checks.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. <https://fsfe.org>
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from lxml import etree
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def compare_files(
|
||||
file1: Path,
|
||||
file2: Path,
|
||||
attr_whitelist: set[str] | None = None,
|
||||
_path: str = "",
|
||||
) -> list[str]:
|
||||
try:
|
||||
t1, t2 = etree.parse(file1), etree.parse(file2)
|
||||
except etree.XMLSyntaxError as e:
|
||||
logger.critical("XML parse error: %s", e)
|
||||
sys.exit(1)
|
||||
|
||||
return compare_elements(t1.getroot(), t2.getroot(), attr_whitelist)
|
||||
|
||||
|
||||
def compare_elements(
|
||||
elem1: etree.Element,
|
||||
elem2: etree.Element,
|
||||
attr_whitelist: set[str] | None = None,
|
||||
_path: str = "",
|
||||
) -> list[str]:
|
||||
"""
|
||||
Recursively compare two XML elements.
|
||||
Returns a list of short, informative error strings.
|
||||
"""
|
||||
if attr_whitelist is None:
|
||||
attr_whitelist = set()
|
||||
|
||||
errors: list[str] = []
|
||||
tag_path = f"{_path}/{elem1.tag}" if _path else elem1.tag
|
||||
|
||||
# tag mismatch
|
||||
if elem1.tag != elem2.tag:
|
||||
errors.append(f"Tag mismatch at {tag_path}: {elem1.tag} ≠ {elem2.tag}")
|
||||
return errors # if tags differ, stop descending
|
||||
|
||||
# attribute deltas
|
||||
attributes_of_elem1 = dict(elem1.attrib.items())
|
||||
attributes_of_elem2 = dict(elem2.attrib.items())
|
||||
|
||||
only_in_elem1 = set(attributes_of_elem1) - set(attributes_of_elem2)
|
||||
only_in_elem2 = set(attributes_of_elem2) - set(attributes_of_elem1)
|
||||
common = set(attributes_of_elem1) & set(attributes_of_elem2)
|
||||
|
||||
if only_in_elem1 or only_in_elem2:
|
||||
errors.append(
|
||||
f"Attribute delta at <{elem1.tag}>"
|
||||
f" only 1: {list(only_in_elem1)} only 2: {list(only_in_elem2)}"
|
||||
)
|
||||
for key in common:
|
||||
if (
|
||||
attributes_of_elem1[key] != attributes_of_elem2[key]
|
||||
and key not in attr_whitelist
|
||||
):
|
||||
error_msg = (
|
||||
f"Attribute value diff at <{elem1.tag} {key}>:"
|
||||
f" {attributes_of_elem1[key]!r} ≠ {attributes_of_elem2[key]!r}"
|
||||
)
|
||||
errors.append(error_msg)
|
||||
|
||||
# child count
|
||||
kids1 = list(elem1)
|
||||
kids2 = list(elem2)
|
||||
if len(kids1) != len(kids2):
|
||||
errors.append(f"Child count at <{elem1.tag}>: {len(kids1)} ≠ {len(kids2)}")
|
||||
return errors # if counts differ, stop descending
|
||||
|
||||
# and then recurse into children
|
||||
for idx, (child1, child2) in enumerate(zip(kids1, kids2, strict=False), start=1):
|
||||
errors.extend(
|
||||
compare_elements(child1, child2, attr_whitelist, _path=f"{tag_path}[{idx}]")
|
||||
)
|
||||
|
||||
return errors
|
||||
@@ -51,6 +51,12 @@
|
||||
<a href="translations/">Translation status</a>
|
||||
</li>
|
||||
</ul>
|
||||
<h2>Files with mismatched xml structure</h2>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="xml_structure/">XML structure status</a>
|
||||
</li>
|
||||
</ul>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
||||
6
status.fsfe.org/xml_structure/__init__.py
Normal file
6
status.fsfe.org/xml_structure/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. <https://fsfe.org>
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
# __init__.py is a special Python file that allows a directory to become
|
||||
# a Python package so it can be accessed using the 'import' statement.
|
||||
32
status.fsfe.org/xml_structure/default.xsl
Normal file
32
status.fsfe.org/xml_structure/default.xsl
Normal file
@@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
|
||||
<xsl:import href="../../fsfe.org/fsfe.xsl"/>
|
||||
<xsl:template match="xml-structure-status">
|
||||
<div class="xml-structure-status">
|
||||
<h2>Status</h2>
|
||||
<xsl:for-each select="/buildinfo/document/set/master">
|
||||
<xsl:sort select="@name" order="ascending"/>
|
||||
<details>
|
||||
<summary>
|
||||
<xsl:value-of select="@name"/>
|
||||
</summary>
|
||||
<ul>
|
||||
<xsl:for-each select="detail">
|
||||
<li>
|
||||
<a>
|
||||
<xsl:attribute name="href">
|
||||
<xsl:text>https://git.fsfe.org/FSFE/fsfe-website/src/branch/master/</xsl:text>
|
||||
<xsl:value-of select="@name"/>
|
||||
</xsl:attribute>
|
||||
<xsl:value-of select="@name"/>
|
||||
</a>
|
||||
<xsl:text>: </xsl:text>
|
||||
<xsl:value-of select="@error"/>
|
||||
</li>
|
||||
</xsl:for-each>
|
||||
</ul>
|
||||
</details>
|
||||
</xsl:for-each>
|
||||
</div>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
||||
13
status.fsfe.org/xml_structure/index.en.xhtml
Normal file
13
status.fsfe.org/xml_structure/index.en.xhtml
Normal file
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<html external="true">
|
||||
<version>1</version>
|
||||
<head>
|
||||
<title>FSFE XML Structure Status</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>XML Structure Status</h1>
|
||||
<p>Here we have the output of a simple script to generate a log of what files have unmatched xml structure.</p>
|
||||
<p>The aim of this is to make it easier to have some idea of what files we should be working on.</p>
|
||||
<xml-structure-status/>
|
||||
</body>
|
||||
</html>
|
||||
1
status.fsfe.org/xml_structure/index.sources
Normal file
1
status.fsfe.org/xml_structure/index.sources
Normal file
@@ -0,0 +1 @@
|
||||
status.fsfe.org/xml_structure/data/*:[]
|
||||
98
status.fsfe.org/xml_structure/subdir.py
Normal file
98
status.fsfe.org/xml_structure/subdir.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# SPDX-FileCopyrightText: Free Software Foundation Europe e.V. <https://fsfe.org>
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
import logging
|
||||
import multiprocessing
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
from fsfe_website_build.lib.checks import compare_files
|
||||
from fsfe_website_build.lib.misc import (
|
||||
get_basepath,
|
||||
get_version,
|
||||
lang_from_filename,
|
||||
run_command,
|
||||
update_if_changed,
|
||||
)
|
||||
from lxml import etree
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _job(
|
||||
master: Path, other: Path, whitelist: set[str]
|
||||
) -> tuple[Path, Path, str] | None:
|
||||
"""Return a one-line result string for starmap."""
|
||||
if get_version(master) != get_version(other):
|
||||
return None
|
||||
errs = compare_files(master, other, whitelist)
|
||||
return (master, other, f"{'; '.join(errs)}") if errs else None
|
||||
|
||||
|
||||
def run(source: Path, languages: list[str], processes: int, working_dir: Path) -> None: # noqa: ARG001
|
||||
"""
|
||||
Build xml-structure log for displaying on a status page
|
||||
Xmls are placed in target_dir, and only passed languages are processed.
|
||||
"""
|
||||
target_dir = working_dir.joinpath("data/")
|
||||
logger.debug("Building index of status of xml structure into dir %s", target_dir)
|
||||
all_git_tracked_files = run_command(
|
||||
["git", "ls-files", "-z"],
|
||||
)
|
||||
|
||||
all_files = {
|
||||
# Split on null bytes, strip and then parse into path
|
||||
path
|
||||
for path in (Path(line.strip()) for line in all_git_tracked_files.split("\x00"))
|
||||
if path.suffix in [".xhtml", ".xml"]
|
||||
and len(path.suffixes) >= 2 # noqa: PLR2004
|
||||
and lang_from_filename(path) in languages
|
||||
}
|
||||
whitelist = {"alt"}
|
||||
groups: defaultdict[Path, list[Path]] = defaultdict(list)
|
||||
for file in all_files:
|
||||
path = Path(file)
|
||||
groups[get_basepath(path)].append(path)
|
||||
|
||||
tasks: list[tuple[Path, Path, set[str]]] = []
|
||||
for basepath, paths in groups.items():
|
||||
master = next(
|
||||
(path for path in paths if lang_from_filename(path) == "en"),
|
||||
None,
|
||||
)
|
||||
if not master:
|
||||
logger.debug("No english translation of %s - skipped", basepath)
|
||||
continue
|
||||
tasks.extend((master, path, whitelist) for path in paths if path != master)
|
||||
|
||||
with multiprocessing.Pool(processes) as pool:
|
||||
filtered_results = [
|
||||
result for result in pool.starmap(_job, tasks) if result is not None
|
||||
]
|
||||
|
||||
# Build dict: master: list of (other, message)
|
||||
tree: dict[Path, list[tuple[Path, str]]] = defaultdict(list)
|
||||
for master, other, message in filtered_results:
|
||||
tree[master].append((other, message))
|
||||
|
||||
# Generate XML
|
||||
work_file = target_dir.joinpath("xml-structure-status.en.xml")
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
root = etree.Element("xml-structure-status")
|
||||
version_el = etree.SubElement(root, "version")
|
||||
version_el.text = "1"
|
||||
|
||||
for master, details in tree.items():
|
||||
master_el = etree.SubElement(root, "master", name=str(master))
|
||||
for other, msg in details:
|
||||
etree.SubElement(
|
||||
master_el,
|
||||
"detail",
|
||||
name=str(other),
|
||||
error=msg,
|
||||
)
|
||||
|
||||
xml_bytes = etree.tostring(root, xml_declaration=True, encoding="utf-8")
|
||||
update_if_changed(work_file, xml_bytes.decode("utf-8"))
|
||||
@@ -6,117 +6,29 @@ import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
from fsfe_website_build.lib.checks import (
|
||||
compare_files,
|
||||
)
|
||||
from fsfe_website_build.lib.misc import (
|
||||
get_basepath,
|
||||
get_version,
|
||||
lang_from_filename,
|
||||
)
|
||||
from lxml import etree
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def compare_elements(
|
||||
elem1: etree.Element,
|
||||
elem2: etree.Element,
|
||||
attr_whitelist: set[str] | None = None,
|
||||
_path: str = "",
|
||||
) -> list[str]:
|
||||
"""
|
||||
Recursively compare two XML elements.
|
||||
Returns a list of short, informative error strings.
|
||||
"""
|
||||
if attr_whitelist is None:
|
||||
attr_whitelist = set()
|
||||
|
||||
errors: list[str] = []
|
||||
tag_path = f"{_path}/{elem1.tag}" if _path else elem1.tag
|
||||
|
||||
# tag mismatch
|
||||
if elem1.tag != elem2.tag:
|
||||
errors.append(f"Tag mismatch at {tag_path}: {elem1.tag} ≠ {elem2.tag}")
|
||||
return errors # if tags differ, stop descending
|
||||
|
||||
# attribute deltas
|
||||
attributes_of_elem1 = dict(elem1.attrib.items())
|
||||
attributes_of_elem2 = dict(elem2.attrib.items())
|
||||
|
||||
only_in_elem1 = set(attributes_of_elem1) - set(attributes_of_elem2)
|
||||
only_in_elem2 = set(attributes_of_elem2) - set(attributes_of_elem1)
|
||||
common = set(attributes_of_elem1) & set(attributes_of_elem2)
|
||||
|
||||
if only_in_elem1 or only_in_elem2:
|
||||
errors.append(
|
||||
f"Attribute delta at <{elem1.tag}>"
|
||||
f" only 1: {list(only_in_elem1)} only 2: {list(only_in_elem2)}"
|
||||
)
|
||||
for key in common:
|
||||
if (
|
||||
attributes_of_elem1[key] != attributes_of_elem2[key]
|
||||
and key not in attr_whitelist
|
||||
):
|
||||
error_msg = (
|
||||
f"Attribute value diff at <{elem1.tag} {key}>:"
|
||||
f" {attributes_of_elem1[key]!r} ≠ {attributes_of_elem2[key]!r}"
|
||||
)
|
||||
errors.append(error_msg)
|
||||
|
||||
# child count
|
||||
kids1 = list(elem1)
|
||||
kids2 = list(elem2)
|
||||
if len(kids1) != len(kids2):
|
||||
errors.append(f"Child count at <{elem1.tag}>: {len(kids1)} ≠ {len(kids2)}")
|
||||
return errors # if counts differ, stop descending
|
||||
|
||||
# and then recurse into children
|
||||
for idx, (child1, child2) in enumerate(zip(kids1, kids2, strict=False), start=1):
|
||||
errors.extend(
|
||||
compare_elements(child1, child2, attr_whitelist, _path=f"{tag_path}[{idx}]")
|
||||
)
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def _job(master: Path, other: Path, whitelist: set[str]) -> str | None:
|
||||
"""Return a one-line result string for starmap."""
|
||||
try:
|
||||
if get_version(master) != get_version(other):
|
||||
return f"{other}: version differs → OK"
|
||||
tree1, tree2 = etree.parse(master), etree.parse(other)
|
||||
errs = compare_elements(tree1.getroot(), tree2.getroot(), whitelist)
|
||||
return None
|
||||
errs = compare_files(master, other, whitelist)
|
||||
return f"{other}: {'; '.join(errs)}" if errs else None
|
||||
except Exception as e:
|
||||
return f"{other}: ERROR {e}"
|
||||
|
||||
|
||||
def compare_two_files(file1: Path, file2: Path, whitelist: set[str]) -> None:
|
||||
"""
|
||||
Compares the xml structure of two files.
|
||||
Exits early if they have different versions
|
||||
"""
|
||||
try:
|
||||
version_1, version_2 = get_version(file1), get_version(file2)
|
||||
except ValueError as e:
|
||||
logger.critical("Version check failed: %s", e)
|
||||
sys.exit(2)
|
||||
if version_1 != version_2:
|
||||
logger.info("Files are different versions, considering comparison okay")
|
||||
return
|
||||
|
||||
try:
|
||||
t1, t2 = etree.parse(file1), etree.parse(file2)
|
||||
except etree.XMLSyntaxError as e:
|
||||
logger.critical("XML parse error: %s", e)
|
||||
sys.exit(1)
|
||||
|
||||
errors = compare_elements(t1.getroot(), t2.getroot(), whitelist)
|
||||
if errors:
|
||||
logger.warning("Differences found:\n%s", "\n".join(errors))
|
||||
sys.exit(1)
|
||||
else:
|
||||
logger.info("XML files match in structure and attributes.")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compare XML structure and attributes. "
|
||||
@@ -172,9 +84,9 @@ def main() -> None:
|
||||
tasks.extend((master, path, args.whitelist) for path in paths if path != master)
|
||||
|
||||
with multiprocessing.Pool(processes=args.jobs) as pool:
|
||||
filtered_results = (
|
||||
filtered_results = [
|
||||
result for result in pool.starmap(_job, tasks) if result is not None
|
||||
)
|
||||
]
|
||||
if filtered_results:
|
||||
for result in filtered_results:
|
||||
logger.info(result)
|
||||
|
||||
Reference in New Issue
Block a user