fix: clean xml comparison messages (#5626)
All checks were successful
continuous-integration/drone/push Build is passing

Some better messages, and more whitelisting.

Co-authored-by: Darragh Elliott <me@delliott.net>
Reviewed-on: #5626
Co-authored-by: delliott <delliott@fsfe.org>
Co-committed-by: delliott <delliott@fsfe.org>
This commit was merged in pull request #5626.
This commit is contained in:
2026-01-15 16:06:04 +00:00
committed by tobiasd
parent 59513c6b2d
commit 5801952388
2 changed files with 46 additions and 37 deletions

View File

@@ -85,7 +85,9 @@ def compare_elements(
# tag mismatch
if elem1.tag != elem2.tag:
errors.append(f"Tag mismatch at {tag_path}: {elem1.tag}{elem2.tag}")
errors.append(
f"Tag of elements at <{tag_path}> differ: {elem1.tag}{elem2.tag}"
)
return errors # if tags differ, stop descending
# attribute deltas
@@ -98,14 +100,14 @@ def compare_elements(
if only_in_elem1 or only_in_elem2:
errors.append(
f"Attribute delta at <{elem1.tag}>"
f" only 1: {only_in_elem1} only 2: {only_in_elem2}"
f"Attributes of element <{elem1.tag}> differ: "
f"{only_in_elem1} {only_in_elem2}"
)
for key in common:
if attributes_of_elem1[key] != attributes_of_elem2[key]:
error_msg = (
f"Attribute value diff at <{elem1.tag} {key}>:"
f" {attributes_of_elem1[key]!r}{attributes_of_elem2[key]!r}"
f"Attribute '{key}' of element <{elem1.tag}> differs: "
f"{attributes_of_elem1[key]!r}{attributes_of_elem2[key]!r}"
)
errors.append(error_msg)
@@ -113,11 +115,13 @@ def compare_elements(
kids1 = list(elem1)
kids2 = list(elem2)
if len(kids1) != len(kids2):
errors.append(f"Child count at <{elem1.tag}>: {len(kids1)}{len(kids2)}")
errors.append(
f"Number of children of <{elem1.tag}> differs: {len(kids1)}{len(kids2)}"
)
return errors # if counts differ, stop descending
# and then recurse into children
for idx, (child1, child2) in enumerate(zip(kids1, kids2, strict=False), start=1):
for idx, (child1, child2) in enumerate(zip(kids1, kids2, strict=False)):
errors.extend(
compare_elements(
child1, child2, xpaths_to_ignore=(), _path=f"{tag_path}[{idx}]"

View File

@@ -19,58 +19,64 @@ from fsfe_website_build.lib.checks import (
from fsfe_website_build.lib.misc import (
get_basepath,
get_version,
lang_from_filename,
)
logger = logging.getLogger(__name__)
def _job(master: Path, other: Path, whitelist: set[str]) -> str | None:
"""Return a one-line result string for starmap."""
"""Return a result string for starmap."""
try:
if get_version(master) != get_version(other):
return None
errs = compare_files(master, other, whitelist)
return f"{other}: {'; '.join(errs)}" if errs else None
return (
f"There were differences between {master} and {other}:\n{'\n'.join(errs)}"
if errs
else None
)
except Exception as e:
return f"{other}: ERROR {e}"
return f"Exception occurred comparing {master} and {other}: ERROR {e}"
def main() -> None:
"""Check that the passed files match xml structure across languages."""
parser = argparse.ArgumentParser(
description="Compare XML structure and attributes. "
"Use --multi for space-separated list + parallel compares."
"Use --multi for space-separated list + parallel compares.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("files", nargs="*", help="XML file(s)")
parser.add_argument(
"-w",
"--whitelist",
type=lambda attributes: set(attributes.split(",")),
nargs="+",
type=str,
default=[
"//img[@alt]", # Image alt text
"//track[@srclang]", # Languages, used in some track elements
"//track[@label]", # Language label, used in some track elements
"//discussion/@href", # Mastodon links can be in different langs
"/html/translator", # the translator
"//discussion[@href]", # Mastodon links can be in different langs
"//image[@alt]", # Image alt text for title image
"//profileimage[@alt]", # Profilemage alt text for about/people images
"//image/@alt", # Image alt text for title image
"//img/@alt", # Image alt text
"//input[@name='language']", # Input language types
"//profileimage/@alt", # Profilemage alt text for about/people images
"//track/@label", # Language label, used in some track elements
"//track/@srclang", # Languages, used in some track elements
],
help="Comma-separated list xpaths that we then ignore.",
help="XPATHS that we then ignore.",
)
parser.add_argument(
"--log-level",
type=str,
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set the logging level (default: INFO)",
help="Set the logging level",
)
parser.add_argument(
"-j",
"--jobs",
type=int,
default=multiprocessing.cpu_count(),
help="Parallel workers (multi mode)",
help="Parallel workers",
)
args = parser.parse_args()
logging.basicConfig(
@@ -79,25 +85,24 @@ def main() -> None:
level=args.log_level,
)
groups: defaultdict[Path, list[Path]] = defaultdict(list)
groups: defaultdict[tuple[Path, str], list[Path]] = defaultdict(list)
for file in args.files:
path = Path(file).resolve()
groups[get_basepath(path)].append(path)
groups[(get_basepath(path), path.suffix)].append(path)
tasks: list[tuple[Path, Path, set[str]]] = []
for basepath, paths in groups.items():
master = next(
(
path
for path in paths
if len(path.suffixes) >= 2 and lang_from_filename(path) == "en" # noqa: PLR2004
),
None,
)
if not master:
logger.warning("No english translation of %s - skipped", basepath)
continue
tasks.extend((master, path, args.whitelist) for path in paths if path != master)
for path_data, paths in groups.items():
basepath, suffix = path_data
if (master := basepath.parent / f"{basepath.name}.en{suffix}").exists():
tasks.extend(
(master, path, args.whitelist) for path in paths if path != master
)
else:
logger.warning(
"No english translation of %s with suffix %s exists - skipping",
basepath,
suffix,
)
with multiprocessing.Pool(processes=args.jobs) as pool:
filtered_results = [