fix: clean xml comparison messages (#5626)
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
Some better messages, and more whitelisting. Co-authored-by: Darragh Elliott <me@delliott.net> Reviewed-on: #5626 Co-authored-by: delliott <delliott@fsfe.org> Co-committed-by: delliott <delliott@fsfe.org>
This commit was merged in pull request #5626.
This commit is contained in:
@@ -85,7 +85,9 @@ def compare_elements(
|
||||
|
||||
# tag mismatch
|
||||
if elem1.tag != elem2.tag:
|
||||
errors.append(f"Tag mismatch at {tag_path}: {elem1.tag} ≠ {elem2.tag}")
|
||||
errors.append(
|
||||
f"Tag of elements at <{tag_path}> differ: {elem1.tag} ≠ {elem2.tag}"
|
||||
)
|
||||
return errors # if tags differ, stop descending
|
||||
|
||||
# attribute deltas
|
||||
@@ -98,14 +100,14 @@ def compare_elements(
|
||||
|
||||
if only_in_elem1 or only_in_elem2:
|
||||
errors.append(
|
||||
f"Attribute delta at <{elem1.tag}>"
|
||||
f" only 1: {only_in_elem1} only 2: {only_in_elem2}"
|
||||
f"Attributes of element <{elem1.tag}> differ: "
|
||||
f"{only_in_elem1} ≠ {only_in_elem2}"
|
||||
)
|
||||
for key in common:
|
||||
if attributes_of_elem1[key] != attributes_of_elem2[key]:
|
||||
error_msg = (
|
||||
f"Attribute value diff at <{elem1.tag} {key}>:"
|
||||
f" {attributes_of_elem1[key]!r} ≠ {attributes_of_elem2[key]!r}"
|
||||
f"Attribute '{key}' of element <{elem1.tag}> differs: "
|
||||
f"{attributes_of_elem1[key]!r} ≠ {attributes_of_elem2[key]!r}"
|
||||
)
|
||||
errors.append(error_msg)
|
||||
|
||||
@@ -113,11 +115,13 @@ def compare_elements(
|
||||
kids1 = list(elem1)
|
||||
kids2 = list(elem2)
|
||||
if len(kids1) != len(kids2):
|
||||
errors.append(f"Child count at <{elem1.tag}>: {len(kids1)} ≠ {len(kids2)}")
|
||||
errors.append(
|
||||
f"Number of children of <{elem1.tag}> differs: {len(kids1)} ≠ {len(kids2)}"
|
||||
)
|
||||
return errors # if counts differ, stop descending
|
||||
|
||||
# and then recurse into children
|
||||
for idx, (child1, child2) in enumerate(zip(kids1, kids2, strict=False), start=1):
|
||||
for idx, (child1, child2) in enumerate(zip(kids1, kids2, strict=False)):
|
||||
errors.extend(
|
||||
compare_elements(
|
||||
child1, child2, xpaths_to_ignore=(), _path=f"{tag_path}[{idx}]"
|
||||
|
||||
@@ -19,58 +19,64 @@ from fsfe_website_build.lib.checks import (
|
||||
from fsfe_website_build.lib.misc import (
|
||||
get_basepath,
|
||||
get_version,
|
||||
lang_from_filename,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _job(master: Path, other: Path, whitelist: set[str]) -> str | None:
|
||||
"""Return a one-line result string for starmap."""
|
||||
"""Return a result string for starmap."""
|
||||
try:
|
||||
if get_version(master) != get_version(other):
|
||||
return None
|
||||
errs = compare_files(master, other, whitelist)
|
||||
return f"{other}: {'; '.join(errs)}" if errs else None
|
||||
return (
|
||||
f"There were differences between {master} and {other}:\n{'\n'.join(errs)}"
|
||||
if errs
|
||||
else None
|
||||
)
|
||||
except Exception as e:
|
||||
return f"{other}: ERROR {e}"
|
||||
return f"Exception occurred comparing {master} and {other}: ERROR {e}"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Check that the passed files match xml structure across languages."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compare XML structure and attributes. "
|
||||
"Use --multi for space-separated list + parallel compares."
|
||||
"Use --multi for space-separated list + parallel compares.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
parser.add_argument("files", nargs="*", help="XML file(s)")
|
||||
parser.add_argument(
|
||||
"-w",
|
||||
"--whitelist",
|
||||
type=lambda attributes: set(attributes.split(",")),
|
||||
nargs="+",
|
||||
type=str,
|
||||
default=[
|
||||
"//img[@alt]", # Image alt text
|
||||
"//track[@srclang]", # Languages, used in some track elements
|
||||
"//track[@label]", # Language label, used in some track elements
|
||||
"//discussion/@href", # Mastodon links can be in different langs
|
||||
"/html/translator", # the translator
|
||||
"//discussion[@href]", # Mastodon links can be in different langs
|
||||
"//image[@alt]", # Image alt text for title image
|
||||
"//profileimage[@alt]", # Profilemage alt text for about/people images
|
||||
"//image/@alt", # Image alt text for title image
|
||||
"//img/@alt", # Image alt text
|
||||
"//input[@name='language']", # Input language types
|
||||
"//profileimage/@alt", # Profilemage alt text for about/people images
|
||||
"//track/@label", # Language label, used in some track elements
|
||||
"//track/@srclang", # Languages, used in some track elements
|
||||
],
|
||||
help="Comma-separated list xpaths that we then ignore.",
|
||||
help="XPATHS that we then ignore.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-level",
|
||||
type=str,
|
||||
default="INFO",
|
||||
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
||||
help="Set the logging level (default: INFO)",
|
||||
help="Set the logging level",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-j",
|
||||
"--jobs",
|
||||
type=int,
|
||||
default=multiprocessing.cpu_count(),
|
||||
help="Parallel workers (multi mode)",
|
||||
help="Parallel workers",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(
|
||||
@@ -79,25 +85,24 @@ def main() -> None:
|
||||
level=args.log_level,
|
||||
)
|
||||
|
||||
groups: defaultdict[Path, list[Path]] = defaultdict(list)
|
||||
groups: defaultdict[tuple[Path, str], list[Path]] = defaultdict(list)
|
||||
for file in args.files:
|
||||
path = Path(file).resolve()
|
||||
groups[get_basepath(path)].append(path)
|
||||
groups[(get_basepath(path), path.suffix)].append(path)
|
||||
|
||||
tasks: list[tuple[Path, Path, set[str]]] = []
|
||||
for basepath, paths in groups.items():
|
||||
master = next(
|
||||
(
|
||||
path
|
||||
for path in paths
|
||||
if len(path.suffixes) >= 2 and lang_from_filename(path) == "en" # noqa: PLR2004
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not master:
|
||||
logger.warning("No english translation of %s - skipped", basepath)
|
||||
continue
|
||||
tasks.extend((master, path, args.whitelist) for path in paths if path != master)
|
||||
for path_data, paths in groups.items():
|
||||
basepath, suffix = path_data
|
||||
if (master := basepath.parent / f"{basepath.name}.en{suffix}").exists():
|
||||
tasks.extend(
|
||||
(master, path, args.whitelist) for path in paths if path != master
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"No english translation of %s with suffix %s exists - skipping",
|
||||
basepath,
|
||||
suffix,
|
||||
)
|
||||
|
||||
with multiprocessing.Pool(processes=args.jobs) as pool:
|
||||
filtered_results = [
|
||||
|
||||
Reference in New Issue
Block a user