feat: typehint dicts better (#5450)
All checks were successful
continuous-integration/drone/push Build is passing

use defaultdict for dicts

Co-authored-by: Darragh Elliott <me@delliott.net>
Reviewed-on: #5450
This commit was merged in pull request #5450.
This commit is contained in:
2025-10-31 13:00:37 +00:00
parent e63feefb86
commit 55038b216a
5 changed files with 40 additions and 52 deletions

View File

@@ -7,6 +7,7 @@
import logging
import subprocess
from pathlib import Path
from typing import Any, cast
from lxml import etree
@@ -29,9 +30,9 @@ def keys_exists(element: dict, *keys: str) -> bool:
return True
def sort_dict(in_dict: dict) -> dict:
def sort_dict[Dict: dict[Any, Any]](in_dict: Dict) -> Dict:
"""Sort dict by keys."""
return dict(sorted(in_dict.items(), key=lambda ele: ele[0]))
return cast("Dict", dict(sorted(in_dict.items(), key=lambda ele: ele[0])))
def update_if_changed(path: Path, content: str) -> None:

View File

@@ -9,11 +9,12 @@ After this step, all .localmenu.??.xml files will be up to date.
import logging
import multiprocessing.pool
from collections import defaultdict
from pathlib import Path
from lxml import etree
from fsfe_website_build.lib.misc import get_basepath, update_if_changed
from fsfe_website_build.lib.misc import get_basepath, sort_dict, update_if_changed
logger = logging.getLogger(__name__)
@@ -87,24 +88,21 @@ def update_localmenus(
"""Update all the .localmenu.*.xml files containing the local menus."""
logger.info("Updating local menus")
# Get a dict of all source files containing local menus
files_by_dir = {}
files_by_dir: dict[str, set[Path]] = defaultdict(set)
for file in filter(
lambda path: "-template" not in str(path),
lambda path: "-template" not in path.name,
source_dir.glob("**/*.??.xhtml"),
):
xslt_root = etree.parse(file)
if xslt_root.xpath("//localmenu"):
directory = xslt_root.xpath("//localmenu/@dir")
directory = (
str(source.joinpath(directory[0]))
if directory
else str(file.parent.resolve())
directory_xpath = xslt_root.xpath("//localmenu/@dir")
directory = str(
source.joinpath(directory_xpath[0])
if directory_xpath
else file.parent.resolve()
)
if directory not in files_by_dir:
files_by_dir[directory] = set()
files_by_dir[directory].add(file)
for directory, files in files_by_dir.items():
files_by_dir[directory] = sorted(files)
files_by_dir = sort_dict(files_by_dir)
# If any of the source files has been updated, rebuild all .localmenu.*.xml
dirs = filter(

View File

@@ -10,6 +10,7 @@ which is useful to prevent reparsing the XSL multiple times.
import logging
import multiprocessing.pool
from collections import defaultdict
from itertools import product
from pathlib import Path
@@ -84,15 +85,8 @@ def process_files(
) -> None:
"""Build .html, .rss and .ics files from .xhtml sources."""
logger.info("Processing xhtml, rss, ics files")
# generate a set of unique processing xsls
xsl_files = {
processor.resolve().relative_to(source_dir.parent.resolve())
for processor in source_dir.glob("**/*.xsl")
}
process_files_dict = {}
for processor in xsl_files:
process_files_dict[processor] = set()
process_files_dict: dict[Path, set[Path]] = defaultdict(set)
# This gathers all the simple xhtml files for generating xhtml output
for file in source_dir.glob("**/*.*.xhtml"):

View File

@@ -6,6 +6,8 @@
import logging
import multiprocessing.pool
from collections import defaultdict
from functools import partial
from pathlib import Path
from fsfe_website_build.lib.misc import (
@@ -35,7 +37,7 @@ def _update_tag_sets(
lang: str,
filecount: dict[str, dict[str, int]],
files_by_tag: dict[str, list[Path]],
tags_by_lang: dict[str, dict[str, str]],
tags_by_lang: dict[str, dict[str, str | None]],
) -> None:
"""Update the .tags.??.xml tagset xmls for a given tag."""
# Add uout toplevel element
@@ -85,8 +87,10 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
with multiprocessing.Pool(processes) as pool:
logger.debug("Updating tags for %s", working_dir)
# Create a complete and current map of which tag is used in which files
files_by_tag = {}
tags_by_lang = {}
files_by_tag: dict[str, set[Path]] = defaultdict(set)
tags_by_lang: dict[str, dict[str, str | None]] = defaultdict(
partial(defaultdict, None)
)
# Fill out files_by_tag and tags_by_lang
for file in filter(
lambda file:
@@ -107,21 +111,14 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
label = tag.text.strip() if tag.text and tag.text.strip() else None
# Load into the dicts
if key not in files_by_tag:
files_by_tag[key] = set()
files_by_tag[key].add(get_basepath(file))
lang = lang_from_filename(file)
if lang not in tags_by_lang:
tags_by_lang[lang] = {}
tags_by_lang[lang][key] = (
tags_by_lang[lang][key]
if key in tags_by_lang[lang] and tags_by_lang[lang][key]
else label
)
if key not in tags_by_lang[lang] or not tags_by_lang[lang][key]:
tags_by_lang[lang][key] = label
# Sort dicts to ensure that they are stable between runs
files_by_tag = sort_dict(files_by_tag)
for tag in files_by_tag:
files_by_tag[tag] = sorted(files_by_tag[tag])
files_by_tag[tag] = set(sorted(files_by_tag[tag])) # noqa: C414
tags_by_lang = sort_dict(tags_by_lang)
for lang in tags_by_lang:
tags_by_lang[lang] = sort_dict(tags_by_lang[lang])
@@ -146,9 +143,8 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
logger.debug("Updating tag sets")
# Get count of files with each tag in each section
filecount = {}
filecount: dict[str, dict[str, int]] = defaultdict(partial(defaultdict, int))
for section in ["news", "events"]:
filecount[section] = {}
for tag in files_by_tag:
filecount[section][tag] = len(
list(

View File

@@ -7,6 +7,7 @@
import datetime
import logging
import multiprocessing
from collections import defaultdict
from pathlib import Path
from fsfe_website_build.lib.misc import (
@@ -80,7 +81,7 @@ def _get_text_ids(file: Path) -> list[str]:
def _create_overview(
target_dir: Path,
data: dict[str, dict[int, list[dict]]],
data: dict[str, dict[str, list[dict[str, str]]]],
) -> None:
work_file = target_dir.joinpath("langs.en.xml")
if not target_dir.exists():
@@ -179,7 +180,7 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
(Path(line.strip()) for line in all_git_tracked_files.split("\x00")),
),
)
priorities_and_searches = {
priorities_and_searches: dict[str, list[str]] = {
"1": [
"**/fsfe.org/index.en.xhtml",
"**/fsfe.org/freesoftware/freesoftware.en.xhtml",
@@ -202,29 +203,27 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
with multiprocessing.Pool(processes) as pool:
# Generate our file lists by priority
# Super hardcoded unfortunately
files_by_priority = {}
files_by_priority: dict[str, list[Path]] = defaultdict(list)
for file in all_files_with_translations:
for priority, searches in priorities_and_searches.items():
if priority not in files_by_priority:
files_by_priority[priority] = []
# If any search matches,
# add it to that priority and skip all subsequent priorities
if any(file.full_match(search) for search in searches):
files_by_priority[priority].append(file)
continue
files_by_lang_by_prio = {}
files_by_lang_by_prio: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
lambda: defaultdict(list)
)
for lang in languages:
files_by_lang_by_prio[lang] = {}
for priority in sorted(files_by_priority.keys()):
files_by_lang_by_prio[lang][priority] = list(
filter(
lambda result: result is not None,
pool.starmap(
_generate_translation_data,
[(lang, file) for file in files_by_priority[priority]],
),
),
files_by_lang_by_prio[lang][priority].extend(
result
for result in pool.starmap(
_generate_translation_data,
[(lang, file) for file in files_by_priority[priority]],
)
if result is not None
)
# sadly single treaded, as only one file being operated on