feat: typehint dicts better (#5450)
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
use defaultdict for dicts Co-authored-by: Darragh Elliott <me@delliott.net> Reviewed-on: #5450
This commit was merged in pull request #5450.
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any, cast
|
||||
|
||||
from lxml import etree
|
||||
|
||||
@@ -29,9 +30,9 @@ def keys_exists(element: dict, *keys: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def sort_dict(in_dict: dict) -> dict:
|
||||
def sort_dict[Dict: dict[Any, Any]](in_dict: Dict) -> Dict:
|
||||
"""Sort dict by keys."""
|
||||
return dict(sorted(in_dict.items(), key=lambda ele: ele[0]))
|
||||
return cast("Dict", dict(sorted(in_dict.items(), key=lambda ele: ele[0])))
|
||||
|
||||
|
||||
def update_if_changed(path: Path, content: str) -> None:
|
||||
|
||||
@@ -9,11 +9,12 @@ After this step, all .localmenu.??.xml files will be up to date.
|
||||
|
||||
import logging
|
||||
import multiprocessing.pool
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from fsfe_website_build.lib.misc import get_basepath, update_if_changed
|
||||
from fsfe_website_build.lib.misc import get_basepath, sort_dict, update_if_changed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -87,24 +88,21 @@ def update_localmenus(
|
||||
"""Update all the .localmenu.*.xml files containing the local menus."""
|
||||
logger.info("Updating local menus")
|
||||
# Get a dict of all source files containing local menus
|
||||
files_by_dir = {}
|
||||
files_by_dir: dict[str, set[Path]] = defaultdict(set)
|
||||
for file in filter(
|
||||
lambda path: "-template" not in str(path),
|
||||
lambda path: "-template" not in path.name,
|
||||
source_dir.glob("**/*.??.xhtml"),
|
||||
):
|
||||
xslt_root = etree.parse(file)
|
||||
if xslt_root.xpath("//localmenu"):
|
||||
directory = xslt_root.xpath("//localmenu/@dir")
|
||||
directory = (
|
||||
str(source.joinpath(directory[0]))
|
||||
if directory
|
||||
else str(file.parent.resolve())
|
||||
directory_xpath = xslt_root.xpath("//localmenu/@dir")
|
||||
directory = str(
|
||||
source.joinpath(directory_xpath[0])
|
||||
if directory_xpath
|
||||
else file.parent.resolve()
|
||||
)
|
||||
if directory not in files_by_dir:
|
||||
files_by_dir[directory] = set()
|
||||
files_by_dir[directory].add(file)
|
||||
for directory, files in files_by_dir.items():
|
||||
files_by_dir[directory] = sorted(files)
|
||||
files_by_dir = sort_dict(files_by_dir)
|
||||
|
||||
# If any of the source files has been updated, rebuild all .localmenu.*.xml
|
||||
dirs = filter(
|
||||
|
||||
@@ -10,6 +10,7 @@ which is useful to prevent reparsing the XSL multiple times.
|
||||
|
||||
import logging
|
||||
import multiprocessing.pool
|
||||
from collections import defaultdict
|
||||
from itertools import product
|
||||
from pathlib import Path
|
||||
|
||||
@@ -84,15 +85,8 @@ def process_files(
|
||||
) -> None:
|
||||
"""Build .html, .rss and .ics files from .xhtml sources."""
|
||||
logger.info("Processing xhtml, rss, ics files")
|
||||
# generate a set of unique processing xsls
|
||||
xsl_files = {
|
||||
processor.resolve().relative_to(source_dir.parent.resolve())
|
||||
for processor in source_dir.glob("**/*.xsl")
|
||||
}
|
||||
|
||||
process_files_dict = {}
|
||||
for processor in xsl_files:
|
||||
process_files_dict[processor] = set()
|
||||
process_files_dict: dict[Path, set[Path]] = defaultdict(set)
|
||||
|
||||
# This gathers all the simple xhtml files for generating xhtml output
|
||||
for file in source_dir.glob("**/*.*.xhtml"):
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
import logging
|
||||
import multiprocessing.pool
|
||||
from collections import defaultdict
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
|
||||
from fsfe_website_build.lib.misc import (
|
||||
@@ -35,7 +37,7 @@ def _update_tag_sets(
|
||||
lang: str,
|
||||
filecount: dict[str, dict[str, int]],
|
||||
files_by_tag: dict[str, list[Path]],
|
||||
tags_by_lang: dict[str, dict[str, str]],
|
||||
tags_by_lang: dict[str, dict[str, str | None]],
|
||||
) -> None:
|
||||
"""Update the .tags.??.xml tagset xmls for a given tag."""
|
||||
# Add uout toplevel element
|
||||
@@ -85,8 +87,10 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
with multiprocessing.Pool(processes) as pool:
|
||||
logger.debug("Updating tags for %s", working_dir)
|
||||
# Create a complete and current map of which tag is used in which files
|
||||
files_by_tag = {}
|
||||
tags_by_lang = {}
|
||||
files_by_tag: dict[str, set[Path]] = defaultdict(set)
|
||||
tags_by_lang: dict[str, dict[str, str | None]] = defaultdict(
|
||||
partial(defaultdict, None)
|
||||
)
|
||||
# Fill out files_by_tag and tags_by_lang
|
||||
for file in filter(
|
||||
lambda file:
|
||||
@@ -107,21 +111,14 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
label = tag.text.strip() if tag.text and tag.text.strip() else None
|
||||
|
||||
# Load into the dicts
|
||||
if key not in files_by_tag:
|
||||
files_by_tag[key] = set()
|
||||
files_by_tag[key].add(get_basepath(file))
|
||||
lang = lang_from_filename(file)
|
||||
if lang not in tags_by_lang:
|
||||
tags_by_lang[lang] = {}
|
||||
tags_by_lang[lang][key] = (
|
||||
tags_by_lang[lang][key]
|
||||
if key in tags_by_lang[lang] and tags_by_lang[lang][key]
|
||||
else label
|
||||
)
|
||||
if key not in tags_by_lang[lang] or not tags_by_lang[lang][key]:
|
||||
tags_by_lang[lang][key] = label
|
||||
# Sort dicts to ensure that they are stable between runs
|
||||
files_by_tag = sort_dict(files_by_tag)
|
||||
for tag in files_by_tag:
|
||||
files_by_tag[tag] = sorted(files_by_tag[tag])
|
||||
files_by_tag[tag] = set(sorted(files_by_tag[tag])) # noqa: C414
|
||||
tags_by_lang = sort_dict(tags_by_lang)
|
||||
for lang in tags_by_lang:
|
||||
tags_by_lang[lang] = sort_dict(tags_by_lang[lang])
|
||||
@@ -146,9 +143,8 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
|
||||
logger.debug("Updating tag sets")
|
||||
# Get count of files with each tag in each section
|
||||
filecount = {}
|
||||
filecount: dict[str, dict[str, int]] = defaultdict(partial(defaultdict, int))
|
||||
for section in ["news", "events"]:
|
||||
filecount[section] = {}
|
||||
for tag in files_by_tag:
|
||||
filecount[section][tag] = len(
|
||||
list(
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import datetime
|
||||
import logging
|
||||
import multiprocessing
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
from fsfe_website_build.lib.misc import (
|
||||
@@ -80,7 +81,7 @@ def _get_text_ids(file: Path) -> list[str]:
|
||||
|
||||
def _create_overview(
|
||||
target_dir: Path,
|
||||
data: dict[str, dict[int, list[dict]]],
|
||||
data: dict[str, dict[str, list[dict[str, str]]]],
|
||||
) -> None:
|
||||
work_file = target_dir.joinpath("langs.en.xml")
|
||||
if not target_dir.exists():
|
||||
@@ -179,7 +180,7 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
(Path(line.strip()) for line in all_git_tracked_files.split("\x00")),
|
||||
),
|
||||
)
|
||||
priorities_and_searches = {
|
||||
priorities_and_searches: dict[str, list[str]] = {
|
||||
"1": [
|
||||
"**/fsfe.org/index.en.xhtml",
|
||||
"**/fsfe.org/freesoftware/freesoftware.en.xhtml",
|
||||
@@ -202,29 +203,27 @@ def run(source: Path, languages: list[str], processes: int, working_dir: Path) -
|
||||
with multiprocessing.Pool(processes) as pool:
|
||||
# Generate our file lists by priority
|
||||
# Super hardcoded unfortunately
|
||||
files_by_priority = {}
|
||||
files_by_priority: dict[str, list[Path]] = defaultdict(list)
|
||||
for file in all_files_with_translations:
|
||||
for priority, searches in priorities_and_searches.items():
|
||||
if priority not in files_by_priority:
|
||||
files_by_priority[priority] = []
|
||||
# If any search matches,
|
||||
# add it to that priority and skip all subsequent priorities
|
||||
if any(file.full_match(search) for search in searches):
|
||||
files_by_priority[priority].append(file)
|
||||
continue
|
||||
|
||||
files_by_lang_by_prio = {}
|
||||
files_by_lang_by_prio: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
|
||||
lambda: defaultdict(list)
|
||||
)
|
||||
for lang in languages:
|
||||
files_by_lang_by_prio[lang] = {}
|
||||
for priority in sorted(files_by_priority.keys()):
|
||||
files_by_lang_by_prio[lang][priority] = list(
|
||||
filter(
|
||||
lambda result: result is not None,
|
||||
pool.starmap(
|
||||
_generate_translation_data,
|
||||
[(lang, file) for file in files_by_priority[priority]],
|
||||
),
|
||||
),
|
||||
files_by_lang_by_prio[lang][priority].extend(
|
||||
result
|
||||
for result in pool.starmap(
|
||||
_generate_translation_data,
|
||||
[(lang, file) for file in files_by_priority[priority]],
|
||||
)
|
||||
if result is not None
|
||||
)
|
||||
|
||||
# sadly single treaded, as only one file being operated on
|
||||
|
||||
Reference in New Issue
Block a user