feat: use compiled regex (#5537)
continuous-integration/drone/push Build is passing

Should be a little bit faster

Co-authored-by: Darragh Elliott <me@delliott.net>
Reviewed-on: #5537
Reviewed-by: tobiasd <tobiasd@fsfe.org>
Co-authored-by: delliott <delliott@fsfe.org>
Co-committed-by: delliott <delliott@fsfe.org>
This commit was merged in pull request #5537.
This commit is contained in:
2025-12-04 05:52:02 +00:00
committed by tobiasd
parent 401e4e31cd
commit 034ed3ecd5
3 changed files with 36 additions and 30 deletions
+20 -27
View File
@@ -206,36 +206,29 @@ def process_file(source: Path, infile: Path, transform: etree.XSLT) -> str:
result = transform(xmlstream)
# And now a bunch of regexes to fix some links.
# xx is the language code in all comments
# Change links from /foo/bar.html into /foo/bar.xx.html
# Change links from foo/bar.html into foo/bar.xx.html
# Same for .rss and .ics links
link_lang_regex = re.compile(
r"""^(/?([^:>]+/)?[^:/.]{3,}\.)(html|rss|ics)""",
flags=re.IGNORECASE,
)
# Change links from /foo/bar/ into /foo/bar/index.xx.html
# Change links from foo/bar/ into foo/bar/index.xx.html
link_index_regex = re.compile(
r"""^(/?[^:>]+/)$""",
flags=re.IGNORECASE,
)
try:
for linkelem in result.xpath("//*[@href]"):
new_href = linkelem.get("href")
# remove any spurious whitespace
linkelem.set(
"href",
linkelem.get("href").strip(),
)
# Change links from /foo/bar.html into /foo/bar.xx.html
# Change links from foo/bar.html into foo/bar.xx.html
# Same for .rss and .ics links
linkelem.set(
"href",
re.sub(
r"""^(/?([^:>]+/)?[^:/.]{3,}\.)(html|rss|ics)""",
rf"""\1{lang}.\3""",
linkelem.get("href"),
flags=re.IGNORECASE,
),
)
# Change links from /foo/bar/ into /foo/bar/index.xx.html
# Change links from foo/bar/ into foo/bar/index.xx.html
linkelem.set(
"href",
re.sub(
r"""^(/?[^:>]+/)$""",
rf"""\1index.{lang}.html""",
linkelem.get("href"),
flags=re.IGNORECASE,
),
)
new_href = new_href.strip()
new_href = link_lang_regex.sub(rf"""\1{lang}.\3""", new_href)
new_href = link_index_regex.sub(rf"""\1index.{lang}.html""", new_href)
linkelem.set("href", new_href)
except AssertionError:
logger.debug("Output generated for file %s is not valid xml", infile)
return str(result)
@@ -47,7 +47,7 @@ def update_stylesheets(source_dir: Path, pool: multiprocessing.pool.Pool) -> Non
pool.map(
_update_sheet,
filter(
lambda file: re.match(banned, str(file)) is None,
lambda file: banned.match(str(file)) is None,
source_dir.glob("**/*.xsl"),
),
)
@@ -38,6 +38,7 @@ def _update_for_base( # noqa: PLR0913
source: Path,
base: Path,
all_xml: set[Path],
source_wildcard_sub_pattern: re.Pattern[str],
nextyear: str,
thisyear: str,
lastyear: str,
@@ -51,7 +52,7 @@ def _update_for_base( # noqa: PLR0913
with base.with_suffix(".sources").open(mode="r") as file:
for line in file:
pattern = (
re.sub(r"(\*)?:\[.*\]$", "*", line)
source_wildcard_sub_pattern.sub("*", line)
.replace("$nextyear", nextyear)
.replace("$thisyear", thisyear)
.replace("$lastyear", lastyear)
@@ -124,12 +125,24 @@ def _update_module_xmllists(
if lang_from_filename(path) in languages and etree.parse(path).xpath("//module")
}
all_bases = source_bases | module_bases
source_wildcard_sub_pattern: re.Pattern[str] = re.compile(r"(\*)?:\[.*\]$")
nextyear = str(datetime.datetime.today().year + 1)
thisyear = str(datetime.datetime.today().year)
lastyear = str(datetime.datetime.today().year - 1)
pool.starmap(
_update_for_base,
((source, base, all_xml, nextyear, thisyear, lastyear) for base in all_bases),
(
(
source,
base,
all_xml,
source_wildcard_sub_pattern,
nextyear,
thisyear,
lastyear,
)
for base in all_bases
),
)