fix: newsletter search issue,
All checks were successful
continuous-integration/drone/pr Build is passing

improve index-website script logic
Remove infile scripting for search, and introduce translation js files
This commit is contained in:
Darragh Elliott 2024-11-27 17:44:09 +00:00
parent 5d19dad7c8
commit 61914ecb0d
6 changed files with 156 additions and 189 deletions

View File

@ -6,7 +6,7 @@
<title>Search</title>
<script type="text/javascript" src="/scripts/lunr-2.3.9.min.js"></script>
<script src="index.js"></script>
<script type="text/javascript" src="index.js"></script>
</head>
<body class="toplevel">
@ -21,8 +21,8 @@
<p>
The search crawls through all site titles, teasers and tags, but
not the full article text. You will see maximum 15 results, sorted
in news and pages. The case of your term does not matter. If you do
not the full article text. You will see maximum 15 results in news
and pages. The case of your term does not matter. If you do
not find what you were looking for, please try a variation of the
terms, or different words, and use the <a href="#tips">advanced
search features</a>.
@ -93,103 +93,6 @@
</li>
</ul>
<script>
/*
@licstart The following is the entire license notice for the
JavaScript code in this page.
Copyright (C) 2020 Free Software Foundation Europe
The JavaScript code in this page is free software: you can
redistribute it and/or modify it under the terms of the GNU
General Public License (GNU GPL) as published by the Free Software
Foundation, either version 3 of the License.
The code is distributed WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
As additional permission under GNU GPL version 3 section 7, you
may distribute non-source (e.g., minimized or compacted) forms of
that code without the copy of the GNU GPL normally required by
section 4, provided you include this license notice and a URL
through which recipients can access the Corresponding Source.
@licend The above is the entire license notice
for the JavaScript code in this page.
*/
</script>
<script>
const searchString = new URLSearchParams(window.location.search).get('q');
const locals = [document.documentElement.getAttribute("lang")];
if (!locals.includes('en')) {
locals.push('en');
}
const $target = document.querySelector('#search_results');
if (searchString) {
// Populate the field with any existing search string
document.querySelector('#search').value = searchString;
// Our index uses title as a key of the hashmap
const pagesByURL = pages.reduce((acc, curr) => {
acc[curr.url] = curr;
return acc;
}, {});
index = lunr(function() {
this.pipeline.remove(lunr.stopWordFilter);
this.pipeline.remove(lunr.trimmer);
this.field("title", { boost: 10 });
this.field("tags", { boost: 5 });
this.field("teaser");
this.field("type");
this.ref("url");
pages.forEach(function (page) {
this.add(page)
}, this)
});
// Do the search and filter out results not from the current local or English
let matches = index.search(searchString).filter(p => locals.some(local => p.ref.includes(local + ".html")));
function display_result(matches) {
// workaround xsl XML tag parsing madness
return '&lt;ul&gt;' + matches.map(p => {
title = pagesByURL[p.ref].title;
date = pagesByURL[p.ref].date;
if (date) {
return '<li>' + '<a href='&apos;+p.ref+&apos;'>'+title+'</a>'+' (' + date + ')</li>';
} else {
return '<li><a href='&apos;+p.ref+&apos;'>' + title + ' </a></li>';
}
}).join('') + '&lt;/ul&gt;';
}
if (matches.length > 0) {
matches = matches.slice(0, 15);
let [news, pages] = matches.reduce(([true_arr, false_arr], m)=> {
if (m.ref.includes('news') === false)
// return true_arr and append m to false_arr
return [true_arr, [...false_arr, m]]
else
return [[...true_arr,m], false_arr]
}, [[],[]]);
if (news.length > 0) {
news = news.sort((a, b) => pagesByURL[a.ref].date &lt; pagesByURL[b.ref].date);
$target.innerHTML = '<h3><translation id="news" /></h3>' + display_result(news);
}
if (pages.length > 0) {
$target.innerHTML += '<h3><translation id="pages" /></h3>' + display_result(pages);
}
} else {
$target.innerHTML = '<p><translation id="search/notfound" /></p>';
}
} else {
$target.innerHTML = '<p><translation id="search/empty" /></p>';
}
</script>
<script type="text/javascript" src="search.js"></script>
</body>
</html>

132
fsfe.org/search/search.js Normal file
View File

@ -0,0 +1,132 @@
/*
@licstart The following is the entire license notice for the
JavaScript code in this page.
Copyright (C) 2020 Free Software Foundation Europe
The JavaScript code in this page is free software: you can
redistribute it and/or modify it under the terms of the GNU
General Public License (GNU GPL) as published by the Free Software
Foundation, either version 3 of the License.
The code is distributed WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
As additional permission under GNU GPL version 3 section 7, you
may distribute non-source (e.g., minimized or compacted) forms of
that code without the copy of the GNU GPL normally required by
section 4, provided you include this license notice and a URL
through which recipients can access the Corresponding Source.
@licend The above is the entire license notice
for the JavaScript code in this page.
*/
fetch(`strings.${document.documentElement.lang}.js`)
.then((response) => {
if (response.ok) {
return document.documentElement.lang;
} else {
return "en";
}
})
.then((language) => {
const text_module = `./strings.${language}.js`;
const importPromise = import(text_module);
const searchString = new URLSearchParams(window.location.search).get("q");
const locals = [document.documentElement.getAttribute("lang")];
if (!locals.includes("en")) {
locals.push("en");
}
const $target = document.querySelector("#search_results");
importPromise.then((texts) => {
if (searchString) {
// Populate the field with any existing search string
document.querySelector("#search").value = searchString;
// Our index uses title as a key of the hashmap
const pagesByURL = pages.reduce((acc, curr) => {
acc[curr.url] = curr;
return acc;
}, {});
index = lunr(function () {
this.pipeline.remove(lunr.stopWordFilter);
this.pipeline.remove(lunr.trimmer);
this.field("title", { boost: 10 });
this.field("tags", { boost: 5 });
this.field("teaser");
this.field("type");
this.ref("url");
pages.forEach(function (page) {
this.add(page);
}, this);
});
// Do the search and filter out results not from the current local or English
let matches = index
.search(searchString)
.filter((p) =>
locals.some((local) => p.ref.includes(local + ".html")),
);
function display_result(matches) {
// workaround xsl XML tag parsing madness
return (
"<ul>" +
matches
.map((p) => {
title = pagesByURL[p.ref].title;
date = pagesByURL[p.ref].date;
if (date) {
return (
"<li>" +
'<a href="' +
p.ref +
'">' +
title +
"</a>" +
" (" +
date +
")</li>"
);
} else {
return '<li><a href="' + p.ref + '">' + title + " </a></li>";
}
})
.join("") +
"</ul>"
);
}
if (matches.length > 0) {
let [news, pages] = matches.reduce(
([true_arr, false_arr], m) => {
if (m.ref.includes("news") === false)
// return true_arr and append m to false_arr
return [true_arr, [...false_arr, m]];
else return [[...true_arr, m], false_arr];
},
[[], []],
);
if (news.length > 0) {
news = news.sort(
(a, b) => pagesByURL[a.ref].date < pagesByURL[b.ref].date,
);
news = news.slice(0, 15);
$target.innerHTML = texts.news_text + display_result(news);
}
if (pages.length > 0) {
pages = pages.slice(0, 15);
$target.innerHTML += texts.pages_text + display_result(pages);
}
} else {
$target.innerHTML = texts.no_results_text;
}
} else {
$target.innerHTML = texts.empty_query_text;
}
});
});

View File

@ -4,7 +4,7 @@
<head>
<title>Zoek</title>
<script type="text/javascript" src="/scripts/lunr-2.3.9.min.js"></script>
<script src="index.js"></script>
<script type="text/javascript" src="index.js"></script>
</head>
<body class="toplevel">
<h1>Zoek</h1>
@ -78,91 +78,7 @@
teken in de bevindingen kan verschillen van uw zoekterm.
</li>
</ul>
<script>
/*
@licstart The following is the entire license notice for the
JavaScript code in this page.
Copyright (C) 2020 Free Software Foundation Europe
The JavaScript code in this page is free software: you can
redistribute it and/or modify it under the terms of the GNU
General Public License (GNU GPL) as published by the Free Software
Foundation, either version 3 of the License.
The code is distributed WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
As additional permission under GNU GPL version 3 section 7, you
may distribute non-source (e.g., minimized or compacted) forms of
that code without the copy of the GNU GPL normally required by
section 4, provided you include this license notice and a URL
through which recipients can access the Corresponding Source.
@licend The above is the entire license notice
for the JavaScript code in this page.
*/
</script>
<script>
const searchString = new URLSearchParams(window.location.search).get('q');
const locals = [document.documentElement.getAttribute("lang")];
if (!locals.includes('en')) {
locals.push('en');
}
const $target = document.querySelector('#search_results');
if (searchString) {
// Populate the field with any existing search string
document.querySelector('#search').value = searchString;
// Our index uses title as a key of the hashmap
const pagesByURL = pages.reduce((acc, curr) => {
acc[curr.url] = curr;
return acc;
}, {});
index = lunr(function() {
this.pipeline.remove(lunr.stopWordFilter);
this.pipeline.remove(lunr.trimmer);
this.field("title", { boost: 10 });
this.field("tags", { boost: 5 });
this.field("teaser");
this.field("type");
this.ref("url");
pages.forEach(function (page) {
this.add(page)
}, this)
});
// Do the search and filter out results not from the current local or English
let matches = index.search(searchString).filter(p => locals.some(local => p.ref.includes(local + ".html")));
function display_result(matches) {
// workaround xsl XML tag parsing madness
return '&lt;ul&gt;' + matches.map(p => {
title = pagesByURL[p.ref].title;
date = pagesByURL[p.ref].date;
if (date) {
return '<li>' + '<a href='&apos;+p.ref+&apos;'>'+title+'</a>'+' (' + date + ')</li>';
} else {
return '<li><a href='&apos;+p.ref+&apos;'>' + title + ' </a></li>';
}
}).join('') + '&lt;/ul&gt;';
}
if (matches.length > 0) {
matches = matches.slice(0, 15);
let [news, pages] = matches.reduce(([true_arr, false_arr], m)=> {
if (m.ref.includes('news') === false)
// return true_arr and append m to false_arr
return [true_arr, [...false_arr, m]]
else
return [[...true_arr,m], false_arr]
}, [[],[]]);
if (news.length > 0) {
news = news.sort((a, b) => pagesByURL[a.ref].date &lt; pagesByURL[b.ref].date);
$target.innerHTML = '<h3><translation id="news" /></h3>' + display_result(news);
}
if (pages.length > 0) {
$target.innerHTML += '<h3><translation id="pages" /></h3>' + display_result(pages);
}
} else {
$target.innerHTML = '<p><translation id="search/notfound" /></p>';
}
} else {
$target.innerHTML = '<p><translation id="search/empty" /></p>';
}
</script>
<script type="text/javascript" src="search.js"></script>
</body>
<translator>André Ockers</translator>
</html>

View File

@ -0,0 +1,7 @@
const news_text = '<h3 id="id-news">News</h3>';
const pages_text = '<h3 id="id-pages">Pages</h3>';
const no_results_text =
"<p>No search results found. Please rephrase your query.</p>";
const empty_query_text = "<p>Your search query is empty.</p>";
export { news_text, pages_text, no_results_text, empty_query_text };

View File

@ -0,0 +1,6 @@
const news_text = '<h3 id="id-nieuws">Nieuws</h3>';
const pages_text = '<h3 id="id-paginas">Pagina</h3>';
const no_results_text =
"<p>Geen zoekresultaten gevonden. Formuleer uw vraag alstublieft opnieuw.</p>";
const empty_query_text = "<p>Uw zoekopdracht is leeg.</p>";
export { news_text, pages_text, no_results_text, empty_query_text };

View File

@ -11,6 +11,9 @@ import logging
from os.path import abspath
from os import environ, sched_getaffinity
import time
import os
os.chdir("fsfe.org")
start_time = time.time()
print("* Creating search index")
@ -69,7 +72,7 @@ def process_file(filename: str):
]
articles.append(
{
"url": "https://fsfe.org/" + filename.removeprefix("fsfe.org/").replace("xhtml", "html"),
"url": "https://fsfe.org/" + filename.replace("xhtml", "html"),
"tags": " ".join(tags),
"title": file_parsed.title.text,
"teaser": " ".join(
@ -98,7 +101,7 @@ p.join()
end_time = time.time()
logger.info("Indexation done in {} seconds!".format(int(end_time - start_time)))
index_path = "fsfe.org/search/index.js"
index_path = "search/index.js"
# Make a JS file that can be directly loaded
# TODO find an easy way to load local JSON file from JavaScript
with open(index_path, "w", encoding="utf-8") as fh: