fsfe-website/tools/githooks/pre-commit

603 lines
20 KiB
Bash
Executable File

#!/usr/bin/env bash
# =============================================================================
# Check dependencies
# =============================================================================
deperrors=''
for depend in git xmllint sed file grep bash perl mediainfo curl mktemp; do
if ! command -v "$depend" >/dev/null 2>&1; then
deperrors="$depend $deperrors"
fi
done
if [ -n "$deperrors" ]; then
cat <<-EOF
The githook script depends on some other programs to function. Not all of
those programs could be located on your system. Please use your package
manager to install the following programs: $deperrors
Your commit has therefore been aborted.
EOF
exit 1
fi >>/dev/stderr
# =============================================================================
# Define variables
# =============================================================================
# find out which files to check
# In a CI pipeline, we try to get the changed files after the PR has been merged
if [[ "$1" == "ci-pr" ]]; then
files_all=$(git diff-tree --name-only -r origin/master HEAD)
echo "Checking the following files:"
echo "${files_all}"
echo
# Locally, we only check the staged files
else
files_all=$(git diff --staged --name-only)
fi
total="$(echo "$files_all" | wc -l)"
count=0
# return codes
RETURN_SYNTAX=0
RETURN_HTML=0
RETURN_TAGS_NEW=0
RETURN_TAGS_MISMATCH=0
RETURN_ENC=0
RETURN_NAME=0
RETURN_NEWSDATE=0
RETURN_VERSION_PRES=0
RETURN_VERSION_INT=0
RETURN_ABS_LINK=0
RETURN_FIX_LANG=0
RETURN_CSS_ELEMENT=0
RETURN_CSS_ATTR=0
RETURN_IMGRATIO=0
# displayed files
FILES_SYNTAX=""
FILES_HTML=""
FILES_TAGS_NEW=""
FILES_TAGS_MISMATCH=""
FILES_ENC=""
FILES_NAME=""
FILES_NEWSDATE=""
FILES_VERSION_PRES=""
FILES_VERSION_INT=""
FILES_ABS_LINK=""
FILES_FIX_LANG=""
FILES_CSS_ELEMENT=""
FILES_CSS_ATTR=""
FILES_IMGRATIO=""
# =============================================================================
# Check all files for different things
# =============================================================================
for f in $files_all; do
((count++))
if [[ "$1" != "ci-pr" ]]; then
echo -ne "pre-commit check: [$count/$total]\r"
fi
# ---------------------------------------------------------------------------
# XML syntax
# ---------------------------------------------------------------------------
fileregex="(\.xhtml$|\.xml$|\.xsl$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
xmllint --noout --nonet "${f}"
if [[ $? != 0 ]]; then
RETURN_SYNTAX=$((RETURN_SYNTAX + 1))
FILES_SYNTAX="${FILES_SYNTAX}|${f}"
fi
fi
# ---------------------------------------------------------------------------
# HTML files
# ---------------------------------------------------------------------------
fileregex="(\.html$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
RETURN_HTML=$((RETURN_HTML + 1))
FILES_HTML="${FILES_HTML}|${f}"
fi
# ---------------------------------------------------------------------------
# New tags
# ---------------------------------------------------------------------------
fileregex="^(news/|events/).*(\.xhtml$|\.xml$|\.xsl$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
hit=0
tags=""
# go through all tags in this file
# make only a new line a field separator to support tags with spaces inside (which is not recommended)
OLDIFS=$IFS # save IFS, usually " \t\n"
IFS=$'\n'
for tag in $(grep -Ei '<tag(\s|\>)' "${f}" | perl -pe 's/.*<tag key="(.+?)".*/\1/'); do
# check if this tag does exist in any other news/event item
if ! git grep -ilE "<tag key=\"${tag}\"" news/ events/ | grep -vq "${f}"; then
hit=1
tags="${tag}, ${tags}"
RETURN_TAGS_NEW=$((RETURN_TAGS_NEW + 1))
fi
done
IFS=$OLDIFS # reset IFS
# if any new tag has been found, enlist them
if [ $hit -ne 0 ]; then
tags="${tags%, }"
FILES_TAGS_NEW="${FILES_TAGS_NEW}|${f} (new tag(s): ${tags})"
fi
fi
# ---------------------------------------------------------------------------
# Tags mismatch between EN and translations
# ---------------------------------------------------------------------------
fileregex="^(news/|events/).*(\.xhtml$|\.xml$|\.xsl$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
# Only check non-english files
if [[ ! $f =~ \.en\. ]]; then
# Get file extension
ext="${f##*.}"
# Get base file name (without) "en.$EXT"
base=$(echo "${f}" | sed -E "s/\.[a-z][a-z]\.${ext}//")
# exit TAGS_MISMATCH check if no english original exists
if [[ -e "$base.en.$ext" ]]; then
# Extract tags from the translated and the English file, and sort them
tags_trans="$(grep -Ei '<tag(\s|\>)' "${f}" | perl -pe 's/.*<tag key="(.+?)".*/\1/' | sort)"
tags_en="$(grep -Ei '<tag(\s|\>)' "${base}.en.${ext}" | perl -pe 's/.*<tag key="(.+?)".*/\1/' | sort)"
# Compare the two lists, and output tags that are not present in one of
# the files. `-3` strips away the list of tags that are common. So the
# output should be empty normally
if [[ $(comm -3 <(echo "$tags_en") <(echo "$tags_trans")) ]]; then
RETURN_TAGS_MISMATCH=$((RETURN_TAGS_MISMATCH + 1))
FILES_TAGS_MISMATCH="${FILES_TAGS_MISMATCH}|${f}"
fi
fi
fi
fi
# ---------------------------------------------------------------------------
# Encoding
# ---------------------------------------------------------------------------
fileregex="(\.xhtml$|\.xml$|\.xsl$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
regex="(utf-8|us-ascii)"
if ! [[ $(file -b --mime-encoding "${f}") =~ $regex ]]; then
RETURN_ENC=$((RETURN_ENC + 1))
FILES_ENC="${FILES_ENC}|${f}"
fi
fi
# ---------------------------------------------------------------------------
# Naming and newsdate attribute mistakes in news/events
# ---------------------------------------------------------------------------
fileregex="^(news/20[0-9]{2}/|news/nl/|news/podcast/|events/20[0-9]{2}/).*(\.xhtml$|\.xml$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
filename="$(basename "${f}")"
# file naming scheme
regex="^((nl-20[0-9]{4})|episode-(special-)?[0-9]{1,3}|(news|event)-20[0-9]{6}-[0-9]{2})\.[a-z]{2}\.(xml|xhtml)$"
if ! [[ $filename =~ $regex ]]; then
RETURN_NAME=$((RETURN_NAME + 1))
FILES_NAME="${FILES_NAME}|${f}"
fi
# newsdate attribute scheme
regex="^20[0-9]{2}-[0-9]{2}-[0-9]{2}$"
if grep -qE "<html\s*newsdate=\".*?>" "${f}"; then
newsdate=$(grep -E "<html\s*newsdate=\".*?>" "${f}" | perl -pe 's/.*newsdate="(.+?)".*/\1/')
if ! [[ $newsdate =~ $regex ]]; then
RETURN_NEWSDATE=$((RETURN_NEWSDATE + 1))
FILES_NEWSDATE="${FILES_NEWSDATE}|${f}"
fi
fi
fi
# ---------------------------------------------------------------------------
# Version tag presence + integer check
# ---------------------------------------------------------------------------
fileregex="(\.xhtml$|\.xml$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
# check whether version tag is present
if ! xmllint --xpath "/*/version" "${f}" &>/dev/null; then
RETURN_VERSION_PRES=$((RETURN_VERSION_PRES + 1))
FILES_VERSION_PRES="${FILES_VERSION_PRES}|${f}"
else
# check whether it's a positive integer
if ! [[ $(xmllint --xpath "/*/version/text()" "${f}") =~ ^[0-9]+$ ]]; then
RETURN_VERSION_INT=$((RETURN_VERSION_INT + 1))
FILES_VERSION_INT="${FILES_VERSION_INT}|${f}"
fi
fi
fi
# ---------------------------------------------------------------------------
# Check absolute links to fsfe.org
# ---------------------------------------------------------------------------
fileregex="(\.xhtml$|\.xml$)"
exclude="^(drm.info|pdfreaders|global|cgi-bin|build|scripts)"
if [[ ( $f =~ $fileregex ) && ( ! $f =~ $exclude ) && ( -e "$f" ) ]]; then
if xmllint --xpath "//a/@href" "${f}" 2>/dev/null | sed -E 's/([^\r\n]) (href=)/\1\n \2/g' | grep -qE "https?://fsfe(urope)?.org"; then
RETURN_ABS_LINK=$((RETURN_ABS_LINK + 1))
FILES_ABS_LINK="${FILES_ABS_LINK}|${f}"
fi
fi
# ---------------------------------------------------------------------------
# Check for links with fixed language
# ---------------------------------------------------------------------------
fileregex="(\.xhtml$|\.xml$)"
exclude="^(cgi-bin|build)"
if [[ ( $f =~ $fileregex ) && ( ! $f =~ $exclude ) && ( -e "$f" ) ]]; then
if xmllint --xpath "//a/@href" "${f}" 2>/dev/null | sed -E 's/([^\r\n]) (href=)/\1\n \2/g' | grep -qE "\"(https?://fsfe(urope)?.org)?/.+?\.[a-z]{2}(\.html)?(#.+?)?\""; then
RETURN_FIX_LANG=$((RETURN_FIX_LANG + 1))
FILES_FIX_LANG="${FILES_FIX_LANG}|${f}"
fi
fi
# ---------------------------------------------------------------------------
# Check for <style> elements
# ---------------------------------------------------------------------------
fileregex="(\.xhtml$|\.xml$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
if xmllint --xpath "//style" "${f}" &>/dev/null; then
RETURN_CSS_ELEMENT=$((RETURN_CSS_ELEMENT + 1))
FILES_CSS_ELEMENT="${FILES_CSS_ELEMENT}|${f}"
fi
fi
# ---------------------------------------------------------------------------
# Check for style attributes
# ---------------------------------------------------------------------------
fileregex="(\.xhtml$|\.xml$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
if xmllint --xpath "//@style" "${f}" &>/dev/null; then
RETURN_CSS_ATTR=$((RETURN_CSS_ATTR + 1))
FILES_CSS_ATTR="${FILES_CSS_ATTR}|${f}"
fi
fi
# ---------------------------------------------------------------------------
# Check for ratio of preview image
# ---------------------------------------------------------------------------
# Note: we also check events, could carry images in the future
fileregex="^(news/|events/).*(\.xhtml$|\.xml$)"
if [[ ( $f =~ $fileregex ) && ( -e "$f" ) ]]; then
imgratio_status=""
imageurl=$(xmllint --xpath "string(//image/@url)" "${f}")
if [[ -n "${imageurl}" ]]; then
# Remote images need to be downloaded
if grep -Eq "^http(s)?://" <<< "${imageurl}"; then
image=$(mktemp --suffix fsfe-imgratio)
# Download file, and handle failed download
if ! curl -s -f "${imageurl}" > "${image}"; then
imgratio_status="failed"
else
imgratio_status="remote"
fi
# Local files need absolute path
else
imgratio_status="local"
image="$(git rev-parse --show-toplevel)${imageurl}"
fi
if [[ "${imgratio_status}" == "failed" ]]; then
RETURN_IMGRATIO=$((RETURN_IMGRATIO + 1))
FILES_IMGRATIO="${FILES_IMGRATIO}|${f} (Image could not be downloaded from ${imageurl})"
else
# Run ratio checks
height=$(mediainfo --Output='Image;%Height%' "${image}")
width=$(mediainfo --Output='Image;%Width%' "${image}")
# The ideal ratio is 1,777. We multiply by 1000 because bash cannot float
ratio=$(( 1000 * width / height ))
# We allow a buffer of ± 2px per height/width in 800 x 450px
if [[ ${ratio} -lt 1769 || ${ratio} -gt 1785 ]]; then
RETURN_IMGRATIO=$((RETURN_IMGRATIO + 1))
FILES_IMGRATIO="${FILES_IMGRATIO}|${f} ($imageurl)"
fi
fi
# delete temporary file unless local graphic
if [[ ${imgratio_status} != "local" ]]; then
rm "${image}"
fi
fi
fi
done
echo -ne "\n"
# =============================================================================
# Report based on failed checks
# =============================================================================
if [ $RETURN_SYNTAX -gt 0 ]; then
cat <<EOF >&2
=========================
|| [CRIT] SYNTAX ERROR ||
=========================
${RETURN_SYNTAX} files failed the XML syntax check!
$(echo "${FILES_SYNTAX}" | sed -E -e "s/\|/\n - /g")
The error log above will help you to identify the error. Read it from
the top. The numbers behind the file name point to the line number in
the file where the error has been detected.
Check this line and its surroundings for XML/HTML tags that have not
been closed correctly, errors with special characters like "&", or
other syntactical mistakes.
EOF
fi
if [ $RETURN_HTML -gt 0 ]; then
cat <<EOF >&2
=================================
|| [CRIT] FILE EXTENSION ERROR ||
=================================
${RETURN_HTML} files in your commit have a wrong file extension.
The following files have ".html" as file extensions:
$(echo "${FILES_HTML}" | sed -E -e "s/\|/\n - /g")
This creates problems with our build system. It is supposed to create
the final HTML files from the .xhtml source files. So please make
sure you follow the format for the XHTML files, and to name them
accordingly.
EOF
fi
if [ $RETURN_TAGS_NEW -gt 0 ]; then
cat <<EOF >&2
====================================
|| [WARN] NEW / DUPLICATED TAG(S) ||
====================================
Your commit introduced $RETURN_TAGS_NEW tag(s) which did not exist before in
our news or event items!
$(echo "${FILES_TAGS_NEW}" | sed -E -e "s/\|/\n - /g")
Please make sure that you use already used tags, and only introduce a
new tag e.g. if it's about a new campaign that will be more often
mentioned in news or events. If you feel unsure, please ask
<web@lists.fsfe.org>.
Here you will find the currently used tags:
https://fsfe.org/tags/tags.html
Please make another commit to replace a new tag with an already
existing one unless you are really sure. Thank you.
EOF
fi
if [ $RETURN_TAGS_MISMATCH -gt 0 ]; then
cat <<EOF >&2
====================================
|| [CRIT] MISMATCHED TAG(S) ||
====================================
Your commit introduced $RETURN_TAGS_MISMATCH files whose tags do not match its
English original!
$(echo "${FILES_TAGS_MISMATCH}" | sed -E -e "s/\|/\n - /g")
Please make sure that the tags of the original English files match the tags of
its translations. You can check this link to learn how tags are being used:
https://wiki.fsfe.org/TechDocs/Mainpage/Editing#Tags
Mismatched tags can create issues with the tags that are hard to debug. Please
fix this.
EOF
fi
if [ $RETURN_ENC -gt 0 ]; then
cat <<EOF >&2
================================
|| [CRIT] FILE ENCODING ERROR ||
================================
${RETURN_ENC} files in your commit have a wrong encoding.
The following files are not UTF-8 encoded:
$(echo "${FILES_ENC}" | sed -E -e "s/\|/\n - /g")
For the FSFE website, we strongly prefer UTF-8 encoded files.
Everything else creates problems. Please change the file encoding in
your text editor or with a special tool.
EOF
fi
if [ $RETURN_NAME -gt 0 ]; then
cat <<EOF >&2
==============================
|| [CRIT] FILE NAMING ERROR ||
==============================
The following files do not follow the naming scheme for news and
events:
$(echo "${FILES_NAME}" | sed -E -e "s/\|/\n - /g")
The scheme is:
* "news-20YYMMDD-01.en.xhtml" for news
* "nl-20YYMM.en.xhtml" for newsletters
* "episode-N.en.xhtml" for podcast episodes
* "event-20YYMMDD-01.en.xml" for events
If there is more than one news item per date, count the "-01"
onwards. Of course, the ".en" can also be the code for another
language we support.
EOF
fi
if [ $RETURN_NEWSDATE -gt 0 ]; then
cat <<EOF >&2
=====================================
|| [CRIT] NEWSDATE ATTRIBUTE ERROR ||
=====================================
The following files do not follow the scheme for the newsdate attribute:
$(echo "${FILES_NEWSDATE}" | sed -E -e "s/\|/\n - /g")
The scheme is "20YY-MM-DD", so the respective line should look
something like this: <html newsdate="2020-01-01">
EOF
fi
if [ $RETURN_VERSION_PRES -gt 0 ]; then
cat <<EOF >&2
================================
|| [CRIT] MISSING VERSION TAG ||
================================
The following files do not contain the mandatory <version> tag, or it is at
the wrong level:
$(echo "${FILES_VERSION_PRES}" | sed -E -e "s/\|/\n - /g")
Please read more about outdated translations and the version tag here:
https://wiki.fsfe.org/TechDocs/Mainpage/Translations/
EOF
fi
if [ $RETURN_VERSION_INT -gt 0 ]; then
cat <<EOF >&2
==============================
|| [CRIT] WRONG VERSION TAG ||
==============================
The following files contain a malformed <version> tag:
$(echo "${FILES_VERSION_INT}" | sed -E -e "s/\|/\n - /g")
The version tag may only be a positive integer (so a number, no decimals or
alphabetic characters)!
Please read more about outdated translations and the version tag here:
https://wiki.fsfe.org/TechDocs/Mainpage/Translations/
EOF
fi
if [ $RETURN_ABS_LINK -gt 0 ]; then
cat <<EOF >&2
======================================
|| [CRIT] ABSOLUTE LINK TO fsfe.org ||
======================================
The following files contain a absolute links to fsfe.org:
$(echo "${FILES_ABS_LINK}" | sed -E -e "s/\|/\n - /g")
Please do not use links containing "https://fsfe.org". So instead of
<a href="https://fsfe.org/freesoftware/">link</a>
you should use:
<a href="/freesoftware">link</a>
More information about the why and how:
https://wiki.fsfe.org/TechDocs/Mainpage/Editing/BestPractices#No_absolute_links_to_fsfe.org
EOF
fi
if [ $RETURN_FIX_LANG -gt 0 ]; then
cat <<EOF >&2
================================
|| [CRIT] FIXED LANGUAGE LINK ||
================================
The following files contain links with fixed language:
$(echo "${FILES_FIX_LANG}" | sed -E -e "s/\|/\n - /g")
Please do not preset a language in your links to resources on fsfe.org.
So instead of:
<a href="/contribute/web.en.html">link</a>
you should use:
<a href="/contribute/web.html">link</a>
More information about the why and how:
https://wiki.fsfe.org/TechDocs/Mainpage/Editing/BestPractices#No_fixed_language_in_internal_links
EOF
fi
if [ $RETURN_CSS_ELEMENT -gt 0 ]; then
cat <<EOF >&2
=================================
|| [WARN] INLINE CSS (<style>) ||
=================================
The following files contain inline CSS as <style> elements:
$(echo "${FILES_CSS_ELEMENT}" | sed -E -e "s/\|/\n - /g")
Please do not use <style> elements to define CSS rules for a file.
More information why this is bad style, and what to do instead:
https://wiki.fsfe.org/TechDocs/Mainpage/Editing/BestPractices#No_in-line_CSS
EOF
fi
if [ $RETURN_CSS_ATTR -gt 0 ]; then
cat <<EOF >&2
==================================
|| [WARN] INLINE CSS (style="") ||
==================================
The following files contain inline CSS as style attributes:
$(echo "${FILES_CSS_ATTR}" | sed -E -e "s/\|/\n - /g")
Please do not use style attributes to design an element. So instead of:
<p style="color: red;">text</p>
use CSS classes instead, or create them if necessary.
More information why this is bad style, and what to do instead:
https://wiki.fsfe.org/TechDocs/Mainpage/Editing/BestPractices#No_in-line_CSS
EOF
fi
if [ $RETURN_IMGRATIO -gt 0 ]; then
cat <<EOF >&2
========================================
|| [CRIT] Invalid preview image ratio ||
========================================
The following files' preview image ratio is not 16:9. This is mandatory for
news items:
$(echo "${FILES_IMGRATIO}" | sed -E -e "s/\|/\n - /g")
More information on preview images:
https://wiki.fsfe.org/TechDocs/Mainpage/Editing#Preview_Image
EOF
fi
EXIT_CRIT=$((RETURN_SYNTAX + RETURN_TAGS_MISMATCH + RETURN_ENC + RETURN_NAME + \
RETURN_NEWSDATE + RETURN_VERSION_PRES + RETURN_VERSION_INT + \
RETURN_ABS_LINK + RETURN_FIX_LANG + RETURN_IMGRATIO))
EXIT_WARN=$(($RETURN_TAGS_NEW + $RETURN_CSS_ELEMENT + $RETURN_CSS_ATTR))
if [ $EXIT_CRIT -gt 0 ]; then
cat <<EOF >&2
----------------------------------
The commit will be aborted. You have to fix the critical problems
first. These are marked with [CRIT].
EOF
elif [ $EXIT_WARN -gt 0 ]; then
cat <<EOF >&2
----------------------------------
Your commit contains a few problematic, but not critical problems,
therefore it will be executed anyway. However, please fix them in a
following commit! They are marked with [WARN].
EOF
fi
exit $EXIT_CRIT