Completely rewrote tagmap generation
All checks were successful
the build was successful

Fixes #63
Fixes #591
This commit is contained in:
Reinhard Müller 2019-03-07 23:34:10 +01:00
parent 072a679bcf
commit 4d1c76951d
8 changed files with 172 additions and 146 deletions

100
Makefile
View File

@ -1,3 +1,16 @@
# -----------------------------------------------------------------------------
# Makefile for "premake" step
# -----------------------------------------------------------------------------
# This Makefile creates some .xml and xhtml files which serve as source files
# in the main build run. It is executed in the source directory, before the
# Makefile for the main build run is constructed and executed.
#
# It also touches all the .sources files which refer to added, modified, or
# deleted .xml files. This way, we avoid that in the main build Makefile each
# .html file has to list a long and changing list of .xml prerequisites - it is
# sufficient to just have the .sources file as a prerequisite.
# -----------------------------------------------------------------------------
.PHONY: all .FORCE
.FORCE:
all:
@ -17,7 +30,7 @@ all: $(SUBDIRS)
# Handle local menus
# -----------------------------------------------------------------------------
MENUSOURCES := $(shell find -name '*.xhtml' |xargs grep -l '<localmenu.*</localmenu>' )
MENUSOURCES := $(shell find -name '*.xhtml' | xargs grep -l '<localmenu.*</localmenu>' )
localmenuinfo.en.xml: ./tools/buildmenu.xsl $(MENUSOURCES)
{ printf '<localmenuset>'; \
@ -47,65 +60,48 @@ d_year.en.xml: $(if $(findstring $(YEAR),$(shell cat d_year.en.xml)),,.FORCE)
all: d_year.en.xml d_month.en.xml d_day.en.xml
# -----------------------------------------------------------------------------
# Update .sources files
# Generate tag maps
# -----------------------------------------------------------------------------
# use shell globbing to work around faulty globbing in gnu make
SOURCEDIRS = $(shell ls -d `sed -rn 's;^(.*/)[^/]*:(\[.*\])$$;\1;gp' $@`)
SOURCEREQS = $(shell ./build/source_globber.sh sourceglobs $@ |sed 's;$$;.??.xml;g' )
# Generation of tag maps is handled in an external script which generates
# tools/tagmaps/*.map, tags/tagged-*.en.xhtml, and tags/tagged-*.sources. The
# tag map files cannot be targets in this Makefile, because list of map files
# is not known when the Makefile starts - some new tags might be created when
# generating the .xml files in the news/generated_xml directory.
tagmaps: $(SUBDIRS)
@build/make_tagmaps.sh
all: $(shell find ./ -name '*.sources')
all: tagmaps
.PHONY: tagmaps
# -----------------------------------------------------------------------------
# generate tag maps
# Touch .sources files for which the web pages must be rebuilt
# -----------------------------------------------------------------------------
# We only have to look at the English files; all translations will have the
# same tags. This speeds up the process and brings the list below the length
# limit for a command line (for now).
TAGMAP := $(shell find ./ -name '*.en.xml' \
| xargs ./build/source_globber.sh map_tags \
| sed -r "s;';'\'';g; s;[^ ]+;'&';g;" \
)
TAGNAMES := $(shell printf '%s\n' $(TAGMAP) \
| sed '/\...\.xml$$/d' \
| grep -vE '[\$%/:()]' \
| sort -u \
)
MAPNAMES := $(shell printf 'tools/tagmaps/%s.map ' $(TAGNAMES))
INDEXNAMES := $(shell printf 'tags/tagged-%s.en.xhtml ' $(TAGNAMES))
INDEXSOURCES := $(shell printf 'tags/tagged-%s.sources ' $(TAGNAMES))
all: $(INDEXNAMES)
tags/tagged-%.en.xhtml: tags/tagged.en.xhtml
cp $< $@
# We update a tagmap whenever any of the XML files mentioned therein *or* a
# translation of such an XML file changes. Following that, the matching
# .sources file is also updated, which causes a rebuild of the taglist page.
all: $(INDEXSOURCES)
tags/tagged-%.sources: tools/tagmaps/%.map
printf '%s:[$*]\n' 'news/*/news' news/generated_xml/ news/nl/nl 'events/*/event' >$@
printf 'd_day:[]' >>$@
MAPREQS = $(shell printf '%s ' $(TAGMAP) \
| sed -r 's;[^ ]+\...\.xml;\n&;g' \
| grep ' $*' \
| cut -d' ' -f1 \
| sed -r 's;\.en\.xml;.??.xml;' \
)
all: $(MAPNAMES)
# -----------------------------------------------------------------------------
# Second Expansion rules
# -----------------------------------------------------------------------------
# Secondary expansion means that the SOURCEDIRS and SOURCEREQS variables will
# be executed once for each target, and it allows us to use the variable $@
# within the expression.
.SECONDEXPANSION:
%.sources: $$(SOURCEDIRS) $$(SOURCEREQS) | $(MAPNAMES) $(INDEXSOURCES)
# This variable contains all the directories listed in the .sources file. It is
# added to the prerequisites so that the removal of a file from such a
# directory also triggers a rebuild of the web pages which have included the
# now removed file. However, we explicitly exclude "." (the root source
# directory) because that also contains a lot of other files.
SOURCEDIRS = $(shell ls -d `sed -rn 's;^(.*/)[^/]*:(\[.*\])$$;\1;gp' $@` | grep -v '^\.$$')
# This variable contains all the actual .xml files covered by the .sources
# file. It obviously is a prerequisite because a page has to be rebuilt if any
# of the .xml files included into it has changed.
SOURCEREQS = $(shell ./build/source_globber.sh sourceglobs $@ | sed 's;$$;.??.xml;g' )
# We simply touch the .sources file. The corresponding .xhtml files in all
# languages depend on the .sources file, so all languages will be rebuilt in
# the main build run.
%.sources: $$(SOURCEDIRS) $$(SOURCEREQS) | tagmaps
touch $@
tools/tagmaps/%.map: $$(MAPREQS) | $(SUBDIRS)
printf '%s\n' $^ > $@
# The .sources files in the tags directory are already handled by
# make_tagmaps.sh
all: $(shell find * -name '*.sources' -not -path 'tags/*')

View File

@ -8,9 +8,6 @@ if [ -z "$inc_arguments" ]; then
while [ "$#" -gt 0 ]; do
case "$1" in
--legacyglobs)
readonly legacyglobs=true
;;
-s|--statusdir|--status-dir)
[ "$#" -gt 0 ] && shift 1 && statusdir="$1"
;;
@ -57,11 +54,6 @@ if [ -z "$inc_arguments" ]; then
[ "$#" -gt 0 ] && shift 1 && processor="$1"
[ "$#" -gt 0 ] && shift 1 && olang="$1"
;;
map_tags)
command="$1$command"
shift 1
break
;;
sourceglobs)
command="$1$command"
[ "$#" -gt 0 ] && shift 1 && sourcesfile="$1"
@ -113,7 +105,6 @@ if [ -z "$inc_arguments" ]; then
sourceglobs) [ -z "$sourcesfile" ] && die "Missing .sources file" ;;
lang_sources) [ -z "$sourceglobfile" -o -z "$lang" ] && die "Need source globfile and language" ;;
cast_refglobs) [ -z "$globfile" -o -z "$reffile" ] && die "Need globfile and reffile" ;;
map_tags) true;;
wakeup_news) true;;
*help*) print_help; exit 0 ;;
*) die "Urecognised command or no command given" ;;

View File

@ -66,7 +66,6 @@ case "$command" in
svn_build_into
fi ;;
build_into) build_into ;;
map_tags) map_tags "$@";;
process_file) process_file "$workfile" "$processor" "$olang" ;;
build_xmlstream) build_xmlstream "$(get_shortname "$workfile")" "$(get_language "$workfile")" "$olang" ;;
tree_maker) tree_maker "$tree" "$target" ;;

98
build/make_tagmaps.sh Executable file
View File

@ -0,0 +1,98 @@
#!/bin/sh
# -----------------------------------------------------------------------------
# Update tagmaps (tools/tagmaps/*.map) and tag list pages (tags/tagged-*)
# -----------------------------------------------------------------------------
# This script collects all <tag> content from all XML files in the source
# directory, and from that creates or updates the following files:
#
# tools/tagmaps/<tag>.map - a list of all XML files containing that tag. It is
# used by the "premake" Makefile via "build/source_globber.sh sourceglobs" to
# determine the XML files covered by each .sources file.
#
# tags/tagged-<tag>.en.xhtml - a source file which will be built by the
# standard build process into a web page listing all news and events with
# this tag.
#
# tags/tagged-<tag>.sources - the pattern list of XML files to be included when
# building that web page.
#
# Each of these files is only touched when the actual file list for a tag
# changes, so the makefile can determine which taglist web pages must be
# rebuilt.
#
# Changing or removing tags in XML files is also considered, in which case a
# file is removed from the map, and the taglist web page source files are
# touched so the build script will rebuild the corresponding web page.
#
# When a tag has been removed from the last XML file where it has been used,
# all the files listed above are correctly deleted.
# -----------------------------------------------------------------------------
set -e
echo "Updating tag maps"
# -----------------------------------------------------------------------------
# Make sure temporary directory is empty
# -----------------------------------------------------------------------------
rm -rf /tmp/tagmaps
mkdir /tmp/tagmaps
# -----------------------------------------------------------------------------
# Create a complete and current map of which tag is used in which files
# -----------------------------------------------------------------------------
echo "* Collecting list of files for each tag"
for xml_file in `find * -name '*.xml' | xargs grep -l '</tag>' | sort`; do
xsltproc build/xslt/get_tags.xsl "${xml_file}" | while read raw_tag; do
tag=`echo "${raw_tag}" | tr -d ' +-/_' | tr '[:upper:]' '[:lower:]'`
echo ${xml_file} >> "/tmp/tagmaps/${tag}.map"
done
done
# -----------------------------------------------------------------------------
# Update only those map files where a change has happened (an XML file been
# added or removed) so make can later see what has changed since the last build
# -----------------------------------------------------------------------------
echo "* Checking for updated tags"
for map_file in `ls /tmp/tagmaps`; do
if ! cmp --quiet "/tmp/tagmaps/${map_file}" "tools/tagmaps/${map_file}"; then
tag=`basename "${map_file}" .map`
echo " * Tag ${tag} has been updated."
cp "/tmp/tagmaps/${map_file}" "tools/tagmaps/${map_file}"
cp "tags/tagged.en.xhtml" "tags/tagged-${tag}.en.xhtml"
echo "events/*/events:[${tag}]" > "tags/tagged-${tag}.sources"
echo "news/*/news:[${tag}]" >> "tags/tagged-${tag}.sources"
echo "news/generated_xml/:[${tag}]" >> "tags/tagged-${tag}.sources"
echo "news/nl/nl:[${tag}]" >> "tags/tagged-${tag}.sources"
echo "d_day:[]" >> "tags/tagged-${tag}.sources"
fi
done
# -----------------------------------------------------------------------------
# Remove the map files for tags which have been completely deleted
# -----------------------------------------------------------------------------
echo "* Checking for deleted tags"
for map_file in `ls tools/tagmaps | grep '\.map'`; do
if [ ! -f "/tmp/tagmaps/${map_file}" ]; then
tag=`basename "${map_file}" .map`
echo " * Tag ${tag} has been deleted."
rm "tools/tagmaps/${map_file}"
rm "tags/tagged-${tag}.en.xhtml"
rm "tags/tagged-${tag}.sources"
fi
done
# -----------------------------------------------------------------------------
# Remove the temporary directory
# -----------------------------------------------------------------------------
echo "* Cleaning up"
rm -rf /tmp/tagmaps

View File

@ -232,9 +232,6 @@ PROCFLAGS = --source "$basedir" --statusdir "$statusdir" --domain "$domain"
INPUTDIR = $input
OUTPUTDIR = $output
# cannot store find results in variable because it will result in too many arguments for the shell
# \${INPUTDIR}/tagmap: \$(shell find "$basedir" -name '*.[a-z][a-z].xml')
# find "$basedir" -name '*.[a-z][a-z].xml' |xargs \${PGLOBBER} \${PROCFLAGS} map_tags >\${INPUTDIR}/tagmap
MakeHead
forcelog Make_globs; Make_globs="$(logname Make_globs)"

View File

@ -6,7 +6,6 @@ basedir="${0%/*}/.."
. "$basedir/build/arguments.sh"
case "$command" in
map_tags) map_tags "$@";;
sourceglobs) sourceglobs "$sourcesfile" ;;
lang_sources) lang_sources "$sourceglobfile" "$lang" ;;
cast_refglobs) cast_refglobs "$globfile" "$reffile" ;;

View File

@ -1,61 +1,10 @@
#!/bin/bash
inc_sources=true
[ -z "$inc_misc" ] && . "$basedir/build/misc.sh"
[ -z "$inc_xmlfiles" ] && . "$basedir/build/xmlfiles.sh"
validate_tagmap(){
tagmap="$basedir/tagmap"
sed -rn 's;^(.*\.xml) +.*$;\1;p' "$tagmap" |while read fn; do
[ -f "$fn" ] || touch -cd@0 "$tagmap"
done
}
map_tags(){
grep -l '</tag>' "$@" \
| while read xml; do
printf '%s ' "$xml"
unicat "$xml" \
| sed -rn '# Normalise XML (strip comments, unify white-spaces)
:X; $bY; N; bX; :Y;
s;[\n\t ]+; ;g;
s; ?([</>]) ?;\1;g
s;<!([^>]|<[^>]*>)*>;;g
# Loop over <tags> section
s;.*<tags( [^>])?>(.+)</tags>.*;\2;
tK; b; :K;
# Collect new format tags
/<tag key="[^"]+"(\/>|>[^<]*<\/tag>)/{
H; s;.*<tag key="([^"]+)"(/>|>[^<]*</tag>).*;\1;
x; s;(.*)<tag key="([^"]+)"(/>|>[^<]*</tag>);\1;
bK;
}
# Collect old format tags
/<tag( [^>]+)?>([^<]+)<\/tag>/{
H; s;.*<tag( [^>]+)?>([^<]+)</tag>.*;\2;
x; s;(.*)<tag( [^>]+)?>([^<]+)</tag>;\1;
bK;
}
H;x;
# Loop end
# delete junk (non-tag content in the tags section)
s;\n+[^\n]*$;;
# normalise tagnames
y;ABCDEFGHIJKLMNOPQRSTUVWXYZ;abcdefghijklmnopqrstuvwxyz;
s;[-_+ /];;g
# put tags in one line and print
s;(\n|$); ;g; p;
'
done
}
tagging_sourceglobs(){
sourceglobs(){
# read a .sources file and glob up referenced xml files for processing in list_sources
sourcesfile="$1"
@ -80,33 +29,6 @@ tagging_sourceglobs(){
| sort -u
}
legacy_sourceglobs(){
# read a .sources file and glob up referenced
# source files for processing in list_sources
sourcesfile="$1"
if [ -f "$sourcesfile" ]; then
sed -rn 's;:global$;*.[a-z][a-z].xml;gp' "$sourcesfile" \
| while read glob; do
echo "$basedir/"$glob
done \
| sed -rn 's:\.[a-z]{2}\.xml( |$):\n:gp' \
| sort -u
fi
}
[ -z "$inc_misc" ] && . "$basedir/build/misc.sh"
sourceglobs(){
if [ "$legacyglobs" = true ]; then
legacy_sourceglobs "$@"
elif [ -f "$1" ] && ! egrep -q '^.+:\[.*\]$' "$1"; then
debug "WARNING! File in legacy format: $1"
legacy_sourceglobs "$@"
else
tagging_sourceglobs "$@"
fi
}
list_sources(){
# read a .sources file and generate a list
# of all referenced xml files with preference

24
build/xslt/get_tags.xsl Normal file
View File

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- ====================================================================== -->
<!-- XSL script to extract the content of <tag> elements from an XML file -->
<!-- ====================================================================== -->
<!-- This XSL script processes all <tag> elements of an XML file and -->
<!-- outputs the content of each of these elements, separated by newlines. -->
<!-- It is used by the script build/make_tagmaps.sh. -->
<!-- ====================================================================== -->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="UTF-8"/>
<xsl:template match="tag">
<xsl:value-of select="."/>
<!-- append a newline -->
<xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="@*|node()" priority="-1">
<xsl:apply-templates select="@*|node()"/>
</xsl:template>
</xsl:stylesheet>