retain line breaks and spacing of xml/xhtml source files

svn path=/trunk/; revision=33309
This commit is contained in:
2016-05-09 20:45:56 +00:00
parent f83f643aa3
commit 489de90db4
4 changed files with 19 additions and 21 deletions
+1 -1
View File
@@ -203,7 +203,7 @@ xslt_dependencies(){
file="$1"
cat "$file" \
| tr '\n\t' ' ' \
| tr '\n\t\r' ' ' \
| sed -r 's;(<xsl:(include|import)[^>]*>);\n\1\n;g' \
| sed -nr '/<xsl:(include|import)[^>]*>/s;^.*href *= *"([^"]*)".*$;\1;gp'
}
+11 -11
View File
@@ -15,18 +15,18 @@ process_file(){
build_xmlstream "$shortname" "$lang" "$olang" \
| xsltproc "$processor" - \
| sed -r '
s;< *(a|link)( [^>]*)? href="https?://'"$domain"'/([^"]*)";<\1\2 href="/\3";g
s;< *(a|link)( [^>]*)? href='\''https?://'"$domain"'/([^'\'']*)'\'';<\1\2 href='\''/\3'\'';g
| sed -r ':X; N; $!bX;
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href="https?://'"$domain"'/([^"]*)";<\1\2 href="/\3";g
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href='\''https?://'"$domain"'/([^'\'']*)'\'';<\1\2 href='\''/\3'\'';g
s;< *(a|link)( [^>]*)? href="(https?://[^"]*)";<\1\2 href="#== norewrite ==\3";g
s;< *(a|link)( [^>]*)? href="([^#"])([^"]*/)?([^\./"]*\.)(html|rss|ics)(#[^"]*)?";<\1\2 href="\3\4\5'"$lang"'.\6\7";g
s;< *(a|link)( [^>]*)? href="([^#"]*/)(#[^"]*)?";<\1\2 href="\3index.'"$lang"'.html\4";g
s;< *(a|link)( [^>]*)? href="#== norewrite ==(https?://[^"]*)";<\1\2 href="\3";g
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href="(https?://[^"]*)";<\1\2 href="#== norewrite ==\3";g
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href="([^#"])([^"]*/)?([^\./"]*\.)(html|rss|ics)(#[^"]*)?";<\1\2 href="\3\4\5'"$lang"'.\6\7";g
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href="([^#"]*/)(#[^"]*)?";<\1\2 href="\3index.'"$lang"'.html\4";g
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href="#== norewrite ==(https?://[^"]*)";<\1\2 href="\3";g
s;< *(a|link)( [^>]*)? href='\''(https?://[^'\'']*)'\'';<\1\2 href='\''#== norewrite ==\3'\'';g
s;< *(a|link)( [^>]*)? href='\''([^#'\''])([^'\'']*/)?([^\./'\'']*\.)(html|rss|ics)(#[^'\'']*)?'\'';<\1\2 href='\''\3\4\5'"$lang"'.\6\7'\'';g
s;< *(a|link)( [^>]*)? href='\''([^#'\'']*/)(#[^'\'']*)?'\'';<\1\2 href='\''\3index.'"$lang"'.html\4'\'';g
s;< *(a|link)( [^>]*)? href='\''#== norewrite ==(https?://[^'\'']*)'\'';<\1\2 href='\''\3'\'';g
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href='\''(https?://[^'\'']*)'\'';<\1\2 href='\''#== norewrite ==\3'\'';g
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href='\''([^#'\''])([^'\'']*/)?([^\./'\'']*\.)(html|rss|ics)(#[^'\'']*)?'\'';<\1\2 href='\''\3\4\5'"$lang"'.\6\7'\'';g
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href='\''([^#'\'']*/)(#[^'\'']*)?'\'';<\1\2 href='\''\3index.'"$lang"'.html\4'\'';g
s;<[\r\n\t ]*(a|link)([\r\n\t ][^>]*)?[\r\n\t ]href='\''#== norewrite ==(https?://[^'\'']*)'\'';<\1\2 href='\''\3'\'';g
'
}
+2 -4
View File
@@ -118,11 +118,9 @@ auto_sources(){
else
list_sources "$sourcesfile" "$lang"
fi | while read source; do
echo -n "$source\t"
include_xml "$source"
echo
printf '\n### filename="%s" ###\n%s' "$source" "$(include_xml "$source")"
done \
| sed -r 's:^([^\t]+)\t[^<]*(< *[^ >]+)([^>]*>):\2 filename="\1" \3:'
| sed -r ':X; N; $!bX; s;\n### (filename="[^\n"]+") ###\n[^<]*(<[^>]+)>;\2 \1>;g'
}
lang_sources(){
+5 -5
View File
@@ -16,8 +16,8 @@ include_xml(){
[ -z "$enc" ] && enc="UTF-8"
iconv -f "$enc" -t "UTF-8" "$file" \
| tr '\n\t\r' ' ' \
| sed -r 's:<(\?[xX][mM][lL]|!DOCTYPE) [^>]+>::g
| sed -r ':X; N; $!bX;
s:<(\?[xX][mM][lL]|!DOCTYPE)[[:space:]]+[^>]+>::g
s:<[^!][^>]*>::;
s:</[^>]*>([^<]*((<[^>]+/>|<!([^>]|<[^>]*>)*>|<\?[^>]+>)[^<]*)*)?$:\1:;'
fi
@@ -28,8 +28,8 @@ get_attributes(){
# XHTML file
file="$1"
cat "$file" \
| tr '\n\t\r' ' ' \
| sed -rn 's;^.*< *([xX]|[xX]?[hH][tT])[mM][lL] +([^>]*)>.*$;\2;p'
sed -rn ':X; N; $!bX;
s;^.*<[\n\t\r ]*([xX]|[xX]?[hH][tT])[mM][lL][\n\t\r ]+([^>]*)>.*$;\2;p' \
"$file"
}