done_progress_bar shan’t output a newline; fix from ⮡ tarent
[shellsnippets/shellsnippets.git] / mksh / roff2htm
index 1c8f751..abc9ea9 100644 (file)
@@ -1,8 +1,9 @@
-# $MirOS: src/scripts/roff2htm,v 1.59 2011/02/18 22:37:28 tg Exp $
+roff2htm_rcsid='$MirOS: src/scripts/roff2htm,v 1.88 2017/01/29 00:51:06 tg Exp $'
 # $ekkoBSD: catman2html.sh,v 1.2 2004/03/07 03:02:53 stephen Exp $
 #-
-# Copyright (c) 2004, 2005, 2006, 2007, 2011
-#      Thorsten “mirabilos” Glaser <tg@mirbsd.org>
+# Copyright (c) 2004, 2005, 2006, 2007, 2011, 2012, 2014, 2015,
+#              2016, 2017
+#      mirabilos <m@mirbsd.org>
 # Original version for ekkoBSD by:
 # Copyright (c) 2004
 #      Stephen Paskaluk <sap@mirbsd.org>
@@ -18,7 +19,7 @@
 #
 # Advertising materials mentioning features or use of this work must
 # display the following acknowledgement:
-#      This product includes material provided by Thorsten Glaser.
+#      This product includes material provided by mirabilos.
 #
 # This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
 # the utmost extent permitted by applicable law, neither express nor
@@ -36,7 +37,7 @@
 # Note: this script assumes MirBSD filesystem interna: ino_t=uint32_t
 
 # check if mksh R31:2007/10/18 or up
-if [[ $KSH_VERSION = @(\@\(#\)MIRBSD KSH R)@(3[2-9]|[4-9][0-9]|[1-9][0-9][0-9])\ +([0-9])/+([0-9])/+([0-9])?(\ *) ]]; then
+if [[ $KSH_VERSION = @(\@\(#\)MIRBSD KSH R)@(3[2-9]|[4-9][0-9]|[1-9][0-9]+([0-9]))\ +([0-9])/+([0-9])/+([0-9])?(\ *) ]]; then
        i=0
 elif [[ $KSH_VERSION = @(\@\(#\)MIRBSD KSH R31)* ]]; then
        eval $(print "$KSH_VERSION" | sed 's#^.*R31 \([0-9]*\)/\([0-9]*\)/\([0-9]*\)\( .*\)*$#y=\1 m=\2 d=\3#')
@@ -62,100 +63,227 @@ roff2htm_gendate=$(date +"%F %T")         # current time
 set -A roff2htm_inodecache                     # inode cache (empty)
 roff2htm_machine=$(uname -m)                   # i386, sparc
 
-function do_convert {
-       typeset -i ws=0
-       sed -e 's/[      ]*$//g'                                        \
+function set_conversion_man {
+       function do_convert {
+               do_convert_man "$@"
+       }
+}
+function set_conversion_paper {
+       function do_convert {
+               do_convert_paper "$@"
+       }
+}
+set_conversion_man
+
+function set_target_absolute {
+       roff2htm_rel=https://www.mirbsd.org/
+}
+function set_target_relative {
+       roff2htm_rel=../
+}
+set_target_relative
+
+function do_convert_man {
+       local -i _nl=0
+       col -x | sed \
            -e '/-$/N
 {
-s/\([0-9A-z][-.,0-9A-z]*\)-\n\(  *\)\([0-9A-z][-.,0-9A-z]*([1-9][A-z]*)\)\([^ ]*\) /\1\3\4\
+s/\([0-9A-z][-.,0-9A-z:]*\)-\n\(  *\)\([0-9A-z][-.,0-9A-z:]*([1-9][A-z]*)\)\([^ ]*\) /\1\3\4\
 \2/
 }'                                                                     \
+           -e 's#<\b_#≤#g' -e 's#>\b_#≥#g'                           \
+           -e 'y#&<>#Áþÿ#'                                             \
+                                                                       \
+           -e 's#[Oo]\b[Oo]\b+\b+#•#g'                                  \
+           -e 's#_\b|\b|#_\b|\b_\b|#g'                                      \
+           -e 's#+\b_#±#g'                                             \
+                                                                       \
+           -e 's#^[A-z][\b 0-9A-z]*$#</pre><h2>&</h2><pre>#'            \
+           -e 's#^  \([A-z][\b -%'\''-;=?-~]*\)$#</pre><h3>\1</h3><pre>#'       \
+                                                                       \
+           -e 's#\([^~<>\80-¿][\80-¿]*\)\b~#\1Ì\85#g'                         \
+           -e 's#\([^\b]\)~\b_#\1_̅\b #g'                                        \
+           -e 's#\([^\b]\)\([^<>_\80-¿][\80-¿]*\)\([Ì\85]*\)\b_#\1_\2\b\3#g'    \
+           -e 's#\([^<>\80-¿]\)[\80-¿]*\([Ì\85]*\)\b\([^<>\80-¿][\80-¿]*\)#<\1<\3\2>#g'   \
+           -e 's#\(<_<\([^>_]*\)>\)\b\1#<G>\2</G>#g'                    \
+           -e 's#<_<\([^>_]*\)>#<i>\1</i>#g'                           \
+           -e 's#<.<\([^>]*\)>#<b>\1</b>#g'                            \
+           -e 's#\b##g'                                                 \
+                                                                       \
+           -e '/<h[23]/s#</*[biG]>##g'                                 \
+           -e 's#</\([biG]\)><\1>##g'                                  \
+           -e 's#</\([biG]\)>\([- -*./:;?@^_~]*\)<\1>#\2#g'            \
+           -e 's#\([\ 1- 0-9A-z]\)\([$/_-]*\)\(<[biG]>\)#\1\3\2#g'       \
+           -e 's#\(</[biG]>\)\([$/_-]*\)\([\ 1- 0-9A-z]\)#\2\1\3#g'      \
+           -e 's#\(</[biG]>\))\([\ 1- ,.0-9A-z]\)#)\1\2#g'               \
+                                                                       \
+           -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\(3p\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
+           -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([1-9]\)\(/[/0-9A-Za-z]*\)*)#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
+           -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([PSU][MS][DM]\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
+           -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\(PAPERS\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
+           -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(GNU)#<a href="'$roff2htm_rel'manINFO/\1.html">&</a>#g' \
+           -e 's#)\(</[biG]>\)\([\ 1- 0-9A-z]\)#\1)\2#g'                 \
+                                                                       \
+           -e 's/Á/\&#38;/g'                                           \
+           -e 's/þ/\&#60;/g'                                           \
+           -e 's/ÿ/\&#62;/g'                                           \
+                                                                       \
+           -e 's#<G>#<b><i>#g' -e 's#</G>#</i></b>#g'                  \
+           -e 's#</b><b>##g'                                           \
+                                                                       \
+           -e '1s#^#<pre>#'                                            \
+           -e '$s#$#</pre>#'                                           \
+           -e 's#<pre></pre>##g'                                       \
+           -e 's#</pre><pre>##g'                                       \
+       | while IFS= read -r line; do
+               if [[ -n $line ]]; then
+                       (( _nl )) && [[ $line != '</pre>'* ]] && print
+                       print -r -- "$line"
+                       _nl=0
+               else
+                       _nl=1
+               fi
+       done
+}
+
+function do_convert_paper {
+       local -i _nl=0
+       col -x | sed \
            -e '/-$/N
 {
-s/\([0-9A-z][-.,0-9A-z]*\)-\n\(  *\)\([0-9A-z][-.,0-9A-z]*([1-9][A-z]*)\)\([^ ]*\) /\1\3\4\
+s/\([0-9A-z][-.,0-9A-z:]*\)-\n\(  *\)\([0-9A-z][-.,0-9A-z:]*([1-9][A-z]*)\)\([^ ]*\) /\1\3\4\
 \2/
 }'                                                                     \
+           -e 's#<\b_#≤#g' -e 's#>\b_#≥#g'                           \
            -e 'y#&<>#Áþÿ#'                                             \
                                                                        \
-           -e '/^[A-Z]\b/s#.\b##g'                                       \
-           -e 's#^[A-Z][ ,A-Z0-9]*$#</pre><h2>&</h2><pre>#'            \
-           -e 's#^  \([A-Z][ \b,A-Z0-9]*\)$#</pre><h3>\1</h3><pre>#'    \
-                                                                       \
-           -e 's#_\b\([^\80-¿][\80-¿]*\)#<i>\1</i>#g'                       \
-           -e 's#[^\80-¿][\80-¿]*\b\([^\80-¿][\80-¿]*\)#<b>\1</b>#g'            \
+           -e 's#[Oo]\b[Oo]\b+\b+#•#g'                                  \
+           -e 's#_\b|\b|#_\b|\b_\b|#g'                                      \
+           -e 's#+\b_#±#g'                                             \
                                                                        \
-           -e 's#</\([bi]\)><\1>##g'                                   \
-           -e 's#</b>\b<b>[^\80-¿][\80-¿]*##g'                              \
-           -e 's#</b>\b[^\80-¿][\80-¿]*<b>##g'                              \
-           -e 's#\b[^\80-¿][\80-¿]*##g'                                     \
-           -e 's#_</i<b><</b>i>##g'                                    \
+           -e 's#\([^~<>\80-¿][\80-¿]*\)\b~#\1Ì\85#g'                         \
+           -e 's#\([^\b]\)~\b_#\1_̅\b #g'                                        \
+           -e 's#\([^\b]\)\([^<>_\80-¿][\80-¿]*\)\([Ì\85]*\)\b_#\1_\2\b\3#g'    \
+           -e 's#\([^<>\80-¿]\)[\80-¿]*\([Ì\85]*\)\b\([^<>\80-¿][\80-¿]*\)#<\1<\3\2>#g'   \
+           -e 's#\(<_<\([^>_]*\)>\)\b\1#<G>\2</G>#g'                    \
+           -e 's#<_<\([^>_]*\)>#<i>\1</i>#g'                           \
+           -e 's#<.<\([^>]*\)>#<b>\1</b>#g'                            \
+           -e 's#\b##g'                                                 \
                                                                        \
-           -e 's#^\( \{2,3\}\)\([A-Z][ ,0-9A-z]*\)$#\1<b>\2</b>#'      \
+           -e 's#</\([biG]\)><\1>##g'                                  \
+           -e 's#</\([biG]\)>\([- -*./:;?@^_~]*\)<\1>#\2#g'            \
+           -e 's#\([\ 1- 0-9A-z]\)\([$/_-]*\)\(<[biG]>\)#\1\3\2#g'       \
+           -e 's#\(</[biG]>\)\([$/_-]*\)\([\ 1- 0-9A-z]\)#\2\1\3#g'      \
+           -e 's#\(</[biG]>\))\([\ 1- ,.0-9A-z]\)#)\1\2#g'               \
                                                                        \
-           -e '/^   /s#\(\([0-9A-z][-.,0-9A-z]*\)(\([1-9]\)[/0-9A-Za-z]*)\)#<a href=\"../man\3/\2.htm\" class=\"manlink\">\1</a>#g' \
-           -e '/^   /s#\(<i>\([0-9A-z][-.,0-9A-z]*\)</i>(\([1-9]\)[/0-9A-Za-z]*)\)#<a href=\"../man\3/\2.htm\" class=\"manlink\">\1</a>#g' \
-           -e '/^   /s#\(\([0-9A-z][-.,0-9A-z]*\)(\([PSU][MS][DM]\))\)#<a href=\"../man\3/\2.htm\" class=\"manlink\">\1</a>#g' \
-           -e '/^   /s#\(<i>\([0-9A-z][-.,0-9A-z]*\)</i>(\([PSU][MS][DM]\))\)#<a href=\"../man\3/\2.htm\" class=\"manlink\">\1</a>#g' \
-           -e '/^   /s#\(\([0-9A-z][-.,0-9A-z]*\)(\(PAPERS\))\)#<a href=\"../man\3/\2.htm\" class=\"manlink\">\1</a>#g' \
-           -e '/^   /s#\(<i>\([0-9A-z][-.,0-9A-z]*\)</i>(\(PAPERS\))\)#<a href=\"../man\3/\2.htm\" class=\"manlink\">\1</a>#g' \
-           -e '/^   /s#\(\([0-9A-z][-.,0-9A-z]*\)(GNU)\)#<a href=\"../manINFO/\2.htm\" class=\"manlink\">\1</a>#g' \
-           -e '/^   /s#\(<i>\([0-9A-z][-.,0-9A-z]*\)</i>(GNU)\)#<a href=\"../manINFO/\2.htm\" class=\"manlink\">\1</a>#g' \
-                                                                       \
-           -e 's#<b>+</b>\( *\)<b>o</b># \1•#'                               \
-           -e 's#<b>+</b>#•#'                                                \
-           -e 's#</\([bi]\)><\1>##g'                                   \
-           -e 's#</\([bi]\)>\([[:punct:][:space:]]*\)<\1>#\2#g'        \
-           -e 's#\([^[:punct:]]\)\([-!"#$%&'\''()*+,./:;=?@[\]^_`{|}~]*\)\(<[bi]>\)#\1\3\2#g'  \
-           -e 's#\(<i>[fh]t*p:[^<]*\)</i>/#\1/</i>#g'                  \
-           -e 's#\(<i>/[^<]*\)</i>/#\1/</i>#g'                         \
-           -e 's#<h3>*<b>*>#<h3>#g' -e 's#</b></h3>#</h3>#g'           \
+           -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([1-9]\)[/0-9A-Za-z]*)#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
+           -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\([PSU][MS][DM]\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
+           -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(\(PAPERS\))#<a href="'$roff2htm_rel'man\2/\1.htm">&</a>#g' \
+           -e '/^ /s#\([0-9A-z][-.,0-9A-z:]*\)(GNU)#<a href="'$roff2htm_rel'manINFO/\1.html">&</a>#g' \
+           -e 's#)\(</[biG]>\)\([\ 1- 0-9A-z]\)#\1)\2#g'                 \
                                                                        \
            -e 's/Á/\&#38;/g'                                           \
            -e 's/þ/\&#60;/g'                                           \
            -e 's/ÿ/\&#62;/g'                                           \
                                                                        \
+           -e 's#<G>#<b><i>#g' -e 's#</G>#</i></b>#g'                  \
+           -e 's#</b><b>##g'                                           \
+                                                                       \
            -e '1s#^#<pre>#'                                            \
            -e '$s#$#</pre>#'                                           \
            -e 's#<pre></pre>##g'                                       \
            -e 's#</pre><pre>##g'                                       \
-           -e 's#<a href="../man'${1:-0}'/#<a href="#g'                \
        | while IFS= read -r line; do
                if [[ -n $line ]]; then
+                       (( _nl )) && [[ $line != '</pre>'* ]] && print
                        print -r -- "$line"
-                       ws=0
+                       _nl=0
                else
-                       (( !ws++ )) && print
+                       _nl=1
                fi
        done
 }
 
 function output_header {
-       print '<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+       print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"><head>
  <meta http-equiv="content-type" content="text/html; charset=utf-8" />
+ <meta name="MSSmartTagsPreventParsing" content="TRUE" />
  <title>RTFM '$1\($2')</title>
  <meta name="robots" content="index, follow" />
  <link rel="canonical" href="https://www.mirbsd.org/man'$roff2htm_machine/$1.$2'" />
+ <style type="text/css"><!--/*--><![CDATA[/*><!--*/
+       body {
+               background-color:#000000;
+               color:#666666;
+               font-family:serif;
+       }
+       a {
+               color:inherit;
+               text-decoration:none;
+               border-bottom:1px dashed;
+       }
+       a:visited {
+               text-decoration:none;
+               border-bottom:1px dotted;
+       }
+       a:hover {
+               text-decoration:none;
+               border-bottom:1px double;
+       }
+       pre {
+               line-height:1.12;
+               color:#FFBF00;
+       }
+       b {
+               color:#FFEF00;
+               font-weight:normal;
+       }
+       i {
+               font-style:normal;
+               border-bottom:1px solid #FFBF00;
+       }
+       b i,i b {
+               color:#FFEF00;
+               font-weight:normal;
+               font-style:normal;
+               border-bottom:1px solid #FFEF00;
+       }
+       h1 {
+               color:#FFEF00;
+               font-size:xx-large;
+               font-family:serif;
+       }
+       h2 {
+               color:#FFFFFF;
+               font-size:x-large;
+               font-family:sans-serif;
+       }
+       h3 {
+               color:#CCCCCC;
+               font-size:large;
+               font-family:sans-serif;
+       }
+ /*]]>*/--></style>
 </head><body>
-<h1>MirOS Manual: <a href="../man'$2/$1'.htm" class="manlink">'$1\($2')</a></h1>'
+<h1>MirOS Manual: <a href="'$roff2htm_rel'man'$2/$1'.htm">'$1\($2')</a></h1>'
 }
 
 function output_footer {
-       print '<hr /><p style="font-size:xx-small;">Generated on' \
-           $roff2htm_gendate 'by
<tt>$MirOS: src/scripts/roff2htm,v 1.59 2011/02/18 22:37:28 tg Exp $</tt></p>
-<p>These manual pages and other documentation are <a class="manlink"
- href="../man7/BSD-Licence.htm">copyrighted</a> by their respective writers;
+       print '
+<hr /><p style="font-size:xx-small;">Generated on' $roff2htm_gendate by \
'<tt>'$roff2htm_rcsid'</tt></p>
+<p>These manual pages and other documentation are <a
+ href="'$roff2htm_rel'man7/BSD-Licence.htm">copyrighted</a> by their respective writers;
  their source is available at our <a href="http://cvs.mirbsd.de/">CVSweb</a>,
- AnonCVS, and other mirrors. The rest is Copyright Â© 2002â\80\922011 <a
- href="https://www.mirbsd.org/">The MirOS Project</a>, Germany.<br /><i
- style="font-size:3pt;"> This product includes material provided by Thorsten
Glaser.</i></p>
+ AnonCVS, and other mirrors. The rest is Copyright Â© 2002â\80\932017 <a
+ href="https://www.mirbsd.org/">The MirOS Project</a>, Germany.<br /><span
+ style="font-size:3pt; font-style:italic;">This product includes material
provided by <b>mirabilos</b>.</span></p>
 <p style="font-size:x-small;">This manual page’s HTML representation
  is supposed to be <a href="http://validator.w3.org/check/referer">valid
- XHTML/1.1</a>; if not, please send a bug report – diffs preferred.</p>
+ XHTML/1.1</a>; if not, please send a bug report — diffs preferred.</p>
 </body></html>'
 }
 
@@ -174,8 +302,8 @@ function do_conversion_verbose {
 
 # convert_page /path/to/man.cat1 /targetpath
 function convert_page {
-       typeset fn=$1 page sect tn
-       typeset -Uui ino=$(stat -Lf %i $fn 2>/dev/null)
+       local fn=$1 page sect tn
+       local -Uui ino=$(stat -Lf %i $fn 2>/dev/null)
        page=${fn##*/}                  # basename
        page=${page%.0}                 # manual page name
        sect=${fn%/*}                   # dirname
@@ -190,7 +318,7 @@ function convert_page {
                # patch in the additional name(s)
                ed -s $2/$tn <<-EOF
                        /<title>/s#</title>#, $page($sect)&#
-                       /<h1>/s#</h1>#, <a href="../$tn" class="manlink">$page($sect)</a>&#
+                       /<h1>/s#</h1>#, <a href="$roff2htm_rel$tn">$page($sect)</a>&#
                        wq
                EOF
        else
@@ -208,10 +336,11 @@ function output_htaccess {
 
 # convert_all /path/to/share/man /targetpath
 function convert_all {
-       typeset tp=${2:-$(pwd)/mbsdman}         # target basepath
-       typeset x f
+       local tp=${2:-$(pwd)/mbsdman}           # target basepath
+       local x f
 
-       (find ${1:-/usr/share/man}/cat{[1-9],3p} -name \*.0 2>&- | sort -f) |&
+       (find ${1:-/usr/share/man}/cat{[1-9],3p} -name \*.0 2>/dev/null | \
+           sort -f) |&
        for x in 1 2 3 3p 4 5 6 7 8 9; do
                mkdir -p $tp/man$x      # one per section
                output_htaccess >$tp/man$x/.htaccess