: ########################################################################## # Title : htmltitle - extract title from HTML document # Author : Heiner Steven # Date : 1997-01-14 # Requires : nawk # Category : HTML # SCCS-Id. : @(#) htmltitle 1.2 04/02/18 ########################################################################## # Description # # Changes # 15.01.97 hs SunOS nawk does not handle printf ("%c", c) right (0.2) ########################################################################## PN=`basename "$0"` # Program name VER='1.2' : ${AWK:=nawk} # Needs awk with "substr()" Usage () { echo >&2 "$PN - extract title from HTML document, $VER (hs '97) usage: $PN [file ...]" exit 1 } Msg () { for MsgLine do echo "$PN: $MsgLine" >&2 done } Fatal () { Msg "$@"; exit 1; } getopt h "$@" >/dev/null 2>&1 || Usage set -- `getopt h "$@"` || exit while [ $# -gt 0 ] do case "$1" in # your flags here --) shift; break;; -h) Usage;; -*) Usage;; *) break;; # First file name esac shift done $AWK ' { len = length ($0) for ( i=1; i<=len; i++ ) { c = substr ($0, i, 1) if ( c == "<" ) { #print "got tag", substr ($0, i) if ( substr ($0, i, 7) ~ /\<[tT][iI][tT][lL][eE]\>/ ) { #print "Title found: " substr ($0, i+7) if ( FILENAME != "-" ) printf "%s: ", FILENAME InTitle = 1 } else if ( substr ($0, i, 8) ~ /\<\/[tT][iI][tT][lL][eE]\>/ ) { #print "End of Title found: " substr ($0, i+8) print "" InTitle = 0 } # Skip tag while ( i <= len && substr ($0, i, 1) != ">" ) i++ continue } if ( InTitle ) printf ("%s", c ""); } } END { if ( InTitle ) print "" # terminating tag not found } ' "$@"