: ########################################################################## # Shellscript: newssub - top 10 news subjects # Version : 1.7 # Author : Heiner Steven # Category : News # Date : 1995-10-02 # SCCS-Id. : @(#) newssub 1.7 04/02/18 ########################################################################## # Description # Creates a "top 10 news subjects" statistics for a given # newsgroup. The newsgroup articles are read directly from # the news spool directory. # # See also # newstop ########################################################################## PN=`basename "$0"` # program name VER='1.7' : ${GREP:=egrep} : ${NEWSSPOOL:=/var/spool/news} # Search a "new" awk (nawk) or GNU awk for awk in mawk gawk nawk do for path in . `echo "$PATH" | tr : ' '` do [ -x "$path/$awk" ] || continue AWK=$path/$awk break 2 done done : ${AWK:=awk} Usage () { echo >&2 "$PN - create newsgroup statistics $VER (stv '95) usage: $PN newsgroup News articles are searched in the spool directory NEWSSPOOL=$NEWSSPOOL. The statistic are printed to standard output. Example: $PN comp.unix.shell" exit 1 } Msg () { for i do echo "$PN: $i" >&2 done } Fatal () { Msg "$@"; exit 1; } # Check arguments before setting them getopt h "$@" > /dev/null 2>&1 || Usage set -- `getopt h "$@"` while [ $# -gt 0 ] do case "$1" in # your flags here --) shift; break;; -h) Usage;; -*) Usage;; *) break;; # first file name esac shift done [ $# -lt 1 ] && Usage Group="$1" Files=${TMPDIR:=/tmp}/nt$$.f # List of Files List=$TMPDIR/nt$$.l # List for statistics Subjects=$TMPDIR/nt$$.s # List of Subjects Tmp=$TMPDIR/nt$$.t trap 'rm -f "$Files" "$List" "$Subjects" "$Tmp" > /dev/null 2>&1' 0 trap "echo 'Signal'; exit 2" 1 2 3 13 15 Dir="$NEWSSPOOL/`echo \"$Group\" | sed 's:\.:/:g'`" [ -d "$Dir" -a -r "$Dir" -a -x "$Dir" ] || Fatal "cannot read news group $Group" "($Dir)" cd "$Dir" || exit $? # Create list of files find * -type f -mtime -7 -print | sort > "$Files" || exit $? [ -s "$Files" ] || exit $? # Create Article statistics # file header quoted original signatures while read Name do $AWK ' { len = length + 1 # Bytes including LF Size += len } NR==1, /^$/ { # Parse Header Header += len # Is article crossposted? if ( !GotNewsgroups && $0 ~ /^Newsgroups: / ) { if ( split ($0, tmp, ":") == 2 ) { Groups = tmp [2] gsub (/[ ][ ]*/, "", Groups) if ( split (Groups, Group, ",") > 0 ) { if ( Group [1] == "'$Group'" ) { Crosspost = 0 } else { Crosspost = 1 } } } GotNewsgroups = 1 } next } { # Parse text body if ( $1 == "--" ) { SigLines++ } else if ( SigLines ) { SigLines++ # This may be a signature if ( $1 ~ /^[:>|+-]/ ) { SigQuote += len } else { SigOrig += len } } else { if ( $1 ~ /^[:>|+-]/ ) { Quote += len } else { Orig += len } } } END { if ( SigLines > 20 ) { # This cannot be a signature, can it? Quote += SigQuote Orig += SigOrig } else { Sig = SigQuote + SigOrig } printf "%s %d %d %d %d %d %d\n", FILENAME, Size, Crosspost, Header, Quote, Orig, Sig } ' $Name done < "$Files" > "$List" || exit $? # Create subject list # file subject xargs < "$Files" $GREP '^Subject: ' /dev/null | sed 's/:[ ]*Subject: / /' | $AWK -F' ' '{ if ( $1 != OldFile ) { # Only one 'Subject:' per file S = $2 # New subject if ( $2 ~ /^[rR][eE][ ]*:/ ) { Followup = 1 gsub (/^[rR][eE][ ]*:[ ]*/, "", S) } else { Followup = 0 } gsub (/[ ][ ]*/, " ", S) print $1 " " Followup " " S OldFile = $1; } }' > "$Subjects" || exit $? # Join articles and subject list # file size crosspost header quote orig sig followup subject join -t' ' "$List" "$Subjects" > "$Tmp" || exit $? # Format of list: # file size links header quote orig sig followup subject $AWK -F' ' ' { Subject [$9]++ Byte [$9] += $2 Crosspost [$9] += $3 Header [$9] += $4 Quote [$9] += $5 Orig [$9] += $6 Sig [$9] += $7 Followup [$9] += $8 } END { for ( S in Subject ) { printf "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\n", Subject [S], Byte [S], Crosspost [S], Followup [S], Header [S], Quote [S], Orig [S], Sig [S], S } } ' "$Tmp" | sort -t' ' -nbr +0 -1 +1 -2 +2 > "$List" echo "Top 10 subjects for the 7 day period ending `edate`. " # Format of Tmp: # Subjects Bytes Crossposts Followups Header Quote Orig Sig Subject $AWK -F ' ' ' BEGIN { print "posts kbytes subject" } { TSubjects++ TArticles += $1 TBytes += $2 TCrossposts += $3 TFollowups += $4 Header += $5 Quote += $6 Orig += $7 Sig += $8 if ( n++ < 10 ) { printf "%3d %8.1f %-.63s\n", $1, $2 / 1024, $9 } } END { printf "\n%d articles on %d subjects\n", TArticles, TSubjects if ( TArticles ) { printf " %d were followups (%.1f%%)\n", TFollowups, (TFollowups * 100) / TArticles printf " %d were crossposts (%.1f%%)\n", TCrossposts, (TCrossposts * 100) / TArticles } Total += Header + Quote + Orig + Sig printf "\n%.1f kbytes total\n", Total / 1024 if ( Total ) { printf " headers: %6.1fkb %5.1f%%\n", Header / 1024, (Header * 100) / Total printf " quoted text: %6.1fkb %5.1f%%\n", Quote / 1024, (Quote * 100) / Total printf " original text: %6.1fkb %5.1f%%\n", Orig / 1024, (Orig * 100) / Total printf " signatures: %6.1fkb %5.1f%%\n", Sig / 1024, (Sig * 100) / Total } printf "\nAverages:\n" if ( TSubjects ) printf " %.1f articles / subject\n", TArticles / TSubjects if ( TArticles ) printf " %.1f kbytes / article\n", (Total / 1024) / TArticles if ( TSubjects ) printf " %.1f kbytes / subject\n", (Total / 1024) / TSubjects print "-- " printf "Your UNIX shell scripting resource: Heiner%cs SHELLdorado\n", 39 print "http://www.shelldorado.com/" } ' "$List"