:
##########################################################################
# Shellscript:	newsweekly - top news subjects
# Author     :	Heiner Steven <heiner.steven@odn.de>
# Date       :	1996-06-11
# Category   :	News
# SCCS-Id.   :	@(#) newsweekly	1.4 02/02/26
##########################################################################
# Description
#
# Changes
# 10.01.2000	hs	Search for "nawk" or "gawk" using search path (1.2)
##########################################################################

PN=`basename "$0"`			# Program name
VER='1.4'

: ${NEWSSPOOL:=/var/spool/news}
: ${GREP:=egrep}
# We need a "new" awk (or gawk) for the "gsub()" function. Search
# for it like the shell would
awk=
for path in `echo "$PATH" | sed 's/^:/.:/;s/:$/./;s/:/ /g'`
do
    if [ -x "$path/nawk" ]
    then awk=$path/nawk; break
    elif [ -x "$path/gawk" ]
    then awk=$path/gawk; break
    fi
done
: ${NAWK:=${awk:=awk}}

Days=7
Top=5					# Top $Top subjects
Threshold=3

Usage () {
    echo >&2 "$PN - top news subjects $VER (hs '96)
usage: $PN [-n cnt] [-p days] [-m threshhold]
    -n:    number of subjects to print for each newsgroup (default $Top)
    -m:    minimum number of subjects (default $Threshold)
    -p:    period (default is $Days days)"
    exit 1
}

Msg () {
    for i
    do echo "$PN: $i" >&2
    done
}

Fatal () { Msg "$@"; exit 1; }

set -- `getopt hn:p:m: "$@"`
while [ $# -gt 0 ]
do
    case "$1" in
	-n)	Top="$2"; shift;;
	-p)	Days="$2"; shift;;
	-m)	Threshold="$2"; shift;;
	--)	shift; break;;
	-h)	Usage;;
	-*)	Usage;;
	*)	break;;			# First file name
    esac
    shift
done

set -u	# DEBUG: no unset variables

[ -d "$NEWSSPOOL" ] ||
    Fatal "cannot read news spool directory: $NEWSSPOOL"

Subjects=${TMPDIR:=/tmp}/nw$$
trap 'rm -f "$Subjects" >/dev/null 2>&1' 0
trap "exit 2" 1 2 3 13 15

cd "$NEWSSPOOL"
find . -follow -type d -depth -print |
    sort |
    # Only get leaf directories
    $NAWK '
	{
	    if ( !index ($0, LastDir) && LastDir != "" )
		print LastDir
	    LastDir = $0
	}
	END { if ( LastDir != "" ) print LastDir; }
    ' |
    while read Dir
    do
    	[ -d "$Dir" ] || continue
    (
	cd "$Dir"
	Group=`pwd | sed -e "s:^$NEWSSPOOL/::" -e 's:/:.:g'`

	find . -type f -mtime -$Days -print |
	    xargs $GREP '^Subject:[ 	]' /dev/null |
	    $NAWK -F: '!Name[$1]++' |	# Only the first subject of a file
	    cut -d: -f3- |		# Remove "name: Subject: ...."
	    $NAWK '
		{
		    sub (/^[ 	]*/, "")
		    sub (/[Rr][Ee]:[ 	]*/, "")
		    gsub (/[ 	][ 	]*/, " ")
		    ++Subject [$0]
		}
		END {
		    for ( Sub in Subject )
			if ( Subject [Sub] > 1 )
			    print Subject [Sub] "	" Sub
		}
	    ' |
	    sort -rn |
	    head -$Top > $Subjects
	    if [ `wc -l < "$Subjects"` -ge $Threshold ]
	    then
		echo "$Group:"
		cat "$Subjects"
		echo
	    fi
    )
    done
exit 0
