: ########################################################################## # Shellscript: newstop - top 10 news posters # Version : 2.3 # Author : Heiner Steven # Category : News # Date : 1995-09-24 # SCCS-Id. : @(#) newstop 2.3 04/02/18 ########################################################################## # Description # Writes a "top 10 posters" newsgroup statistics to # standard output. Directly accesses the news spool # directory. # # Notes # o Uses "edate" to print a date # # See also # newssub ########################################################################## PN=`basename "$0"` # Program name VER='2.3' : ${GREP:=egrep} : ${NEWSSPOOL:=/var/spool/news} : ${NEWSSTAT:=$HOME/.newsstat} # Search a "new" awk (nawk) or GNU awk for awk in mawk gawk nawk do for path in . `echo "$PATH" | tr : ' '` do [ -x "$path/$awk" ] || continue AWK=$path/$awk break 2 done done : ${AWK:=awk} Period=7 # Number of days for one period TopCnt=10 # Number of posters to print Usage () { echo >&2 "$PN - create newsgroup statistics $VER (hs '95) usage: $PN [-n number] [-d days] newsgroup -n: number of authors to print (default is $TopCnt) -d: length of a period (default is $Period days) News articles are searched in the spool directory NEWSSPOOL=$NEWSSPOOL. The statistic are printed to standard output. Example: $PN comp.unix.shell" exit 1 } Msg () { for MsgLine do echo "$PN: $MsgLine" >&2 done } Fatal () { Msg "$@"; exit 1; } set -- `getopt n:d:h "$@"` while [ $# -gt 0 ] do case "$1" in -n) TopCnt="$2"; shift;; -d) Period="$2"; shift;; --) shift; break;; -h) Usage;; -*) Usage;; *) break;; # First news group name esac shift done [ $# -ne 1 ] && Usage Group="$1" TmpDir="${TMPDIR:=/tmp}/nt$$" # Directory with all tmp. files Authors="$TmpDir/authors" # List of Authors Files="$TmpDir/files" # List of Files List="$TmpDir/list" # List for statistics Sizes="$TmpDir/sizes" # List of file sizes Tmp="$TmpDir/tmp" NewsStat="$NEWSSTAT/$Group" # Last statistics Top="$NewsStat/top$TopCnt.authors.new" # Last Top 10 OldAuthors="$NewsStat/authors" NewAuthors="$TmpDir/newauthors" [ -d "$NewsStat" ] || mkdir -p "$NewsStat" || exit $? trap 'rm -rf "$TmpDir" >/dev/null 2>&1' 0 trap "exit 2" 1 2 3 13 15 [ -d "$TmpDir" ] || mkdir -p "$TmpDir" || Fatal "cannot create temporary directory: $TmpDir" Dir="$NEWSSPOOL/`echo \"$Group\" | sed 's:\.:/:g'`" [ -d "$Dir" -a -r "$Dir" -a -x "$Dir" ] || Fatal "cannot read news group $Group" "(Dir=$Dir)" cd "$Dir" || exit $? # Create file list find * -type f -mtime -$Period -print | sort > "$Files" || exit $? [ -s "$Files" ] || exit 0 # Create list with file sizes (SYSV/BSD ls) # file size xargs < $Files ls -ld | $AWK 'NF == 8 || NF == 9 { print $NF " " $(NF-4) }' | sort > "$Sizes" || exit $? # Create author list (there could be more than one "From:" line # in each file - only take the first line) # file author xargs < $Files $GREP '^From: ' /dev/null | sed 's/:[ ]*From: / /' | $AWK -F' ' '++A[$1] == 1' > "$Authors" || exit $? # Rewrite Author list in the format # file address realname $AWK -F' ' ' { if ( $2 ~ /\(.*\)[ ]*$/ ) { # Address format "heiner@hsysnbg.nbg.sub.org (Heiner Steven)" Name=$2; Addr=$2 gsub (/.*\(/, "", Name) gsub (/\)$/, "", Name) gsub (/[ ]*\(.*\)[ ]*/, "", Addr) print $1 " " Addr " " Name } else if ( $2 ~ /<.*>[ ]*$/ ) { # Address format "Heiner Steven " Name=$2; Addr=$2 gsub (/[ ]*<.*>.*/, "", Name) sub (/^"/, "", Name); sub (/"$/, "", Name); gsub (/.*.*/, "", Addr) print $1 " " Addr " " Name } else { # Address format "heiner@hsysnbg.nbg.sub.org" print $1 " " $2 " " $2 } } ' "$Authors" | sort > "$Tmp" && mv "$Tmp" "$Authors" join -t' ' "$Sizes" "$Authors" > "$Tmp" || exit $? # Format of list: # FILE SIZE Addr Realname $AWK -F' ' ' { Posts [$3]++ Bytes [$3] += $2 Name [$3] = $4 } END { for ( Author in Posts ) { printf "%d\t%f\t%s\t%s\n", Posts [Author], Bytes [Author], Author, Name [Author] } } ' "$Tmp" | sort -t' ' +2 > "$List" # Sorted by name # # Special handling for new posters: # Get number of posters, articles and bytes. # # Create sorted author list cut -d' ' -f3 < "$List" | sort -u > "$Authors" # List only new authors if [ `ls $OldAuthors.[1-3] 2>/dev/null | wc -l` -gt 0 ] then sort -u "$OldAuthors".[1-3] | comm -23 "$Authors" - > "$NewAuthors" else > "$NewAuthors" fi # Extract lines of new authors from list join -t' ' -j1 1 -j2 3 -o "2.1 2.2 2.3 2.4" "$NewAuthors" "$List" > "$Tmp" # Put totals into shell variables set -- `awk '{ n++; a+=$1; b+=$2 }; END { print n, a, b }' "$Tmp"` if [ $# -eq 3 ] then NewPosters="$1" NewArticles="$2" NewBytes="$3" else NewPosters=0 NewArticles=0 NewBytes=0 fi # Age author files i=1 while [ $i -le 2 ] do old=`expr $i + 1` [ -f "$OldAuthors.$old" ] && mv "$OldAuthors.$old" "$OldAuthors.$i" i=$old done cp "$Authors" "$OldAuthors.3" # Sort table by posts and bytes sort -t' ' -nbr +0 -1 +1 -2 +2 "$List" > "$Tmp" && mv "$Tmp" "$List" echo "Top $TopCnt posters for the $Period day period ending `edate`. " # Format of Tmp: # Posts Bytes Address Name $AWK -F ' ' ' BEGIN { TopCnt = '"$TopCnt"' print "posts kbytes name address" } { TPosters++ TArticles += $1 TBytes += $2 if ( n < TopCnt ) { n++ printf "%3d %8.1f %-28.28s %-.35s\n", $1, $2 / 1024, $4, $3 Articles += $1 Bytes += $2 } } END { print "--- -----" printf "%3d %8.1f Total for top %d\n\n", Articles, Bytes / 1024, TopCnt print "Totals for the newsgroup:" printf " %d posters\n", TPosters printf " %d articles\n", TArticles printf " %.1f kbytes\n\n", TBytes / 1024 printf "The top %d accounted for:\n", TopCnt if ( TPosters ) printf " %5.1f%% of the posters\n", n * 100 / TPosters if ( TArticles ) printf " %5.1f%% of the articles\n", Articles * 100 / TArticles if ( TBytes ) printf " %5.1f%% of the bytes\n\n", Bytes * 100 / TBytes print "Averages:" if ( TPosters ) printf " %.1f articles / poster\n", TArticles / TPosters if ( TArticles ) printf " %.1f kbytes / article\n", (TBytes / 1024) / TArticles if ( TPosters ) printf " %.1f kbytes / poster\n", (TBytes / 1024) / TPosters if ( '"$NewPosters"' ) { printf "\n%d people posted for the first time this period.\n", '"$NewPosters"' printf "They went on to post %d articles altogether\n\n", '"$NewArticles"' print "The new posters accounted for:" if ( TPosters ) printf " %5.1f%% of the posters\n", '"$NewPosters"' * 100 / TPosters if ( TArticles ) printf " %5.1f%% of the articles\n", '"$NewArticles"' * 100 / TArticles if ( TBytes ) printf " %5.1f%% of the bytes\n\n", '"$NewBytes"' * 100 / TBytes } print "-- " printf "Your UNIX shell scripting resource: Heiner%cs SHELLdorado\n", 39 print "http://www.shelldorado.com/" } ' "$List" | tee "$Top"