#! /usr/bin/ksh ########################################################################## # Title : search-swish.cgi - search using "swish-e" (http://swish-e.org) # Version : %I% # Author : Heiner Steven # Date : 2002-03-06 # Category : CGI # Requires : swish-search, urlgetopt # SCCS-Id. : %Z% %M% %I% %D% ########################################################################## # Description # External parameters # QUERY query string # # Note # Needs the following non-standard programs # urlgetopt, swish-e ########################################################################## PN=${0##*/} # Program name VER='%I%' ########################################################################## # Configuration section ########################################################################## [ -r ./setenv ] && . ./setenv : ${NAWK:=nawk} DEF_PAGESIZE=10 # number of matches per page #Debug=true # {false|true} Debug=false # {false|true} DocumentRoot=${DOCUMENT_ROOT:=/var/apache/htdocs} SwishIndex=$DocumentRoot/index.swish-e WEBROOT=http://www.shelldorado.com libbase=$DocumentRoot/search Header=$libbase/header.html Footer=$libbase/footer.html ########################################################################## # End configuration section ########################################################################## # For searching we will "eval" the following command line. Note that the # quoting is essential to preserve whitespace within arguments. searchcmd='swish-search -d " " -f "$SwishIndex" -w "$*"' # Mark the names of all variable we want to accept on the command line as # "tagged". Variables not listed will not be accepted. # Define new variables here! typeset -t QUERY= \ OLDQUERY= \ NEXTPAGE= \ NUM= \ START= # We need "urlgetopt", "swish-e" PATH=$PATH:$HOME/local/bin:$PWD:/bin export PATH function Usage { print -u2 "$PN - short description, $VER (hs '00)" if $iscgi then print "
Valid CGI arguments:
    QUERY	query string
    NUM		number of matches to list per page
    NEXTPAGE	{ \"|<\" | \"<<\" | \">>\" | \">|\" }
		for first, previous, next, or last page
" else print -u2 "usage: $PN query [...]" fi exit 1 } function Msg { print -u2 "$PN:" "$@" } function Fatal { Msg "$@"; exit 1; } ########################################################################## function isnumber { (( $# != 1 )) && Fatal "$0: wrong number of arguments" [[ $1 == +([0-9]) ]] } function min # num1 [num2 ...] { typeset minval=$1; shift while (( $# > 0 )) do (( $1 < $minval )) && minval=$1 shift done print -- "$minval" } function max # num1 [num2 ...] { typeset maxval=$1; shift while (( $# > 0 )) do (( $1 > $maxval )) && maxval=$1 shift done print -- "$maxval" } ########################################################################## function striphtml { sed -e 's/ $//;s/<[^>]*>//g' "$@" } ########################################################################## function urlencode { $NAWK ' BEGIN { # We assume an awk implementation that is just plain dumb. # We will convert an character to its ASCII value with the # table ord[], and produce two-digit hexadecimal output # without the printf("%02X") feature. EOL = "%0A" # "end of line" string (encoded) split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ") hextab [0] = 0 for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0 } { encoded = "" for ( i=1; i<=length ($0); ++i ) { c = substr ($0, i, 1) if ( c ~ /[a-zA-Z0-9.-]/ ) { encoded = encoded c # safe character } else if ( c == " " ) { encoded = encoded "+" # special handling } else { # unsafe character, encode it as a two-digit hex-number lo = ord [c] % 16 hi = int (ord [c] / 16); encoded = encoded "%" hextab [hi] hextab [lo] } } print encoded } ' "$@" } function htmlencode { sed 's/"/\"/g' < Search term EOT ((${MAX:-0} > 0)) || return cat <<-EOT 0 $START $MAX EOT } ########################################################################## function evalnavigation { if [[ -n $OLDQUERY && -n $QUERY && $QUERY != $OLDQUERY ]] then START=0 fi : ${START:=0} : ${NUM:=$DEF_PAGESIZE} : ${MAX:=0} case "$NEXTPAGE" in ("|<") ((START=0)) ;; ("<<") if (($START > $NUM)) then ((START=$START-$NUM)) else START=0 fi ;; (">>") if (( $START + $NUM <= $MAX)) then ((START=$START+$NUM)) fi ;; (">|") if (($MAX > $NUM)) then ((START=$MAX-$MAX%$NUM)) # Start of last page else ((START=0)) fi ;; esac } ########################################################################## function formathtml { $NAWK -F" " ' # Input: rankingurltitlefilesize function blockgraph(percent, p, q, ret) { p = int(percent % 101) q = 100 - percent ret = "" pstr = " " p " " qstr = " " ret = ret "" ret = ret "" ret = ret "" ret = ret "
" pstr "" qstr "
" return ret } BEGIN { cols = 2 nmatches = 0 webroot = "'"$WEBROOT"'" print "" } $1 ~ /^#/ { next } # ignore comments NF == 4 { ++nmatches ranking = $1 url = $2 title = $3 filesize = $4 percent = int(ranking/10) if ( (p = match (url, /[^\/]*$/)) > 0 ) { filename = substr (url, p) } else { filename = url } if ( title == filename ) { description = "" title "" } else { description = "" title " - " filename } if ( nmatches == 1 ) { print "" \ "" \ " " } graph = blockgraph(percent) print "" \ "" \ " " \ "" } END { print "
RelevanceDescription
" \ "
" graph "" description "
" } ' } ########################################################################## # Start of main program ########################################################################## integer START=0 # number of first match to display integer NUM=$DEF_PAGESIZE # maximum number of matches per page if [[ -n $REQUEST_METHOD ]] then iscgi=true else iscgi=false fi if $iscgi then calledname=${SCRIPT_NAME##*/} if [[ $calledname == $PN ]] then standalone=true else standalone=false fi fi contenttype=text/html if $iscgi then #$Debug && contenttype=text/plain $standalone && print "Content-type: $contenttype\n" #$Debug && export $Debug && print "
"

exec 2>&1
#set -x

    case "$REQUEST_METHOD" in
    	(POST)	read -r QUERY_STRING || exit 2;;
	(GET)	;;
	(*)	;;
    esac

    # Create variables of the form "VALID_FORM_x" with value "true"
    # for variable "x" we want to accept as CGI input.
    # They are used to simplify the checking if a variable is valid or
    # not. Note that they *must* not start with "FORM_", because
    # otherwise an attacker could provide it as an argument to this
    # script.

    for var in $(typeset -t | cut -d= -f1)
    do eval "VALID_FORM_$var=true"
    done

    # Remove all variables that look like they were provided
    # from the command line, but are not. Comment this code out
    # if you want to use these names to set default values.

    for var in $(set | grep ^FORM_ | cut -d= -f1)
    do unset "$var"
    done

    # The following lines evaluates all variable assignments, and creates
    # variables with the prefix "FORM_", e.g. "name=a" would create
    # an environment variable FORM_name with the content "a".
    # NOTE:
    #	THIS IS A SECURITY RISK, because if there is a bug in
    #	"urlgetopt", it may execute code a user provided!

#set -x
    setvars=$(./urlgetopt -l -p FORM_ "$QUERY_STRING")
    #print "setvars=|$setvars|"
    eval "$setvars"
#set +x

    # Now check if only valid variables were specified as arguments, and
    # verify the content is valid.

    for varname in $(set | grep ^FORM_ | cut -d= -f1)
    do
	# Paranoia: check if this really is a valid identifier name.
	[[ $varname == [a-zA-Z_]+([a-zA-Z0-9_]) ]] ||
		Fatal "Internal Error: invalid variable name: $varname"

    	case "$varname" in
	    (FORM_START)
	    	isnumber "$FORM_START" || FORM_START=0
		START=$FORM_START
		;;

	    (FORM_NUM)
	    	isnumber "$FORM_NUM" || FORM_NUM=0
		NUM=$FORM_NUM
		;;

	    (*)
	    	# No special handling for this variable; just check
		# if it is a valid one

		eval "vname=\"VALID_$varname\""
		if eval "[[ \"\${$vname:-false}\" != true ]]"
		then
		    print -u2 "invalid variable name: $varname"
		    unset $varname
		    Usage
		    # not reached
		fi

		# Uncomment the following line, if you want to create a
		# variable with the same name as the CGI argument, but without
		# the "FORM_" prefix, e.g. to copy the value of "FORM_QUERY" to
		# the variable "QUERY".

		# DANGER
#set -vx
		eval "${varname#FORM_}=\"\$$varname\""
#set +vx
		;;
	esac
    done

    set -- $QUERY
else
    while getopts :h opt
    do
	case "$opt" in
	    # your flags here
	    #f)	argument=$OPTARG;;
	    h)	Usage;;
	    ?)	Usage;;
	esac
    done
    shift OPTIND-1
fi

#(( $# < 1 )) && exit 0

: ${NUM:=500}
: ${START:=0}

if (( $# > 0 ))
then
    # For the browsing feature (more precise: the "last page" button) to
    # work, we need the number of matches. "search-swish" returns this
    # number in the following format:
    #	# Number of hits: 25
    #
    # Our first search only is used to count the number of matches:

    nmatches=$(eval "$searchcmd -m 1" | egrep -i '^# Number of hits')
    nmatches=${nmatches##* }	# we only want the last column of the output
    MAX=$nmatches

    evalnavigation
fi

cat "$Header"

$iscgi && printnavigation

(($# > 0)) || exit 0

(($NUM <= 0)) && NUM=20

# We calculate offsets to the first match in "number of matches", and in
# "number of pages". All page numbers start with "0" (to simplify
# calculations), and will be displayed "1"-based.

# Print this number of pages before and after the matching page
integer surrpages=10
: ${nmatches:=0}

if (( $nmatches < 1 ))
then
    print "


no matches found" else integer npages=$((nmatches/$NUM+1)) integer curpage=$(($START/$NUM)) integer startpage=$(max $(($curpage-$surrpages)) 0) integer lastpage=$(min $(($curpage+$surrpages)) $npages) #print "nmatches=$nmatches, npages=$npages" #print "
0 <= start $startpage <= $curpage <= last $lastpage <= $npages
" print "Page $(($curpage+1)) of $npages: " myurl="$SCRIPT_NAME?$(mkurl)" integer pi=$startpage while (($pi < $lastpage)) do ((offset=$pi*$NUM)) (($pi > $startpage)) && print -- "-" #print "DEBUG: page $currentpage: QUERY=$*&START=$offset&NUM=$NUM
" if (( $pi == $curpage )) then print "$(($curpage+1))" else print "$(($pi+1))" fi ((pi=$pi+1)) done print "
" eval "$searchcmd -b \"$START\" -m \"$NUM\"" | if $iscgi then formathtml else egrep -v '^# ' fi fi $Debug && print "
" print "

View source code" \ " of this shell script" cat "$Footer" exit 0