1#! /bin/sh
2## Web proxy, following the grand tradition of Web things being handled by
3## gross scripts.  Uses netcat to listen on a high port [default 8000],
4## picks apart requests and sends them on to the right place.  Point this
5## at the browser client machine you'll be coming from [to limit access to
6## only it], and point the browser's concept of an HTTP proxy to the
7## machine running this.  Takes a single argument of the client that will
8## be using it, and rejects connections from elsewhere.  LOGS the queries
9## to a configurable logfile, which can be an interesting read later on!
10## If the argument is "reset", the listener and logfile are cleaned up.
11##
12## This works surprisingly fast and well, for a shell script, although may
13## randomly fail when hammered by a browser that tries to open several
14## connections at once.  Drop the "maximum connections" in your browser if
15## this is a problem.
16##
17## A more degenerate case of this, or preferably a small C program that
18## does the same thing under inetd, could handle a small site's worth of
19## proxy queries.  Given the way browsers are evolving, proxies like this
20## can play an important role in protecting your own privacy.
21##
22## If you grabbed this in ASCII mode, search down for "eew" and make sure
23## the embedded-CR check is intact, or requests might hang.
24##
25## Doesn't handle POST forms.  Who cares, if you're just watching HTTV?
26## Dumbness here has a highly desirable side effect: it only sends the first
27## GET line, since that's all you really ever need to send, and suppresses
28## the other somewhat revealing trash that most browsers insist on sending.
29
30# set these as you wish: proxy port...
31PORT=8000
32# logfile spec: a real file or /dev/null if you don't care
33LFILE=${0}.log
34# optional: where to dump connect info, so you can see if anything went wrong
35# CFILE=${0}.conn
36# optional extra args to the listener "nc", for instance "-s inside-net-addr"
37# XNC=''
38
39# functionality switch has to be done fast, so the next listener can start
40# prelaunch check: if no current client and no args, bail.
41case "${1}${CLIENT}" in
42  "")
43    echo needs client hostname
44    exit 1
45  ;;
46esac
47
48case "${1}" in
49  "")
50# Make like inetd, and run the next relayer process NOW.  All the redirection
51# is necessary so this shell has NO remaining channel open to the net.
52# This will hang around for 10 minutes, and exit if no new connections arrive.
53# Using -n for speed, avoiding any DNS/port lookups.
54    nc -w 600 -n -l -p $PORT -e "$0" $XNC "$CLIENT" < /dev/null > /dev/null \
55	2> $CFILE &
56  ;;
57esac
58
59# no client yet and had an arg, this checking can be much slower now
60umask 077
61
62if test "$1" ; then
63# if magic arg, just clean up and then hit our own port to cause server exit
64  if test "$1" = "reset" ; then
65    rm -f $LFILE
66    test -f "$CFILE" && rm -f $CFILE
67    nc -w 1 -n 127.0.0.1 $PORT < /dev/null > /dev/null 2>&1
68    exit 0
69  fi
70# find our ass with both hands
71  test ! -f "$0" && echo "Oops, cannot find my own corporeal being" && exit 1
72# correct launch: set up client access control, passed along thru environment.
73  CLIENT="$1"
74  export CLIENT
75  test "$CFILE" || CFILE=/dev/null
76  export CFILE
77  touch "$CFILE"
78# tell us what happened during the last run, if possible
79  if test -f "$CFILE"  ; then
80    echo "Last connection results:"
81    cat $CFILE
82  fi
83
84# ping client machine and get its bare IP address
85  CLIENT=`nc -z -v -w 8 "$1" 22000 2>&1 | sed 's/.*\[\(..*\)\].*/\1/'`
86  test ! "$CLIENT" && echo "Can't find address of $1" && exit 1
87
88# if this was an initial launch, be informative about it
89  echo "=== Launch: $CLIENT" >> $LFILE
90  echo "Proxy running -- will accept connections on $PORT from $CLIENT"
91  echo "  Logging queries to $LFILE"
92  test -f "$CFILE" && echo "  and connection fuckups to $CFILE"
93
94# and run the first listener, showing us output just for the first hit
95  nc -v -w 600 -n -l -p $PORT -e "$0" $XNC "$CLIENT" &
96  exit 0
97fi
98
99# Fall here to handle a page.
100# GET type://host.name:80/file/path HTTP/1.0
101# Additional: trash
102# More: trash
103# <newline>
104
105read x1 x2 x3 x4
106echo "=== query: $x1 $x2 $x3 $x4" >> $LFILE
107test "$x4" && echo "extra junk after request: $x4" && exit 0
108# nuke questionable characters and split up the request
109hurl=`echo "$x2" | sed -e "s+.*//++" -e 's+[\`'\''|$;<>{}\\!*()"]++g'`
110# echo massaged hurl: $hurl >> $LFILE
111hh=`echo "$hurl" | sed -e "s+/.*++" -e "s+:.*++"`
112hp=`echo "$hurl" | sed -e "s+.*:++" -e "s+/.*++"`
113test "$hp" = "$hh" && hp=80
114hf=`echo "$hurl" | sed -e "s+[^/]*++"`
115# echo total split: $hh : $hp : $hf >> $LFILE
116# suck in and log the entire request, because we're curious
117# Fails on multipart stuff like forms; oh well...
118if test "$x3" ; then
119  while read xx ; do
120    echo "${xx}" >> $LFILE
121    test "${xx}" || break
122# eew, buried returns, gross but necessary for DOS stupidity:
123    test "${xx}" = "
124" && break
125  done
126fi
127# check for non-GET *after* we log the query...
128test "$x1" != "GET" && echo "sorry, this proxy only does GETs" && exit 0
129# no, you can *not* phone home, you miserable piece of shit
130test "`echo $hh | fgrep -i netscap`" && \
131  echo "access to Netscam's servers <b>DENIED.</b>" && exit 0
132# Do it.  30 sec net-wait time oughta be *plenty*...
133# Some braindead servers have forgotten how to handle the simple-query syntax.
134# If necessary, replace below with (echo "$x1 $hf" ; echo '') | nc...
135echo "$x1 $hf" | nc -w 30 "$hh" "$hp" 2> /dev/null || \
136  echo "oops, can't get to $hh : $hp".
137echo "sent \"$x1 $hf\" to $hh : $hp" >> $LFILE
138exit 0
139
140