#!/usr/bin/ksh93

########################################################################
#                                                                      #
#               This software is part of the ast package               #
#                 Copyright (c) 2008-2012 Roland Mainz                 #
#                      and is licensed under the                       #
#                 Eclipse Public License, Version 1.0                  #
#                    by AT&T Intellectual Property                     #
#                                                                      #
#                A copy of the License is available at                 #
#          http://www.eclipse.org/org/documents/epl-v10.html           #
#         (with md5 checksum b35adb5213ca9657e911e9befb180842)         #
#                                                                      #
#                                                                      #
#                 Roland Mainz <roland.mainz@nrubsig.org>              #
#                                                                      #
########################################################################

#
# Copyright (c) 2008, 2012, Roland Mainz. All rights reserved.
#

#
# Written by Roland Mainz <roland.mainz@nrubsig.org>
#

# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant
export PATH='/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin'

# Make sure all math stuff runs in the "C" locale to avoid problems
# with alternative # radix point representations (e.g. ',' instead of
# '.' in de_DE.*-locales). This needs to be set _before_ any
# floating-point constants are defined in this script).
if [[ "${LC_ALL-}" != '' ]] ; then
	export \
		LC_MONETARY="${LC_ALL}" \
		LC_MESSAGES="${LC_ALL}" \
		LC_COLLATE="${LC_ALL}" \
		LC_CTYPE="${LC_ALL}"
		unset LC_ALL
fi
export LC_NUMERIC='C'


function encode_multipart_form_data
{
	nameref formdata="$1"
	nameref content='formdata.content'
	integer numformelements=${#formdata.form[*]}
	integer i
	typeset tmp
    
	content=''
    
	# todo: add support to upload files
	for (( i=0 ; i < numformelements ; i++ )) ; do
		nameref element="formdata.form[${i}]"

		content+="--${formdata.boundary}\n"
		content+="Content-Disposition: form-data; name=\"${element.name}\"\n"
		content+="\n"
		# make sure we quote the '\' properly since we pass these data to one instance of
		# "print" when putting the content on the wire.
		content+="${element.data//\\/\\\\}\n" # fixme: may need encoding for non-ASCII data
	done
    
	# we have to de-quote the content before we can count the real numer of bytes in the payload
	tmp="$(print -- "${content}")"
	formdata.content_length=${#tmp} # fixme: should this be "wc -b" ?

	# add content tail (which MUST not be added to the content length)
	content+="--${formdata.boundary}--\n"

	return 0
}


# parse HTTP return code, cookies etc.
function parse_http_response
{
	nameref response="$1"
	typeset h statuscode statusmsg s
	integer i
    
	# we use '\r' as additional IFS to filter the final '\r'
	IFS=$' \t\r' read -r h statuscode statusmsg  # read HTTP/1.[01] <code>
	[[ "${h}" != ~(Eil)HTTP/ ]]           && { print -u2 -f $"%s: HTTP/ header missing\n" "$0" ; return 1 ; }
	[[ "${statuscode}" != ~(Elr)[0-9]+ ]] && { print -u2 -f $"%s: invalid status code\n"  "$0" ; return 1 ; }

	integer response.statuscode="10#${statuscode}"
	typeset response.statusmsg="${statusmsg}"
    
    	typeset -a response.headers
	
	# collect headers
	while IFS='' read -r s ; do
		[[ "${s}" == $'\r' ]] && break

		# strip '\r' at the end
		s="${s/~(Er)$'\r'/}"
		
		response.headers+=( "${s}" )
	done

	for (( i=0 ; i < ${#response.headers[@]} ; i++ )) ; do
		s="${response.headers[i]}"
		# add compound variable fields _ONLY_ on _demand_ if the
		# matching headers exist
		case "${s}" in
			~(Eli)Content-Length:[[:blank:]]+[0-9]+)
				integer response.content_length="10#${s/~(Eli)Content-Length:[[:blank:]]+/}"
				;;
			~(Eli)Content-Type:[[:blank:]]+)
				typeset response.content_type="${s/~(Eli)Content-Type:[[:blank:]]+/}"
				;;
			~(Eli)Location:[[:blank:]]+)
				typeset response.location="${s/~(Eli)Location:[[:blank:]]+/}"
				;;
			~(Eli)Transfer-Encoding:[[:blank:]]+)
				typeset response.transfer_encoding="${s/~(Eli)Transfer-Encoding:[[:blank:]]+/}"
				;;
		esac
	done
	
	return 0
}

function cat_http_body
{
	typeset emode="$1"
	typeset hexchunksize='0'
	integer chunksize=0

	if [[ "${emode}" == 'chunked' ]] ; then
		while IFS=$'\n' read hexchunksize ; do
			hexchunksize="${hexchunksize//$'\r'/}"
			[[ "${hexchunksize}" != '' ]] || continue
			[[ "${hexchunksize}" == ~(Elr)[[:xdigit:]]+ ]] || break
			chunksize="16#${hexchunksize}"
			(( chunksize > 0 )) || break
			dd bs=1 count="${chunksize}" 2>'/dev/null'
		done
	else
		cat
	fi

	return 0
}

function request_bitly
{
	nameref res_short_url=${1}
	# site setup
	typeset url_host='api.bit.ly'
	typeset url_path='/v3/shorten'
	typeset url="http://${url_host}${url_path}"
	integer netfd # http stream number
	typeset inputurl="${2}"
	compound httpresponse # http response
	typeset request=''
	integer res
	typeset response=''
	typeset -r bitly_apikey=(
		# no comment here, intentionally
		typeset -r -b user='ZmxleXRh'
		typeset -r -b key='Ul8yMDAxNDk2NDNiYmYyYmUwY2M0YTBhYThiM2E3NDg1Mg=='
	)

	# we assume "inputurl" is a correctly encoded URL which doesn't
	# require any further mangling
	url_path+="?login=${ printf '%B' bitly_apikey.user; }&apiKey=${ printf '%B' bitly_apikey.key; }&uri=${inputurl}&format=xml"

	request="GET ${url_path} HTTP/1.1\r\n"
	request+="Host: ${url_host}\r\n"
	request+="User-Agent: ${http_user_agent}\r\n"
	request+='Connection: close\r\n'

	redirect {netfd}<> "/dev/tcp/${url_host}/80" 
	(( $? != 0 )) && { print -u2 -f $"%s: Could not open connection to %s.\n" "$0" "${url_host}" ;  return 1 ; }

	# send http GET
	{
		print -n -- "${request}\r\n"
	}  >&${netfd}

	# process reply
	parse_http_response httpresponse <&${netfd} ; (( res=$? ))
	if (( res == 0 )) ; then
		response="${ cat_http_body "${httpresponse.transfer_encoding-}" <&${netfd} ; }" ; (( res+=$? ))
	fi

	# close connection
	redirect {netfd}<&-
        
	if (( res == 0 && httpresponse.statuscode >= 200 && httpresponse.statuscode <= 299 )) && \
	   [[ "${response}" == ~(Elr).*(<url>)(.*)(<\/url>).* ]]; then
		# the statement below should really parse the XML...
		res_short_url="${response/~(Elr).*(<url>)(.*)(<\/url>).*/\2}"
		return 0
	else
		print -u2 -f $"%s response was (%s,%s):\n%s\n" "${url_host}" "${httpresponse.statuscode-}" "${httpresponse.statusmsg-}" "${response}"
		return 1
	fi
	
	# not reached
}

function request_googl
{
	nameref res_short_url=${1}
	# site setup
	typeset url_host='goo.gl'
	typeset url_path='/api/url'
	typeset url="http://${url_host}${url_path}"
	integer netfd # http stream number
	typeset inputurl="${2}"
	compound httpresponse
	integer res
	typeset response=''

	# argument for "encode_multipart_form_data"
	compound mimeform=(
		# input
		typeset boundary
		compound -a form
		# output
		typeset content
		integer content_length
	)
     
	typeset request=''
	typeset content=''

	typeset -r boundary="--------shshorturl_${RANDOM}_Xksh93_Xfish_${RANDOM}_Yeats_${RANDOM}_Zchicken_${RANDOM}monster_--------"

	mimeform.boundary="${boundary}"
	mimeform.form=(
		( name='user'		    data='toolbar@google.com' )
		( name='url'		    data="${inputurl}" )
		( name='auth_token='	    data='shshorturl/ksh93/script' )
	)
	encode_multipart_form_data mimeform
          
	content="${mimeform.content}"

	request="POST ${url_path} HTTP/1.1\r\n"
	request+="Host: ${url_host}\r\n"
	request+="User-Agent: ${http_user_agent}\r\n"
	request+='Connection: close\r\n'
	request+="Content-Type: multipart/form-data; boundary=${boundary}\r\n"
	request+="Content-Length: $(( mimeform.content_length ))\r\n"

	redirect {netfd}<> "/dev/tcp/${url_host}/80" 
	(( $? != 0 )) && { print -u2 -f $"%s: Could not open connection to %s.\n" "$0" "${url_host}" ;  return 1 ; }

	# send http post
	{
		print -n -- "${request}\r\n"
		print -n -- "${content}\r\n"
	}  >&${netfd}

	# process reply
	parse_http_response httpresponse <&${netfd} ; (( res=$? ))
	if (( res == 0 )) ; then
		response="${ cat_http_body "${httpresponse.transfer_encoding-}" <&${netfd} ; }" ; (( res+=$? ))
	fi

	# close connection
	redirect {netfd}<&-
    
	if (( res == 0 && httpresponse.statuscode >= 200 && httpresponse.statuscode <= 299 )) && \
	   [[ "${response}" == ~(Elri-g).*\"short_url\":\"http://goo.gl/.*\".* ]] ; then
		# the statement below should really parse the JSON...
		res_short_url="${response/~(Elri-g).*\"short_url\":\"(http:\/\/goo.gl\/.*)\".*/\1}"

		return 0
	else
		print -u2 -f $"%s response was (%s,%s):\n%s\n" "${url_host}" "${httpresponse.statuscode-}" "${httpresponse.statusmsg-}" "${response}"
		return 1
	fi
	
	# not reached
	return 0
}

function request_tinyurl
{
	nameref res_short_url=${1}
	# site setup
	typeset url_host='tinyurl.com'
	typeset url_path='/api-create.php'
	typeset url="http://${url_host}${url_path}"
	integer netfd # http stream number
	typeset inputurl="${2}"
	compound httpresponse # http response
	typeset request=''
	integer res
	typeset response=''

	# we assume "inputurl" is a correctly encoded URL which doesn't
	# require any further mangling
	url_path+="?url=${inputurl}"

	request="GET ${url_path} HTTP/1.1\r\n"
	request+="Host: ${url_host}\r\n"
	request+="User-Agent: ${http_user_agent}\r\n"
	request+='Connection: close\r\n'

	redirect {netfd}<> "/dev/tcp/${url_host}/80" 
	(( $? != 0 )) && { print -u2 -f $"%s: Could not open connection to %s.\n" "$0" "${url_host}" ;  return 1 ; }

	# send http GET
	{
		print -n -- "${request}\r\n"
	}  >&${netfd}

	# process reply
	parse_http_response httpresponse <&${netfd} ; (( res=$? ))
	if (( res == 0 )) ; then
		response="${ cat_http_body "${httpresponse.transfer_encoding-}" <&${netfd} ; }" ; (( res+=$? ))
	fi
	
	# close connection
	redirect {netfd}<&-
        
	if (( res == 0 && httpresponse.statuscode >= 200 && httpresponse.statuscode <= 299 )) ; then
		res_short_url="${response}"
		return 0
	else
		print -u2 -f $"%s response was (%s,%s):\n%s\n" "${url_host}" "${httpresponse.statuscode-}" "${httpresponse.statusmsg-}" "${response}"
		return 1
	fi
	
	# not reached
}

function request_trimurl
{
	nameref res_short_url=${1}
	# site setup
	typeset url_host='api.tr.im'
	typeset url_path='/api/trim_url.xml'
	typeset url="http://${url_host}${url_path}"
	integer netfd # http stream number
	typeset inputurl="${2}"
	compound httpresponse # http response
	typeset request=''
	integer res
	typeset response=''

	# we assume "inputurl" is a correctly encoded URL which doesn't
	# require any further mangling
	url_path+="?url=${inputurl}"

	request="GET ${url_path} HTTP/1.1\r\n"
	request+="Host: ${url_host}\r\n"
	request+="User-Agent: ${http_user_agent}\r\n"
	request+='Connection: close\r\n'

	redirect {netfd}<> "/dev/tcp/${url_host}/80" 
	(( $? != 0 )) && { print -u2 -f $"%s: Could not open connection to %s.\n" "$0" "${url_host}" ;  return 1 ; }

	# send http GET
	{
		print -n -- "${request}\r\n"
	}  >&${netfd}

	# process reply
	parse_http_response httpresponse <&${netfd} ; (( res=$? ))
	if (( res == 0 )) ; then
		response="${ cat_http_body "${httpresponse.transfer_encoding-}" <&${netfd} ; }" ; (( res+=$? ))
	fi

	# close connection
	redirect {netfd}<&-
        
	if (( res == 0 && httpresponse.statuscode >= 200 && httpresponse.statuscode <= 299 )) ; then
		# the statement below should really parse the XML...
		res_short_url="${response/~(Elr).*(<url>)(.*)(<\/url>).*/\2}"
		return 0
	else
		print -u2 -f $"%s response was (%s,%s):\n%s\n" "${url_host}" "${httpresponse.statuscode-}" "${httpresponse.statusmsg-}" "${response}"
		return 1
	fi
	
	# not reached
}

function usage
{
	OPTIND=0
	getopts -a "${progname}" "${shshorturl_usage}" OPT '-?'
	exit 2
}

function main
{
	typeset	service_provider='goo.gl' # default if option "+A" is set
	bool	scanall=true
	typeset	shorturl
	integer	res
	
	if [[ -v SHSHORTURL_SERVICE_PROVIDER ]] ; then
		service_provider="${SHSHORTURL_SERVICE_PROVIDER}"
		scanall=false
	fi
	
	while getopts -a "${progname}" "${shshorturl_usage}" OPT ; do 
		case "${OPT}" in
			'A')	scanall=true ;;
			'+A')	scanall=false ;;
			'P')	service_provider="${OPTARG}" ; scanall=false ;;
			*)	usage ;;
		esac
	done
	shift $(( OPTIND-1 ))
	
	# expecting at exactly one argument
	(( $# == 1 )) || usage
	
	typeset url="$1"

	if (( !scanall )) ; then	
		if [[ -v providerlist[${service_provider}] ]] ; then
			${providerlist[${service_provider}]} shorturl "${url}" ; (( res=$? ))
			(( res==0 )) && printf '%s\n' "${shorturl}"
			return ${res}
		else
			print -u2 -f $"%s: Unsupported service provider.\n" "${progname}"
			return 1
		fi
	else
		typeset pi # provider index
	
		for pi in "${!providerlist[@]}" ; do
			${providerlist[${pi}]} shorturl "${url}" 1>'/dev/null' 2>&1 ; (( res=$? ))
			
			if (( res==0 )) ; then
				printf '%s\n' "${shorturl}"
				return 0
			fi
		done
		
		return 1
	fi
	
	# not reached
	return 0
}

# program start
builtin basename
builtin cat
builtin uname

typeset progname="${ basename "${0}" ; }"

set -o noglob
set -o nounset

# HTTP protocol client identifer
typeset -r http_user_agent="shshorturl/ksh93 (2012-04-12; ${ uname -s -r -p ; })"

typeset -r shshorturl_usage=$'+
[-?\n@(#)\$Id: shshorturl (Roland Mainz) 2012-04-12 \$\n]
[-author?Roland Mainz <roland.mainz@nrubsig.org>]
[+NAME?shshorturl - create short alias URL from long URL]
[+DESCRIPTION?\bshshorturl\b is a small utility which passes a given URL
	to internet service which creates short aliases in the
	form of http://<servicename>/XXXXXXXX to redirect long URLs.]
[+?The first arg \burl\b describes a long URL which is transformed into
	a short alias.]
[P:provider?Service provider (\'bit.ly\', \'goo.gl\', \'tinyurl.com\' or \'tr.im\').
	The default can be set via the SHSHORTURL_SERVICE_PROVIDER environment
	variable. If this option is not set and the environment variable
	SHSHORTURL_SERVICE_PROVIDER does not exist all providers will be scanned
	until one returns a short url]:[provider]
[A:scanall?Scan all providers. This is the default.]

url

[+SEE ALSO?\bksh93\b(1), \brssread\b(1), \bshtwitter\b(1), http://bit.ly/, http://goo.gl/, http://www.tinyurl.com/, http://tr.im/]
'

# list of providers
typeset -r -A providerlist=(
	[bit.ly]='request_bitly'
	[goo.gl]='request_googl'
	[tinyurl.com]='request_tinyurl'
	[tr.im]='request_trimurl'
)
	
main "$@"
exit $?

# EOF.
