#!/usr/bin/env python
#
# GooSweep
# Robert Wesley McGrew
# wesleymcgrew@gmail.com
#
# Run without arguments and scroll down a bit for usage information
#
# Copyright 2005 Robert Wesley McGrew
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
version = '0.8'

import getopt
import sys
import socket
import google
import time
import SOAPpy

global queries_performed 
global burst_size
global burst
global burst_start_time
global subnet
global subnet_supplied
global rdns

#########################################
#
# Usage information
#

def usage():
	print """
Usage:
goosweep.py [-h num] [-r] [-b num] <[-d filename] | [-o report]> <-s subnet> 

-s Subnet should take the form of a dotted-quad where
   an asterisk may take the place of up to three of elements
   starting at the right.  For example:

        192.168.5.*
        192.168.*.*
        192.*.*.*

   If you do not specify this correctly, there really is
   no telling what will happen, as it's too much trouble
   to check to see if you're incapable of reading
   documentation.  If you need more flexibility
   in your sweep list, either script around this program
   or modify the generate_ip_list() function

-o <report> should be the name of the file you wish to
   contain the pretty HTML report of the sweep.  Excellent 
   for printing up in pentest reports!

-d Dumps a comma-delimited text version of the data
   to filename

-b Burst mode, with number of queries per burst
   Will perform num queries, and wait 24 hours until 
   the next burst.

   The Google API limits you to 1000 requests
   daily, so I recommend not exceeding that.  You may
   set less if you want to save some queries for another
   program.

-r Perform a reverse DNS lookup of each IP
   and google the resulting hostname as well (if one exists).

-h Write a hash mark every num hosts swept.  Use 0 to
   turn off hash marks.  Default is 8.
   """

#######################################
#
# Modify this function if you need more
# flexibility in the list of hosts that
# are scanned
#

def generate_ip_list(subnet):
	num_stars = 0
	r = [(0,256),(0,256),(0,256),(0,256)]
	for i in range(0,len(subnet)):
		if subnet[i] == '*':
			num_stars += 1
	
	current_quad_element = 0
	for i in range(current_quad_element, 4 - num_stars):
		temp = ''
		for j in range(0,len(subnet)):
			if subnet[j] != '.':
				temp += subnet[j]
			else:
				break
		r[i] = (int(temp),int(temp)+1)
		subnet = subnet[j+1:]
	
	ip_list = []
	
	for a in range(r[0][0], r[0][1]):
		for b in range(r[1][0], r[1][1]):
			for c in range(r[2][0], r[2][1]):
				for d in range(r[3][0], r[3][1]):
					ip_list.append(str(a) + "." + str(b) + "." + str(c) + "." + str(d))
	
	return ip_list

#######################################
#
# Generates a report based on the
# results.  Modify to suit your needs.
#

def generate_report(f, ip_list, results_ip, name_list, results_name, max):
	f.write('<html>\n')
	f.write('<title>' + 'GooSweep ' + version + ' Results for ' + subnet + '</title>\n')
	f.write('<body>\n')
	f.write('<h1>GoogleSweep ' + version + ' results for ' + subnet + '</h1>\n')
	f.write('<h2>Report generated: ' + time.ctime() + '</h2>\n')
	f.write('<hr>\n')
	f.write('<table border=1>\n')
	f.write('<tr><td>IP Address</td> <td>Hits</td>')
	if rdns:
		f.write(' <td>Hostname</td> <td>Hits</td> <td>Total Hits</td>')
	f.write(' <td>Relative Popularity</td></tr>\n')
	
	for i in range(0,len(ip_list)):
		f.write('<tr><td><a href="http://www.google.com/search?hl=en&lr=&sa=G&q=%22' + ip_list[i] + '%22">')
		f.write(ip_list[i] + '</a></td> <td>' + str(results_ip[i]) + '</td>')
		total = results_ip[i]
		if rdns:
			if name_list[i] != '':
				f.write(' <td><a href="http://www.google.com/search?hl=en&lr=&sa=G&q=%22' + name_list[i] + '%22">')
				f.write(name_list[i] + '</a></td>')
			else:
				f.write(' <td><i>Unknown</i></td>')
			f.write(' <td>' + str(results_name[i]) + '</td> <td>' + str(results_ip[i] + results_name[i]) + '</td>')
			total += results_name[i]
		f.write(' <td><table border=0><tr><td bgcolor="#FF0000" width=' + str(((total*50)/max)*10) + ' height=10></td></tr></table></td>')
		f.write('</tr>\n')

	f.write('</table>\n')
	f.write('<hr>\n')
	f.write('</body>\n')
	f.write('</html>\n')
	
#######################################
#
# Searches google and keeps track of 
# when to stop and start bursts
#

def search_google(term):
	global queries_performed
	global burst
	global burst_start_time
	global burst_size

	results = google.doGoogleSearch(term)
	num_results = results.meta.estimatedTotalResultsCount
	queries_performed += 1
	if queries_performed == 1 and burst:
		burst_start_time = time.time()
		print ''
		print 'Began new burst at ' + time.ctime(burst_start_time)
	if queries_performed == burst_size and burst:
		print ''
		print 'Burst ended at ' + time.ctime()
		print 'Sleeping until ' + time.ctime(burst_start_time+(60*60*24)+60) # plus a minute or so fudge-factor
		while time.time() < (burst_start_time+(60*60*24)+60):
			time.sleep((burst_start_time+(60*60*24)+60)-time.time())
	return num_results

#######################################
#
# Main program code
#

print 'GooSweep v' + version
print 'Robert Wesley McGrew - wesleymcgrew@gmail.com'

rdns = False
output_exists = False
dump = False
report = False
subnet_supplied = False
force = False
hash = True
hash_num = 8
queries_performed = 0
burst_size = 0
burst = False
burst_start_time = 0

try:
	opts, args = getopt.gnu_getopt(sys.argv[1:],"h:b:d:o:s:r")
except getopt.GetoptError:
	usage()
	sys.exit(1)

for o, a in opts:
	if o == '-h':
		if int(a) == 0:
			hash = False
		else:
			hash_num = int(a)
	if o == '-r':
		rdns = True
	if o == '-b':
		burst = True
		burst_size = int(a)
	if o == '-d':
		dump = True
		dump_file = a
		output_exists = True
	if o == '-o':
		report = True
		report_file = a
		output_exists = True
	if o == '-s':
		subnet_supplied = True
		subnet = a

if args or not output_exists or not subnet_supplied:
	usage()
	sys.exit(1)

print 'Building scan list...'

ip_list = generate_ip_list(subnet)

print str(len(ip_list)) + ' IP addresses'

num_queries = len(ip_list)
if num_queries > 1000 and not burst:
	print 'Number of required API queries ' + num_queries + ' > allowed 1000 daily.  Use burst mode'
	sys.exit(1)

name_list = []
num_names = 0
if rdns:
	print 'Performing reverse DNS lookup of all IP addresses...'
	for ip in ip_list:
		try:
			hostname, alias, ips = socket.gethostbyaddr(ip)
			num_names += 1
		except:
			hostname = ''
		name_list.append(hostname)
	print str(num_names) + ' hosts have names'
	num_queries += num_names

if num_queries > 1000 and not burst:
	print 'Number of required API queries ' + num_queries + ' > allowed 1000 daily.  Use burst mode'
	sys.exit(1)

print 'Performing ' + str(num_queries) + ' API queries total.'

hash_counter = 0
queries_performed = 0
max = 0
results_ip = []
results_name = []
if dump:
	try:
		dump_handle = open(dump_file,'w')
	except:
		print 'Could not open ' + dump_file + ' for writing.'
		sys.exit(1)
		
for i in range(0,len(ip_list)):
	results_ip.append(search_google(ip_list[i]))
	if rdns:
		if name_list[i] != '':
			results_name.append(search_google(name_list[i]))
		else:
			results_name.append(0)
	if dump:
		if rdns:
			dump_handle.write(ip_list[i] + ',' + 
				  	  str(results_ip[i]) + ',' + 
				    	  name_list[i] + ',' + 
				    	  str(results_name[i]) + ',' +
				    	  str(results_ip[i]+results_name[i]) + '\n')
		else:
			dump_handle.write(ip_list[i] + ',' +
					  str(results_ip[i]) + '\n')
		dump_handle.flush()
	
	if rdns:
		if results_ip[i] + results_name[i] > max:
			max = results_ip[i] + results_name[i]
	else:
		if results_ip[i] > max:
			max = results_ip[i]
		
	if hash:
		if hash_counter < hash_num:
			hash_counter += 1
		else:
			sys.stdout.write('#')
			sys.stdout.flush()
			hash_counter = 0

if dump:
	dump_handle.close()

if report:
	print ''
	print 'Generating report (' + report_file + ')'
	try:
		report_handle = open(report_file,'w')
	except:
		print 'Coult not open ' + report_file + ' for writing.'
		sys.exit(1)
	generate_report(report_handle,ip_list,results_ip,name_list,results_name, max)
	report_handle.close()

print ''
print 'Completed.'
