#!/usr/bin/env python
#
# YaSweep
# Robert Wesley McGrew
# wesleymcgrew@gmail.com
#
# Run without arguments and scroll down a bit for usage information
#
# Copyright 2005 Robert Wesley McGrew
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
version = '0.95y'

import getopt
import sys
import socket
import time
import urllib
import sys
from xml.dom import minidom

global queries_performed 
global burst_size
global burst
global burst_start_time
global subnet
global subnet_supplied
global rdns
global appid
global appid_supplied
#########################################
#
# Usage information
#

def usage():
   print """
Usage:
yasweep.py [-h num] [-r] [-b num] <[-d filename] | [-o report]> <-a appid> <-s subnet> 

-s Subnet should take the form of a dotted-quad where
   an asterisk may take the place of up to three of elements
   starting at the right.  For example:

        192.168.5.*
        192.168.*.*
        192.*.*.*

   If you do not specify this correctly, there really is
   no telling what will happen, as it's too much trouble
   to check to see if you're incapable of reading
   documentation.  If you need more flexibility
   in your sweep list, either script around this program
   or modify the generate_ip_list() function

-a Yahoo AppID

-o <report> should be the name of the file you wish to
   contain the pretty HTML report of the sweep.  Excellent 
   for printing up in pentest reports!

-d Dumps a comma-delimited text version of the data
   to filename

-b Burst mode, with number of queries per burst
   Will perform num queries, and wait 24 hours until 
   the next burst.

   The Yahoo Search API limits you to 5000 requests
   daily, so I recommend not exceeding that.  You may
   set less if you want to save some queries for another
   program.

-r Perform a reverse DNS lookup of each IP
   and search the resulting hostname as well (if one exists).

-h Write a hash mark every num hosts swept.  Use 0 to
   turn off hash marks.  Default is 8.
   """

#######################################
#
# Modify this function if you need more
# flexibility in the list of hosts that
# are scanned
#

def generate_ip_list(subnet):
   num_stars = 0
   r = [(0,256),(0,256),(0,256),(0,256)]
   for i in range(0,len(subnet)):
      if subnet[i] == '*':
         num_stars += 1
   
   current_quad_element = 0
   for i in range(current_quad_element, 4 - num_stars):
      temp = ''
      for j in range(0,len(subnet)):
         if subnet[j] != '.':
            temp += subnet[j]
         else:
            break
      r[i] = (int(temp),int(temp)+1)
      subnet = subnet[j+1:]
   
   ip_list = []
   
   for a in range(r[0][0], r[0][1]):
      for b in range(r[1][0], r[1][1]):
         for c in range(r[2][0], r[2][1]):
            for d in range(r[3][0], r[3][1]):
               ip_list.append(str(a) + "." + str(b) + "." + str(c) + "." + str(d))
   
   return ip_list

#######################################
#
# Generates a report based on the
# results.  Modify to suit your needs.
#

def generate_report(f, ip_list, results_ip, name_list, results_name, max):
   f.write('<html>\n')
   f.write('<title>' + 'YaSweep ' + version + ' Results for ' + subnet + '</title>\n')
   f.write('<body>\n')
   f.write('<h1>YaSweep ' + version + ' results for ' + subnet + '</h1>\n')
   f.write('<h2>Report generated: ' + time.ctime() + '</h2>\n')
   f.write('<hr>\n')
   f.write('<table border=1>\n')
   f.write('<tr><td>IP Address</td> <td>Hits</td>')
   if rdns:
      f.write(' <td>Hostname</td> <td>Hits</td> <td>Total Hits</td>')
   f.write(' <td>Relative Popularity</td></tr>\n')
   
   for i in range(0,len(ip_list)):
      f.write('<tr><td><a href="http://search.yahoo.com/search?p=%22' + ip_list[i] + '%22">')
      f.write(ip_list[i] + '</a></td> <td>' + str(results_ip[i]) + '</td>')
      total = results_ip[i]
      if rdns:
         if name_list[i] != '':
            f.write(' <td><a href="http://search.yahoo.com/search?p=%22' + name_list[i] + '%22">')
            f.write(name_list[i] + '</a></td>')
         else:
            f.write(' <td><i>Unknown</i></td>')
         f.write(' <td>' + str(results_name[i]) + '</td> <td>' + str(results_ip[i] + results_name[i]) + '</td>')
         total += results_name[i]
      f.write(' <td><table border=0><tr><td bgcolor="#FF0000" width=' + str(((total*50)/max)*10) + ' height=10></td></tr></table></td>')
      f.write('</tr>\n')

   f.write('</table>\n')
   f.write('<hr>\n')
   f.write('</body>\n')
   f.write('</html>\n')
   
#######################################
#
# Searches yahoo and keeps track of 
# when to stop and start bursts
#

def search_yahoo(term):
   global queries_performed
   global burst
   global burst_start_time
   global burst_size
   global app_id

   url = 'http://api.search.yahoo.com/WebSearchService/V1/webSearch?'
   query = '"' + term + '"'
   num_results = 0

   params = urllib.urlencode({
      'appid'    : appid,
      'query'    : query,
      'results'  : 1,
      'adult_ok' : 1})
   
   successful = False
   while not successful:
      successful = True
      try:
         data = urllib.urlopen(url+params).read()
         dom = minidom.parseString(data)
         resultset = dom.getElementsByTagName('ResultSet')
         num_results = int(resultset.item(0).getAttribute('totalResultsAvailable'))
      except:
         successful = False

   queries_performed += 1
   if queries_performed == 1 and burst:
      burst_start_time = time.time()
      print ''
      print 'Began new burst at ' + time.ctime(burst_start_time)
   if queries_performed == burst_size and burst:
      print ''
      print 'Burst ended at ' + time.ctime()
      print 'Sleeping until ' + time.ctime(burst_start_time+(60*60*24)+60) # plus a minute or so fudge-factor
      while time.time() < (burst_start_time+(60*60*24)+60):
         time.sleep((burst_start_time+(60*60*24)+60)-time.time())
   return num_results

#######################################
#
# Main program code
#

print 'YaSweep v' + version
print 'Robert Wesley McGrew - wesleymcgrew@gmail.com'

rdns = False
output_exists = False
dump = False
report = False
subnet_supplied = False
force = False
hash = True
hash_num = 8
queries_performed = 0
burst_size = 0
burst = False
burst_start_time = 0

try:
   opts, args = getopt.gnu_getopt(sys.argv[1:],"h:b:d:o:s:r")
except getopt.GetoptError:
   usage()
   sys.exit(1)

for o, a in opts:
   if o == '-h':
      if int(a) == 0:
         hash = False
      else:
         hash_num = int(a)
   if o == '-r':
      rdns = True
   if o == '-b':
      burst = True
      burst_size = int(a)
   if o == '-d':
      dump = True
      dump_file = a
      output_exists = True
   if o == '-o':
      report = True
      report_file = a
      output_exists = True
   if o == '-s':
      subnet_supplied = True
      subnet = a
   if o == '-a':
      appid_supplied = True
      appid = a


if args or not output_exists or not subnet_supplied or not appid_supplied:
   usage()
   sys.exit(1)

print 'Building scan list...'

ip_list = generate_ip_list(subnet)

print str(len(ip_list)) + ' IP addresses'

num_queries = len(ip_list)
if num_queries > 1000 and not burst:
   print 'Number of required API queries ' + num_queries + ' > allowed 1000 daily.  Use burst mode'
   sys.exit(1)

name_list = []
num_names = 0
if rdns:
   print 'Performing reverse DNS lookup of all IP addresses...'
   for ip in ip_list:
      try:
         hostname, alias, ips = socket.gethostbyaddr(ip)
         num_names += 1
      except:
         hostname = ''
      name_list.append(hostname)
   print str(num_names) + ' hosts have names'
   num_queries += num_names

if num_queries > 1000 and not burst:
   print 'Number of required API queries ' + num_queries + ' > allowed 1000 daily.  Use burst mode'
   sys.exit(1)

print 'Performing ' + str(num_queries) + ' API queries total.'

hash_counter = 0
queries_performed = 0
max = 0
results_ip = []
results_name = []
if dump:
   try:
      dump_handle = open(dump_file,'w')
   except:
      print 'Could not open ' + dump_file + ' for writing.'
      sys.exit(1)
      
for i in range(0,len(ip_list)):
   results_ip.append(search_yahoo(ip_list[i]))
   if rdns:
      if name_list[i] != '':
         results_name.append(search_yahoo(name_list[i]))
      else:
         results_name.append(0)
   if dump:
      if rdns:
         dump_handle.write(ip_list[i] + ',' + 
                 str(results_ip[i]) + ',' + 
                    name_list[i] + ',' + 
                    str(results_name[i]) + ',' +
                    str(results_ip[i]+results_name[i]) + '\n')
      else:
         dump_handle.write(ip_list[i] + ',' +
                 str(results_ip[i]) + '\n')
      dump_handle.flush()
   
   if rdns:
      if results_ip[i] + results_name[i] > max:
         max = results_ip[i] + results_name[i]
   else:
      if results_ip[i] > max:
         max = results_ip[i]
      
   if hash:
      if hash_counter < hash_num:
         hash_counter += 1
      else:
         sys.stdout.write('#')
         sys.stdout.flush()
         hash_counter = 0

if dump:
   dump_handle.close()

if report:
   print ''
   print 'Generating report (' + report_file + ')'
   try:
      report_handle = open(report_file,'w')
   except:
      print 'Coult not open ' + report_file + ' for writing.'
      sys.exit(1)
   generate_report(report_handle,ip_list,results_ip,name_list,results_name, max)
   report_handle.close()

print ''
print 'Completed.'

