前提
這是一個後臺掃描程式,可以多線程掃描目標網址的管理者登入後台,過幾天我會再寫一篇專門一行行詳細解說 其中的doc.txt可以在我的git專案裡找到,是我蒐集得來的字典檔。
git clone https://github.com/littletrojan/SiteScaner.git
#!python3 import os import urllib.request import urllib.parse import threading import queue import time import random from optparse import OptionParser import sys import progressbar '''Here is about the optionalparser ''' usage = "usage: %prog [options] arg1 arg2" parser=OptionParser(usage=usage) #Thread number setting parser parser.add_option("-t","--thread",dest="thread_num",type="int",help="Open thread number(optional)",default="1") #Target name setting parser parser.add_option("-u","--url",dest="url",type="string",help="Target url to scan(required)") #Show detail message setting parser parser.add_option("-v",dest="show_message",action="store_true",default=False,help="Set this argument to show the detail(optional)") #define dictionary setting parser,default using doc.txt whitch content more than 40000 list of path. parser.add_option("-d","--dict",dest="dictionary",metavar="FILE",default="doc.txt") #define the execlution name in the dictionary want to exclude parser.add_option("-e","--execlude",dest="exclude_list",action="append",help="Setting the excludtion extend file name whitch you don't want to try,like .jpg .gif etc") exclude_list=['.jpg','.gif','.css','.png','.js'] user_make_thread_dead=False class HTTPBackendScanner(object): result_list=[] Baidu_Spider="" Charset="" def __init__(self,local_threading_num,local_domain_name,local_showdetail,local_excludelist,local_dictionary): self.threading_num=local_threading_num self.domain_name=local_domain_name self.UserAgent="Mollize6.0" self.Charsetr="UTF8" self.showdetail=local_showdetail self.excludelist=local_excludelist self.q=queue.Queue() self.Queue_num=0 self.dictionary=local_dictionary ''' Before starting the crawler we should parse the dictionary,and exclude the file we don't really want, and put it in to the queue in the end. ''' def DictParser(self): print("Starting to parse dictionary line by line...") with open (dictionary,"r") as lines: for line in lines: line=line.rstrip() if os.path.splitext(line)[1] not in exclude_list: self.Queue_num+=1 self.q.put(line) print("There are %s path in the list" %(self.Queue_num)) def crawler(self): global users_make_thread_dead HttpStatus='' print("thread %s strart "% (str(threading.get_ident()))) while (not self.q.empty())and(user_make_thread_dead == False): path=self.q.get() url="%s%s" %(domain_name,path) opener=urllib.request.build_opener() urllib.request.install_opener(opener) headers={} headers['User-Agent']=self.Baidu_Spider headers['Content-Type']=self.Charset data=urllib.parse.urlencode(headers) bData=data.encode('ascii') request=urllib.request.Request(url,bData) try: print("Try to connecting %s"%(url),end='\t') #We dont want it to change line print. response=urllib.request.urlopen(request) content=response.read() if len(content): finalurl = response.geturl() if finalurl!=url: if self.showdetail: print("Detect redirectrion by server! We are in %s now" %(finalurl)) else: print("{0}".format("303 Redirection")) #print(threading.Thread.getname()) #print("States[%s]:Path:%s" % (str(response.getcode()), url)) else: self.result_list.append(url) print("States[%s]:Path:%s" %(str(response.getcode(),url))) except urllib.error.HTTPError as e: print(e.reason) if e.reason.lower()!="not found": raise ''' if self.showdetail: print ("Get error in HTTP connection",end ="") print(e.getreason) if self.HttpErrorHandler(e.getreason): else: if self.HttpErrorHandler(e.getreason): continue else: sys.exit(1) ''' pass def HttpErrorHandler(self,errstr): if errstr=="not found": #If http error is 404 not found return True else: return False def print_result(self): for list in self.result_list: print(list) ''' After HttpBackenScan start all work will use thread to do. ''' def starting_thread(self): for i in range(threading_num): t=threading.Thread(target=self.crawler) t.daemon=True #Make sure when main thread exit ,the others thread will kill themself too! t.start() UserInput=input("input any word to stop!\n") global user_make_thread_dead user_make_thread_dead==True self.print_result() ''' Scaner's main logic function ''' def run(self): self.starting_thread() self.print_result() if __name__=="__main__": (option,args)=parser.parse_args() if (option.url==None): #if user pass none url. parser.print_help() exit(1) print("Program is start!") print(option.url) threading_num=option.thread_num #specified the thread numbers. domain_name=option.url+"/" #Modify url so that it can be strcat with another file path print("Going to parsing url:"+domain_name+"\n") #print url after it has benn modify. userinput=input("Type any word to continue...or type quit to quit.,type help to get help") if userinput.lower()=="quit": print("byebye") sys.exit(0) elif userinput.lower()=="help": parser.print_help() else: pass showdetail=option.show_message #Show the detail or not excludelist=exclude_list dictionary=option.dictionary scanner=HTTPBackendScanner(threading_num,domain_name,showdetail,excludelist,dictionary) scanner.DictParser() scanner.run()
沒有留言:
張貼留言