MrdodgerX

Securityfocus Web Crawller WP and Jooma

Sep 26th, 2020
406
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 15.33 KB | None | 0 0
  1. from bs4 import BeautifulSoup as bs
  2. import requests
  3. from requests.adapters import HTTPAdapter
  4. # from requests.packages.urllib3.util.retry import Retry
  5. import random
  6. from threading import Thread
  7. import time
  8. import re
  9. import json
  10. from urllib3.util import Retry
  11.  
  12. import sys
  13. import argparse
  14.  
  15. parser = argparse.ArgumentParser(prog='getargy.py',add_help=False)
  16.  
  17. parser.add_argument('-h', '--help', action="store_true")
  18. parser.add_argument('-s', '--start')
  19. parser.add_argument('-e', '--end')
  20.  
  21. args = parser.parse_args()
  22.  
  23.  
  24. joomlacoredata = []
  25. drupaldata = []
  26. joomlacomponentdata = []
  27. otherdata = []
  28.  
  29. desktop_agents = [
  30.     'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
  31.     'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
  32.     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
  33.     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14',
  34.     'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
  35.     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
  36.     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
  37.     'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
  38.     'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
  39.     'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0',
  40.     'Mozilla/5.0 (Linux; U; en-US) AppleWebKit/528.5+ (KHTML, like Gecko, Safari/528.5+) Version/4.0 Kindle/3.0 (screen 600x800; rotate)',
  41.     'Mozilla/5.0 (X11; U; Linux armv7l like Android; en-us) AppleWebKit/531.2+ (KHTML, like Gecko) Version/5.0 Safari/533.2+ Kindle/3.0+',
  42.  
  43.     'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)',
  44.     'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)',
  45.     'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
  46.  
  47.     'Mozilla/5.0 (Nintendo 3DS; U; ; en) Version/1.7412.EU',
  48.     'Mozilla/5.0 (PlayStation Vita 3.61) AppleWebKit/537.73 (KHTML, like Gecko) Silk/3.2',
  49.     'Mozilla/5.0 (PlayStation 4 3.11) AppleWebKit/537.73 (KHTML, like Gecko)',
  50.     'Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Mobile Safari/537.36 Edge/13.10586',
  51.     'Mozilla/5.0 (Windows NT 10.0; Win64; x64; XBOX_ONE_ED) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393',
  52.     'Mozilla/5.0 (Nintendo WiiU) AppleWebKit/536.30 (KHTML, like Gecko) NX/3.0.4.2.12 NintendoBrowser/4.3.1.11264.US',
  53.  
  54.     'AppleTV5,3/9.1.1',
  55.     'AppleTV6,2/11.1',
  56.     'Dalvik/2.1.0 (Linux; U; Android 6.0.1; Nexus Player Build/MMB29T)',
  57.     'Mozilla/5.0 (Linux; Android 5.1; AFTS Build/LMY47O) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/41.99900.2250.0242 Safari/537.36',
  58.     'Mozilla/5.0 (Linux; U; Android 4.2.2; he-il; NEO-X5-116A Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30',
  59.     'Roku4640X/DVP-7.70 (297.70E04154A)',
  60.     'Mozilla/5.0 (CrKey armv7l 1.5.16041) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.0 Safari/537.36',
  61.  
  62.     'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1',
  63.     'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36',
  64.     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9',
  65.     'Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36',
  66.     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246',
  67.  
  68.     'Mozilla/5.0 (Linux; Android 5.0.2; LG-V410/V41020c Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/34.0.1847.118 Safari/537.36',
  69.     'Mozilla/5.0 (Linux; Android 4.4.3; KFTHWI Build/KTU84M) AppleWebKit/537.36 (KHTML, like Gecko) Silk/47.1.79 like Chrome/47.0.2526.80 Safari/537.36',
  70.     'Mozilla/5.0 (Linux; Android 5.0.2; SAMSUNG SM-T550 Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.3 Chrome/38.0.2125.102 Safari/537.36',
  71.     'Mozilla/5.0 (Linux; Android 7.0; SM-T827R4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.116 Safari/537.36',
  72.     'Mozilla/5.0 (Linux; Android 6.0.1; SHIELD Tablet K1 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/55.0.2883.91 Safari/537.36',
  73.     'Mozilla/5.0 (Linux; Android 6.0.1; SGP771 Build/32.2.A.0.253; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.98 Safari/537.36',
  74.     'Mozilla/5.0 (Linux; Android 7.0; Pixel C Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.98 Safari/537.36'
  75.  
  76. ]
  77.  
  78.  
  79. def random_headers():
  80.     return {'User-Agent': random.choice(desktop_agents),
  81.             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}
  82.  
  83.  
  84. def getInfoSecuritFocus(id):
  85.     global jsondata
  86.     title = ""
  87.     bugtrag = ""
  88.     typevul = ""
  89.     cve = []
  90.     remote = 0
  91.     local = 0
  92.     publisheddate = 0
  93.     updateddate = 0
  94.     authorname = ""
  95.     vulvesion = []
  96.     notvul = []
  97.     session = requests.Session()
  98.     retry = Retry(connect=3, backoff_factor=0.5)
  99.     adapter = HTTPAdapter(max_retries=retry)
  100.     session.mount('http://', adapter)
  101.     session.mount('https://', adapter)
  102.    
  103.     try:
  104.         # time.sleep(2)
  105.         url = f"https://www.securityfocus.com/bid/{id}/info"
  106.         infoSF = session.get(url, headers=random_headers())
  107.         # print(infoSF.status_code)
  108.         if(infoSF.status_code==200):
  109.             print(url)
  110.        
  111.             allLowerHtml = infoSF.text.lower()
  112.  
  113.             soupWeb = bs(infoSF.text, 'html.parser')
  114.             titlename = soupWeb.find(class_="title").text.strip()
  115.             componentname = comtype(title)
  116.             vulcontent = soupWeb.find(id="vulnerability")
  117.             tables = vulcontent.find('table').find_all("td")
  118.             indexnum=0
  119.             for td in tables:
  120.                 # print(f"{td.text}: {id}")
  121.                 textdata = td.text.strip()
  122.                 if(indexnum==1):
  123.                     bugtrag = textdata
  124.                 elif(indexnum==3):
  125.                     typevul = textdata
  126.                 elif(indexnum==5):
  127.                     cve = findCVE(textdata)
  128.                 elif(indexnum==7):
  129.                     remote = yesno(textdata)
  130.                 elif(indexnum==9):
  131.                     local = yesno(textdata)
  132.                 elif(indexnum==11):
  133.                     publisheddate = textdata
  134.                 elif(indexnum==13):
  135.                     updateddate = textdata
  136.                 elif(indexnum==15):
  137.                     authorname = textdata
  138.                 elif(indexnum==17):
  139.                     vulvesion = versionDetect(textdata)
  140.                 elif(indexnum==20):
  141.                     notvul = versionDetect(textdata)
  142.                
  143.                 indexnum+=1
  144.                
  145.             if 'drupal' in allLowerHtml:
  146.                 securityfocusdb = {
  147.                     # "iscore": 0,
  148.                     "bugtragid": bugtrag,
  149.                     "vul_ver": vulvesion, #set as array
  150.                     "not_vul_ver": notvul, # set as array
  151.                     "name": titlename,
  152.                     "cve": cve, # set as array
  153.                     "remote": remote,
  154.                     "local": local,
  155.                     "author": authorname,
  156.                     "type_vul": typevul,
  157.                     "published": publisheddate,
  158.                     "updated": updateddate,
  159.                     "component": componentname,
  160.                     "ref": {
  161.                         # "EDB": "",
  162.                         "securityfocus": url,
  163.                         "cve": cveUrl(cve),
  164.                         "other": otherref(id)
  165.  
  166.                     }
  167.                 }
  168.                     # print(securityfocusdb)
  169.                 drupaldata.append(securityfocusdb)
  170.                     # return securityfocusdb
  171.                
  172.                
  173.             if 'joomla' in allLowerHtml:
  174.  
  175.                 if 'component' in allLowerHtml:
  176.                     securityfocusdb = {
  177.                         "bugtragid": bugtrag,
  178.                         "vul_ver": vulvesion, #set as array
  179.                         "not_vul_ver": notvul, # set as array
  180.                         "name": titlename,
  181.                         "cve": cve, # set as array
  182.                         "remote": remote,
  183.                         "local": local,
  184.                         "author": authorname,
  185.                         "type_vul": typevul,
  186.                         "published": publisheddate,
  187.                         "updated": updateddate,
  188.                         "component": componentname,
  189.                         "ref": {
  190.                             "securityfocus": url,
  191.                             "cve": cveUrl(cve),
  192.                             "other": otherref(id)
  193.  
  194.                         }
  195.                     }
  196.                     joomlacomponentdata.append(securityfocusdb)
  197.  
  198.                 else:
  199.                     securityfocusdb = {
  200.                         # "iscore": 1,
  201.                         "bugtragid": bugtrag,
  202.                         "vul_ver": vulvesion, #set as array
  203.                         "not_vul_ver": notvul, # set as array
  204.                         "name": titlename,
  205.                         "cve": cve, # set as array
  206.                         "remote": remote,
  207.                         "local": local,
  208.                         "author": authorname,
  209.                         "type_vul": typevul,
  210.                         "published": publisheddate,
  211.                         "updated": updateddate,
  212.                         "ref": {
  213.                             # "EDB": "",
  214.                             "securityfocus": url,
  215.                             "cve": cveUrl(cve),
  216.                             "other": otherref(id)
  217.  
  218.                         }
  219.                     }
  220.                     joomlacoredata.append(securityfocusdb)
  221.  
  222.             else:
  223.                 securityfocusdb = {
  224.                     # "iscore": 1,
  225.                     "bugtragid": bugtrag,
  226.                     "vul_ver": vulvesion, #set as array
  227.                     "not_vul_ver": notvul, # set as array
  228.                     "name": titlename,
  229.                     "cve": cve, # set as array
  230.                     "remote": remote,
  231.                     "local": local,
  232.                     "author": authorname,
  233.                     "type_vul": typevul,
  234.                     "published": publisheddate,
  235.                     "updated": updateddate,
  236.                     "ref": {
  237.                         # "EDB": "",
  238.                         "securityfocus": url,
  239.                         "cve": cveUrl(cve),
  240.                         "other": otherref(id)
  241.  
  242.                     }
  243.                 }
  244.                 otherdata.append(securityfocusdb)
  245.            
  246.             print("Done")
  247.         else:
  248.             print(infoSF.status_code)
  249.     except Exception as err:
  250.         print (err)
  251.  
  252.  
  253. def findCVE(text):
  254.     r1 = re.findall(r"CVE-\d{4}-\d{4,7}",text)
  255.     return r1
  256.  
  257. def yesno(text):
  258.     text = text.lower()
  259.     if "y" in text:
  260.         return 1
  261.     return 0
  262.  
  263. def versionDetect(text):
  264.     listversion = []
  265.     r1 = re.findall(r"(\d+(\.\d+){2})(\-[\w\d\.\-]*)?(\+[\w\d\.\-]*)?",text)
  266.     for r in r1:
  267.         listversion.append(r[0])
  268.     return listversion
  269.  
  270. def cveUrl(cve):
  271.     refcve = []
  272.     urlquery = "https://cve.mitre.org/cgi-bin/cvename.cgi?name="
  273.     for c in cve:
  274.         refcve.append(f"{urlquery}{c}")
  275.     return refcve
  276.  
  277. def comtype(title):
  278.     result = []
  279.     r1 = re.findall(r"""(?:'|").*(?:'|")""",title)
  280.     for r in r1:
  281.         result.append(r.strip("'".strip('"')))
  282.     return result
  283.    
  284.  
  285. def otherref(id):
  286.     listurl = []
  287.     url = f"https://www.securityfocus.com/bid/{id}/references"
  288.     infoSF = requests.get(url, headers=random_headers())
  289.     print(infoSF.status_code)
  290.     if(infoSF.status_code==200):
  291.  
  292.         soupWeb = bs(infoSF.text, 'html.parser')
  293.         vulcontent = soupWeb.find(id="vulnerability")
  294.         url = vulcontent.find_all('a', href=True)
  295.         # print(url)
  296.         for a in url:
  297.             # print(a['href'])
  298.             listurl.append(a['href'])
  299.         return listurl
  300.        
  301.     else:
  302.         return []
  303.  
  304. def help():
  305.     print("""
  306.    -h | --help\t - to get help
  307.    -s | --start \t - start index search in www.securityfocus.com
  308.    -e | --end \t - end index search in www.securityfocus.com
  309.    """)
  310.  
  311.  
  312.  
  313.  
  314. if __name__ == "__main__":
  315.  
  316.     startrange = 0
  317.     endrange = 0
  318.  
  319.     parser = argparse.ArgumentParser(prog='getargy.py',add_help=False)
  320.  
  321.     parser.add_argument('-h', '--help', action="store_true")
  322.     parser.add_argument('-s', '--start', type=int)
  323.     parser.add_argument('-e', '--end', type=int)
  324.     # parser.add_argument('--search')
  325.     args = parser.parse_args()
  326.  
  327.     if len(sys.argv) == 1:
  328.         help()
  329.  
  330.     if args.help:
  331.         help()
  332.  
  333.     if args.start is not None:
  334.         startrange = args.start
  335.         if startrange == 0:
  336.             print("Error: Please set value more then 0")
  337.             sys.exit()
  338.  
  339.     if args.end is not None:
  340.         endrange = args.end
  341.         if endrange == 0:
  342.             print("Error: Please set value more then 0")
  343.             sys.exit()
  344.    
  345.     if(args.start is not None or args.end is not None):
  346.         if startrange > endrange:
  347.             print("Error: Start Value less then End Value")
  348.             sys.exit()
  349.         origin_time = time.time()
  350.         processes = list()
  351.         timetosleep = 0
  352.         # for i in range(1,107179):
  353.         for i in range(startrange,endrange):
  354.             processes.append(Thread(target=getInfoSecuritFocus, args=(i,)))
  355.            
  356.         for x in range(0, len(processes)):
  357.             if timetosleep < 40:
  358.                 timetosleep+=1
  359.                 processes[x].start()
  360.                
  361.             else:
  362.                 print("Sleeping")
  363.                 time.sleep(15)
  364.                 processes[x].start()
  365.                 timetosleep=0
  366.            
  367.         for x in range(0, len(processes)):
  368.             processes[x].join()
  369.        
  370.  
  371.         with open('joomlacoresecuritfocus.json', 'w') as outfile:  
  372.             json.dump(joomlacoredata, outfile)
  373.         with open('joomlacomponentsecuritfocus.json', 'w') as outfile:  
  374.             json.dump(joomlacomponentdata, outfile)
  375.         with open('drupalsecurityfocus.json', 'w') as outfile:  
  376.             json.dump(drupaldata, outfile)
  377.         with open('othersecurityfocus.json', 'w') as outfile:  
  378.             json.dump(otherdata, outfile)
  379.  
  380.         # # print(json.dumps(jsondata,indent=4))
  381.         time_interval = time.time() - origin_time
  382.         print("With Multiproc: ", time_interval)
  383.  
  384. # https://www.securityfocus.com/bid/99999/info
  385. # https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-7739 #
  386.  
Add Comment
Please, Sign In to add comment