Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
DanMcInerney committed Jun 23, 2014
1 parent 9dbf1d6 commit 99736db
Show file tree
Hide file tree
Showing 16 changed files with 211 additions and 0 deletions.
11 changes: 11 additions & 0 deletions scrapy.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Automatically created by: scrapy startproject
#
# For more information about the [deploy] section see:
# http://doc.scrapy.org/en/latest/topics/scrapyd.html

[settings]
default = xsscrapy.settings

[deploy]
#url = http://localhost:6800/
project = xss_spider
Empty file added xsscrapy/__init__.py
Empty file.
Binary file added xsscrapy/__init__.pyc
Binary file not shown.
10 changes: 10 additions & 0 deletions xsscrapy/items.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html

from scrapy.item import Item, Field

class Link(Item):
url = Field()
body = Field()
Binary file added xsscrapy/items.pyc
Binary file not shown.
10 changes: 10 additions & 0 deletions xsscrapy/middlewares.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from xsscrapy.settings import USER_AGENT_LIST
import random
from scrapy import log

class RandomUserAgentMiddleware(object):
''' Use a random user-agent for each request '''
def process_request(self, request, spider):
ua = random.choice(USER_AGENT_LIST)
if ua:
request.headers.setdefault('User-Agent', ua)
Binary file added xsscrapy/middlewares.pyc
Binary file not shown.
8 changes: 8 additions & 0 deletions xsscrapy/pipelines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html

class XSS_pipeline(object):
def process_item(self, item, spider):
return item
Binary file added xsscrapy/pipelines.pyc
Binary file not shown.
28 changes: 28 additions & 0 deletions xsscrapy/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
#

BOT_NAME = 'xsscrapy'

SPIDER_MODULES = ['xsscrapy.spiders']
NEWSPIDER_MODULE = 'xsscrapy.spiders'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
# Get a random user agent for each crawled page
USER_AGENT_LIST = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0']
DOWNLOADER_MIDDLEWARES = {'xsscrapy.middlewares.RandomUserAgentMiddleware': 400,
'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None,}

# prevent duplicate link crawling
DUPEFILTER_CLASS = 'scrapy.dupefilter.RFPDupeFilter'

ITEM_PIPELINES = {'xsscrapy.pipelines.XSS_pipeline':100} # Look into what the 100 is doing (I know lower is higher priority, 0-1000)

Binary file added xsscrapy/settings.pyc
Binary file not shown.
4 changes: 4 additions & 0 deletions xsscrapy/spiders/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.
Binary file added xsscrapy/spiders/__init__.pyc
Binary file not shown.
Binary file added xsscrapy/spiders/tutorial_spider.pyc
Binary file not shown.
140 changes: 140 additions & 0 deletions xsscrapy/spiders/xss_spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.http import Request

from xsscrapy.items import Link

from urlparse import urlparse, parse_qsl
import urllib
import re


class XSSspider(CrawlSpider):
name = 'xss_spider'
#allowed_domains = ['coin.co']
#start_urls = ['http://coin.co']

rules = (Rule(SgmlLinkExtractor(), callback='parse_url', follow=True), )

def __init__(self, *args, **kwargs):
# run using: scrapy crawl xss_spider -a url='http://kadira.com'
super(XSSspider, self).__init__(*args, **kwargs)
self.start_urls = [kwargs.get('url')]
hostname = urlparse(self.start_urls[0]).hostname
self.allowed_domains = ['.'.join(hostname.split('.')[-2:])] # adding [] around the value seems to allow it to crawl subdomain of value
self.payloader = xss_payloader()

def parse_url(self, response):
item = Link()
item['url'] = response.url
payloaded_urls = self.payloader.run(item['url'])
if payloaded_urls:
return [Request(url, callback=self.find_xss_in_body) for url in payloaded_urls]

#item['body'] = response.body
return item

def find_xss_in_body(self, response):
delim = '9zqjx'
body = response.body
url = response.url
tester = '"\'><()=;/:'
if tester in body:
print '------------------------- 100% vulnerable:', url

allBetweenDelims = '%s(.*?)%s' % (delim, delim)
matches = re.findall(allBetweenDelims, body)
if len(matches) > 0:
pass


class xss_payloader:
''' Find urls with parameters then return a list of urls with 1 xss payload per param '''

def __init__(self):
self.xssDelim = '9zqjx' # zqjx has the least amount of google search results I can find for 4 letter combo (47.2K)
self.payloadTests = [self.xssDelim+'"\'><()=;/:'+self.xssDelim, # Normal check
self.xssDelim+'%22%27%3E%3C%28%29%3D%3B%2F%3A'+self.xssDelim, # Hex encoded
self.xssDelim+'&#34&#39&#62&#60&#40&#41&#61&#59&#47&#58'+self.xssDelim] # HTML encoded without semicolons

def run(self, url):
if '=' in url:
payloaded_urls = self.checkForURLparams(url)
return payloaded_urls

def checkForURLparams(self, url):
''' Add links with variables in them to the queue again but with XSS testing payloads '''
payloaded_urls = []
params = self.getURLparams(url)
moddedParams = self.change_params(params)
hostname, protocol, root_domain, path = self.url_processor(url)
if hostname and protocol and path:
for payload in moddedParams:
for params in moddedParams[payload]:
joinedParams = urllib.urlencode(params, doseq=1) # doseq maps the params back together
newURL = urllib.unquote(protocol+hostname+path+'?'+joinedParams)
payloaded_urls.append(newURL)
return payloaded_urls

def getURLparams(self, url):
''' Parse out the URL parameters '''
parsedUrl = urlparse(url)
fullParams = parsedUrl.query
params = parse_qsl(fullParams) #parse_qsl rather than parse_ps in order to preserve order
return params

def change_params(self, params):
''' Returns a list of complete parameters, each with 1 parameter changed to an XSS vector '''
changedParams = []
changedParam = False
moddedParams = []
allModdedParams = {}

# Create a list of lists, each list will be the URL we will test
# This preserves the order of the URL parameters and will also
# test each parameter individually instead of all at once
for payload in self.payloadTests:
allModdedParams[payload] = []
for x in xrange(0, len(params)):
for p in params:
param = p[0]
value = p[1]
# If a parameter has not been modified yet
if param not in changedParams and changedParam == False:
newValue = payload
changedParams.append(param)
p = (param, newValue)
moddedParams.append(p)
changedParam = True
else:
moddedParams.append(p)

# Reset so we can step through again and change a diff param
allModdedParams[payload].append(moddedParams)

changedParam = False
moddedParams = []

# Reset the list of changed params each time a new payload is attempted
changedParams = []

return allModdedParams

def url_processor(self, url):
''' Get the url domain, protocol, and hostname using urlparse '''
try:
parsed_url = urlparse(url)
# Get the path
path = parsed_url.path
# Get the protocol
protocol = parsed_url.scheme+'://'
# Get the hostname (includes subdomains)
hostname = parsed_url.hostname
# Get root domain
root_domain = '.'.join(hostname.split('.')[-2:])
except:
print '[-] Could not parse url:', url
return

return (hostname, protocol, root_domain, path)
Binary file added xsscrapy/spiders/xss_spider.pyc
Binary file not shown.

0 comments on commit 99736db

Please sign in to comment.