Skip to content

Commit

Permalink
removed JS escaped chars too early, also allowed empty params to be p…
Browse files Browse the repository at this point in the history
…ayloaded
  • Loading branch information
Dan McInerney committed Dec 4, 2014
1 parent f4061a6 commit 60542c3
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 34 deletions.
59 changes: 32 additions & 27 deletions xsscrapy/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def process_item(self, item, spider):
# matches with semicolon which sometimes cuts results off
sc_full_match = '%s.{0,80}?%s;9' % (delim, delim)
#chars_between_delims = '%s(.*?)%s' % (delim, delim)
chars_between_delims = '%s(.{0,80}?)%s' % (delim, delim)
#chars_between_delims = '%s(.{0,80}?)%s' % (delim, delim)

# Quick sqli check based on DSSS
dbms, regex = self.sqli_check(body, meta['orig_body'])
Expand Down Expand Up @@ -204,40 +204,40 @@ def xss_logic(self, injection, meta, resp_url, error):
# Unpack the injection
#tag_index, tag, attr, attr_val, payload, unfiltered_chars, line = injection
# get_unfiltered_chars() can only return a string 0+ characters, but never None
unfiltered_chars = injection[5]
reflected_chars = injection[5]
payload = injection[4]
if ';' in unfiltered_chars:
if ';' in reflected_chars:
payload += ';9'
# injection[6] sometimes == '<p' maybe?
# It happened POSTing chatRecord to http://service.taobao.com/support/minerva/robot_save_chat_record.htm
# that page returned a Connection: Close header and no body
line = injection[6]+payload
item_found = None

# get_unfiltered_chars() always returns a string
if len(unfiltered_chars) > 0:
# get_reflected_chars() always returns a string
if len(reflected_chars) > 0:
chars_payloads = self.get_breakout_chars(injection, resp_url)
# breakout_chars always returns a , never None
if len(chars_payloads) > 0:
sugg_payloads = []
for chars in chars_payloads:
if set(chars).issubset(set(unfiltered_chars)):
# Get rid of possible payloads with > in them if > not in unfiltered_chars
if set(chars).issubset(set(reflected_chars)):
# Get rid of possible payloads with > in them if > not in reflected_chars
item_found = True
for possible_payload in chars_payloads[chars]:
if '>' not in unfiltered_chars:
if '>' not in reflected_chars:
if '>' in possible_payload:
continue
sugg_payloads.append(possible_payload)

if item_found:
return self.make_item(meta, resp_url, line, unfiltered_chars, sugg_payloads)
return self.make_item(meta, resp_url, line, reflected_chars, sugg_payloads)

def get_breakout_chars(self, injection, resp_url):
''' Returns either None if no breakout chars were found
or a list of sets of potential breakout characters '''

tag_index, tag, attr, attr_val, payload, unfiltered_chars, line = injection
tag_index, tag, attr, attr_val, payload, reflected_chars, line = injection
pl_delim = payload[:7]
#full_match = '%s.*?%s' % (pl_delim, pl_delim)
full_match = '%s.{0,80}?%s' % (pl_delim, pl_delim)
Expand Down Expand Up @@ -639,7 +639,7 @@ def combine_regex_lxml(self, lxml_injs, full_matches, scolon_matches, body, mism
split_body = body[:match_offset]
# split the body at the tag, then take the last fragment
# which is closest to the injection point as regexed
line_no_tag = split_body.split(tag_delim)[-1].replace('\\"', '').replace("\\'", "")
line_no_tag = split_body.split(tag_delim)[-1]#.replace('\\"', '').replace("\\'", "")
line = tag_delim + line_no_tag
# Sometimes it may split wrong, in which case we drop that lxml match
if line_no_tag.startswith('<doctype') or line_no_tag.startswith('<html'):
Expand All @@ -654,11 +654,12 @@ def combine_regex_lxml(self, lxml_injs, full_matches, scolon_matches, body, mism
attr_val = attr_dict[a]
break

unfiltered_chars = self.get_unfiltered_chars(payload, pl_delim, scolon_matches, match_offset)
#unfiltered_chars = self.get_unfiltered_chars(payload, pl_delim, scolon_matches, match_offset)
reflected_chars = self.get_reflected_chars(tag, attr, payload, pl_delim, scolon_matches, match_offset)
# Common false+ shows only "> as unfiltered if script parses the chars between 2 unrelated delim strs
if unfiltered_chars == '">':
unfiltered_chars = ''
all_inj_data[match_offset] = [tag_index, tag, attr, attr_val, payload, unfiltered_chars, line]
if reflected_chars == '">':
reflected_chars = ''
all_inj_data[match_offset] = [tag_index, tag, attr, attr_val, payload, reflected_chars, line]

return all_inj_data

Expand Down Expand Up @@ -860,41 +861,45 @@ def unescape_payload(self, payload):

return payload

def get_unfiltered_chars(self, payload, delim, scolon_matches, match_offset):
def get_reflected_chars(self, tag, attr, payload, delim, scolon_matches, match_offset):
''' Check for the special chars and append them to a master list of tuples, one tuple per injection point
Always returns a string '''

unfiltered_chars = []
# Change the delim+test_str+delim payload to just test_str
# Make sure js payloads remove escaped ' and ", also remove ;
# since ; will show up in html encoded entities. If ; is unfiltered
# it will be added after this function
#escaped_chars = re.findall(r'\\(.)', chars)
chars_found = payload.replace(delim, '').replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
chars_between_delim = payload.replace(delim, '')#.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
#If injection is inside script tag, remove the escaped chars
if tag == 'script' or attr in self.event_attributes():
chars_between_delim = chars_between_delim.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
else:
chars_between_delim = chars_between_delim.replace("\\", "")

# List for just the inj point
for c in chars_found:
if c in self.test_str:
unfiltered_chars.append(c)
#for c in chars_found:
# if c in self.test_str:
# unfiltered_chars.append(c)

# # Check if a colon needs to be added to the unfiltered chars
for scolon_match in scolon_matches:
# Confirm the string offset of the match is the same
# Since scolon_match will only exist when ;9 was found
scolon_offset = scolon_match[0]
if match_offset == scolon_offset:
unfiltered_chars.append(';')
chars_between_delim += ';'
break

# Catch pesky false positives usually inside super long script tags from
# fucked up html like lots of JSON in a script tag like google has on some pages
if len(unfiltered_chars) != len(set(unfiltered_chars)):
unfiltered_chars = ''
# if len(unfiltered_chars) != len(set(unfiltered_chars)):
# unfiltered_chars = ''

unfiltered_chars = ''.join(unfiltered_chars)
# unfiltered_chars = ''.join(unfiltered_chars)

if len(unfiltered_chars) > 0:
return unfiltered_chars
if len(chars_between_delim) > 0:
return chars_between_delim
else:
return ''

Expand Down
22 changes: 15 additions & 7 deletions xsscrapy/spiders/xss_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, *args, **kwargs):
hostname = urlparse(self.start_urls[0]).hostname
# With subdomains
self.allowed_domains = [hostname] # adding [] around the value seems to allow it to crawl subdomain of value
self.delim = '9zqjx'
self.delim = '1zqjx'
# semi colon goes on end because sometimes it cuts stuff off like
# gruyere or the second cookie delim
self.test_str = '\'"(){}<x>:/'
Expand Down Expand Up @@ -179,6 +179,7 @@ def parse_resp(self, response):

# Test URL variables with xss strings
payloaded_urls, url_delim_str = self.make_URLs(orig_url, payload) # list of tuples where item[0]=url, and item[1]=changed param
print 'URL:', payloaded_urls, url_delim_str
if payloaded_urls:
url_reqs = self.make_url_reqs(orig_url, payloaded_urls, url_delim_str)
if url_reqs:
Expand Down Expand Up @@ -325,7 +326,7 @@ def make_URLs(self, url, payload):
delim_str = self.delim + two_rand_letters
payload = delim_str + payload + delim_str + ';9'

if '=' in url:
if '=' in url and '?' in url:
# If URL has variables, payload them
payloaded_urls = self.payload_url_vars(url, payload)
else:
Expand Down Expand Up @@ -362,19 +363,27 @@ def payload_url_vars(self, url, payload):
continue

for p in params:
if p[1] == payload:
if payload in p[1]:
changed_value = p[0]

payloaded_urls.append((newURL, changed_value, payload))

# Payload the path, like: example.com/page1.php?param=val becomes example.com/page1.php/FUZZCHARS/?param=val
payloaded_urls.append(self.payload_path(url))

if len(payloaded_urls) > 0:
return payloaded_urls

# def payload_path(self, url):
# ''' Payload the path, like: example.com/page1.php?param=val becomes example.com/page1.php/FUZZCHARS/?param=val '''
# parsed = urlparse(url)

def getURLparams(self, url):
''' Parse out the URL parameters '''
parsedUrl = urlparse(url)
fullParams = parsedUrl.query
params = parse_qsl(fullParams) #parse_qsl rather than parse_ps in order to preserve order
#parse_qsl rather than parse_ps in order to preserve order
params = parse_qsl(fullParams, keep_blank_values=True)
return params

def change_params(self, params, payload):
Expand All @@ -394,9 +403,8 @@ def change_params(self, params, payload):
value = p[1]
# If a parameter has not been modified yet
if param not in changedParams and changedParam == False:
newValue = payload
changedParams.append(param)
p = (param, newValue)
p = (param, value+payload)
moddedParams.append(p)
changedParam = param
else:
Expand All @@ -410,7 +418,7 @@ def change_params(self, params, payload):
moddedParams = []

# Reset the list of changed params each time a new payload is attempted
changedParams = []
#changedParams = []

if len(allModdedParams) > 0:
return allModdedParams
Expand Down

0 comments on commit 60542c3

Please sign in to comment.