forked from smicallef/spiderfoot
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsfp_junkfiles.py
187 lines (160 loc) · 7.35 KB
/
sfp_junkfiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# -*- coding: utf-8 -*-
# -------------------------------------------------------------------------------
# Name: sfp_junkfiles
# Purpose: From Spidering, identifies backup and temporary files.
#
# Author: Steve Micallef <[email protected]>
#
# Created: 23/08/2014
# Copyright: (c) Steve Micallef 2014
# Licence: GPL
# -------------------------------------------------------------------------------
from sflib import SpiderFoot, SpiderFootPlugin, SpiderFootEvent
import random
class sfp_junkfiles(SpiderFootPlugin):
"""Junk Files:Footprint:Crawling and Scanning:slow,errorprone,invasive:Looks for old/temporary and other similar files."""
# Default options
opts = {
'fileexts': ['tmp', 'bak', 'old'],
'urlextstry': ['asp', 'php', 'jsp',],
'files': ["old", "passwd", ".htaccess", ".htpasswd",
"Thumbs.db", "backup"],
'dirs': ['zip', 'tar.gz', 'tgz', 'tar'],
'skipfake': True
}
# Option descriptions
optdescs = {
'fileexts': "File extensions to try.",
'urlextstry': "Try those extensions against URLs with these extensions.",
'files': "Try to fetch each of these files from the directory of the URL.",
'dirs': "Try to fetch the containing folder with these extensions.",
'skipfake': "Try to fetch an obviously fake page and if no 404 is returned, stop trying that particular host for junk files. Good for avoiding false positives in cases where servers return content for pages that don't exist."
}
results = list()
hosts = list()
skiphosts = list()
bases = list()
def setup(self, sfc, userOpts=dict()):
self.sf = sfc
self.results = list()
self.hosts = list()
self.skiphosts = list()
self.bases = list()
self.__dataSource__ = "Target Website"
for opt in userOpts.keys():
self.opts[opt] = userOpts[opt]
# What events is this module interested in for input
def watchedEvents(self):
return ["LINKED_URL_INTERNAL"]
# What events this module produces
# This is to support the end user in selecting modules based on events
# produced.
def producedEvents(self):
return ["JUNK_FILE"]
# Handle events sent to this module
def handleEvent(self, event):
eventName = event.eventType
srcModuleName = event.module
eventData = event.data
host = self.sf.urlBaseUrl(eventData)
self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)
if eventData in self.results:
return None
else:
self.results.append(eventData)
if self.opts['skipfake'] and host in self.skiphosts:
self.sf.debug("Skipping " + host + " because it doesn't return 404s.")
return None
# Try and fetch an obviously missing page
if host not in self.hosts and self.opts['skipfake']:
fetch = host + "/" + str(random.randint(0, 99999999)) + ".html"
res = self.sf.fetchUrl(fetch, headOnly=True,
timeout=self.opts['_fetchtimeout'],
useragent=self.opts['_useragent'])
if res['code'] != "404":
self.skiphosts.append(host)
self.hosts.append(host)
return None
self.hosts.append(host)
# http://www/blah/abc.php -> try http://www/blah/abc.php.[fileexts]
for ext in self.opts['urlextstry']:
if "." + ext + "?" in eventData or "." + ext + "#" in eventData or \
eventData.endswith("." + ext):
bits = eventData.split("?")
for x in self.opts['fileexts']:
if self.checkForStop():
return None
self.sf.debug("Trying " + x + " against " + eventData)
fetch = bits[0] + "." + x
if fetch not in self.results:
self.results.append(fetch)
else:
self.sf.debug("Skipping, already fetched.")
continue
res = self.sf.fetchUrl(fetch, headOnly=True,
timeout=self.opts['_fetchtimeout'],
useragent=self.opts['_useragent'],
sizeLimit=10000000)
if res['realurl'] != fetch:
self.sf.debug("Skipping because " + res['realurl'] + " isn't the fetched URL of " + fetch)
continue
if res['code'] == "200":
evt = SpiderFootEvent("JUNK_FILE", fetch,
self.__name__, event)
self.notifyListeners(evt)
base = self.sf.urlBaseDir(eventData)
if base in self.bases:
return None
else:
self.bases.append(base)
# don't do anything with the root directory of a site
self.sf.debug("Base: " + base + ", event: " + eventData)
if base == eventData + "/" or base == eventData:
return None
# http://www/blah/abc.html -> try http://www/blah.[dirs]
for dirfile in self.opts['dirs']:
if self.checkForStop():
return None
if base.count('/') == 3:
self.sf.debug("Skipping base url.")
continue
self.sf.debug("Trying " + dirfile + " against " + eventData)
fetch = base[0:len(base) - 1] + "." + dirfile
if fetch not in self.results:
self.results.append(fetch)
else:
self.sf.debug("Skipping, already fetched.")
continue
res = self.sf.fetchUrl(fetch, headOnly=True,
timeout=self.opts['_fetchtimeout'],
useragent=self.opts['_useragent'])
if res['realurl'] != fetch:
self.sf.debug("Skipping because " + res['realurl'] + " isn't the fetched URL of " + fetch)
continue
if res['code'] == "200":
evt = SpiderFootEvent("JUNK_FILE", fetch,
self.__name__, event)
self.notifyListeners(evt)
# http://www/blah/abc.html -> try http://www/blah/[files]
for f in self.opts['files']:
if self.checkForStop():
return None
self.sf.debug("Trying " + f + " against " + eventData)
fetch = base + f
if fetch not in self.results:
self.results.append(fetch)
else:
self.sf.debug("Skipping, already fetched.")
continue
res = self.sf.fetchUrl(fetch, headOnly=True,
timeout=self.opts['_fetchtimeout'],
useragent=self.opts['_useragent'])
if res['realurl'] != fetch:
self.sf.debug("Skipping because " + res['realurl'] + " isn't the fetched URL of " + fetch)
continue
if res['code'] == "200":
evt = SpiderFootEvent("JUNK_FILE", fetch,
self.__name__, event)
self.notifyListeners(evt)
return None
# End of sfp_junkfiles class