Skip to content

Commit

Permalink
More advanced GUI - Auth for password protected sites
Browse files Browse the repository at this point in the history
New GUI Allows for browsable files, and asks the user for the authentication details if a password is required on a site.
The results get outputted into a scrollable and selectable list. They are also saved to a text file (error_results.txt)
  • Loading branch information
Merton committed Jul 17, 2018
1 parent c42cede commit 75897ac
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 35 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Python files
__pycache__
*.pyc
*.xml

# Application files
error_results.txt
94 changes: 85 additions & 9 deletions gui.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,107 @@
import linker
import tkinter as tk
from tkinter import filedialog, ttk

class Application(tk.Frame):
class AuthDialog():
auth = ()
def __init__(self, parent):
self.win = tk.Toplevel(parent)
self.win.wm_title("Authentication Required")

self.u = tk.Label(self.win, text="Username")
self.u.grid(row=0, column=0)

self.auth_user = tk.Entry(self.win)
self.auth_user.grid(row=0, column=1)

self.p = tk.Label(self.win, text="Password")
self.p.grid(row=1, column=0)

self.auth_pass = tk.Entry(self.win)
self.auth_pass.grid(row=1, column=1)

self.b = ttk.Button(self.win, text="Enter", command=self.return_auth)
self.b.grid(row=1, column=0)

def return_auth(self):
print("clicked")
self.auth = self.auth_user.get(), self.auth_pass.get()
self.win.destroy()


class LinkerGUI(tk.Frame):
PAD_X = 25
PAD_Y = 25
user = ''
pswd = ''

def __init__(self, master=None):
tk.Frame.__init__(self, master)
self.grid()
self.createWidgets()

def get_broken_links(self):
self.results.delete(0,tk.END)
filename = self.file_input.get()
if filename != "":
errors = linker.check_links(filename)
if errors == 401:
print("Auth required")
popup = AuthDialog(self)
self.wait_window(popup.win)
auth = popup.auth

print(auth)
errors = linker.check_links(filename, auth)

for url, error, location in errors:
self.results.insert(tk.END, "===== BROKEN LINK DETECTED ======")
self.results.insert(tk.END, "Broken Link path: ", url)
self.results.insert(tk.END, "Error Code: ", error)
self.results.insert(tk.END, "Location of broken URL: ",location)
self.results.insert(tk.END, "================================")
self.results.insert(tk.END, " ")

else:
print("No file specified!")

def browse_file(self):
self.filename = filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("XML Files","*.xml"),("all files","*.*")))
self.file_input.insert(0, self.filename)

def createWidgets(self):
self.filename = tk.Label(self,text="Filename")
self.filename.grid(row=0)
# Set title of window
self.winfo_toplevel().title("Linker")

self.filename_label = tk.Label(self,text="Filename")
self.filename_label.grid(row=0)

self.file_input = tk.Entry(self)
self.file_input.grid(row=0, column=1)

self.results = tk.Listbox(self)
self.results.config(width=70)
self.results.grid(row=1, column=0, columnspan=3)

# ====== Buttons ====== #
# Browse
self.browse = tk.Button(self)
self.browse["text"] = "Browse..."
self.browse["command"] = self.browse_file
self.browse.grid(row=0, column=2)

# Quit
self.QUIT = tk.Button(self, text="QUIT", fg="red", command=self.quit)
self.QUIT.grid(row=3, column=0)

# Enter
self.enter = tk.Button(self)
self.enter["text"] = "Enter"
self.enter["command"] = lambda: linker.check_links(self.file_input.get())
self.enter.grid(row=1, column=0)
self.enter["command"] = self.get_broken_links
self.enter.grid(row=3, column=2)

self.QUIT = tk.Button(self, text="QUIT", fg="red",
command=self.quit)
self.QUIT.grid(row=1, column=1)

def run():
root = tk.Tk()
app = Application(master=root)
app = LinkerGUI(master=root)
app.mainloop()
87 changes: 64 additions & 23 deletions linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import requests
from bs4 import BeautifulSoup as Soup

def check_links(site_map_file):
def check_links(site_map_file, auth=None):
tree = ET.parse(site_map_file)
root = tree.getroot()
url_count = len(list(root))
Expand All @@ -11,23 +11,43 @@ def check_links(site_map_file):
broken_links = []
# Every checked page url, including it's links from a tags
checked_links = []

site_url = root[0][0].text
print(site_url)

try:
if auth:
print(auth)
r = requests.get(site_url, auth=(auth[0], auth[1]))
else:
r = requests.get(site_url)
except:
print("Could not reach site")
quit

if r.status_code == 401:
return 401
# Loop over every <url> tag from the site map
for index, page in enumerate(root):
for page_index, page in enumerate(root):
# Get the <loc> tag its contents
url = page[0].text
# Encoded to include multi-lang urls
url.encode('utf-8')
print('{} of {} | Checking url [{}]'.format(index + 1, url_count, url))

if url not in checked_links and "/assets" not in url:
if (url not in checked_links) and ("/assets" not in url):
print('Page {} of {} | Checking url [{}]'.format(page_index + 1, url_count, url))

try:
r = requests.get(url)
if auth:
r = requests.get(url, auth=(auth[0], auth[1]))
else:
r = requests.get(url)

except:
print("Uh oh, something went wrong checking {}".format(url))
broken_links.append((url, "Unknown error", url))

status_code = r.status_code

if status_code != 200:
print('Non-OK response ({}) on url: {}'.format(url,status_code))
broken_links.append((url, status_code, url))
Expand All @@ -39,31 +59,55 @@ def check_links(site_map_file):
links = soup.find_all('a')
links_count = len(links)

for index, link in enumerate(links):
# Checks every link's href (<a/>) on the page
for link_index, link in enumerate(links):
link_url = link.get('href')
print('{} of {} | Checking url [{}]'.format(index + 1, links_count, link_url))

if link_url not in checked_links:
checked_links.append(link_url)

# Allows for links that are relative, ie - /contact
if link_url and not (link_url.startswith("http") or link_url.startswith("mailto:")):
if link_url.startswith("/"):
link_url = site_url[0:-1] + link_url
else:
link_url = site_url + link_url

print('Page {} of {} | Link {} of {} | Checking url [{}]'.format(page_index + 1, url_count, link_index + 1, links_count, link_url))
try:
r = requests.get(link_url)
if auth:
r = requests.get(link_url, auth=(auth[0],auth[1]))
else:
r = requests.get(link_url)
except:
print("Uh oh, something went wrong checking {}".format(link_url))
if link_url == '':
link_url = link
broken_links.append((link_url, "Unknown error", url))

status_code = r.status_code
if status_code != 200:
print('Non-OK response ({}) on link_url: {}'.format(link_url,status_code))
print(link)
broken_links.append((link_url, status_code, url))

checked_links.append(link_url)
else:
print('Link already checked')
continue

for url, error, location in broken_links:
print( "Error: " + str(error), " => URL: " + url, "Location: " + location)


else:
continue
# Outputs results to a file and terminal, returns results
with open('error_results.txt', 'w') as file:
for url, error, location in broken_links:
broken_link = """
===== BROKEN LINK ============
Broken Link Path: {}
Location: {}
Error: {}
==============================
""".format(str(url), str(location), str(error))
print( "Error: ", str(error), " => URL: ", str(url), "Location: ", str(location))
file.write(broken_link)


return broken_links

# Allows command line running
def run():
print("Enter sitemap url - ie, https://www.google.com/sitemap.xml [Leave blank to use local file]:")
url = input()
Expand All @@ -82,9 +126,6 @@ def run():
raise SystemExit("Could not download the xml file, please try again.")


check_links(site_map_file)


if __name__ == "__main__":
run()

5 changes: 2 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import linker, gui
import gui

gui.run()
linker.run()
gui.run()

0 comments on commit 75897ac

Please sign in to comment.