Skip to content

Commit 123dab9

Browse files
GitHub user scraper (avinashkranjan#994)
1 parent fbfad71 commit 123dab9

File tree

3 files changed

+222
-0
lines changed

3 files changed

+222
-0
lines changed

Github-User-Scraper/README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Github Trending Users Fetcher
2+
Running this Script would allow the user to fetch trending github users based on choice of language (Python, Java etc) or time frame (daily, monthly or weekly trending users)
3+
4+
## Setup instructions
5+
In order to run this script, you need to have Python and pip installed on your system. After you're done installing Python and pip, run the following command from your terminal to install the requirements from the same folder (directory) of the project.
6+
```
7+
pip install -r requirements.txt
8+
```
9+
After satisfying all the requirements for the project, Open the terminal in the project folder and run
10+
```
11+
python scraper.py
12+
```
13+
or
14+
```
15+
python3 scraper.py
16+
```
17+
depending upon the python version. Make sure that you are running the command from the same virtual environment in which the required modules are installed.
18+
19+
## Output
20+
21+
The user can choose the language they want to scrape trending users for and the time frame as well.
22+
23+
![Github Trending User Fetcher](https://i.postimg.cc/yxbTG2zL/trending.png)
24+
25+
## Author
26+
[Ayush Jain](https://github.com/Ayushjain2205)

Github-User-Scraper/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
requests
2+
beautifulsoup4

Github-User-Scraper/scraper.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
import tkinter as tk
4+
from tkinter import ttk
5+
from tkinter import font as tkFont
6+
from tkinter import messagebox, simpledialog
7+
import sqlite3
8+
from sqlite3 import Error
9+
import time
10+
import datetime
11+
12+
# Dictionary for date values
13+
dates = {'Today':'daily','This week':'weekly','This month':'monthly'}
14+
15+
# Function to connect to the SQL Database
16+
def sql_connection():
17+
try:
18+
con = sqlite3.connect('./Github-User-Scraper/githubUsers.db')
19+
return con
20+
except Error:
21+
print(Error)
22+
23+
24+
# Function to create table
25+
def sql_table(con):
26+
cursorObj = con.cursor()
27+
cursorObj.execute(
28+
"CREATE TABLE IF NOT EXISTS users(name text, profile_link text, date_range text, repo text, repo_lang text, repo_link text)")
29+
con.commit()
30+
31+
# Call functions to connect to database and create table
32+
con = sql_connection()
33+
sql_table(con)
34+
35+
# Function to insert into table
36+
def sql_insert(con, entities):
37+
cursorObj = con.cursor()
38+
cursorObj.execute(
39+
'INSERT INTO users(name, profile_link, date_range, repo, repo_lang, repo_link) VALUES(?, ?, ?, ?, ?, ?)', entities)
40+
con.commit()
41+
42+
# Function to fetch data from DB
43+
def sql_fetch(con):
44+
cursorObj = con.cursor()
45+
try:
46+
cursorObj.execute('SELECT DISTINCT * FROM users ORDER BY rowid DESC') # SQL search query
47+
except Error:
48+
print("Database empty... Fetch courses using fetcher script")
49+
return
50+
51+
rows = cursorObj.fetchall()
52+
display_text = ""
53+
54+
# Show messagebox incase of empty DB
55+
if len(rows) == 0 :
56+
messagebox.showinfo("Alert", "No users scraped yet!")
57+
return " "
58+
59+
first_row = "{:^30}".format("Name") + "{:^40}".format("Profile Link") + "{:^30}".format("Date Range") + "{:^30}".format("Top Repo") + "{:^20}".format("Repo Lang") + "{:^30}".format("Repo Link") + '\n'
60+
display_text += first_row
61+
62+
# Format rows
63+
for row in rows:
64+
name = "{:<30}".format(row[0])
65+
profile_link = "{:<40}".format(
66+
row[1] if len(row[1]) < 30 else row[1][:26]+"...")
67+
date_range = "{:<30}".format(
68+
row[2] if len(row[2]) < 30 else row[2][:26]+"...")
69+
repo = "{:<30}".format(
70+
row[3] if len(row[3]) < 30 else row[3][:26]+"...")
71+
repo_lang = "{:^20}".format(
72+
row[4] if len(row[4]) < 30 else row[4][:26]+"...")
73+
repo_link = "{:<30}".format(
74+
row[5] if len(row[5]) < 30 else row[5][:26]+"...")
75+
display_text += (name + profile_link + date_range + repo + repo_lang + repo_link + '\n')
76+
77+
return display_text
78+
79+
80+
# Function to generate URL based on choice
81+
def get_URL():
82+
url_lang = language.get()
83+
url_date = dates[date.get()]
84+
url = 'https://github.com/trending/developers/{}?since={}'.format(url_lang.lower(), url_date)
85+
return url
86+
87+
def scrape_users():
88+
url_lang = language.get()
89+
date_range = date_helper()
90+
url = get_URL()
91+
page = requests.get(url)
92+
93+
# Start scraping resultant html data
94+
soup = BeautifulSoup(page.content, 'html.parser')
95+
users = soup.find_all('article', {'class': 'Box-row d-flex'})
96+
for user in users:
97+
progress['value'] += 10
98+
window.update_idletasks()
99+
name = user.find('h1', {'class': 'h3 lh-condensed'}).text.strip()
100+
profile_link = 'https://github.com{}'.format(user.find('h1', {'class': 'h3 lh-condensed'}).find('a')['href'])
101+
repo = user.find('h1', {'class': 'h4 lh-condensed'}).text.strip()
102+
repo_link = 'https://github.com{}'.format(user.find('h1', {'class': 'h4 lh-condensed'}).find('a')['href'])
103+
entities = (name, profile_link, date_range, repo, url_lang, repo_link)
104+
sql_insert(con, entities)
105+
106+
#set progress bar back to 0
107+
progress['value'] = 0
108+
window.update_idletasks()
109+
messagebox.showinfo("Success!", "Users scrapped successfully!")
110+
111+
def show_results():
112+
display_text = sql_fetch(con)
113+
query_label.config(state=tk.NORMAL)
114+
query_label.delete(1.0, "end")
115+
query_label.insert(1.0, display_text)
116+
query_label.config(state=tk.DISABLED)
117+
118+
def date_helper():
119+
date_range_type = dates[date.get()]
120+
today = datetime.date.today()
121+
if date_range_type == 'daily':
122+
formatted = today.strftime("%d/%m/%Y")
123+
return formatted
124+
elif date_range_type == 'weekly':
125+
from_date = ( datetime.date.today() - datetime.timedelta(days = 7))
126+
formatted_today = today.strftime("%d/%m/%Y")
127+
formatted_from_date = from_date.strftime("%d/%m/%Y")
128+
return "{} - {}".format(formatted_from_date,formatted_today)
129+
else:
130+
month = today.strftime("%B")
131+
return month
132+
133+
134+
# Creating tkinter window
135+
window = tk.Tk()
136+
window.title('Github Trending User Fetcher')
137+
window.geometry('1400x1000')
138+
window.configure(bg='white')
139+
140+
style = ttk.Style()
141+
style.configure('my.TButton', font=('Helvetica', 16))
142+
style.configure('my.TFrame', background='white')
143+
144+
# label text for title
145+
ttk.Label(window, text="Trending Github Users",
146+
background='white', foreground="Blue",
147+
font=("Helvetica", 30, 'bold')).grid(row=0, column=1)
148+
149+
# label for combobox
150+
ttk.Label(window, text="Select Language:", background = 'white',
151+
font=("Helvetica", 15)).grid(column=0,
152+
row=5, padx=10, pady=25)
153+
ttk.Label(window, text="Select Date range:", background = 'white',
154+
font=("Helvetica", 15)).grid(column=2,
155+
row=5, padx=10, pady=25)
156+
157+
# Combobox creation
158+
language = ttk.Combobox(
159+
window, width=30, state='readonly', font="Helvetica 15")
160+
date = ttk.Combobox(
161+
window, width=20, state='readonly',font="Helvetica 15")
162+
163+
# Button creation
164+
scrape_btn = ttk.Button(window, text="Scrape Users!", style='my.TButton', command = scrape_users)
165+
166+
display_btn = ttk.Button(window, text="Display from DB", style='my.TButton', command = show_results)
167+
168+
# Adding combobox drop down list
169+
language['values'] = ('C++', 'HTML', 'Java', 'Javascript', 'PHP', 'Python', 'Ruby', 'C#', 'C', 'Dockerfile', 'JSON', 'Julia', 'Dart'
170+
'Shell','Solidity','YAML')
171+
172+
date['values'] = ('Today','This week','This month')
173+
174+
# Progress bar
175+
progress = ttk.Progressbar(window, orient="horizontal", length=200, mode="determinate")
176+
progress.grid(row=5, column=5, pady=5, padx=15, ipadx=5)
177+
178+
language.grid(column=1, row=5, padx=10)
179+
language.current(0)
180+
181+
date.grid(column=3, row=5, padx=10)
182+
date.current(0)
183+
184+
scrape_btn.grid(row=5, column=4, pady=5, padx=15, ipadx=5)
185+
display_btn.grid(row=7, column=2, pady=5, padx=15, ipadx=5)
186+
187+
frame = ttk.Frame(window, style='my.TFrame')
188+
frame.place(relx=0.50, rely=0.18, relwidth=0.98, relheight=0.90, anchor="n")
189+
190+
# To display stock data
191+
query_label = tk.Text(frame ,height="52" ,width="500", bg="alice blue")
192+
query_label.grid(row=10, columnspan=2)
193+
194+
window.mainloop()

0 commit comments

Comments
 (0)