Skip to content

Commit a12f6ec

Browse files
committed
added script for downloading content from instagram
1 parent e0c71d9 commit a12f6ec

File tree

2 files changed

+249
-0
lines changed

2 files changed

+249
-0
lines changed

Instagram downloader/Readme.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Instagram Content Downloader
2+
3+
This python script is used to download profile pics that are on instagram. Some used packages in this project are
4+
5+
- requests: used to make HTTP requests simpler and human friendly.
6+
- BeautifulSoup: used to parse HTML and XML documents.
7+
- And some others like selenium, time, os
8+
9+
## Setup instructions
10+
11+
- Clone the repo to your local machine
12+
- go into the Instagram downloader folder
13+
- It will ask you for your username, password and the user whose post you want to downlaod
14+
- after running you will be able to download the required post
15+
16+
## Detailed explanation of script, if needed
17+
18+
This python script makes use of different packages and modules like BeautifulSoup, Selenium, os, requests to doownload content from instagram provided you have given the correct set of credentials. For better understandinng take a look at the code for the same.
19+
20+
## Output
21+
22+
- Providing correct details will ensure downloading of the required post
23+
<img src="https://i.imgur.com/Z107rdp.png">
24+
25+
## Disclaimers, if any
26+
27+
Do not forget to put your username, password else it will result in error

Instagram downloader/downloader.py

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
# import required modules
2+
from selenium import webdriver
3+
from selenium.webdriver.common.keys import Keys
4+
import selenium.common.exceptions
5+
import time
6+
from bs4 import BeautifulSoup as bs
7+
import requests
8+
import os
9+
10+
11+
# get instagram account credentials
12+
username = input('Enter Your User Name ')
13+
password = input('Enter Your Password ')
14+
15+
# assign URL
16+
url = 'https://instagram.com/' + input('Enter User Name Of User For Downloading Posts ')
17+
18+
# Get URL path
19+
def path():
20+
global chrome
21+
# starts a new chrome session
22+
# add path if required
23+
chrome = webdriver.Chrome()
24+
25+
# Extract URL
26+
def url_name(url):
27+
# the web page opens up
28+
chrome.get(url)
29+
30+
# webdriver will wait for 4 sec before throwing a
31+
# NoSuchElement exception so that the element
32+
# is detected and not skipped.
33+
time.sleep(4)
34+
35+
# Login to access post
36+
def login(username, your_password):
37+
log_but = chrome.find_element_by_class_name("L3NKy")
38+
time.sleep(2)
39+
log_but.click()
40+
time.sleep(4)
41+
# finds the username box
42+
usern = chrome.find_element_by_name("username")
43+
# sends the entered username
44+
usern.send_keys(username)
45+
46+
# finds the password box
47+
passw = chrome.find_element_by_name("password")
48+
49+
# sends the entered password
50+
passw.send_keys(your_password)
51+
52+
# sends the enter key
53+
passw.send_keys(Keys.RETURN)
54+
55+
time.sleep(5.5)
56+
57+
# Find Not Now Button
58+
notn = chrome.find_element_by_class_name("yWX7d")
59+
60+
notn.click()
61+
time.sleep(3)
62+
63+
# Function to get content of first post
64+
def first_post():
65+
pic = chrome.find_element_by_class_name("kIKUG").click()
66+
time.sleep(2)
67+
68+
# Function to get next post
69+
def next_post():
70+
try:
71+
nex = chrome.find_element_by_class_name(
72+
"coreSpriteRightPaginationArrow")
73+
return nex
74+
except selenium.common.exceptions.NoSuchElementException:
75+
return 0
76+
77+
# Download contenet of all posts
78+
def download_allposts():
79+
80+
# open First Post
81+
first_post()
82+
83+
user_name = url.split('/')[-1]
84+
85+
# check if folder corresponding to user name exist or not
86+
if(os.path.isdir(user_name) == False):
87+
88+
# Create folder
89+
os.mkdir(user_name)
90+
91+
# Check if Posts contains multiple images or videos
92+
multiple_images = nested_check()
93+
94+
if multiple_images:
95+
nescheck = multiple_images
96+
count_img = 0
97+
98+
while nescheck:
99+
elem_img = chrome.find_element_by_class_name('rQDP3')
100+
101+
# Function to save nested images
102+
save_multiple(user_name+'/'+'content1.'+str(count_img), elem_img)
103+
count_img += 1
104+
nescheck.click()
105+
nescheck = nested_check()
106+
107+
# pass last_img_flag True
108+
save_multiple(user_name+'/'+'content1.' +
109+
str(count_img), elem_img, last_img_flag=1)
110+
else:
111+
save_content('_97aPb', user_name+'/'+'content1')
112+
c = 2
113+
114+
while(True):
115+
next_el = next_post()
116+
117+
if next_el != False:
118+
next_el.click()
119+
time.sleep(1.3)
120+
121+
try:
122+
multiple_images = nested_check()
123+
124+
if multiple_images:
125+
nescheck = multiple_images
126+
count_img = 0
127+
128+
while nescheck:
129+
elem_img = chrome.find_element_by_class_name('rQDP3')
130+
save_multiple(user_name+'/'+'content' +
131+
str(c)+'.'+str(count_img), elem_img)
132+
count_img += 1
133+
nescheck.click()
134+
nescheck = nested_check()
135+
save_multiple(user_name+'/'+'content'+str(c) +
136+
'.'+str(count_img), elem_img, 1)
137+
else:
138+
save_content('_97aPb', user_name+'/'+'content'+str(c))
139+
140+
except selenium.common.exceptions.NoSuchElementException:
141+
print("finished")
142+
return
143+
144+
else:
145+
break
146+
147+
c += 1
148+
149+
# Function to save content of the current post
150+
def save_content(class_name, img_name):
151+
time.sleep(0.5)
152+
153+
try:
154+
pic = chrome.find_element_by_class_name(class_name)
155+
156+
except selenium.common.exceptions.NoSuchElementException:
157+
print("Either This user has no images or you haven't followed this user or something went wrong")
158+
return
159+
160+
html = pic.get_attribute('innerHTML')
161+
soup = bs(html, 'html.parser')
162+
link = soup.find('video')
163+
164+
if link:
165+
link = link['src']
166+
167+
else:
168+
link = soup.find('img')['src']
169+
response = requests.get(link)
170+
171+
with open(img_name, 'wb') as f:
172+
f.write(response.content)
173+
time.sleep(0.9)
174+
175+
# Function to save multiple posts
176+
def save_multiple(img_name, elem, last_img_flag=False):
177+
time.sleep(1)
178+
l = elem.get_attribute('innerHTML')
179+
html = bs(l, 'html.parser')
180+
biglist = html.find_all('ul')
181+
biglist = biglist[0]
182+
list_images = biglist.find_all('li')
183+
184+
if last_img_flag:
185+
user_image = list_images[-1]
186+
187+
else:
188+
user_image = list_images[(len(list_images)//2)]
189+
video = user_image.find('video')
190+
191+
if video:
192+
link = video['src']
193+
194+
else:
195+
link = user_image.find('img')['src']
196+
response = requests.get(link)
197+
198+
with open(img_name, 'wb') as f:
199+
f.write(response.content)
200+
201+
# Function to check if the post is nested
202+
def nested_check():
203+
204+
try:
205+
time.sleep(1)
206+
nes_nex = chrome.find_element_by_class_name('coreSpriteRightChevron ')
207+
return nes_nex
208+
209+
except selenium.common.exceptions.NoSuchElementException:
210+
return 0
211+
212+
# Driver Code
213+
path()
214+
time.sleep(1)
215+
216+
url_name(url)
217+
218+
login(username, password)
219+
220+
download_allposts()
221+
222+
chrome.close()

0 commit comments

Comments
 (0)