Merge pull request avinashkranjan#839 from Ayushjain2205/MonsterJobs-scraper

avinashkranjan · web-flow · commit a4381f018dd1 · 2021-04-10T22:15:55.000+05:30
Monster jobs scraper
diff --git a/MonsterJobs Scraper/README.md b/MonsterJobs Scraper/README.md
@@ -0,0 +1,41 @@
+# Monster Jobs Scrapper
+
+Running this Script would allow the user to scrape job openings from [Monster jobs](https://www.monsterindia.com), based on their choice of location, job role, company or designation.
+
+## Setup instructions
+
+In order to run this script, you need to have Python and pip installed on your system. After you're done installing Python and pip, run the following command from your terminal to install the requirements from the same folder (directory) of the project.
+
+```
+pip install -r requirements.txt
+```
+
+As this script uses selenium, you will need to install the chrome webdriver from [this link](https://sites.google.com/a/chromium.org/chromedriver/downloads)
+
+After satisfying all the requirements for the project, Open the terminal in the project folder and run
+
+```
+python scraper.py
+```
+
+or
+
+```
+python3 scraper.py
+```
+
+depending upon the python version. Make sure that you are running the command from the same virtual environment in which the required modules are installed.
+
+## Output
+
+The user needs to enter input as per required job
+
+![User is asked for input](https://i.postimg.cc/tg270Zjs/monster-scraper-input.png)
+
+The scraped jobs are stored in a CSV file with name job_records.csv
+
+![Jobs saved in csv file](https://i.postimg.cc/x1gbQFGj/monster-scraper-output.png)
+
+## Author
+
+[Ayush Jain](https://github.com/Ayushjain2205)
diff --git a/MonsterJobs Scraper/requirements.txt b/MonsterJobs Scraper/requirements.txt
@@ -0,0 +1,3 @@
+requests
+beautifulsoup4
+selenium
diff --git a/MonsterJobs Scraper/scraper.py b/MonsterJobs Scraper/scraper.py
@@ -0,0 +1,83 @@
+import requests
+from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+import time
+import csv
+
+# Get chrome driver path
+driver_path = input("Enter chrome driver path: ")
+
+# Setup csv file to write data into
+filename = "job_records.csv"
+fields = ['Job Title', 'Company', 'Location', 'Job Description', 'URL']
+
+# Get user choice until valid choice is entered
+while (True):
+    search_option = int(input(
+        "Enter 1 - to search by location \nEnter 2 - to search by role, skill or company \nEnter 3 for both : "))
+    if (search_option == 1):
+        location = input("Enter location :")
+        url = 'https://www.monsterindia.com/srp/results?locations={}'.format(
+            location)
+        break
+    elif (search_option == 2):
+        job_type = input("Enter role, skill or company : ")
+        url = 'https://www.monsterindia.com/srp/results?query={}'.format(
+            job_type)
+        break
+    elif (search_option == 3):
+        location = input("Enter location :")
+        job_type = input("Enter role, skill or company : ")
+        url = 'https://www.monsterindia.com/srp/results?query={}&locations={}'.format(
+            job_type, location)
+        break
+    else:
+        continue
+# initiating the webdriver. Parameter includes the path of the webdriver.
+driver = webdriver.Chrome(driver_path)
+driver.get(url)
+
+# this is just to ensure that the page is loaded
+time.sleep(5)
+html = driver.page_source
+
+# Now apply bs4 to html variable
+soup = BeautifulSoup(html, "html.parser")
+job_divs = soup.find_all("div", {"class": "card-apply-content"})
+
+with open(filename, 'w', newline='', encoding='utf8') as csvfile:
+    csvwriter = csv.writer(csvfile)
+    csvwriter.writerow(fields)
+    for job in job_divs:
+        job_title_div = job.find('div', {"class": "job-tittle"})
+
+        # Get job title
+        job_title_holder = job_title_div.find('h3')
+        job_title = (job_title_holder.find('a')).text.strip()
+
+        # Get company name
+        company_name_tag = job_title_div.find(
+            'span', {"class": "company-name"})
+        company_name = company_name_tag.find('a', {"class": "under-link"})
+        if (company_name is None):
+            company_name = 'confidential'
+        else:
+            company_name = company_name.text
+
+        # Get location
+        company_location_tag = job_title_div.find('span', {"class": "loc"})
+        company_location = company_location_tag.find('small').text.strip()
+
+        # Get job description
+        job_description = job.find('p', {"class": "job-descrip"}).text.strip()
+
+        # Get job URL
+        job_url = "https:"+((job_title_holder.find('a'))['href'])
+
+        # Add data as a row in CSV file
+        csvwriter.writerow(
+            [job_title, company_name, company_location, job_description, job_url])
+
+print("Job data successfully saved in job_records.csv")
+driver.close()  # closing the webdriver

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+requests`
	`2`	`+beautifulsoup4`
	`3`	`+selenium`