|
| 1 | +import requests |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +from selenium import webdriver |
| 4 | +from selenium.webdriver.common.keys import Keys |
| 5 | +import time |
| 6 | +import csv |
| 7 | + |
| 8 | +# Get chrome driver path |
| 9 | +driver_path = input("Enter chrome driver path: ") |
| 10 | + |
| 11 | +# Setup csv file to write data into |
| 12 | +filename = "job_records.csv" |
| 13 | +fields = ['Job Title', 'Company', 'Location', 'Job Description', 'URL'] |
| 14 | + |
| 15 | +# Get user choice until valid choice is entered |
| 16 | +while (True): |
| 17 | + search_option = int(input( |
| 18 | + "Enter 1 - to search by location \nEnter 2 - to search by role, skill or company \nEnter 3 for both : ")) |
| 19 | + if (search_option == 1): |
| 20 | + location = input("Enter location :") |
| 21 | + url = 'https://www.monsterindia.com/srp/results?locations={}'.format( |
| 22 | + location) |
| 23 | + break |
| 24 | + elif (search_option == 2): |
| 25 | + job_type = input("Enter role, skill or company : ") |
| 26 | + url = 'https://www.monsterindia.com/srp/results?query={}'.format( |
| 27 | + job_type) |
| 28 | + break |
| 29 | + elif (search_option == 3): |
| 30 | + location = input("Enter location :") |
| 31 | + job_type = input("Enter role, skill or company : ") |
| 32 | + url = 'https://www.monsterindia.com/srp/results?query={}&locations={}'.format( |
| 33 | + job_type, location) |
| 34 | + break |
| 35 | + else: |
| 36 | + continue |
| 37 | +# initiating the webdriver. Parameter includes the path of the webdriver. |
| 38 | +driver = webdriver.Chrome(driver_path) |
| 39 | +driver.get(url) |
| 40 | + |
| 41 | +# this is just to ensure that the page is loaded |
| 42 | +time.sleep(5) |
| 43 | +html = driver.page_source |
| 44 | + |
| 45 | +# Now apply bs4 to html variable |
| 46 | +soup = BeautifulSoup(html, "html.parser") |
| 47 | +job_divs = soup.find_all("div", {"class": "card-apply-content"}) |
| 48 | + |
| 49 | +with open(filename, 'w', newline='', encoding='utf8') as csvfile: |
| 50 | + csvwriter = csv.writer(csvfile) |
| 51 | + csvwriter.writerow(fields) |
| 52 | + for job in job_divs: |
| 53 | + job_title_div = job.find('div', {"class": "job-tittle"}) |
| 54 | + |
| 55 | + # Get job title |
| 56 | + job_title_holder = job_title_div.find('h3') |
| 57 | + job_title = (job_title_holder.find('a')).text.strip() |
| 58 | + |
| 59 | + # Get company name |
| 60 | + company_name_tag = job_title_div.find( |
| 61 | + 'span', {"class": "company-name"}) |
| 62 | + company_name = company_name_tag.find('a', {"class": "under-link"}) |
| 63 | + if (company_name is None): |
| 64 | + company_name = 'confidential' |
| 65 | + else: |
| 66 | + company_name = company_name.text |
| 67 | + |
| 68 | + # Get location |
| 69 | + company_location_tag = job_title_div.find('span', {"class": "loc"}) |
| 70 | + company_location = company_location_tag.find('small').text.strip() |
| 71 | + |
| 72 | + # Get job description |
| 73 | + job_description = job.find('p', {"class": "job-descrip"}).text.strip() |
| 74 | + |
| 75 | + # Get job URL |
| 76 | + job_url = "https:"+((job_title_holder.find('a'))['href']) |
| 77 | + |
| 78 | + # Add data as a row in CSV file |
| 79 | + csvwriter.writerow( |
| 80 | + [job_title, company_name, company_location, job_description, job_url]) |
| 81 | + |
| 82 | +print("Job data successfully saved in job_records.csv") |
| 83 | +driver.close() # closing the webdriver |
0 commit comments