|
| 1 | +from selenium import webdriver |
| 2 | +import os |
| 3 | +options = webdriver.ChromeOptions() |
| 4 | +options.add_argument("--headless") |
| 5 | +from selenium.webdriver.common.desired_capabilities import DesiredCapabilities |
| 6 | +from selenium.webdriver.support.ui import WebDriverWait |
| 7 | +from selenium.webdriver.support import expected_conditions as EC |
| 8 | +from selenium.webdriver.common.by import By |
| 9 | +from selenium.common.exceptions import NoSuchElementException |
| 10 | +from selenium.common.exceptions import TimeoutException |
| 11 | +from fpdf import FPDF |
| 12 | + |
| 13 | + |
| 14 | +capa = DesiredCapabilities.CHROME |
| 15 | +capa["pageLoadStrategy"] = "none" |
| 16 | + |
| 17 | +driver = webdriver.Chrome(desired_capabilities=capa,options=options) |
| 18 | +baseurl="https://www.codechef.com/problems" |
| 19 | +wait = WebDriverWait(driver, 15) |
| 20 | + |
| 21 | +# map to get url from its problem difficulty |
| 22 | +problem_difficulty = {"Beginner": "school", "Easy": "easy", "Medium": "medium", "Hard": "hard", "Challenge": "challenge"} |
| 23 | + |
| 24 | +# get_problems returns the name and links of the problems |
| 25 | +def get_problems(category, no_of_problems): |
| 26 | + |
| 27 | +# A map to store problem name and problem url |
| 28 | + problem_info = {} |
| 29 | + try: |
| 30 | + driver.get(baseurl + '/' + category) |
| 31 | + # wait till the first element is loaded |
| 32 | + wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[1]/td[1]/div/a/b"))) |
| 33 | + except TimeoutException as exception: |
| 34 | + print("Couldn't fetch problem. Network issue or page slow to render. Try again") |
| 35 | + os._exit(-1) |
| 36 | + |
| 37 | + |
| 38 | + |
| 39 | + for problem_index in range(1, no_of_problems + 1): |
| 40 | + problem_name = driver.find_element_by_xpath("//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[{}]/td[1]/div/a/b".format(problem_index)).text |
| 41 | + problem_url = driver.find_element_by_xpath("//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[{}]/td[1]/div/a".format(problem_index)).get_attribute('href') |
| 42 | + print(problem_name," ",problem_url) |
| 43 | + problem_info[problem_name] = problem_url |
| 44 | + return problem_info |
| 45 | + |
| 46 | +# get_problem_desciption returns content of the problem |
| 47 | +def get_problem_description(problem_url,problem_name): |
| 48 | + try: |
| 49 | + driver.get(problem_url) |
| 50 | + wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='problem-statement']/p[1]"))) |
| 51 | + problem_title= problem_name |
| 52 | + problem_statement = driver.find_element_by_xpath("//*[@id='problem-statement']/p[1]").text |
| 53 | + problem_test_cases = driver.find_element_by_xpath("//*[@id='problem-statement']/pre[1]").text |
| 54 | + |
| 55 | + |
| 56 | + if (problem_test_cases.find("Output") == -1): |
| 57 | + problem_test_cases = "Input\n" + problem_test_cases |
| 58 | + problem_test_cases+="\nOutput\n" |
| 59 | + problem_test_cases += driver.find_element_by_xpath("//*[@id='problem-statement']/pre[2]").text |
| 60 | + |
| 61 | + |
| 62 | + else: |
| 63 | + |
| 64 | + |
| 65 | + driver.execute_script("window.stop();") |
| 66 | + problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url} |
| 67 | + return problem |
| 68 | + |
| 69 | + #Handling exceptions |
| 70 | + except NoSuchElementException as e: |
| 71 | + print("Couldn't scrap the element, Unable to locate it") |
| 72 | + problem=None |
| 73 | + except TimeoutException as exception: |
| 74 | + print("Couldn't scrap the element, Unable to locate it") |
| 75 | + problem=None |
| 76 | + |
| 77 | + |
| 78 | + |
| 79 | + |
| 80 | + |
| 81 | +#storing the information in the pdf |
| 82 | +def convert_to_pdf(problem): |
| 83 | + pdf = FPDF() |
| 84 | + pdf.add_page() |
| 85 | + pdf.set_font("Arial", size = 15) |
| 86 | + # Replace character that aren't in latin-1 character set |
| 87 | + title=problem["title"].encode('latin-1', 'replace').decode('latin-1') |
| 88 | + statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1') |
| 89 | + test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1') |
| 90 | + url=problem["url"] |
| 91 | + # add sections to pdf |
| 92 | + pdf.cell(200, 10, txt =title, ln = 1, align = 'C') |
| 93 | + pdf.multi_cell(200, 10, txt =statement, align = 'L') |
| 94 | + pdf.multi_cell(200, 10, txt =test_case, align = 'L') |
| 95 | + pdf.write(5, 'Problem_Link: ') |
| 96 | + pdf.write(5,url,url) |
| 97 | + |
| 98 | + |
| 99 | + pdf.output(title+".pdf") |
| 100 | + |
| 101 | + |
| 102 | +#main function |
| 103 | +def main(): |
| 104 | + category=input("Enter the difficulty level from the following \n Beginner \n Easy \n Medium \n Hard \n Challenge \n\n") |
| 105 | + no_of_problems=int(input("\n Enter the number of problems to be scrapped: \n")) |
| 106 | + info = get_problems(problem_difficulty[category],no_of_problems) |
| 107 | + for name, url in info.items(): |
| 108 | + problem=get_problem_description(url,name) |
| 109 | + if(problem is not None ): |
| 110 | + convert_to_pdf(problem) |
| 111 | + else: |
| 112 | + pass |
| 113 | + |
| 114 | +if __name__ == '__main__': |
| 115 | + main() |
| 116 | + |
| 117 | +driver.close() |
0 commit comments