Skip to content

Commit

Permalink
fixing linkedin
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandruvesa committed Apr 22, 2024
1 parent 99b185b commit 3dc8818
Showing 1 changed file with 13 additions and 21 deletions.
34 changes: 13 additions & 21 deletions course/module-1/crawlers/linkedin.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def extract(self, link: str, **kwargs):
"Education": self._scrape_education(link),
}

self.driver.get(link)
time.sleep(5)
button = self.driver.find_element(
By.CSS_SELECTOR, ".app-aware-link.profile-creator-shared-content-view__footer-action"
)
button.click()

# Scrolling and scraping posts
self.scroll_page()
soup = BeautifulSoup(self.driver.page_source, "html.parser")
Expand All @@ -51,12 +58,7 @@ def extract(self, link: str, **kwargs):
self.driver.close()

self.model.bulk_insert(
[
PostDocument(
platform="linkedin", content=post, author_id=kwargs.get("user")
)
for post in posts
]
[PostDocument(platform="linkedin", content=post, author_id=kwargs.get("user")) for post in posts]
)

logger.info(f"Finished scrapping data for profile: {link}")
Expand Down Expand Up @@ -92,9 +94,7 @@ def _get_page_content(self, url: str) -> BeautifulSoup:
time.sleep(5)
return BeautifulSoup(self.driver.page_source, "html.parser")

def _extract_posts(
self, post_elements: List[Tag], post_images: Dict[str, str]
) -> Dict[str, Dict[str, str]]:
def _extract_posts(self, post_elements: List[Tag], post_images: Dict[str, str]) -> Dict[str, Dict[str, str]]:
"""
Extracts post texts and combines them with their respective images.
Expand Down Expand Up @@ -133,16 +133,8 @@ def login(self):
"""Log in to LinkedIn."""
self.driver.get("https://www.linkedin.com/login")
if not settings.LINKEDIN_USERNAME and not settings.LINKEDIN_PASSWORD:
raise ImproperlyConfigured(
"LinkedIn scraper requires an valid account to perform extraction"
)
raise ImproperlyConfigured("LinkedIn scraper requires an valid account to perform extraction")

self.driver.find_element(By.ID, "username").send_keys(
settings.LINKEDIN_USERNAME
)
self.driver.find_element(By.ID, "password").send_keys(
settings.LINKEDIN_PASSWORD
)
self.driver.find_element(
By.CSS_SELECTOR, ".login__form_action_container button"
).click()
self.driver.find_element(By.ID, "username").send_keys(settings.LINKEDIN_USERNAME)
self.driver.find_element(By.ID, "password").send_keys(settings.LINKEDIN_PASSWORD)
self.driver.find_element(By.CSS_SELECTOR, ".login__form_action_container button").click()

0 comments on commit 3dc8818

Please sign in to comment.