Skip to content

Commit

Permalink
Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
BillSchumacher committed Apr 15, 2023
1 parent 9cf7227 commit 52bb22d
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions autogpt/commands/web_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from urllib.parse import urljoin, urlparse

import requests
from requests.compat import urljoin
from requests import Response
from bs4 import BeautifulSoup

Expand Down Expand Up @@ -134,19 +135,20 @@ def scrape_text(url: str) -> str:
return text


def extract_hyperlinks(soup: BeautifulSoup) -> List[Tuple[str, str]]:
def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> List[Tuple[str, str]]:
"""Extract hyperlinks from a BeautifulSoup object
Args:
soup (BeautifulSoup): The BeautifulSoup object
base_url (str): The base URL
Returns:
List[Tuple[str, str]]: The extracted hyperlinks
"""
hyperlinks = []
for link in soup.find_all("a", href=True):
hyperlinks.append((link.text, link["href"]))
return hyperlinks
return [
(link.text, urljoin(base_url, link["href"]))
for link in soup.find_all("a", href=True)
]


def format_hyperlinks(hyperlinks: List[Tuple[str, str]]) -> List[str]:
Expand Down Expand Up @@ -183,7 +185,7 @@ def scrape_links(url: str) -> Union[str, List[str]]:
for script in soup(["script", "style"]):
script.extract()

hyperlinks = extract_hyperlinks(soup)
hyperlinks = extract_hyperlinks(soup, url)

return format_hyperlinks(hyperlinks)

Expand Down

0 comments on commit 52bb22d

Please sign in to comment.