diff --git a/Pipfile b/Pipfile index 63f66f9..e25863b 100644 --- a/Pipfile +++ b/Pipfile @@ -24,6 +24,7 @@ requests-file = "*" pytest = "*" "e1839a8" = {path = ".", editable = true} sphinx = "*" +mypy = "*" [scripts] diff --git a/Pipfile.lock b/Pipfile.lock index a35c154..03398aa 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9af93ac7145d6f8f0f24b28c59064699450344bdd45f18d3c4383647a1a08f03" + "sha256": "ef6f9504ed9751cf2f4c5aef06e59838981c79d84fa1d36fb5ce258d8dba189f" }, "host-environment-markers": { "implementation_name": "cpython", @@ -345,6 +345,13 @@ ], "version": "==1.0" }, + "mypy": { + "hashes": [ + "sha256:aa668809ae0dbec5e9feb8929f4b5e1f9318a0a397447fa2f38c382a2ed6a036", + "sha256:bd0c9a2fcf0c4f7a54a2b625f466fcc000d415f371298d96fa5d2acc69074aca" + ], + "version": "==0.560" + }, "packaging": { "hashes": [ "sha256:99276dc6e3a7851f32027a68f1095cd3f77c148091b092ea867a351811cfe388", @@ -371,6 +378,20 @@ ], "version": "==0.6.0" }, + "psutil": { + "hashes": [ + "sha256:82a06785db8eeb637b349006cc28a92e40cd190fefae9875246d18d0de7ccac8", + "sha256:4152ae231709e3e8b80e26b6da20dc965a1a589959c48af1ed024eca6473f60d", + "sha256:230eeb3aeb077814f3a2cd036ddb6e0f571960d327298cc914c02385c3e02a63", + "sha256:a3286556d4d2f341108db65d8e20d0cd3fcb9a91741cb5eb496832d7daf2a97c", + "sha256:94d4e63189f2593960e73acaaf96be235dd8a455fe2bcb37d8ad6f0e87f61556", + "sha256:c91eee73eea00df5e62c741b380b7e5b6fdd553891bee5669817a3a38d036f13", + "sha256:779ec7e7621758ca11a8d99a1064996454b3570154277cc21342a01148a49c28", + "sha256:8a15d773203a1277e57b1d11a7ccdf70804744ef4a9518a87ab8436995c31a4b", + "sha256:e2467e9312c2fa191687b89ff4bc2ad8843be4af6fb4dc95a7cc5f7d7a327b18" + ], + "version": "==5.4.3" + }, "py": { "hashes": [ "sha256:8cca5c229d225f8c1e3085be4fcf306090b00850fefad892f9d96c7b6e2f310f", @@ -378,6 +399,13 @@ ], "version": "==1.5.2" }, + "pyee": { + "hashes": [ + "sha256:47f8fa96d6dee61c82001831e1fbba55f3f808003a322d0e6653aa01c59f6b9e", + "sha256:4ec22817297b7024f89721cc34f790ee2767c5b5ca44284c565ee643abafbe32" + ], + "version": "==5.0.0" + }, "pygments": { "hashes": [ "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d", @@ -397,6 +425,12 @@ ], "version": "==2.2.0" }, + "pyppeteer": { + "hashes": [ + "sha256:596929fb7d052048679081d3dc2a998cf065e936a752c7ba2392445d6e0e9706" + ], + "version": "==0.0.10" + }, "pyquery": { "hashes": [ "sha256:07987c2ed2aed5cba29ff18af95e56e9eb04a2249f42ce47bddfb37f487229a3", @@ -488,6 +522,29 @@ ], "version": "==1.9.1" }, + "typed-ast": { + "hashes": [ + "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58", + "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a", + "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863", + "sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded", + "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85", + "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6", + "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c", + "sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6", + "sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559", + "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892", + "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea", + "sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87", + "sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe", + "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9", + "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46", + "sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9", + "sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd", + "sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa" + ], + "version": "==1.1.0" + }, "urllib3": { "hashes": [ "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b", @@ -501,6 +558,27 @@ "sha256:55994787e93b411c2d659068b51b9998d9d0c05e0df188e6daf8f45836e1ea38" ], "version": "==1.19.0" + }, + "websockets": { + "hashes": [ + "sha256:f5192da704535a7cbf76d6e99c1ec4af7e8d1288252bf5a2385d414509ded0cf", + "sha256:0c31bc832d529dc7583d324eb6c836a4f362032a1902723c112cf57883488d8c", + "sha256:da7610a017f5343fdf765f4e0eb6fd0dfd08264ca1565212b110836d9367fc9c", + "sha256:fd81af8cf3e69f9a97f3a6c0623a0527de0f922c2df725f00cd7646d478af632", + "sha256:3d425ae081fb4ba1eef9ecf30472ffd79f8e868297ccc7a47993c96dbf2a819c", + "sha256:ebdd4f18fe7e3bea9bd3bf446b0f4117739478caa2c76e4f0fb72cc45b03cbd7", + "sha256:3859ca16c229ddb0fa21c5090e4efcb037c08ce69b0c1dfed6122c3f98cd0c22", + "sha256:d1a0572b6edb22c9208e3e5381064e09d287d2a915f90233fef994ee7a14a935", + "sha256:80188abdadd23edaaea05ce761dc9a2e1df31a74a0533967f0dcd9560c85add0", + "sha256:fecf51c13195c416c22422353b306dddb9c752e4b80b21e0fa1fccbe38246677", + "sha256:367ff945bc0950ad9634591e2afe50bf2222bc4fad1088a386c4bb700888026e", + "sha256:6df87698022aef2596bffdfecc96d656db59c8d719708c8a471daa815ee61656", + "sha256:341824d8c9ad53fc43cca3fa9407f294125fa258592f7676640396501448e57e", + "sha256:64896a6b3368c959b8096b655e46f03dfa65b96745249f374bd6a35705cc3489", + "sha256:1f3e5a52cab6daa3d432c7b0de0a14109be39d2bfaad033ee5de4a3d3e11dcdf", + "sha256:da4d4fbe059b0453e726d6d993760065d69b823a27efc3040402a6fcfe6a1ed9" + ], + "version": "==4.0.1" } } } diff --git a/requests_html.py b/requests_html.py index d1f11b7..7c60cbf 100644 --- a/requests_html.py +++ b/requests_html.py @@ -1,7 +1,7 @@ import asyncio from urllib.parse import urlparse, urlunparse from concurrent.futures._base import TimeoutError -from typing import List +from typing import Set import pyppeteer import requests @@ -16,43 +16,16 @@ from w3lib.encoding import html_to_unicode - - DEFAULT_ENCODING = 'utf-8' useragent = UserAgent() -class HTMLResponse(requests.Response): - """An HTML-enabled :class:`Response ` object. - Same as Requests class:`Response ` object, but with an - intelligent ``.html`` property added. - """ - - def __init__(self, *args, **kwargs): - super(HTMLResponse, self).__init__(*args, **kwargs) - self._html = None - - @property - def html(self) -> str: - if self._html: - return self._html - - self._html = HTML(url=self.url, html=self.text, default_encoding=self.encoding) - return self._html - - @classmethod - def _from_response(cls, response): - html_r = cls() - html_r.__dict__.update(response.__dict__) - return html_r - - class BaseParser: """A basic HTML/Element Parser, for Humans.""" - def __init__(self, *, element, default_encoding: str = None, html: str = None, url: str): + def __init__(self, *, element, default_encoding: str = None, html: str = None, url: str) -> None: self.element = element self.url = url self.skip_anchors = True @@ -69,7 +42,7 @@ def html(self) -> str: return etree.tostring(self.element, encoding='unicode').strip() @html.setter - def set_html(self, html): + def set_html(self, html: str) -> None: """Property setter for self.html.""" self._html = html @@ -148,7 +121,7 @@ def search_all(self, template: str): return [r for r in findall(template, self.html)] @property - def links(self) -> List[str]: + def links(self) -> Set[str]: """All found links on page, in as–is form.""" def gen(): for link in self.find('a'): @@ -164,7 +137,7 @@ def gen(): return set(g for g in gen()) @property - def absolute_links(self) -> List[str]: + def absolute_links(self) -> Set[str]: """All found links on page, in absolute form.""" def gen(): for link in self.links: @@ -275,6 +248,31 @@ async def _async_render(url: str): return self +class HTMLResponse(requests.Response): + """An HTML-enabled :class:`Response ` object. + Same as Requests class:`Response ` object, but with an + intelligent ``.html`` property added. + """ + + def __init__(self, *args, **kwargs): + super(HTMLResponse, self).__init__(*args, **kwargs) + self._html = None + + @property + def html(self) -> HTML: + if self._html: + return self._html + + self._html = HTML(url=self.url, html=self.text, default_encoding=self.encoding) + return self._html + + @classmethod + def _from_response(cls, response): + html_r = cls() + html_r.__dict__.update(response.__dict__) + return html_r + + def user_agent(style='chrome') -> str: """Returns a random user-agent, if not requested one of a specific style. Defaults to a Chrome-style User-Agent. @@ -301,7 +299,7 @@ def __init__(self, mock_browser=True, *args, **kwargs): self.hooks = {'response': self._handle_response} @staticmethod - def _handle_response(response, **kwargs) -> requests.Response: + def _handle_response(response, **kwargs) -> HTMLResponse: """Requests HTTP Response handler. Attaches .html property to Response objects. """