Skip to content

Commit

Permalink
Merge branch 'master' of github.com:kennethreitz/requests-html
Browse files Browse the repository at this point in the history
  • Loading branch information
kennethreitz committed Mar 1, 2018
2 parents 6370ed4 + 636e21f commit 9c94d16
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions requests_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
DEFAULT_URL = 'https://example.org/'
DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8'

useragent = UserAgent()
useragent = None

# Typing.
_Find = Union[List['Element'], 'Element']
Expand Down Expand Up @@ -118,7 +118,7 @@ def lxml(self) -> HtmlElement:
"""`lxml <http://lxml.de>`_ representation of the
:class:`Element <Element>` or :class:`HTML <HTML>`.
"""
return soup_parse(self.html)
return soup_parse(self.html, features='html.parser')

@property
def text(self) -> _Text:
Expand Down Expand Up @@ -213,7 +213,7 @@ def gen():

try:
href = link.attrs['href'].strip()
if href and not (href.startswith('#') and self.skip_anchors and href in ['javascript:;']):
if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith('javascript:'):
yield href
except KeyError:
pass
Expand Down Expand Up @@ -432,6 +432,9 @@ def user_agent(style='chrome') -> _UserAgent:
"""Returns a random user-agent, if not requested one of a specific
style. Defaults to a Chrome-style User-Agent.
"""
global useragent
if not useragent:
useragent = UserAgent()

return useragent[style] if style else useragent.random

Expand Down

0 comments on commit 9c94d16

Please sign in to comment.