forked from parsing/python-readability
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathurl_helpers.py
52 lines (44 loc) · 1.15 KB
/
url_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import logging
from urlparse import urlparse
def host_for_url(url):
"""
>>> host_for_url('http://base/whatever/fdsh')
'base'
>>> host_for_url('invalid')
"""
host = urlparse(url)[1]
if not host:
logging.error("could not extract host from URL: %r" % (url,))
return None
return host
def absolute_url(url, base_href):
"""
>>> absolute_url('foo', 'http://base/whatever/ooo/fdsh')
'http://base/whatever/ooo/foo'
>>> absolute_url('foo/bar/', 'http://base')
'http://base/foo/bar/'
>>> absolute_url('/foo/bar', 'http://base/whatever/fdskf')
'http://base/foo/bar'
>>> absolute_url('\\n/foo/bar', 'http://base/whatever/fdskf')
'http://base/foo/bar'
>>> absolute_url('http://localhost/foo', 'http://base/whatever/fdskf')
'http://localhost/foo'
"""
url = url.strip()
proto = urlparse(url)[0]
if proto:
return url
base_url_parts = urlparse(base_href)
base_server = '://'.join(base_url_parts[:2])
if url.startswith('/'):
return base_server + url
else:
path = base_url_parts[2]
if '/' in path:
path = path.rsplit('/', 1)[0] + '/'
else:
path = '/'
return base_server + path + url
if __name__ == '__main__':
import doctest
doctest.testmod()