forked from binux/pyspider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresponse.py
165 lines (139 loc) · 4.95 KB
/
response.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
# Author: Binux<[email protected]>
# http://binux.me
# Created on 2012-11-02 11:16:02
import json
import chardet
from pyquery import PyQuery
from requests.structures import CaseInsensitiveDict
from requests.utils import get_encoding_from_headers, get_encodings_from_content
from requests import HTTPError
class Response(object):
def __init__(self):
self.status_code = None
self.url = None
self.orig_url = None
self.headers = CaseInsensitiveDict()
self.content = ''
self.cookies = {}
self.error = None
self.save = None
self.time = 0
def __repr__(self):
return '<Response [%d]>' % self.status_code
def __bool__(self):
"""Returns true if :attr:`status_code` is 'OK'."""
return self.ok
def __nonzero__(self):
"""Returns true if :attr:`status_code` is 'OK'."""
return self.ok
@property
def ok(self):
try:
self.raise_for_status()
except RequestException:
return False
return True
@property
def encoding(self):
if hasattr(self, '_encoding'):
return self._encoding
# content is unicode
if isinstance(self.content, unicode):
return 'unicode'
# Try charset from content-type
encoding = get_encoding_from_headers(self.headers)
if encoding == 'ISO-8859-1':
encoding = None
# Try charset from content
if not encoding:
encoding = get_encodings_from_content(self.content)
encoding = encoding and encoding[0] or None
# Fallback to auto-detected encoding.
if not encoding and chardet is not None:
encoding = chardet.detect(self.content)['encoding']
if encoding and encoding.lower() == 'gb2312':
encoding = 'gb18030'
self._encoding = encoding or 'utf-8'
return self._encoding
@encoding.setter
def encoding(self, value):
self._encoding = value
self._text = None
@property
def text(self):
"""Content of the response, in unicode.
if Response.encoding is None and chardet module is available, encoding
will be guessed.
"""
if hasattr(self, '_text') and self._text:
return self._text
if not self.content:
return u''
if isinstance(self.content, unicode):
return self.content
content = None
encoding = self.encoding
# Decode unicode from given encoding.
try:
content = self.content.decode(encoding, 'replace')
except LookupError:
# A LookupError is raised if the encoding was not found which could
# indicate a misspelling or similar mistake.
#
# So we try blindly encoding.
content = self.content.decode('utf-8', 'replace')
self._text = content
return content
@property
def json(self):
"""Returns the json-encoded content of a request, if any."""
if hasattr(self, '_json'):
return self._json
try:
self._json = json.loads(self.text or self.content)
except ValueError:
self._json = None
return self._json
@property
def doc(self):
"""Returns a PyQuery object of a request's content"""
if hasattr(self, '_doc'):
return self._doc
doc = self._doc = PyQuery(self.text or self.content)
doc.make_links_absolute(self.url)
return doc
def raise_for_status(self, allow_redirects=True):
"""Raises stored :class:`HTTPError` or :class:`URLError`, if one occurred."""
if self.error:
http_error = HTTPError(self.error)
elif (self.status_code >= 300) and (self.status_code < 400) and not allow_redirects:
http_error = HTTPError('%s Redirection' % (self.status_code))
elif (self.status_code >= 400) and (self.status_code < 500):
http_error = HTTPError('%s Client Error' % (self.status_code))
elif (self.status_code >= 500) and (self.status_code < 600):
http_error = HTTPError('%s Server Error' % (self.status_code))
else:
return
http_error.response = self
raise http_error
def isok(self):
try:
self.raise_for_status()
return True
except:
return False
def rebuild_response(r):
response = Response()
response.status_code = r.get('status_code', 599)
response.url = r.get('url', '')
response.headers = CaseInsensitiveDict(r.get('headers', {}))
response.content = r.get('content', '')
response.cookies = r.get('cookies', {})
response.error = r.get('error')
response.time = r.get('time', 0)
response.orig_url = r.get('orig_url', response.url)
response.save = r.get('save')
return response