Skip to content

Commit

Permalink
six first commit, make sqlite database test passed
Browse files Browse the repository at this point in the history
fixed:
absolute_import
from six.moves.urllib import parse as urlparse
thread.get_ident() -> threading.current_thread().ident
basestring -> six.string_types
__metaclass__ -> add_metaclass
UserDict.DictMixin -> collections.Mapping (and support for new dict)
division
StringIO -> BytesIO
print_function
unicode -> six.text_type
__builtins__ -> six.moves.builtins
reload -> six.reload_module
dict.iteritems() -> six.iteritems(dict)
dict.iterkeys() -> for k in dict
dict.values() -> list(six.itervalues(dict))
dict.itervalues() -> six.itervalues(dict)
raise exc_type, exc_value, tb -> six.reraise(exc_type, exc_value, tb)
  • Loading branch information
binux committed Dec 13, 2014
1 parent dd0fc55 commit b502754
Show file tree
Hide file tree
Showing 26 changed files with 136 additions and 84 deletions.
3 changes: 2 additions & 1 deletion pyspider/database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
# http://binux.me
# Created on 2014-10-08 15:04:08

import urlparse

from six.moves.urllib import parse as urlparse


def connect_database(url):
Expand Down
17 changes: 11 additions & 6 deletions pyspider/database/basedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
# http://binux.me
# Created on 2012-08-30 17:43:49

from __future__ import unicode_literals, division, absolute_import

import logging
logger = logging.getLogger('database.basedb')

from six import itervalues


class BaseDB:

Expand All @@ -16,6 +20,7 @@ class BaseDB:
dbcur should be overwirte
'''
__tablename__ = None
placeholder = '%s'

@staticmethod
Expand Down Expand Up @@ -68,44 +73,44 @@ def _select2dic(self, tablename=None, what="*", where="", where_values=[],
def _replace(self, tablename=None, **values):
tablename = self.escape(tablename or self.__tablename__)
if values:
_keys = ", ".join(self.escape(k) for k in values.iterkeys())
_keys = ", ".join(self.escape(k) for k in values)
_values = ", ".join([self.placeholder, ] * len(values))
sql_query = "REPLACE INTO %s (%s) VALUES (%s)" % (tablename, _keys, _values)
else:
sql_query = "REPLACE INTO %s DEFAULT VALUES" % tablename
logger.debug("<sql: %s>", sql_query)

if values:
dbcur = self._execute(sql_query, values.values())
dbcur = self._execute(sql_query, list(itervalues(values)))
else:
dbcur = self._execute(sql_query)
return dbcur.lastrowid

def _insert(self, tablename=None, **values):
tablename = self.escape(tablename or self.__tablename__)
if values:
_keys = ", ".join((self.escape(k) for k in values.iterkeys()))
_keys = ", ".join((self.escape(k) for k in values))
_values = ", ".join([self.placeholder, ] * len(values))
sql_query = "INSERT INTO %s (%s) VALUES (%s)" % (tablename, _keys, _values)
else:
sql_query = "INSERT INTO %s DEFAULT VALUES" % tablename
logger.debug("<sql: %s>", sql_query)

if values:
dbcur = self._execute(sql_query, values.values())
dbcur = self._execute(sql_query, list(itervalues(values)))
else:
dbcur = self._execute(sql_query)
return dbcur.lastrowid

def _update(self, tablename=None, where="1=0", where_values=[], **values):
tablename = self.escape(tablename or self.__tablename__)
_key_values = ", ".join([
"%s = %s" % (self.escape(k), self.placeholder) for k in values.iterkeys()
"%s = %s" % (self.escape(k), self.placeholder) for k in values
])
sql_query = "UPDATE %s SET %s WHERE %s" % (tablename, _key_values, where)
logger.debug("<sql: %s>", sql_query)

return self._execute(sql_query, values.values() + list(where_values))
return self._execute(sql_query, list(itervalues(values)) + list(where_values))

def _delete(self, tablename=None, where="1=0", where_values=[]):
tablename = self.escape(tablename or self.__tablename__)
Expand Down
2 changes: 1 addition & 1 deletion pyspider/database/sqlite/projectdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import time

from sqlitebase import SQLiteMixin
from .sqlitebase import SQLiteMixin
from pyspider.database.base.projectdb import ProjectDB as BaseProjectDB
from pyspider.database.basedb import BaseDB

Expand Down
2 changes: 1 addition & 1 deletion pyspider/database/sqlite/resultdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import time
import json

from sqlitebase import SQLiteMixin, SplitTableMixin
from .sqlitebase import SQLiteMixin, SplitTableMixin
from pyspider.database.base.resultdb import ResultDB as BaseResultDB
from pyspider.database.basedb import BaseDB

Expand Down
4 changes: 2 additions & 2 deletions pyspider/database/sqlite/sqlitebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
# Created on 2014-11-22 20:30:44

import time
import thread
import sqlite3
import threading


class SQLiteMixin(object):

@property
def dbcur(self):
pid = thread.get_ident()
pid = threading.current_thread().ident
if not (self.conn and pid == self.last_pid):
self.last_pid = pid
self.conn = sqlite3.connect(self.path, isolation_level=None)
Expand Down
2 changes: 1 addition & 1 deletion pyspider/database/sqlite/taskdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import time
import json

from sqlitebase import SQLiteMixin, SplitTableMixin
from .sqlitebase import SQLiteMixin, SplitTableMixin
from pyspider.database.base.taskdb import TaskDB as BaseTaskDB
from pyspider.database.basedb import BaseDB

Expand Down
9 changes: 6 additions & 3 deletions pyspider/fetcher/tornado_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# http://binux.me
# Created on 2012-12-17 11:07:19

from __future__ import unicode_literals

import six
import time
import json
import Queue
Expand Down Expand Up @@ -166,22 +169,22 @@ def http_fetch(self, url, task, callback):
track_headers = task.get('track', {}).get('fetch', {}).get('headers', {})
# proxy
if 'proxy' in task_fetch:
if isinstance(task_fetch['proxy'], basestring):
if isinstance(task_fetch['proxy'], six.string_types):
fetch['proxy_host'] = task_fetch['proxy'].split(":")[0]
fetch['proxy_port'] = int(task_fetch['proxy'].split(":")[1])
elif self.proxy and task_fetch.get('proxy', True):
fetch['proxy_host'] = self.proxy.split(":")[0]
fetch['proxy_port'] = int(self.proxy.split(":")[1])
# etag
if task_fetch.get('etag', True):
_t = task_fetch.get('etag') if isinstance(task_fetch.get('etag'), basestring) \
_t = task_fetch.get('etag') if isinstance(task_fetch.get('etag'), six.string_types) \
else track_headers.get('etag')
if _t:
fetch['headers'].setdefault('If-None-Match', _t)
# last modifed
if task_fetch.get('last_modified', True):
_t = task_fetch.get('last_modifed') \
if isinstance(task_fetch.get('last_modifed'), basestring) \
if isinstance(task_fetch.get('last_modifed'), six.string_types) \
else track_headers.get('last-modified')
if _t:
fetch['headers'].setdefault('If-Modifed-Since', _t)
Expand Down
24 changes: 15 additions & 9 deletions pyspider/libs/base_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
import inspect
import functools
import fractions

import six
from six import add_metaclass, iteritems

from pyspider.libs.log import LogFormatter
from pyspider.libs.url import quote_chinese, _build_url, _encode_params
from pyspider.libs.url import quote_chinese, _build_url, _encode_params, _encode_multipart_formdata
from pyspider.libs.utils import md5string, hide_me, pretty_unicode
from pyspider.libs.ListIO import ListO
from pyspider.libs.response import rebuild_response
Expand All @@ -35,7 +39,7 @@ def logstr(self):
result = []
formater = LogFormatter(color=False)
for record in self.logs:
if isinstance(record, basestring):
if isinstance(record, six.string_types):
result.append(pretty_unicode(record))
else:
if record.exc_info:
Expand Down Expand Up @@ -126,10 +130,13 @@ def __new__(cls, name, bases, attrs):
return newcls


@add_metaclass(BaseHandlerMeta)
class BaseHandler(object):
__metaclass__ = BaseHandlerMeta
crawl_config = {}
project_name = None
cron_jobs = []
min_tick = 0
__env__ = {'not_inited': True}

def _reset(self):
self._extinfo = {}
Expand Down Expand Up @@ -190,20 +197,19 @@ def _crawl(self, url, **kwargs):

if kwargs.get('callback'):
callback = kwargs['callback']
if isinstance(callback, basestring) and hasattr(self, callback):
if isinstance(callback, six.string_types) and hasattr(self, callback):
func = getattr(self, callback)
elif hasattr(callback, 'im_self') and callback.im_self is self:
func = callback
kwargs['callback'] = func.__name__
else:
raise NotImplementedError("self.%s() not implemented!" % callback)
if hasattr(func, '_config'):
for k, v in func._config.iteritems():
for k, v in iteritems(func._config):
kwargs.setdefault(k, v)

if hasattr(self, 'crawl_config'):
for k, v in self.crawl_config.iteritems():
kwargs.setdefault(k, v)
for k, v in iteritems(self.crawl_config):
kwargs.setdefault(k, v)

url = quote_chinese(_build_url(url.strip(), kwargs.get('params')))
if kwargs.get('files'):
Expand Down Expand Up @@ -296,7 +302,7 @@ def crawl(self, url, **kwargs):
taskid
'''

if isinstance(url, basestring):
if isinstance(url, six.string_types):
return self._crawl(url, **kwargs)
elif hasattr(url, "__iter__"):
result = []
Expand Down
29 changes: 23 additions & 6 deletions pyspider/libs/counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,19 @@
# http://binux.me
# Created on 2012-11-14 17:09:50

from __future__ import unicode_literals, division, absolute_import

import time
import cPickle
import logging
from collections import deque
from UserDict import DictMixin
try:
from UserDict import DictMixin
except ImportError:
from collections import Mapping as DictMixin

import six
from six import iteritems


class BaseCounter(object):
Expand Down Expand Up @@ -52,7 +60,7 @@ def event(self, value=1):

@property
def avg(self):
return float(self.sum) / len(self.values)
return self.sum / len(self.values)

@property
def sum(self):
Expand Down Expand Up @@ -163,6 +171,15 @@ def __getitem__(self, key):
else:
return CounterValue(self.manager, key)

def __len__(self):
return len(self.keys())

def __iter__(self):
return iter(self.keys())

def __contains__(self, key):
return key in self.keys()

def keys(self):
result = set()
for key in self.manager.counters:
Expand All @@ -173,7 +190,7 @@ def keys(self):

def to_dict(self, get_value=None):
result = {}
for key, value in self.iteritems():
for key, value in iteritems(self):
if isinstance(value, BaseCounter):
if get_value is not None:
value = getattr(value, get_value)
Expand All @@ -190,7 +207,7 @@ def __init__(self, cls=TimebaseAverageWindowCounter):
self.counters = {}

def event(self, key, value=1):
if isinstance(key, basestring):
if isinstance(key, six.string_types):
key = (key, )
assert isinstance(key, tuple), "event key type error"
if key not in self.counters:
Expand All @@ -199,7 +216,7 @@ def event(self, key, value=1):
return self

def value(self, key, value=1):
if isinstance(key, basestring):
if isinstance(key, six.string_types):
key = (key, )
assert isinstance(key, tuple), "event key type error"
if key not in self.counters:
Expand Down Expand Up @@ -238,7 +255,7 @@ def keys(self):
def to_dict(self, get_value=None):
self.trim()
result = {}
for key, value in self.iteritems():
for key, value in iteritems(self):
if isinstance(value, BaseCounter):
if get_value is not None:
value = getattr(value, get_value)
Expand Down
5 changes: 3 additions & 2 deletions pyspider/libs/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
# http://binux.me
# Created on 2012-10-24 16:08:17

import logging
import six
import sys
import time
import logging

try:
import curses
Expand Down Expand Up @@ -87,7 +88,7 @@ def format(self, record):
record.message = record.getMessage()
except Exception as e:
record.message = "Bad message (%r): %r" % (e, record.__dict__)
assert isinstance(record.message, basestring) # guaranteed by logging
assert isinstance(record.message, six.string_types) # guaranteed by logging
record.asctime = time.strftime(
"%y%m%d %H:%M:%S", self.converter(record.created))
prefix = '[%(levelname)1.1s %(asctime)s %(module)s:%(lineno)d]' % \
Expand Down
12 changes: 7 additions & 5 deletions pyspider/libs/pprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@
"""

from __future__ import print_function

import sys as _sys
import warnings

from cStringIO import StringIO as _StringIO
from io import BytesIO

__all__ = ["pprint", "pformat", "isreadable", "isrecursive", "saferepr",
"PrettyPrinter"]
Expand Down Expand Up @@ -122,7 +124,7 @@ def pprint(self, object):
self._stream.write("\n")

def pformat(self, object):
sio = _StringIO()
sio = BytesIO()
self._format(object, sio, 0, 0, {}, 0)
return sio.getvalue()

Expand Down Expand Up @@ -273,7 +275,7 @@ def _safe_repr(object, context, maxlevels, level):
except:
pass
qget = quotes.get
sio = _StringIO()
sio = BytesIO()
write = sio.write
for char in object:
if char.isalpha():
Expand Down Expand Up @@ -373,8 +375,8 @@ def _perfcheck(object=None):
t2 = time.time()
p.pformat(object)
t3 = time.time()
print "_safe_repr:", t2 - t1
print "pformat:", t3 - t2
print("_safe_repr:", t2 - t1)
print("pformat:", t3 - t2)

if __name__ == "__main__":
_perfcheck()
2 changes: 1 addition & 1 deletion pyspider/libs/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self):
self.time = 0

def __repr__(self):
return '<Response [%d]>' % self.status_code
return u'<Response [%d]>' % self.status_code

def __bool__(self):
"""Returns true if :attr:`status_code` is 'OK'."""
Expand Down
Loading

0 comments on commit b502754

Please sign in to comment.