add setup.py for PyPI Fixes vinta#2

dojekyll · Oct 3, 2013 · 197d7ba · 197d7ba
2 parents 79763ab + 5cd3692
commit 197d7ba
Show file tree

Hide file tree

Showing 7 changed files with 184 additions and 93 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,10 @@
 *.py[cod]
 
-.coverage
-.tox
+*.egg
+*.egg-info
+dist
+build
+sdist
 
 .codeintel
 .DS_STORE
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -0,0 +1,9 @@
+.. :changelog:
+
+History
+-------
+
+1.0.0 (2013-10-03)
+++++++++++++++++++
+
+- Initial Release
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,4 @@
+include LICENSE
+include HISTORY.rst
+include README.rst
+include requirements.txt
diff --git a/README.md b/README.md
diff --git a/README.rst b/README.rst
@@ -0,0 +1,102 @@
+Haul
+====
+
+Find thumbnails and original images from URL or HTML file.
+
+Installation
+------------
+
+on Ubuntu
+
+.. code-block:: bash
+
+ $ sudo apt-get install build-essential python-dev libxml2-dev libxslt1-dev
+ $ pip install haul
+
+on Mac OS X
+
+.. code-block:: bash
+
+ $ pip install haul
+
+Usage
+-----
+
+Find images from ``img src``, ``a href`` and even ``background-image``:
+
+.. code-block:: python
+
+ import haul
+
+ url = 'http://gibuloto.tumblr.com/post/62525699435/fuck-yeah'
+ result = haul.find_images(url)
+
+ print(result.image_urls)
+ """
+ output:
+ [
+ 'http://25.media.tumblr.com/3f5f10d7216f1dd5eacb5eb3e302286a/tumblr_mtpcwdzKBT1qh9n5lo1_500.png',
+ ...
+ 'http://24.media.tumblr.com/avatar_a3a119b674e2_16.png',
+ 'http://25.media.tumblr.com/avatar_9b04f54875e1_16.png',
+ 'http://31.media.tumblr.com/avatar_0acf8f9b4380_16.png',
+ ]
+ """
+
+Find original (or bigger size) images with ``extend=True``:
+
+.. code-block:: python
+
+ import haul
+
+ url = 'http://gibuloto.tumblr.com/post/62525699435/fuck-yeah'
+ result = haul.find_images(url, extend=True)
+
+ print(result.image_urls)
+ """
+ output:
+ [
+ 'http://25.media.tumblr.com/3f5f10d7216f1dd5eacb5eb3e302286a/tumblr_mtpcwdzKBT1qh9n5lo1_500.png',
+ ...
+ 'http://24.media.tumblr.com/avatar_a3a119b674e2_16.png',
+ 'http://25.media.tumblr.com/avatar_9b04f54875e1_16.png',
+ 'http://31.media.tumblr.com/avatar_0acf8f9b4380_16.png',
+ # bigger size, extended from above urls
+ 'http://25.media.tumblr.com/3f5f10d7216f1dd5eacb5eb3e302286a/tumblr_mtpcwdzKBT1qh9n5lo1_1280.png',
+ ...
+ 'http://24.media.tumblr.com/avatar_a3a119b674e2_128.png',
+ 'http://25.media.tumblr.com/avatar_9b04f54875e1_128.png',
+ 'http://31.media.tumblr.com/avatar_0acf8f9b4380_128.png',
+ ]
+ """
+
+Custom finder / extender pipeline:
+
+.. code-block:: python
+
+ from haul import Haul
+
+ IMAGE_FINDER_PIPELINE = (
+ 'haul.finders.pipeline.html.img_src_finder',
+ 'haul.finders.pipeline.css.background_image_finder',
+ )
+
+ GOOGLE_SITES_EXTENDER_PIEPLINE = (
+ 'haul.extenders.pipeline.google.blogspot_s1600_extender',
+ 'haul.extenders.pipeline.google.ggpht_s1600_extender',
+ 'haul.extenders.pipeline.google.googleusercontent_s1600_extender',
+ )
+
+ url = 'http://fashion-fever.nl/dressing-up/'
+ h = Haul(parser='lxml',
+ finder_pipeline=IMAGE_FINDER_PIPELINE,
+ extender_pipeline=GOOGLE_SITES_EXTENDER_PIEPLINE)
+ result = h.find_images(url, extend=True)
+
+Run Tests
+---------
+
+.. code-block:: bash
+
+ $ cd tests
+ $ python test.py
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
 beautifulsoup4
 cssutils
+html5lib
 lxml
 requests
diff --git a/setup.py b/setup.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+import os
+import sys
+
+from pip.req import parse_requirements
+
+try:
+ from setuptools import setup
+except ImportError:
+ from distutils.core import setup
+
+
+if sys.argv[-1] == 'publish':
+ os.system('python setup.py sdist upload')
+ sys.exit()
+
+long_description = open('README.rst').read() + '\n\n' + open('HISTORY.rst').read()
+
+license = open('LICENSE').read()
+
+install_requires = [str(item.req) for item in parse_requirements('requirements.txt')]
+
+packages = [
+ 'haul',
+ 'haul.finders',
+ 'haul.finders.pipeline',
+ 'haul.extenders',
+ 'haul.extenders.pipeline',
+]
+
+setup(
+ name='haul',
+ version='1.0.0',
+ description='An Extensible Image Crawler',
+ long_description=long_description,
+ keywords='haul web image content scraper parser crawler',
+ author='Vinta Chen',
+ author_email='[email protected]',
+ url='https://github.com/vinta/Haul',
+ license=license,
+ install_requires=install_requires,
+ include_package_data=True,
+ packages=packages,
+ zip_safe=False,
+ classifiers=(
+ 'Development Status :: 3 - Alpha',
+ 'Environment :: Web Environment',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: MIT License',
+ 'Natural Language :: English',
+ 'Natural Language :: Chinese (Traditional)',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 2',
+ 'Programming Language :: Python :: 2.7',
+ 'Topic :: Internet :: WWW/HTTP',
+ 'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
+ 'Topic :: Multimedia :: Graphics',
+ 'Topic :: Software Development :: Libraries :: Python Modules',
+ 'Topic :: Utilities',
+ ),
+)