Skip to content

Commit

Permalink
add def sanitize
Browse files Browse the repository at this point in the history
Move sanitize from sanitize.py to scrape.py
Underscore-hide globals.
  • Loading branch information
bluquar committed Jul 16, 2013
1 parent aa742a8 commit b85ed57
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@
from urllib import urlopen
import os
import datetime
from sanitize import sanitize
import string

REDDIT_API_SLEEP_TIME = 2.50
_REDDIT_API_SLEEP_TIME = 2.50
_VALID_CHARS = frozenset(''.join(("-_.() ", string.ascii_letters, string.digits)))

def sanitize(s):
return ''.join(c for c in s if c in _VALID_CHARS)

def download_and_save(url, filename):
"""Saves the data at a given URL to a given local filename."""
Expand Down Expand Up @@ -72,7 +76,7 @@ def main():
percent = int((100 * n_so_far) / total_n)
alert("%d percent complete." % percent)

sleep(REDDIT_API_SLEEP_TIME) # Avoid offending the Reddit API Gods!)
sleep(_REDDIT_API_SLEEP_TIME) # Avoid offending the Reddit API Gods!)
alert("Completed web scrape.")

if __name__ == '__main__':
Expand Down

0 comments on commit b85ed57

Please sign in to comment.