|
| 1 | +import requests |
| 2 | +import json |
| 3 | +import os |
| 4 | +from bs4 import BeautifulSoup |
| 5 | + |
| 6 | +headers = { |
| 7 | + 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:39.0) Gecko/20100101 Firefox/39.0', |
| 8 | + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |
| 9 | + 'Accept-Language': 'en-US,en;q=0.5', |
| 10 | + 'Accept-Encoding': 'gzip, deflate', |
| 11 | + 'Connection': 'keep-alive'} |
| 12 | + |
| 13 | + |
| 14 | +def bestRestaurant(cityId=1, rankType='popscore'): |
| 15 | + html = requests.get('http://www.dianping.com/mylist/ajax/shoprank?cityId=%s&shopType=10&rankType=%s&categoryId=0' % |
| 16 | + (cityId, rankType), headers=headers).text |
| 17 | + result = json.loads(html)['shopBeans'] |
| 18 | + return result |
| 19 | + |
| 20 | + |
| 21 | +def getCityId(): |
| 22 | + citys = {'北京': '2', '上海': '1', '广州': '4', '深圳': '7', '成都': '8', '重庆': '9', '杭州': '3', '南京': '5', '沈阳': '18', '苏州': '6', '天津': '10', |
| 23 | + '武汉': '16', '西安': '17', '长沙': '344', '大连': '19', '济南': '22', '宁波': '11', '青岛': '21', '无锡': '13', '厦门': '15', '郑州': '160'} |
| 24 | + return citys |
| 25 | + |
| 26 | + |
| 27 | +def getRankType(): |
| 28 | + RankType = {'最佳餐厅': 'score', '人气餐厅': 'popscore', |
| 29 | + '口味最佳': 'score1', '环境最佳': 'score2', '服务最佳': 'score3'} |
| 30 | + return RankType |
| 31 | + |
| 32 | + |
| 33 | +def dpindex(cityId=1, page=1): |
| 34 | + url = 'http://dpindex.dianping.com/dpindex?region=&category=&type=rank&city=%s&p=%s' % ( |
| 35 | + cityId, page) |
| 36 | + html = requests.get(url, headers=headers).text |
| 37 | + table = BeautifulSoup(html, 'lxml').find( |
| 38 | + 'div', attrs={'class': 'idxmain-subcontainer'}).find_all('li') |
| 39 | + result = [] |
| 40 | + for item in table: |
| 41 | + shop = {} |
| 42 | + shop['name'] = item.find('div', attrs={'class': 'field-name'}).get_text() |
| 43 | + shop['url'] = item.find('a').get('href') |
| 44 | + shop['num'] = item.find('div', attrs={'class': 'field-num'}).get_text() |
| 45 | + shop['addr'] = item.find('div', attrs={'class': 'field-addr'}).get_text() |
| 46 | + shop['index'] = item.find('div', attrs={'class': 'field-index'}).get_text() |
| 47 | + result.append(shop) |
| 48 | + return result |
| 49 | + |
| 50 | + |
| 51 | +def restaurantList(url): |
| 52 | + html = requests.get(url, headers=headers, timeout=30).text.replace('\r', '').replace('\n', '') |
| 53 | + table = BeautifulSoup(html, 'lxml').find('div', id='shop-all-list').find_all('li') |
| 54 | + result = [] |
| 55 | + for item in table: |
| 56 | + shop = {} |
| 57 | + soup = item.find('div', attrs={'class': 'txt'}) |
| 58 | + tit = soup.find('div', attrs={'class': 'tit'}) |
| 59 | + comment = soup.find('div', attrs={'class': 'comment'}) |
| 60 | + tag_addr = soup.find('div', attrs={'class': 'tag-addr'}) |
| 61 | + shop['name'] = tit.find('a').get_text() |
| 62 | + shop['star'] = comment.find('span').get('title') |
| 63 | + shop['review-num'] = comment.find('a', |
| 64 | + attrs={'class': 'review-num'}).get_text().replace('条点评', '') |
| 65 | + shop['mean-price'] = comment.find('a', attrs={'class': 'mean-price'}).get_text() |
| 66 | + shop['type'] = tag_addr.find('span', attrs={'class': 'tag'}).get_text() |
| 67 | + shop['addr'] = tag_addr.find('span', attrs={'class': 'addr'}).get_text() |
| 68 | + try: |
| 69 | + comment_list = soup.find('span', attrs={'class': 'comment-list'}).find_all('span') |
| 70 | + except: |
| 71 | + comment_list = [] |
| 72 | + score = [] |
| 73 | + for i in comment_list: |
| 74 | + score.append(i.get_text()) |
| 75 | + shop['score'] = score |
| 76 | + tags = [] |
| 77 | + try: |
| 78 | + for i in tit.find('div', attrs={'class': 'promo-icon'}).find_all('a'): |
| 79 | + try: |
| 80 | + tags += i.get('class') |
| 81 | + except: |
| 82 | + tags.append(i.get('class')[0]) |
| 83 | + except: |
| 84 | + pass |
| 85 | + shop['tags'] = tags |
| 86 | + result.append(shop) |
| 87 | + return result |
0 commit comments