Skip to content

Commit

Permalink
Update ikun_basketball.py
Browse files Browse the repository at this point in the history
  • Loading branch information
lovevantt authored Jan 15, 2020
1 parent c560ddf commit a511e17
Showing 1 changed file with 18 additions and 12 deletions.
30 changes: 18 additions & 12 deletions ikun_basketball.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# coding=utf-8
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
Expand All @@ -6,7 +7,8 @@
from bs4 import BeautifulSoup
import xlwt

browser = webdriver.PhantomJS()
# browser = webdriver.PhantomJS()
browser = webdriver.Chrome()
WAIT = WebDriverWait(browser, 10)
browser.set_window_size(1400, 900)

Expand All @@ -29,11 +31,12 @@ def search():
browser.get("https://www.bilibili.com/")

# 被那个破登录遮住了
index = WAIT.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#primary_menu > ul > li.home > a")))
index.click()
# index = WAIT.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#primary_menu > ul > li.home > a")))
# index.click()

input = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#banner_link > div > div > form > input")))
submit = WAIT.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="banner_link"]/div/div/form/button')))
input = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#nav_searchform > input")))
submit = WAIT.until(EC.element_to_be_clickable(
(By.XPATH, '/html/body/div[2]/div/div[1]/div[1]/div/div[2]/div/form/div/button')))

input.send_keys('蔡徐坤 篮球')
submit.click()
Expand All @@ -42,10 +45,10 @@ def search():
print('跳转到新窗口')
all_h = browser.window_handles
browser.switch_to.window(all_h[1])

get_source()

total = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR,
"#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.last > button")))
"#all-list > div.flow-loader > div.page-wrap > div > ul > li.page-item.last > button")))
return int(total.text)
except TimeoutException:
return search()
Expand All @@ -55,10 +58,10 @@ def next_page(page_num):
try:
print('获取下一页数据')
next_btn = WAIT.until(EC.element_to_be_clickable((By.CSS_SELECTOR,
'#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.next > button')))
'#all-list > div.flow-loader > div.page-wrap > div > ul > li.page-item.next > button')))
next_btn.click()
WAIT.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR,
'#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.active > button'),
'#all-list > div.flow-loader > div.page-wrap > div > ul > li.page-item.active > button'),
str(page_num)))
get_source()
except TimeoutException:
Expand All @@ -67,7 +70,7 @@ def next_page(page_num):


def save_to_excel(soup):
list = soup.find(class_='all-contain').find_all(class_='info')
list = soup.find(class_='video-list clearfix').find_all(class_='video-item matrix')

for item in list:
item_title = item.find('a').get('title')
Expand All @@ -93,9 +96,12 @@ def save_to_excel(soup):

def get_source():
WAIT.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, '#server-search-app > div.contain > div.body-contain > div > div.result-wrap.clearfix')))
(By.CSS_SELECTOR, '#all-list > div.flow-loader > div.filter-wrap')))

html = browser.page_source
soup = BeautifulSoup(html, 'lxml')
print('到这')

save_to_excel(soup)


Expand All @@ -113,4 +119,4 @@ def main():

if __name__ == '__main__':
main()
book.save(u'蔡徐坤篮球.xlsx')
book.save('蔡徐坤篮球.xlsx')

0 comments on commit a511e17

Please sign in to comment.