selenium套件更新

將套件更新到4.4.3版本，因此寫法全部都更新過
rifleak74 · Sep 23, 2022 · e91b182 · e91b182
1 parent 6c55c9a
commit e91b182
Showing 1 changed file with 37 additions and 36 deletions.
diff --git a/行銷人轉職爬蟲王實戰｜5大社群＋2大電商/2-3.Selenium實戰練習－UberEat爬蟲/2-3.Selenium實戰練習－UberEat爬蟲.py b/行銷人轉職爬蟲王實戰｜5大社群＋2大電商/2-3.Selenium實戰練習－UberEat爬蟲/2-3.Selenium實戰練習－UberEat爬蟲.py
@@ -1,93 +1,94 @@
 # -*- coding: utf-8 -*-
 """
 Created on Tue May  4 20:23:37 2021
-
 @author: Ivan
 課程教材：行銷人轉職爬蟲王實戰｜5大社群平台＋2大電商
 版權屬於「楊超霆」所有，若有疑問，可聯絡[email protected]
-
 第二章 進階皇蟲Selenium
 Selenium實戰練習－UberEat爬蟲
 """
-from selenium.webdriver import DesiredCapabilities
+# selenium，2022/9/17 將套件更新到4.4.3版本，因此寫法全部都更新過
 from selenium import webdriver
-from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service as ChromeService
+from webdriver_manager.chrome import ChromeDriverManager
 import time
 import pandas as pd
 
-# 設定基本參數
-desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
-#此處必須換成自己電腦的User-Agent
-desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'
-
-# PhantomJS driver 路徑
-driver = webdriver.PhantomJS(executable_path = 'phantomjs', desired_capabilities=desired_capabilities)
+# 自動下載ChromeDriver
+service = ChromeService(executable_path=ChromeDriverManager().install())
 
 # 關閉通知提醒
 chrome_options = webdriver.ChromeOptions()
 prefs = {"profile.default_content_setting_values.notifications" : 2}
 chrome_options.add_experimental_option("prefs",prefs)
 
 # 開啟瀏覽器
-driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)
+driver = webdriver.Chrome(service=service, chrome_options=chrome_options)
+time.sleep(5)
 
-# 去到你想要的網頁
-driver.get("https://www.ubereats.com/tw")
+# 開啟網頁
+driver.get('https://www.ubereats.com/tw')
 time.sleep(3)
 
 
 
 #---------- 開始網頁的控制 ----------
 #--- 輸入外送地址
-getblock = driver.find_element_by_xpath('//*[@placeholder="輸入外送地址"]')
+getblock = driver.find_element(by=By.XPATH, value='//*[@placeholder="輸入外送地址"]')
 getblock.send_keys('中山北路二段1號') # 輸入地址
 time.sleep(1)
 getblock.send_keys('\ue007') # 按下Enter
 
 #--- 目標：爬下所有餐廳門市
 #方法1：利用class抓取
-len(driver.find_elements_by_class_name('lv'))
-len(driver.find_elements_by_class_name('g3'))
-len(driver.find_elements_by_class_name('ag'))
+#由於網站更新後，class都是程式自動不好抓取，改抓取個餐點種類的大類別
+len(driver.find_elements(by=By.TAG_NAME, value='section'))
+
+# show出各個類別
+for i in driver.find_elements(by=By.TAG_NAME, value='section'):
+    # show 出所有店名
+    for j in i.find_elements(by=By.TAG_NAME, value='h3'):
+        print(j.text + '\n')
+
 
-for i in driver.find_elements_by_class_name('lv'):
-    print(i.text + '\n')
 
 #方法2：利用剝洋蔥方式
-location = '//main/div/div[3]/div[2]/div/div[4]/div['
-# driver.find_element_by_xpath('//main/div/div[3]/div[2]/div/div[4]/div[1]/div/a/h3')
-for i in range(1, 21):
-    print(driver.find_element_by_xpath(location + str(i) + ']/div/a/h3').text + ' ')
+location = '//section/div[2]/div[1]/li['
+for i in range(1, 4): # 只先 show 出3個看看
+    print(driver.find_element(by=By.XPATH, value=location + str(i) + ']/div/a/h3').text)
 
 doit = True
 i = 1
 while doit:
     try:
-        print(driver.find_element_by_xpath(location + str(i) + ']/div/a/h3').text + ' ')
+        print(driver.find_element(by=By.XPATH, value=location + str(i) + ']/div/a/h3').text)
     except:
         doit = False
         print(i)
     i = i + 1
 
+
+
 # 完整寫法
 restaurant = []
 restaurantURL = []
 deliveryCost = []
 spendTime = []
-location = '//main/div/div[3]/div[2]/div/div[4]/div['
+location = '//section/div[2]/div[1]/li['
 doit = True
 i = 1
 while doit:
     try:
-        restaurant.append(driver.find_element_by_xpath(location + str(i) + ']/div/a/h3').text)
-        restaurantURL.append(driver.find_element_by_xpath(location + str(i) + ']/div/a').get_attribute('href'))
-        deliveryCost.append(driver.find_element_by_xpath(location + str(i) +']/div/div/div/div[2]/div[2]/div[2]').text)
-        spendTime.append(driver.find_element_by_xpath(location + str(i) +']/div/div/div/div[2]/div[2]/div[3]').text)
+        restaurant.append(driver.find_element(by=By.XPATH, value=location + str(i) + ']/div/a/h3').text)
+        restaurantURL.append(driver.find_element(by=By.XPATH, value=location + str(i) + ']/div/a').get_attribute('href'))
+        deliveryCost.append(driver.find_element(by=By.XPATH, value=location + str(i) +']/div/div/div/div[2]/div[2]/div[2]').text)
+        spendTime.append(driver.find_element(by=By.XPATH, value=location + str(i) +']/div/div/div/div[2]/div[2]/div[2]').text)
     except:
         doit = False
     print(i)
     i = i + 1
-    
+
 # 打包成CSV檔案
 dfData = pd.DataFrame({
     '店家名稱':restaurant,
@@ -101,9 +102,9 @@
 
 
 #--- 補充資源
-driver.find_element_by_id('batBeacon872942032971') # 通過ID
+driver.find_elements(by=By.ID, value='batBeacon872942032971') # 通過ID
 # <input type="text" class="form-control" id="usr" name='inportbox1'>
-driver.find_element_by_name('inportbox1') # 通過Name
-driver.find_element_by_link_text('GABA 元気の源 嘎吧 日式飯糰店') # 通過連結
-driver.find_element_by_tag_name('h1') # 通過標籤
-driver.find_element_by_css_selector('div.h3') #通過標籤CSS
+driver.find_elements(by=By.NAME, value='inportbox1') # 通過Name
+driver.find_elements(by=By.LINK_TEXT, value='GABA 元気の源 嘎吧 日式飯糰店') # 通過連結
+driver.find_elements(by=By.TAG_NAME, value='h1') # 通過標籤
+driver.find_elements(by=By.CSS_SELECTOR, value='div.h3') #通過標籤CSS