-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbbGlobus.py
91 lines (83 loc) · 3.2 KB
/
bbGlobus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Benzín Brno - Globus - Natural - bbGlobus.py
# https://www.globus.cz/brno/cerpaci-stanice-a-myci-linka.html
# https://www.globus.cz/brno/sluzby-a-produkty/cerpaci-stanice-a-myci-linka
# 20.12.2023 - httpx misto requests ktery neumi asyncio
import asyncio
# extract - stahne stranku
async def extract(url, Key):
from bbCFG import bbCenaMsk, bbHeaders
# import requests
import pandas as pd
import sys
import httpx
# headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'}
# r = requests.get(url, headers)
# BeautifulSoup - ted nepotrebuji neni JavaScript
# soup = BeautifulSoup(r.content, 'html.parser')
r = await httpx.AsyncClient(http2=True, verify=False, max_redirects=9).get(url, headers=bbHeaders, follow_redirects=True)
# print('r.content', type(r.content), '\n', r.content)
Cena = 0
try:
# pd najde tabulky
table = pd.read_html(r.content)
# print(f'Total tables: {len(table)}')
except: # catch *all* exceptions # pylint: disable=bare-except
e = sys.exc_info()[0]
print("Error in bbGlobus.py: ", e)
return Cena
# tabulek je 6, zajima me c. 2.
df = table[1] # pandas.core.frame.DataFrame
# adding column name to the respective columns - https://bit.ly/3oxfuhN
try:
df.columns = ['Name', 'smazat', 'Cena']
except: # catch *all* exceptions # pylint: disable=bare-except
e = sys.exc_info()[0]
print("Error in bbGlobus.py: ", e)
return Cena
# smazani sloupce - https://bit.ly/3oBNYjk
del df['smazat']
# Cena zmena typu na float - https://bit.ly/3Bi79SL
df['Cena'] = df['Cena'].astype('float64')
# Prepocitani df tj. / 100
df['Cena'] = (df['Cena'] / 100)
# How to add a trailing zeros to a pandas dataframe column? - https://bit.ly/3D5zwUO
df['Cena'] = df['Cena'].map(bbCenaMsk.format)
# LookUp - nalezeni hodnoty Key - https://bit.ly/3DuT59p
Radek = df.loc[df['Name'] == Key]
# How to get a value from a Pandas DataFrame and not the index and object type - https://bit.ly/3BhmuDc
# Catching all exceptions without pylint error - https://bit.ly/3kvu86m
try:
Cena = Radek['Cena'].values[0]
except: # catch *all* exceptions - pylint: disable=bare-except
e = sys.exc_info()[0]
print("Error in bbGlobus.py: ", e)
Cena = 0
# => float
Cena = float(Cena)
return Cena
# test function
async def tGlobu(url=''):
from bbCFG import brint, bbProduct, bbNoUrl
brint('tGlobu:', 'url', url)
if bbProduct and (url != bbNoUrl):
return await Globu(url)
else:
return 29.9
# globus - vrati cenu za natual - https://www.globus.cz/brno/cerpaci-stanice-a-myci-linka.html
async def Globu(url=''):
if url == '':
# url = r'https://www.globus.cz/brno/cerpaci-stanice-a-myci-linka.html'
url = r'https://www.globus.cz/brno/sluzby-a-produkty/cerpaci-stanice-a-myci-linka'
Key = 'Drive 95'
Key = 'Natural 95' # zmena media - 26.10.2021 13:15
Cena = await extract(url, Key)
# print('Cena paliva -', Key, '- je:', Cena, ' type', type(Cena))
return Cena
# main
async def bbGlobus_main():
print('def Globu(): ', await Globu())
print('OkDone.')
# __name__
if __name__ == '__main__':
# bbGlobus_main()
asyncio.run(bbGlobus_main())