vieles
parent
813e3e5b42
commit
051535008f
@ -0,0 +1,96 @@
|
|||||||
|
import toml
|
||||||
|
import os
|
||||||
|
import db_model as db
|
||||||
|
from peewee import PostgresqlDatabase
|
||||||
|
import datetime
|
||||||
|
from sshtunnel import SSHTunnelForwarder
|
||||||
|
import verdoppelungsrate
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from dataclasses_json import dataclass_json
|
||||||
|
import scrape_bev
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass_json
|
||||||
|
@dataclass
|
||||||
|
class CoronaStatistik:
|
||||||
|
ts: datetime.datetime
|
||||||
|
quelle: str
|
||||||
|
country_region: str
|
||||||
|
typ: str
|
||||||
|
wert: float
|
||||||
|
|
||||||
|
|
||||||
|
def config_laden():
|
||||||
|
configfile = os.path.join(SKRIPTPFAD, "config.toml")
|
||||||
|
with open(configfile) as file:
|
||||||
|
return toml.loads(file.read())
|
||||||
|
|
||||||
|
|
||||||
|
SKRIPTPFAD = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
CONFIG = config_laden()
|
||||||
|
|
||||||
|
|
||||||
|
def verdoppelungsrate_bearbeiten(quelle, country):
|
||||||
|
letzter_ts = verdoppelungsrate.letzten_eintrag_ermitteln(quelle, country)
|
||||||
|
if letzter_ts is None:
|
||||||
|
letzter_ts = datetime.datetime(1970, 1, 1)
|
||||||
|
daten = verdoppelungsrate.letzte_verdopplungsraten_berechnen(quelle, country, letzter_ts)
|
||||||
|
return daten
|
||||||
|
|
||||||
|
|
||||||
|
def alle_eintraege_abarbeiten():
|
||||||
|
daten = []
|
||||||
|
bev_zahlen = {}
|
||||||
|
query_quelle = db.CoronaDaten.select(db.CoronaDaten.quelle).group_by(db.CoronaDaten.quelle)
|
||||||
|
for data in query_quelle:
|
||||||
|
if data.quelle == "rki_landkreis":
|
||||||
|
continue
|
||||||
|
query_country = db.CoronaDaten.select(db.CoronaDaten.country_region).where(
|
||||||
|
db.CoronaDaten.quelle == data.quelle).group_by(db.CoronaDaten.country_region)
|
||||||
|
for datensatz in query_country:
|
||||||
|
daten_verdoppelungsrate = verdoppelungsrate_bearbeiten(data.quelle, datensatz.country_region)
|
||||||
|
if daten_verdoppelungsrate:
|
||||||
|
daten.extend(daten_verdoppelungsrate)
|
||||||
|
if datensatz.country_region not in bev_zahlen:
|
||||||
|
bev_zahlen[datensatz.country_region] = scrape_bev.scrape_bev(datensatz.country_region)
|
||||||
|
if bev_zahlen[datensatz.country_region]:
|
||||||
|
daten_bev_prozent = scrape_bev.auslesen_unverarbeiteter_daten(data.quelle,
|
||||||
|
datensatz.country_region, bev_zahlen,
|
||||||
|
CoronaStatistik)
|
||||||
|
else:
|
||||||
|
daten_bev_prozent = None
|
||||||
|
if daten_bev_prozent:
|
||||||
|
daten.extend(daten_bev_prozent)
|
||||||
|
|
||||||
|
if len(daten) > 5000:
|
||||||
|
print("Sende Daten")
|
||||||
|
daten_senden(daten)
|
||||||
|
daten = []
|
||||||
|
return daten
|
||||||
|
|
||||||
|
|
||||||
|
def daten_senden(daten):
|
||||||
|
daten_dict = []
|
||||||
|
for data in daten:
|
||||||
|
daten_dict.append(data.to_dict())
|
||||||
|
db.CoronaStatistik.insert_many(daten_dict).execute()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with SSHTunnelForwarder(
|
||||||
|
(CONFIG["ssh"]["ip_server"], CONFIG["ssh"]["ssh_port"]), ssh_username=CONFIG["ssh"]["user"],
|
||||||
|
ssh_password=CONFIG["ssh"]["pw"], remote_bind_address=('127.0.0.1', CONFIG["pg"]["pgport"])) as server:
|
||||||
|
|
||||||
|
db.database.initialize(PostgresqlDatabase(CONFIG["pg"]["pgdb"],
|
||||||
|
user=CONFIG["pg"]["pguser"], password=CONFIG["pg"]["pgpw"],
|
||||||
|
host="127.0.0.1",
|
||||||
|
port=server.local_bind_port))
|
||||||
|
db.create_tables()
|
||||||
|
daten = alle_eintraege_abarbeiten()
|
||||||
|
daten_senden(daten)
|
||||||
|
db.Database.close(db.database)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -1,16 +1,50 @@
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
import db_model as db
|
||||||
|
import datetime
|
||||||
|
|
||||||
countrys = ["Germany"]
|
|
||||||
|
|
||||||
|
TYP = "prozent_bev"
|
||||||
URL = "https://countrymeters.info/en/"
|
URL = "https://countrymeters.info/en/"
|
||||||
|
|
||||||
|
|
||||||
|
def letzten_eintrag_ermitteln(quelle, country_region):
|
||||||
|
ts = db.CoronaStatistik.select(
|
||||||
|
db.CoronaStatistik.ts).where((db.CoronaStatistik.typ == TYP) &
|
||||||
|
(db.CoronaStatistik.quelle == quelle) &
|
||||||
|
(db.CoronaStatistik.country_region == country_region)
|
||||||
|
).order_by(db.CoronaStatistik.ts.desc()).limit(1).scalar()
|
||||||
|
if ts is None:
|
||||||
|
ts = datetime.datetime(1970, 1, 1)
|
||||||
|
return ts
|
||||||
|
|
||||||
|
|
||||||
|
def auslesen_unverarbeiteter_daten(quelle, land, bev_zahl, CoronaStatistik):
|
||||||
|
ts = letzten_eintrag_ermitteln(quelle, land)
|
||||||
|
daten = []
|
||||||
|
query = db.CoronaDaten.select(db.CoronaDaten.ts, db.CoronaDaten.confirmed).where(
|
||||||
|
(db.CoronaDaten.quelle == quelle) & (db.CoronaDaten.country_region == land) & (
|
||||||
|
db.CoronaDaten.ts > ts)).namedtuples()
|
||||||
|
for datensatz in query:
|
||||||
|
prozent_bev = datensatz.confirmed / bev_zahl[land] * 100
|
||||||
|
prozent_bev = round(prozent_bev, 8)
|
||||||
|
print(f"Im Land {land} {prozent_bev}% infiziert")
|
||||||
|
daten.append(CoronaStatistik(datensatz.ts, quelle, land, TYP, prozent_bev))
|
||||||
|
return daten
|
||||||
|
|
||||||
|
|
||||||
def scrape_bev(land):
|
def scrape_bev(land):
|
||||||
r = requests.get(f"{URL}{land}")
|
r = requests.get(f"{URL}{land}")
|
||||||
soup = BeautifulSoup(r.text, features="html5lib")
|
soup = BeautifulSoup(r.text, features="html5lib")
|
||||||
table = soup.find("div", {"id": "cp1"})
|
table = soup.find("div", {"id": "cp1"})
|
||||||
print(table)
|
try:
|
||||||
|
value = table.get_text()
|
||||||
|
except AttributeError:
|
||||||
|
value = None
|
||||||
|
else:
|
||||||
|
value = int(value.replace(",", ""))
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
scrape_bev("Germany")
|
scrape_bev("Germany")
|
||||||
|
Loading…
Reference in New Issue