vieles
parent
813e3e5b42
commit
051535008f
@ -0,0 +1,96 @@
|
||||
import toml
|
||||
import os
|
||||
import db_model as db
|
||||
from peewee import PostgresqlDatabase
|
||||
import datetime
|
||||
from sshtunnel import SSHTunnelForwarder
|
||||
import verdoppelungsrate
|
||||
from dataclasses import dataclass
|
||||
from dataclasses_json import dataclass_json
|
||||
import scrape_bev
|
||||
|
||||
|
||||
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
class CoronaStatistik:
|
||||
ts: datetime.datetime
|
||||
quelle: str
|
||||
country_region: str
|
||||
typ: str
|
||||
wert: float
|
||||
|
||||
|
||||
def config_laden():
|
||||
configfile = os.path.join(SKRIPTPFAD, "config.toml")
|
||||
with open(configfile) as file:
|
||||
return toml.loads(file.read())
|
||||
|
||||
|
||||
SKRIPTPFAD = os.path.abspath(os.path.dirname(__file__))
|
||||
CONFIG = config_laden()
|
||||
|
||||
|
||||
def verdoppelungsrate_bearbeiten(quelle, country):
|
||||
letzter_ts = verdoppelungsrate.letzten_eintrag_ermitteln(quelle, country)
|
||||
if letzter_ts is None:
|
||||
letzter_ts = datetime.datetime(1970, 1, 1)
|
||||
daten = verdoppelungsrate.letzte_verdopplungsraten_berechnen(quelle, country, letzter_ts)
|
||||
return daten
|
||||
|
||||
|
||||
def alle_eintraege_abarbeiten():
|
||||
daten = []
|
||||
bev_zahlen = {}
|
||||
query_quelle = db.CoronaDaten.select(db.CoronaDaten.quelle).group_by(db.CoronaDaten.quelle)
|
||||
for data in query_quelle:
|
||||
if data.quelle == "rki_landkreis":
|
||||
continue
|
||||
query_country = db.CoronaDaten.select(db.CoronaDaten.country_region).where(
|
||||
db.CoronaDaten.quelle == data.quelle).group_by(db.CoronaDaten.country_region)
|
||||
for datensatz in query_country:
|
||||
daten_verdoppelungsrate = verdoppelungsrate_bearbeiten(data.quelle, datensatz.country_region)
|
||||
if daten_verdoppelungsrate:
|
||||
daten.extend(daten_verdoppelungsrate)
|
||||
if datensatz.country_region not in bev_zahlen:
|
||||
bev_zahlen[datensatz.country_region] = scrape_bev.scrape_bev(datensatz.country_region)
|
||||
if bev_zahlen[datensatz.country_region]:
|
||||
daten_bev_prozent = scrape_bev.auslesen_unverarbeiteter_daten(data.quelle,
|
||||
datensatz.country_region, bev_zahlen,
|
||||
CoronaStatistik)
|
||||
else:
|
||||
daten_bev_prozent = None
|
||||
if daten_bev_prozent:
|
||||
daten.extend(daten_bev_prozent)
|
||||
|
||||
if len(daten) > 5000:
|
||||
print("Sende Daten")
|
||||
daten_senden(daten)
|
||||
daten = []
|
||||
return daten
|
||||
|
||||
|
||||
def daten_senden(daten):
|
||||
daten_dict = []
|
||||
for data in daten:
|
||||
daten_dict.append(data.to_dict())
|
||||
db.CoronaStatistik.insert_many(daten_dict).execute()
|
||||
|
||||
|
||||
def main():
|
||||
with SSHTunnelForwarder(
|
||||
(CONFIG["ssh"]["ip_server"], CONFIG["ssh"]["ssh_port"]), ssh_username=CONFIG["ssh"]["user"],
|
||||
ssh_password=CONFIG["ssh"]["pw"], remote_bind_address=('127.0.0.1', CONFIG["pg"]["pgport"])) as server:
|
||||
|
||||
db.database.initialize(PostgresqlDatabase(CONFIG["pg"]["pgdb"],
|
||||
user=CONFIG["pg"]["pguser"], password=CONFIG["pg"]["pgpw"],
|
||||
host="127.0.0.1",
|
||||
port=server.local_bind_port))
|
||||
db.create_tables()
|
||||
daten = alle_eintraege_abarbeiten()
|
||||
daten_senden(daten)
|
||||
db.Database.close(db.database)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,16 +1,50 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import db_model as db
|
||||
import datetime
|
||||
|
||||
countrys = ["Germany"]
|
||||
|
||||
TYP = "prozent_bev"
|
||||
URL = "https://countrymeters.info/en/"
|
||||
|
||||
|
||||
def letzten_eintrag_ermitteln(quelle, country_region):
|
||||
ts = db.CoronaStatistik.select(
|
||||
db.CoronaStatistik.ts).where((db.CoronaStatistik.typ == TYP) &
|
||||
(db.CoronaStatistik.quelle == quelle) &
|
||||
(db.CoronaStatistik.country_region == country_region)
|
||||
).order_by(db.CoronaStatistik.ts.desc()).limit(1).scalar()
|
||||
if ts is None:
|
||||
ts = datetime.datetime(1970, 1, 1)
|
||||
return ts
|
||||
|
||||
|
||||
def auslesen_unverarbeiteter_daten(quelle, land, bev_zahl, CoronaStatistik):
|
||||
ts = letzten_eintrag_ermitteln(quelle, land)
|
||||
daten = []
|
||||
query = db.CoronaDaten.select(db.CoronaDaten.ts, db.CoronaDaten.confirmed).where(
|
||||
(db.CoronaDaten.quelle == quelle) & (db.CoronaDaten.country_region == land) & (
|
||||
db.CoronaDaten.ts > ts)).namedtuples()
|
||||
for datensatz in query:
|
||||
prozent_bev = datensatz.confirmed / bev_zahl[land] * 100
|
||||
prozent_bev = round(prozent_bev, 8)
|
||||
print(f"Im Land {land} {prozent_bev}% infiziert")
|
||||
daten.append(CoronaStatistik(datensatz.ts, quelle, land, TYP, prozent_bev))
|
||||
return daten
|
||||
|
||||
|
||||
def scrape_bev(land):
|
||||
r = requests.get(f"{URL}{land}")
|
||||
soup = BeautifulSoup(r.text, features="html5lib")
|
||||
table = soup.find("div", {"id": "cp1"})
|
||||
print(table)
|
||||
try:
|
||||
value = table.get_text()
|
||||
except AttributeError:
|
||||
value = None
|
||||
else:
|
||||
value = int(value.replace(",", ""))
|
||||
return value
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
scrape_bev("Germany")
|
||||
|
Loading…
Reference in New Issue