vieles
							parent
							
								
									813e3e5b42
								
							
						
					
					
						commit
						051535008f
					
				@ -0,0 +1,96 @@
 | 
				
			|||||||
 | 
					import toml
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import db_model as db
 | 
				
			||||||
 | 
					from peewee import PostgresqlDatabase
 | 
				
			||||||
 | 
					import datetime
 | 
				
			||||||
 | 
					from sshtunnel import SSHTunnelForwarder
 | 
				
			||||||
 | 
					import verdoppelungsrate
 | 
				
			||||||
 | 
					from dataclasses import dataclass
 | 
				
			||||||
 | 
					from dataclasses_json import dataclass_json
 | 
				
			||||||
 | 
					import scrape_bev
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@dataclass_json
 | 
				
			||||||
 | 
					@dataclass
 | 
				
			||||||
 | 
					class CoronaStatistik:
 | 
				
			||||||
 | 
					    ts: datetime.datetime
 | 
				
			||||||
 | 
					    quelle: str
 | 
				
			||||||
 | 
					    country_region: str
 | 
				
			||||||
 | 
					    typ: str
 | 
				
			||||||
 | 
					    wert: float
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def config_laden():
 | 
				
			||||||
 | 
					    configfile = os.path.join(SKRIPTPFAD, "config.toml")
 | 
				
			||||||
 | 
					    with open(configfile) as file:
 | 
				
			||||||
 | 
					        return toml.loads(file.read())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SKRIPTPFAD = os.path.abspath(os.path.dirname(__file__))
 | 
				
			||||||
 | 
					CONFIG = config_laden()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def verdoppelungsrate_bearbeiten(quelle, country):
 | 
				
			||||||
 | 
					    letzter_ts = verdoppelungsrate.letzten_eintrag_ermitteln(quelle, country)
 | 
				
			||||||
 | 
					    if letzter_ts is None:
 | 
				
			||||||
 | 
					        letzter_ts = datetime.datetime(1970, 1, 1)
 | 
				
			||||||
 | 
					    daten = verdoppelungsrate.letzte_verdopplungsraten_berechnen(quelle, country, letzter_ts)
 | 
				
			||||||
 | 
					    return daten
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def alle_eintraege_abarbeiten():
 | 
				
			||||||
 | 
					    daten = []
 | 
				
			||||||
 | 
					    bev_zahlen = {}
 | 
				
			||||||
 | 
					    query_quelle = db.CoronaDaten.select(db.CoronaDaten.quelle).group_by(db.CoronaDaten.quelle)
 | 
				
			||||||
 | 
					    for data in query_quelle:
 | 
				
			||||||
 | 
					        if data.quelle == "rki_landkreis":
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					        query_country = db.CoronaDaten.select(db.CoronaDaten.country_region).where(
 | 
				
			||||||
 | 
					            db.CoronaDaten.quelle == data.quelle).group_by(db.CoronaDaten.country_region)
 | 
				
			||||||
 | 
					        for datensatz in query_country:
 | 
				
			||||||
 | 
					            daten_verdoppelungsrate = verdoppelungsrate_bearbeiten(data.quelle, datensatz.country_region)
 | 
				
			||||||
 | 
					            if daten_verdoppelungsrate:
 | 
				
			||||||
 | 
					                daten.extend(daten_verdoppelungsrate)
 | 
				
			||||||
 | 
					            if datensatz.country_region not in bev_zahlen:
 | 
				
			||||||
 | 
					                bev_zahlen[datensatz.country_region] = scrape_bev.scrape_bev(datensatz.country_region)
 | 
				
			||||||
 | 
					            if bev_zahlen[datensatz.country_region]:
 | 
				
			||||||
 | 
					                daten_bev_prozent = scrape_bev.auslesen_unverarbeiteter_daten(data.quelle,
 | 
				
			||||||
 | 
					                                                                              datensatz.country_region, bev_zahlen,
 | 
				
			||||||
 | 
					                                                                              CoronaStatistik)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                daten_bev_prozent = None
 | 
				
			||||||
 | 
					            if daten_bev_prozent:
 | 
				
			||||||
 | 
					                daten.extend(daten_bev_prozent)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if len(daten) > 5000:
 | 
				
			||||||
 | 
					                print("Sende Daten")
 | 
				
			||||||
 | 
					                daten_senden(daten)
 | 
				
			||||||
 | 
					                daten = []
 | 
				
			||||||
 | 
					    return daten
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def daten_senden(daten):
 | 
				
			||||||
 | 
					    daten_dict = []
 | 
				
			||||||
 | 
					    for data in daten:
 | 
				
			||||||
 | 
					        daten_dict.append(data.to_dict())
 | 
				
			||||||
 | 
					    db.CoronaStatistik.insert_many(daten_dict).execute()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main():
 | 
				
			||||||
 | 
					    with SSHTunnelForwarder(
 | 
				
			||||||
 | 
					                (CONFIG["ssh"]["ip_server"], CONFIG["ssh"]["ssh_port"]), ssh_username=CONFIG["ssh"]["user"],
 | 
				
			||||||
 | 
					                ssh_password=CONFIG["ssh"]["pw"], remote_bind_address=('127.0.0.1', CONFIG["pg"]["pgport"])) as server:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        db.database.initialize(PostgresqlDatabase(CONFIG["pg"]["pgdb"],
 | 
				
			||||||
 | 
					                                                  user=CONFIG["pg"]["pguser"], password=CONFIG["pg"]["pgpw"],
 | 
				
			||||||
 | 
					                                                  host="127.0.0.1",
 | 
				
			||||||
 | 
					                                                  port=server.local_bind_port))
 | 
				
			||||||
 | 
					        db.create_tables()
 | 
				
			||||||
 | 
					        daten = alle_eintraege_abarbeiten()
 | 
				
			||||||
 | 
					        daten_senden(daten)
 | 
				
			||||||
 | 
					        db.Database.close(db.database)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					    main()
 | 
				
			||||||
@ -1,16 +1,50 @@
 | 
				
			|||||||
import requests
 | 
					import requests
 | 
				
			||||||
from bs4 import BeautifulSoup
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import db_model as db
 | 
				
			||||||
 | 
					import datetime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
countrys = ["Germany"]
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TYP = "prozent_bev"
 | 
				
			||||||
URL = "https://countrymeters.info/en/"
 | 
					URL = "https://countrymeters.info/en/"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def letzten_eintrag_ermitteln(quelle, country_region):
 | 
				
			||||||
 | 
					    ts = db.CoronaStatistik.select(
 | 
				
			||||||
 | 
					        db.CoronaStatistik.ts).where((db.CoronaStatistik.typ == TYP) &
 | 
				
			||||||
 | 
					                                     (db.CoronaStatistik.quelle == quelle) &
 | 
				
			||||||
 | 
					                                     (db.CoronaStatistik.country_region == country_region)
 | 
				
			||||||
 | 
					                                     ).order_by(db.CoronaStatistik.ts.desc()).limit(1).scalar()
 | 
				
			||||||
 | 
					    if ts is None:
 | 
				
			||||||
 | 
					        ts = datetime.datetime(1970, 1, 1)
 | 
				
			||||||
 | 
					    return ts
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def auslesen_unverarbeiteter_daten(quelle, land, bev_zahl, CoronaStatistik):
 | 
				
			||||||
 | 
					    ts = letzten_eintrag_ermitteln(quelle, land)
 | 
				
			||||||
 | 
					    daten = []
 | 
				
			||||||
 | 
					    query = db.CoronaDaten.select(db.CoronaDaten.ts, db.CoronaDaten.confirmed).where(
 | 
				
			||||||
 | 
					        (db.CoronaDaten.quelle == quelle) & (db.CoronaDaten.country_region == land) & (
 | 
				
			||||||
 | 
					                    db.CoronaDaten.ts > ts)).namedtuples()
 | 
				
			||||||
 | 
					    for datensatz in query:
 | 
				
			||||||
 | 
					        prozent_bev = datensatz.confirmed / bev_zahl[land] * 100
 | 
				
			||||||
 | 
					        prozent_bev = round(prozent_bev, 8)
 | 
				
			||||||
 | 
					        print(f"Im Land {land} {prozent_bev}% infiziert")
 | 
				
			||||||
 | 
					        daten.append(CoronaStatistik(datensatz.ts, quelle, land, TYP, prozent_bev))
 | 
				
			||||||
 | 
					    return daten
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def scrape_bev(land):
 | 
					def scrape_bev(land):
 | 
				
			||||||
    r = requests.get(f"{URL}{land}")
 | 
					    r = requests.get(f"{URL}{land}")
 | 
				
			||||||
    soup = BeautifulSoup(r.text, features="html5lib")
 | 
					    soup = BeautifulSoup(r.text, features="html5lib")
 | 
				
			||||||
    table = soup.find("div", {"id": "cp1"})
 | 
					    table = soup.find("div", {"id": "cp1"})
 | 
				
			||||||
    print(table)
 | 
					    try:
 | 
				
			||||||
 | 
					        value = table.get_text()
 | 
				
			||||||
 | 
					    except AttributeError:
 | 
				
			||||||
 | 
					        value = None
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        value = int(value.replace(",", ""))
 | 
				
			||||||
 | 
					    return value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
scrape_bev("Germany")
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					    scrape_bev("Germany")
 | 
				
			||||||
 | 
				
			|||||||
					Loading…
					
					
				
		Reference in New Issue