diff --git a/.gitignore b/.gitignore index 6a18ad4..adcd7c8 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,6 @@ ENV/ # Rope project settings .ropeproject + +# Config Toml +config.toml diff --git a/clone_online_to_offline_db.py b/clone_online_to_offline_db.py new file mode 100644 index 0000000..fe5c2a2 --- /dev/null +++ b/clone_online_to_offline_db.py @@ -0,0 +1,48 @@ +import toml +import os +import db_model as db +import requests +import json +from peewee import SqliteDatabase +import datetime + + +def config_laden(): + configfile = os.path.join(SKRIPTPFAD, "config.toml") + with open(configfile) as file: + return toml.loads(file.read()) + + +SKRIPTPFAD = os.path.abspath(os.path.dirname(__file__)) +CONFIG = config_laden() + + +def headers_erstellen(): + headers = {"Authorization": "Bearer {token}".format(token=CONFIG["token"]), + "Prefer": "resolution=merge-duplicates"} + return headers + + +def tabelle_auslesen(tabelle, headers): + url = f"{CONFIG['url']}{tabelle}" + r = requests.get(url, headers=headers) + erg = json.loads(r.text) + return erg + + +def main(): + db.DATABASE.initialize(SqliteDatabase(os.path.join(SKRIPTPFAD, "corona.db3"))) + db.create_tables() + headers = headers_erstellen() + + erg = tabelle_auslesen(CONFIG['tablename_countryliste'], headers) + for data_dict in erg: + db.CoronaCountry.create(**data_dict) + + erg = tabelle_auslesen(CONFIG["tablename_daten"], headers) + for data_dict in erg: + data_dict["ts"] = datetime.datetime.strptime(data_dict["ts"], "%Y-%m-%dT%H:%M:%S") + db.CoronaDaten.create(**data_dict) + + +main() diff --git a/country_region_korrektur.py b/country_region_korrektur.py new file mode 100644 index 0000000..b05acf9 --- /dev/null +++ b/country_region_korrektur.py @@ -0,0 +1,86 @@ +import toml +import os +import db_model as db +from peewee import SqliteDatabase, IntegrityError, PostgresqlDatabase +from sshtunnel import SSHTunnelForwarder + + +def config_laden(): + configfile = os.path.join(SKRIPTPFAD, "config.toml") + with open(configfile) as file: + return toml.loads(file.read()) + + +SKRIPTPFAD = os.path.abspath(os.path.dirname(__file__)) +CONFIG = config_laden() +QUELLEN = ["jhu", "who", "rki"] + + +def alle_country_vereinheitlichen(): + print("Betrete country vereinheitlichen") + country_dict = {} + for datensatz in db.CoronaCountry.select(): + neuer_name = datensatz.country_region.lower().capitalize() + print(neuer_name) + try: + if neuer_name not in country_dict[datensatz.quelle]: + country_dict[datensatz.quelle].append(neuer_name) + except KeyError: + country_dict[datensatz.quelle] = [neuer_name] + with db.database.atomic() as transaction: + try: + db.CoronaDaten.update(country_region=neuer_name).where( + db.CoronaDaten.country_region == datensatz.country_region).execute() + print(neuer_name, datensatz.country_region) + except IntegrityError: + print("Fehler") + transaction.rollback() + print("Schleife Beendet") + db.CoronaCountry.delete().execute() + with db.database.atomic(): + print("CountryCreate") + for quelle, countrys in country_dict.items(): + print(quelle, countrys) + for country in countrys: + db.CoronaCountry.create(country_region=country, quelle=quelle) + + +def country_umbennnen(): + quelle = input("Quelle eingeben: ") + name_altes_land = input("Name der alten Bezeichnung des Landes eingeben: ") + name_neues_land = input("Name der neuen Bezeichnung des Landes eingeben: ") + + anzahl_daten = db.CoronaCountry.select().where( + (db.CoronaCountry.country_region == name_neues_land) & (db.CoronaCountry.quelle == quelle) + ).count() + if anzahl_daten > 0: + db.CoronaCountry.delete().where( + db.CoronaCountry.country_region == name_altes_land + ).execute() + else: + db.CoronaCountry.update(country_region=name_neues_land).where( + db.CoronaCountry.country_region == name_altes_land + ).execute() + try: + db.CoronaDaten.update(country_region=name_neues_land).where( + db.CoronaDaten.country_region == name_altes_land + ).execute() + except IntegrityError: + db.CoronaDaten.delete().where( + db.CoronaDaten.country_region == name_altes_land + ).execute() + + +def main(): + with SSHTunnelForwarder( + (CONFIG["ssh"]["ip_server"], CONFIG["ssh"]["ssh_port"]), ssh_username=CONFIG["ssh"]["user"], + ssh_password=CONFIG["ssh"]["pw"], remote_bind_address=('127.0.0.1', CONFIG["pg"]["pgport"])) as server: + + db.database.initialize(PostgresqlDatabase(CONFIG["pg"]["pgdb"], + user=CONFIG["pg"]["pguser"], password=CONFIG["pg"]["pgpw"], + host="127.0.0.1", + port=server.local_bind_port)) + country_umbennnen() + + +main() diff --git a/db_model.py b/db_model.py new file mode 100644 index 0000000..3abe458 --- /dev/null +++ b/db_model.py @@ -0,0 +1,44 @@ +from peewee import * + +database = Proxy() + + +class UnknownField(object): + def __init__(self, *_, **__): pass + + +class BaseModel(Model): + class Meta: + database = database + + +class CoronaCountry(BaseModel): + country_region = TextField(primary_key=True) + quelle = TextField(null=True) + + class Meta: + table_name = 'corona_country' + + +class CoronaDaten(BaseModel): + active = IntegerField(null=True) + confirmed = IntegerField(null=True) + country_region = TextField() + deaths = IntegerField(null=True) + quelle = TextField(null=True) + recoverd = IntegerField(null=True) + ts = DateTimeField() + + class Meta: + table_name = 'corona_daten' + indexes = ( + (('ts', 'country_region'), True), + ) + primary_key = CompositeKey('country_region', 'ts') + + +def create_tables(): + database.create_tables([CoronaCountry, CoronaDaten]) + + + diff --git a/db_model_postgrest.py b/db_model_postgrest.py new file mode 100644 index 0000000..198a7d0 --- /dev/null +++ b/db_model_postgrest.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from dataclasses_json import dataclass_json +import datetime + +import requests + + +@dataclass_json +@dataclass +class CoronaDaten: + ts: datetime.datetime or str + country_region: str + quelle: str + confirmed: int + deaths: int + recoverd: int + active: int + + +@dataclass_json +@dataclass +class CoronaCountry: + country_region: str + quelle: str + + +def sende_daten(url, table, headers, daten): + url = f"{url}{table}" + for data in daten: + try: + r = requests.post(url, headers=headers, json=data.to_dict()) + except AttributeError: + r = requests.post(url, headers=headers, data=data) + status_auswerten(r, daten) + + +def status_auswerten(r, daten): + if not (r.status_code == 200 or r.status_code == 201): + print(f"Statuscode: {r.status_code}\n Message: {r.text}") + print(daten) + else: + print("Erfolgreich übertragen") + + diff --git a/scrape_bev.py b/scrape_bev.py new file mode 100644 index 0000000..95e0aa1 --- /dev/null +++ b/scrape_bev.py @@ -0,0 +1,16 @@ +import requests +from bs4 import BeautifulSoup + +countrys = ["Germany"] + +URL = "https://countrymeters.info/en/" + + +def scrape_bev(land): + r = requests.get(f"{URL}{land}") + soup = BeautifulSoup(r.text, features="html5lib") + table = soup.find("div", {"id": "cp1"}) + print(table) + + +scrape_bev("Germany") diff --git a/scrape_corona.py b/scrape_corona.py new file mode 100644 index 0000000..cc46f92 --- /dev/null +++ b/scrape_corona.py @@ -0,0 +1,158 @@ +""" +Quellen: +Johns Hopkins University: +https://gisanddata.maps.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6 +https://services1.arcgis.com/0MSEUqKaxRlEPj5g/arcgis/rest/services/ncov_cases/FeatureServer/1/query?f=json&where=Confirmed%20%3E%200&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Confirmed%20desc%2CCountry_Region%20asc%2CProvince_State%20asc&resultOffset=0&resultRecordCount=250&cacheHint=true + +WHO: +https://experience.arcgis.com/experience/685d0ace521648f8a5beeeee1b9125cd +https://services.arcgis.com/5T5nSi527N4F7luB/arcgis/rest/services/Cases_by_country_Plg_V3/FeatureServer/0/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=cum_conf%20desc&resultOffset=0&resultRecordCount=125&cacheHint=true + +Deutschland RKI: +https://experience.arcgis.com/experience/478220a4c454480e823b17327b2bf1d4 +https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/Coronaf%C3%A4lle_in_den_Bundesl%C3%A4ndern/FeatureServer/0/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=faelle_100000_EW%20desc&resultOffset=0&resultRecordCount=50&cacheHint=true +""" + + +import json +import requests +import db_model_postgrest as db +import toml +import os +import datetime + +QUELLEN = { + "jhu": "https://services1.arcgis.com/0MSEUqKaxRlEPj5g/arcgis/rest/services/ncov_cases/FeatureServer/1/query?f" + "=json&where=Confirmed%20%3E%200&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields" + "=*&orderByFields=Confirmed%20desc%2CCountry_Region%20asc%2CProvince_State%20asc&resultOffset=0" + "&resultRecordCount=250&cacheHint=true", + "who": "https://services.arcgis.com/5T5nSi527N4F7luB/arcgis/rest/services/Cases_by_country_Plg_V3/FeatureServer/0" + "/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields" + "=*&orderByFields=cum_conf%20desc&resultOffset=0&resultRecordCount=125&cacheHint=true", + "rki": "https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/Coronaf%C3%A4lle_in_den_Bundesl%C3" + "%A4ndern/FeatureServer/0/query?f=json&where=1%3D1&returnGeometry=false&spatialRel" + "=esriSpatialRelIntersects&outFields=*&orderByFields=faelle_100000_EW%20desc&resultOffset=0" + "&resultRecordCount=50&cacheHint=true " +} + + +def config_laden(): + configfile = os.path.join(SKRIPTPFAD, "config.toml") + with open(configfile) as file: + return toml.loads(file.read()) + + +SKRIPTPFAD = os.path.abspath(os.path.dirname(__file__)) +CONFIG = config_laden() + + +def daten_abrufen(url): + response = requests.get(url) + json_text = json.loads(response.text) + return json_text["features"] + + +def jhu_daten_erstellen(quelle, daten): + daten_dict = {} + for data in daten: + datum = data["attributes"] + country = datum["Country_Region"].replace(" ", "") + if country in daten_dict: + datensatz = daten_dict[country] + datensatz.ts = max(datensatz.ts, datetime.datetime.fromtimestamp(datum["Last_Update"] / 1000)) + datensatz.confirmed = datensatz.confirmed + datum["Confirmed"] + datensatz.deaths = datensatz.deaths + datum["Deaths"] + datensatz.recoverd = datensatz.recoverd + datum["Recovered"] + datensatz.active = datensatz.confirmed - datensatz.deaths - datensatz.recoverd + daten_dict[country] = datensatz + else: + daten_dict[country] = (db.CoronaDaten( + datetime.datetime.fromtimestamp(datum["Last_Update"] / 1000), + country.lower().capitalize(), + quelle, + datum["Confirmed"], + datum["Deaths"], + datum["Recovered"], + datum["Confirmed"] - datum["Deaths"] - datum["Recovered"] + )) + return daten_dict + + +def who_daten_erstellen(quelle, daten): + daten_dict = {} + for data in daten: + datum = data["attributes"] + daten_dict[datum["ADM0_NAME"]] = (db.CoronaDaten( + datetime.datetime.fromtimestamp(datum["DateOfReport"] / 1000), + datum["ADM0_NAME"].lower().capitalize(), + quelle, + datum["cum_conf"], + datum["cum_death"], + -1, + -1 + )) + return daten_dict + + +def rki_daten_erstellen(quelle, daten): + daten_dict = {} + for data in daten: + datum = data["attributes"] + daten_dict[datum["LAN_ew_GEN"]] = (db.CoronaDaten( + datetime.datetime.fromtimestamp(datum["Aktualisierung"] / 1000), + datum["LAN_ew_GEN"].lower().capitalize(), + quelle, + datum["Fallzahl"], + datum["Death"], + -1, + -1 + )) + return daten_dict + + +def daten_erstellen(quelle, daten): + if quelle == "jhu": + daten_db = jhu_daten_erstellen(quelle, daten) + elif quelle == "who": + daten_db = who_daten_erstellen(quelle, daten) + elif quelle == "rki": + daten_db = rki_daten_erstellen(quelle, daten) + else: + raise ValueError("Quelle nicht bekannt") + return daten_db + + +def daten_trennen(daten): + daten_liste = [] + for _, value in daten.items(): + value.ts = value.ts.strftime("%Y-%m-%d %H:%M:%S") + daten_liste.append(value) + return daten_liste + + +def country_liste_erstellen(quelle, daten): + country_liste = [] + for key in daten.keys(): + country_liste.append(db.CoronaCountry(key, quelle)) + return country_liste + + +def headers_erstellen(): + headers = {"Authorization": "Bearer {token}".format(token=CONFIG["token"]), + "Prefer": "resolution=ignore-duplicates"} + return headers + + +def main(): + headers = headers_erstellen() + for quelle, url in QUELLEN.items(): + json_daten = daten_abrufen(url) + daten = daten_erstellen(quelle, json_daten) + country_liste = country_liste_erstellen(quelle, daten) + daten_server = daten_trennen(daten) + db.sende_daten(CONFIG["url"], CONFIG["tablename_countryliste"], headers, country_liste) + db.sende_daten(CONFIG["url"], CONFIG["tablename_daten"], headers, daten_server) + + +if __name__ == "__main__": + main() diff --git a/verdoppelungsrate.py b/verdoppelungsrate.py new file mode 100644 index 0000000..09c2b72 --- /dev/null +++ b/verdoppelungsrate.py @@ -0,0 +1,80 @@ +import toml +import os +import db_model as db +from peewee import SqliteDatabase +from peewee import fn +import datetime + + +def config_laden(): + configfile = os.path.join(SKRIPTPFAD, "config.toml") + with open(configfile) as file: + return toml.loads(file.read()) + + +SKRIPTPFAD = os.path.abspath(os.path.dirname(__file__)) +CONFIG = config_laden() +QUELLEN = ["jhu", "who", "rki"] + + +def verdoppelungsrate_ermitteln(quelle, land, confirmed, confirmed_ts): + halbe_infizierte = confirmed / 2 + ts_halbe = db.CoronaDaten.select(db.CoronaDaten.ts).where( + (db.CoronaDaten.quelle == quelle) & (db.CoronaDaten.country_region == land) & + (db.CoronaDaten.confirmed < halbe_infizierte)).order_by(db.CoronaDaten.confirmed.desc()).limit(1).scalar() + try: + verdoppelungstage = (confirmed_ts - ts_halbe).total_seconds() + except TypeError: + verdoppelungstage = None + return verdoppelungstage + + +def letzte_verdopplungsrate(quelle, land): + query = db.CoronaDaten.select(db.CoronaDaten.ts, fn.max(db.CoronaDaten.confirmed).alias("confirmed")).where( + (db.CoronaDaten.quelle == quelle) & (db.CoronaDaten.country_region == land)).dicts() + for datensatz in query: + verdoppelung = verdoppelungsrate_ermitteln(quelle, land, datensatz["confirmed"], datensatz["ts"]) + print(verdoppelung) + + +def durchschnittliche_verdoppelungsrate(quelle, land): + query = db.CoronaDaten.select(db.CoronaDaten.ts, db.CoronaDaten.confirmed).where( + (db.CoronaDaten.quelle == quelle) & (db.CoronaDaten.country_region == land) + ).namedtuples() + + verdoppelungsraten = [] + for datensatz in query: + verdoppelungsrate = verdoppelungsrate_ermitteln(quelle, land, datensatz.confirmed, datensatz.ts) + if verdoppelungsrate is not None: + verdoppelungsraten.append(verdoppelungsrate) + durchschnitt = sum(verdoppelungsraten) / len(verdoppelungsraten) + print(verdoppelungsrate_in_tagen(durchschnitt)) + + +def jede_verdoppelungsrate(quelle, land): + query = db.CoronaDaten.select(db.CoronaDaten.ts, db.CoronaDaten.confirmed).where( + (db.CoronaDaten.quelle == quelle) & (db.CoronaDaten.country_region == land) + ).order_by(db.CoronaDaten.ts.desc()).namedtuples() + for datensatz in query: + verdoppelungsrate = verdoppelungsrate_ermitteln(quelle, land, datensatz.confirmed, datensatz.ts) + print(f"Verdoppelungsrate am {datensatz.ts}: {verdoppelungsrate_in_tagen(verdoppelungsrate)}") + + +def verdoppelungsrate_in_tagen(verdoppelungsrate): + try: + return verdoppelungsrate / (60 * 60 * 24) + except TypeError: + return None + + +def main(): + db.DATABASE.initialize(SqliteDatabase(os.path.join(SKRIPTPFAD, "corona.db3"))) + for quelle in QUELLEN: + query = db.CoronaCountry.select().where(db.CoronaCountry.quelle == quelle) + for datensatz in query: + if quelle == "jhu" and datensatz.country_region == "Germany": + jede_verdoppelungsrate(quelle, datensatz.country_region) + + +if __name__ == "__main__": + main()