From 2776f92dace59db631c038fb6fcc771e1e1bc079 Mon Sep 17 00:00:00 2001 From: Hofei90 <29521028+hofei90@users.noreply.github.com> Date: Mon, 6 Jan 2020 11:19:14 +0100 Subject: [PATCH] Initial Commit --- .gitignore | 132 ++++++++++++++++++++++++++++++++++++++++ abflussdaten_sammeln.py | 75 +++++++++++++++++++++++ db_model.py | 45 ++++++++++++++ hnd_scraping.py | 54 ++++++++++++++++ hydris_data.py | 92 ++++++++++++++++++++++++++++ requirements.txt | 5 ++ vorlage_cfg_grenz.toml | 69 +++++++++++++++++++++ 7 files changed, 472 insertions(+) create mode 100644 .gitignore create mode 100644 abflussdaten_sammeln.py create mode 100644 db_model.py create mode 100644 hnd_scraping.py create mode 100644 hydris_data.py create mode 100644 requirements.txt create mode 100644 vorlage_cfg_grenz.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..172ff7d --- /dev/null +++ b/.gitignore @@ -0,0 +1,132 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Konfigurationsdatei +cfg_grenz.toml \ No newline at end of file diff --git a/abflussdaten_sammeln.py b/abflussdaten_sammeln.py new file mode 100644 index 0000000..75f05ff --- /dev/null +++ b/abflussdaten_sammeln.py @@ -0,0 +1,75 @@ +import pygeohash +import toml +import os + +import hnd_scraping +import hydris_data +import db_model as db + + +def config_laden(configfile): + with open(configfile) as file: + return toml.loads(file.read()) + + +PFAD = os.path.abspath(os.path.dirname(__file__)) +CONFIG = config_laden(os.path.join(PFAD, "cfg_grenz.toml")) + + +def daten_sammeln(): + daten = [] + for messstelle in CONFIG["hnd"]: + lat = CONFIG["hnd"][messstelle]["lat"] + lon = CONFIG["hnd"][messstelle]["lon"] + geohash = pygeohash.geohash.encode(lat, lon) + durchfluss = max(hnd_scraping.scrap_messwerte_hnd(CONFIG["hnd"][messstelle]["url_q"])) + pegelstand = max(hnd_scraping.scrap_messwerte_hnd(CONFIG["hnd"][messstelle]["url_h"])) + daten.append({"ts": durchfluss[0], "geohash": geohash, + "durchfluss": durchfluss[1], "pegelstand": pegelstand[1]}) + + hydris_daten = hydris_data.get_hydris_data("https://www.salzburg.gv.at/wasser/hydro/grafiken/data.json") + numbers = [204180, 204032, 203323, 204198, 203570, 203539] + hydris_daten_gefiltert = [hydris_data.get_station(hydris_daten, station_id=str(number)) + for number in numbers] + for station in hydris_daten_gefiltert: + geohash = pygeohash.geohash.encode(station.location.lat, station.location.lon) + daten.append({"ts": station.flow["15m.Cmd.RunOff"].timestamp, "geohash": geohash, + "durchfluss": station.flow["15m.Cmd.RunOff"].value, + "pegelstand": station.water_level["15m.Cmd.WiskiWeb"].value}) + return daten + + +def send_with_ssh() -> None: + db_adapter = CONFIG["db"] + port = CONFIG[db_adapter]["port"] + from sshtunnel import SSHTunnelForwarder + + config = config_laden(CONFIG["pfad_ssh_auth"]) + with SSHTunnelForwarder( + (config["ip_server"], config["ssh_port"]), + ssh_username=config["user"], + ssh_password=config["pw"], + remote_bind_address=("127.0.0.1", port), + ) as server: + CONFIG[db_adapter]["port"] = server.local_bind_port + init() + + +def init() -> None: + db_adapter = CONFIG["db"]["db"] + db_ = db.init_db(CONFIG["db"][db_adapter]["database"], db_adapter, CONFIG["db"].get(db_adapter)) + db.DB_PROXY.initialize(db_) + db.create_tables() + daten = daten_sammeln() + db.insert_many(daten) + + +def main() -> None: + if CONFIG["db"]["ssh_tunnel"]: + send_with_ssh() + else: + init() + + +if __name__ == "__main__": + main() diff --git a/db_model.py b/db_model.py new file mode 100644 index 0000000..c4fcaaf --- /dev/null +++ b/db_model.py @@ -0,0 +1,45 @@ +import peewee +import os +from typing import Any, Dict, Optional + +SKRIPTPFAD = os.path.abspath(os.path.dirname(__file__)) +DB_PROXY = peewee.Proxy() + + +class BaseModel(peewee.Model): + class Meta: + database = DB_PROXY + + +class FlussDaten(BaseModel): + ts = peewee.DateTimeField() + geohash = peewee.CharField(12) + durchfluss = peewee.FloatField() + pegelstand = peewee.FloatField() + + +def insert_many(daten): + FlussDaten.insert_many(daten).execute() + + +def create_tables() -> None: + DB_PROXY.create_tables([FlussDaten]) + + +def init_db(name: str, type_: str = "sqlite", config: Optional[Dict[str, Any]] = None): + config = config or {} + drivers = { + "sqlite": peewee.SqliteDatabase, + "mysql": peewee.MySQLDatabase, + "postgresql": peewee.PostgresqlDatabase, + } + + try: + cls = drivers[type_] + except KeyError: + raise ValueError("Unknown database type: {}".format(type_)) from None + del config["database"] + db = cls(name, **config) + return db + + diff --git a/hnd_scraping.py b/hnd_scraping.py new file mode 100644 index 0000000..2b8c11e --- /dev/null +++ b/hnd_scraping.py @@ -0,0 +1,54 @@ +from bs4 import BeautifulSoup +import datetime +import requests + +URLS = [ + "https://www.hnd.bayern.de/pegel/inn/laufen-siegerstetter-keller-18602009/tabelle?methode=wasserstand&setdiskr=15", + "https://www.hnd.bayern.de/pegel/inn/laufen-siegerstetter-keller-18602009/tabelle?methode=abfluss&"] + + +def convert_to_datetime(datum): + if datum is not None: + datum = datetime.datetime.strptime(datum.get_text(), "%d.%m.%Y %H:%M") + return datum + + +def convert_to_value(messwert): + if messwert is not None: + messwert = messwert.get_text() + if "-" not in messwert: + messwert = float(messwert.replace(",", ".")) + else: + messwert = None + return messwert + + +def scrap_messwerte(url): + r = requests.get(url) + soup = BeautifulSoup(r.text, features="html5lib") + return soup + + +def scrap_messwerte_hnd(url): + soup = scrap_messwerte(url) + + el = ( + soup.find_all("tr") + ) + datensaetze = [] + for a in el: + messwert = convert_to_value(a.find("td", {"class": "center"})) + datum = convert_to_datetime(a.find("td")) + if messwert is not None and datum is not None: + datensaetze.append([datum, messwert]) + return datensaetze + + +def main(): + for url in URLS: + datensatz = scrap_messwerte_hnd(url) + pass + + +if __name__ == "__main__": + main() diff --git a/hydris_data.py b/hydris_data.py new file mode 100644 index 0000000..69292c3 --- /dev/null +++ b/hydris_data.py @@ -0,0 +1,92 @@ +import datetime +from typing import Any, Dict, Iterable, List, NamedTuple, Optional + +import requests + + +class Measurement(NamedTuple): + timestamp: datetime.datetime # in UTC + unit: str + value: float + + +class Location(NamedTuple): + lat: Optional[float] + lon: Optional[float] + alt: Optional[float] + + +class Station(NamedTuple): + id: str # might contain not only digits so no int + name: str + operator: Optional[str] + location: Location + flow: Optional[Dict[str, Measurement]] + water_level: Dict[str, Measurement] + raw_data: Dict[str, Any] + + +def _parse_measurements(values: dict, key: str) -> Optional[Dict[str, Measurement]]: + if key not in values: + return None + return { + name: Measurement( + timestamp=datetime.datetime.fromtimestamp(measurement["dt"] / 1000), + unit=measurement["unit"], + value=measurement["v"], + ) + for name, measurement in values[key].items() + if measurement != {} + } + + +def get_hydris_data(url: str) -> List[Station]: + data = requests.get(url).json() + return [ + Station( + id=station["number"], + name=station["name"], + operator=station.get("Betreiber"), + location=Location( + lat=station.get("latlng", (None, None))[0], + lon=station.get("latlng", (None, None))[1], + alt=station.get("altitude"), + ), + flow=_parse_measurements(station["values"], "Q"), + water_level=_parse_measurements(station["values"], "W"), + raw_data=station, + ) + for station in data + ] + + +def get_station(data: Iterable[Station], station_id: str) -> Optional[Station]: + result = filter(lambda station: station.id == station_id, data) + return None if not result else list(result)[0] + + +def main() -> None: + data = get_hydris_data("https://www.salzburg.gv.at/wasser/hydro/grafiken/data.json") + + numbers = [204180, 204032, 203323, 204198, 203570, 203539] + for number in numbers: + station = get_station(data, station_id=str(number)) + + # Salzburg_Nonntaler Brücke / Salzach + print(station.name) + + # HD Salzburg + print(station.operator) + + # Location(lat=47.79805653502927, lon=13.054023495047648, alt=412.8) + print(station.location) + + __import__("pprint").pprint(station.flow) + __import__("pprint").pprint(station.water_level) + + # Raw station JSON data + # print(station.raw_data) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bdb0c79 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +requests +peewee +beautifulsoup4 +pygeohash +toml diff --git a/vorlage_cfg_grenz.toml b/vorlage_cfg_grenz.toml new file mode 100644 index 0000000..bec8e33 --- /dev/null +++ b/vorlage_cfg_grenz.toml @@ -0,0 +1,69 @@ +# +# H = Wasserstand +# Q = Abfluss + +[hnd] +[hnd.laufen] +# Daten fuer Import +url_h = "https://www.hnd.bayern.de/pegel/inn/laufen-siegerstetter-keller-18602009/tabelle?methode=wasserstand&" +url_q = "https://www.hnd.bayern.de/pegel/inn/laufen-siegerstetter-keller-18602009/tabelle?methode=abfluss&" +text = "Laufen" +lat = 47.939488 +lon = 12.932920 + +[hnd.brodhausen] +# Daten fuer Import +url_h = "https://www.hnd.bayern.de/pegel/inn/brodhausen-18666001/tabelle?methode=wasserstand&" +url_q = "https://www.hnd.bayern.de/pegel/inn/brodhausen-18666001/tabelle?methode=abfluss&" +text = "Brodhausen" +lat = 47.847131 +lon = 12.945941 + + + +[db] +db = "postgresql" # "postgresql" | "mysql" | "sqlite" +ssh_tunnel = false # Additional module 'sshtunnel' has to be installed manually +pfad_ssh_auth = "" # Only relevant if ssh_tunnel is true + +[db.postgresql] +database = "" +user = "" +password = "" +host = "localhost" +port = + +[db.mysql] +database = "" +user = "" +password = "" +host = "" +port = "" + +[db.sqlite] +database = ":memory:" # Path to database file or :memory: + +[Laufen.H.Stufe] +600 = "Meldestufe 1" +700 = "Meldestufe 2" +750 = "Meldestufe 3" +800 = "HW100" +850 = "Meldestufe 4" + +[Laufen.H.Hinweis] +600 = "Triebenbach: Beginnende Ueberflutung des Leitwerkes an der Surmuendung" +670 = "Triebenbach: Beginnende Ueberflutung der Wirtschaftswege" +740 = "Triebenbach: Anwesen Pointner wird ueberflutet" +780 = "Mayerhofen: Anwesen Kettenberger und Prechtl ueberflutet" +820 = "Obslaufen: Beginnende Ueberflutung der Steinernen Gasse und der B 20" + +[Laufen.Q.Stufe] +1130 = "HQ1" +1350 = "HQ2" +1800 = "HQ5" +2000 = "HQ10" +2300 = "HQ20" +2800 = "HQ50" +3100 = "HQ100" + +