Initial Commit

master
Hofei90 5 years ago
parent 64d37ae0a1
commit 2776f92dac

132
.gitignore vendored

@ -0,0 +1,132 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Konfigurationsdatei
cfg_grenz.toml

@ -0,0 +1,75 @@
import pygeohash
import toml
import os
import hnd_scraping
import hydris_data
import db_model as db
def config_laden(configfile):
with open(configfile) as file:
return toml.loads(file.read())
PFAD = os.path.abspath(os.path.dirname(__file__))
CONFIG = config_laden(os.path.join(PFAD, "cfg_grenz.toml"))
def daten_sammeln():
daten = []
for messstelle in CONFIG["hnd"]:
lat = CONFIG["hnd"][messstelle]["lat"]
lon = CONFIG["hnd"][messstelle]["lon"]
geohash = pygeohash.geohash.encode(lat, lon)
durchfluss = max(hnd_scraping.scrap_messwerte_hnd(CONFIG["hnd"][messstelle]["url_q"]))
pegelstand = max(hnd_scraping.scrap_messwerte_hnd(CONFIG["hnd"][messstelle]["url_h"]))
daten.append({"ts": durchfluss[0], "geohash": geohash,
"durchfluss": durchfluss[1], "pegelstand": pegelstand[1]})
hydris_daten = hydris_data.get_hydris_data("https://www.salzburg.gv.at/wasser/hydro/grafiken/data.json")
numbers = [204180, 204032, 203323, 204198, 203570, 203539]
hydris_daten_gefiltert = [hydris_data.get_station(hydris_daten, station_id=str(number))
for number in numbers]
for station in hydris_daten_gefiltert:
geohash = pygeohash.geohash.encode(station.location.lat, station.location.lon)
daten.append({"ts": station.flow["15m.Cmd.RunOff"].timestamp, "geohash": geohash,
"durchfluss": station.flow["15m.Cmd.RunOff"].value,
"pegelstand": station.water_level["15m.Cmd.WiskiWeb"].value})
return daten
def send_with_ssh() -> None:
db_adapter = CONFIG["db"]
port = CONFIG[db_adapter]["port"]
from sshtunnel import SSHTunnelForwarder
config = config_laden(CONFIG["pfad_ssh_auth"])
with SSHTunnelForwarder(
(config["ip_server"], config["ssh_port"]),
ssh_username=config["user"],
ssh_password=config["pw"],
remote_bind_address=("127.0.0.1", port),
) as server:
CONFIG[db_adapter]["port"] = server.local_bind_port
init()
def init() -> None:
db_adapter = CONFIG["db"]["db"]
db_ = db.init_db(CONFIG["db"][db_adapter]["database"], db_adapter, CONFIG["db"].get(db_adapter))
db.DB_PROXY.initialize(db_)
db.create_tables()
daten = daten_sammeln()
db.insert_many(daten)
def main() -> None:
if CONFIG["db"]["ssh_tunnel"]:
send_with_ssh()
else:
init()
if __name__ == "__main__":
main()

@ -0,0 +1,45 @@
import peewee
import os
from typing import Any, Dict, Optional
SKRIPTPFAD = os.path.abspath(os.path.dirname(__file__))
DB_PROXY = peewee.Proxy()
class BaseModel(peewee.Model):
class Meta:
database = DB_PROXY
class FlussDaten(BaseModel):
ts = peewee.DateTimeField()
geohash = peewee.CharField(12)
durchfluss = peewee.FloatField()
pegelstand = peewee.FloatField()
def insert_many(daten):
FlussDaten.insert_many(daten).execute()
def create_tables() -> None:
DB_PROXY.create_tables([FlussDaten])
def init_db(name: str, type_: str = "sqlite", config: Optional[Dict[str, Any]] = None):
config = config or {}
drivers = {
"sqlite": peewee.SqliteDatabase,
"mysql": peewee.MySQLDatabase,
"postgresql": peewee.PostgresqlDatabase,
}
try:
cls = drivers[type_]
except KeyError:
raise ValueError("Unknown database type: {}".format(type_)) from None
del config["database"]
db = cls(name, **config)
return db

@ -0,0 +1,54 @@
from bs4 import BeautifulSoup
import datetime
import requests
URLS = [
"https://www.hnd.bayern.de/pegel/inn/laufen-siegerstetter-keller-18602009/tabelle?methode=wasserstand&setdiskr=15",
"https://www.hnd.bayern.de/pegel/inn/laufen-siegerstetter-keller-18602009/tabelle?methode=abfluss&"]
def convert_to_datetime(datum):
if datum is not None:
datum = datetime.datetime.strptime(datum.get_text(), "%d.%m.%Y %H:%M")
return datum
def convert_to_value(messwert):
if messwert is not None:
messwert = messwert.get_text()
if "-" not in messwert:
messwert = float(messwert.replace(",", "."))
else:
messwert = None
return messwert
def scrap_messwerte(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, features="html5lib")
return soup
def scrap_messwerte_hnd(url):
soup = scrap_messwerte(url)
el = (
soup.find_all("tr")
)
datensaetze = []
for a in el:
messwert = convert_to_value(a.find("td", {"class": "center"}))
datum = convert_to_datetime(a.find("td"))
if messwert is not None and datum is not None:
datensaetze.append([datum, messwert])
return datensaetze
def main():
for url in URLS:
datensatz = scrap_messwerte_hnd(url)
pass
if __name__ == "__main__":
main()

@ -0,0 +1,92 @@
import datetime
from typing import Any, Dict, Iterable, List, NamedTuple, Optional
import requests
class Measurement(NamedTuple):
timestamp: datetime.datetime # in UTC
unit: str
value: float
class Location(NamedTuple):
lat: Optional[float]
lon: Optional[float]
alt: Optional[float]
class Station(NamedTuple):
id: str # might contain not only digits so no int
name: str
operator: Optional[str]
location: Location
flow: Optional[Dict[str, Measurement]]
water_level: Dict[str, Measurement]
raw_data: Dict[str, Any]
def _parse_measurements(values: dict, key: str) -> Optional[Dict[str, Measurement]]:
if key not in values:
return None
return {
name: Measurement(
timestamp=datetime.datetime.fromtimestamp(measurement["dt"] / 1000),
unit=measurement["unit"],
value=measurement["v"],
)
for name, measurement in values[key].items()
if measurement != {}
}
def get_hydris_data(url: str) -> List[Station]:
data = requests.get(url).json()
return [
Station(
id=station["number"],
name=station["name"],
operator=station.get("Betreiber"),
location=Location(
lat=station.get("latlng", (None, None))[0],
lon=station.get("latlng", (None, None))[1],
alt=station.get("altitude"),
),
flow=_parse_measurements(station["values"], "Q"),
water_level=_parse_measurements(station["values"], "W"),
raw_data=station,
)
for station in data
]
def get_station(data: Iterable[Station], station_id: str) -> Optional[Station]:
result = filter(lambda station: station.id == station_id, data)
return None if not result else list(result)[0]
def main() -> None:
data = get_hydris_data("https://www.salzburg.gv.at/wasser/hydro/grafiken/data.json")
numbers = [204180, 204032, 203323, 204198, 203570, 203539]
for number in numbers:
station = get_station(data, station_id=str(number))
# Salzburg_Nonntaler Brücke / Salzach
print(station.name)
# HD Salzburg
print(station.operator)
# Location(lat=47.79805653502927, lon=13.054023495047648, alt=412.8)
print(station.location)
__import__("pprint").pprint(station.flow)
__import__("pprint").pprint(station.water_level)
# Raw station JSON data
# print(station.raw_data)
if __name__ == "__main__":
main()

@ -0,0 +1,5 @@
requests
peewee
beautifulsoup4
pygeohash
toml

@ -0,0 +1,69 @@
#
# H = Wasserstand
# Q = Abfluss
[hnd]
[hnd.laufen]
# Daten fuer Import
url_h = "https://www.hnd.bayern.de/pegel/inn/laufen-siegerstetter-keller-18602009/tabelle?methode=wasserstand&"
url_q = "https://www.hnd.bayern.de/pegel/inn/laufen-siegerstetter-keller-18602009/tabelle?methode=abfluss&"
text = "Laufen"
lat = 47.939488
lon = 12.932920
[hnd.brodhausen]
# Daten fuer Import
url_h = "https://www.hnd.bayern.de/pegel/inn/brodhausen-18666001/tabelle?methode=wasserstand&"
url_q = "https://www.hnd.bayern.de/pegel/inn/brodhausen-18666001/tabelle?methode=abfluss&"
text = "Brodhausen"
lat = 47.847131
lon = 12.945941
[db]
db = "postgresql" # "postgresql" | "mysql" | "sqlite"
ssh_tunnel = false # Additional module 'sshtunnel' has to be installed manually
pfad_ssh_auth = "<pfad>" # Only relevant if ssh_tunnel is true
[db.postgresql]
database = "<db>"
user = "<user>"
password = "<pw>"
host = "localhost"
port = <port>
[db.mysql]
database = ""
user = ""
password = ""
host = ""
port = ""
[db.sqlite]
database = ":memory:" # Path to database file or :memory:
[Laufen.H.Stufe]
600 = "Meldestufe 1"
700 = "Meldestufe 2"
750 = "Meldestufe 3"
800 = "HW100"
850 = "Meldestufe 4"
[Laufen.H.Hinweis]
600 = "Triebenbach: Beginnende Ueberflutung des Leitwerkes an der Surmuendung"
670 = "Triebenbach: Beginnende Ueberflutung der Wirtschaftswege"
740 = "Triebenbach: Anwesen Pointner wird ueberflutet"
780 = "Mayerhofen: Anwesen Kettenberger und Prechtl ueberflutet"
820 = "Obslaufen: Beginnende Ueberflutung der Steinernen Gasse und der B 20"
[Laufen.Q.Stufe]
1130 = "HQ1"
1350 = "HQ2"
1800 = "HQ5"
2000 = "HQ10"
2300 = "HQ20"
2800 = "HQ50"
3100 = "HQ100"
Loading…
Cancel
Save