From a2e96559cfd1ad6e65a57253073cf1c7a19b50b1 Mon Sep 17 00:00:00 2001 From: spertl Date: Mon, 20 Apr 2026 19:56:19 +0000 Subject: [PATCH] XML Aufbereitung --- scripts/correct_xml_isybau.py | 124 +++++++++++++++++++++++++++++++++ scripts/validate_xml_isybau.py | 26 +++++++ 2 files changed, 150 insertions(+) create mode 100644 scripts/correct_xml_isybau.py create mode 100644 scripts/validate_xml_isybau.py diff --git a/scripts/correct_xml_isybau.py b/scripts/correct_xml_isybau.py new file mode 100644 index 0000000..fe3405a --- /dev/null +++ b/scripts/correct_xml_isybau.py @@ -0,0 +1,124 @@ + + + + + +# "C:\Program Files\QGIS 3.26.3\bin\Ogr2ogr.exe" -f GMLAS C:\temp-GIS\2026-04-08_export.xml C:\temp-GIS\fridolfing_kanal.gpkg + + +# "C:\Program Files\QGIS 3.26.3\bin\Ogr2ogr.exe" -f GML C:\temp-GIS\2026-04-08_export.xml C:\temp-GIS\fridolfing_kanal.gpkg -lco GML_ID=YES -lco XSD="C:\temp-GIS\_isy\2013\1_XML-Schema\1302-metadaten.xsd" + +##"C:\Program Files\QGIS 3.26.3\bin"\ogr2ogr.exe -f GPKG %GPKGDatei% GMLAS:%IsybauXMLDatei% -oo XSD=C:\temp-GIS\_isy\2013\1_XML-Schema\1302-metadaten.xsd -forceNullable -oo CONFIG_FILE=C:\temp-GIS\_isy\gmlasconf.xml -oo REMOVE_UNUSED_LAYERS=YES -oo REMOVE_UNUSED_FIELDS=YES + + +import re +import xml.etree.ElementTree as ET + +filedir = "C:/temp-GIS/" +xmlfile = "2026-04-08_export" + +with open(filedir+xmlfile+".xml", "r") as infile: + content = infile.read() + +pattern = re.compile(r"") +content = pattern.sub("", content) + +pattern = re.compile(r"") +content = pattern.sub("", content) + +pattern = re.compile(r"") +content = pattern.sub("", content) + +pattern = re.compile(r"2017/07/01") +content = pattern.sub("2017-07", content) + +pattern = re.compile(r" xsi:nil=\"true\"") +content = pattern.sub("", content) + +pattern = re.compile(r"0") +content = pattern.sub("", content) + + +xmlReplaceTag = ["Datenkollektive", "Geometrie", "Geometriedaten", "Knoten"] + +for x in xmlReplaceTag: + pattern = re.compile(r"\s*<"+x+">") + content = pattern.sub("", content) + + +pattern = re.compile(r"\s*") +content = pattern.sub("", content) + + + +### +### Boolean korrigieren +### +### 14.04.2026: Tabelle: _ogr_fields_metadata wurde field_type von boolean auf string verändert, da es bei boolean zur Ausgabe Null = False gekommen ist. +### + +list_bool = ["Bautechnik", "Geometrie", "Sanierung", "Umfeld", "Inspektion", "Dichtheit", "Film", "Verfahren", "Rechennetz", "Gebiet", "Flaechen", "Belastung", "Berechnung", "Beobachtung", "SonderprofilVorhanden", "DrainageAngeschlossen","Einstieghilfe", "Uebergabeschacht", "Abdeckplatte", "Konus", "Uebergangsplatte", "Podest", "Uebergabepunkt", "Uebergabebauwerk", "Bypass", "Schwimmerabschluss", "ExistenzPumpe", "ExistenzUeberlauf", "ExistenzFiltersack", "Ueberschwemmungsgebiet", "Schmutzfaenger", "Dichtheitspruefung", "Reinigung", "Verbindung", "BDEZulaufDrainage", "DDEZulaufDrainage", "Pruefergebnis", "FilmpfadIstAbsolut", "VerlustansatzA110", "DruckdichterDeckel", "Rueckschlagklappe", "Bezugslinie"] + +# Setze 0 = False und 1 = True +for i in list_bool: + pattern = re.compile(r"<"+i+">0") + content = pattern.sub(r"<"+i+">false", content) + pattern = re.compile(r"<"+i+">1") + content = pattern.sub(r"<"+i+">true", content) + + + + + +with open(filedir+xmlfile+"_new.xml", "w") as outfile: + outfile.write(content) + +####### +####### RUNDUNG +####### + +input_xml = filedir+xmlfile+"_new.xml" +output_xml = filedir+xmlfile+"_new2.xml" + +# Hier pro Tag die Rundung definieren +tag_rules = { + "Rechtswert": 3, + "Hochwert": 3, + "Punkthoehe": 3, + "Schachttiefe": 2, + "Laenge": 2, + "Entfernung": 2, + "Rohrlaenge": 2, + "SohlhoeheZulauf": 3, + "SohlhoeheAblauf": 3, + "DMPLaenge": 2 +} + +# Regex: wert +pattern = re.compile(r"<(\w+)>(.*?)") + +def process_line(line): + def replace(match): + tag = match.group(1) + content = match.group(2) + + # Nur bearbeiten, wenn Tag in Regeln + if tag in tag_rules: + try: + value = float(content) + decimals = tag_rules[tag] + rounded = round(value, decimals) + return f"<{tag}>{rounded}" + except ValueError: + pass # kein numerischer Wert → unverändert + + return match.group(0) # Original zurückgeben + + return pattern.sub(replace, line) + +with open(input_xml, "r", encoding="utf-8") as infile, \ + open(output_xml, "w", encoding="utf-8") as outfile: + + for line in infile: + new_line = process_line(line) + outfile.write(new_line) diff --git a/scripts/validate_xml_isybau.py b/scripts/validate_xml_isybau.py new file mode 100644 index 0000000..2e6041d --- /dev/null +++ b/scripts/validate_xml_isybau.py @@ -0,0 +1,26 @@ +from lxml import etree + +# XML-Datei laden +with open("C:/temp-GIS/2026-04-08_export_new2.xml", "rb") as xml_file: + xml_doc = etree.parse(xml_file) + +# XSD-Datei laden +with open('C:/temp-GIS/_isy/2017/1_XML-Schema/1707-metadaten.xsd', "rb") as xsd_file: + xsd_doc = etree.parse(xsd_file) + +# Schema erstellen +xmlschema = etree.XMLSchema(xsd_doc) + +# Validieren +if xmlschema.validate(xml_doc): + print("XML ist gültig ✅") +else: + print("XML ist NICHT gültig ❌") + with open("C:/temp-GIS/fehler.log", "w", encoding="utf-8") as log_file: + count = 0 + for error in xmlschema.error_log: + log_file.write(f"Zeile: {error.line}, Spalte: {error.column}; ") + log_file.write(f"Fehler: {error.message}\n") + count = count + 1 + print("Anzahl Fehler: ", count) +