Source code for isogeo_xml_toolbelt.md_xml_fixer
# -*- coding: UTF-8 -*-
#! python3
from __future__ import absolute_import, print_function, unicode_literals
# -----------------------------------------------------------------------------
# Name: Metadata XML fixer
# Purpose: Add missing elements to XML ISO19139 files
# Python: 3.5+
# Author: Julien Moura
# Source: https://github.com/Guts/iso19139_xml_fixer
# Created: 08/09/2017
# -----------------------------------------------------------------------------
# #############################################################################
# ###### Libraries #########
# ##########################
# Standard library
import logging
from logging.handlers import RotatingFileHandler
from os import getcwd, listdir, mkdir, path
import sys
from xml.dom import minidom
from xml.etree import ElementTree as ET
# imports depending on Python version
if sys.version_info < (3, 0):
from io import open
else:
pass
# ##############################################################################
# ############ Globals ############
# #################################
# LOG
logger = logging.getLogger("XML_ISO19139_FIXER")
logging.captureWarnings(True)
logger.setLevel(logging.DEBUG)
log_form = logging.Formatter(
"%(asctime)s || %(levelname)s " "|| %(module)s || %(lineno)s || %(message)s"
)
logfile = RotatingFileHandler("LOG_XML_FIXER.log", "a", 5000000, 1)
logfile.setLevel(logging.DEBUG)
logfile.setFormatter(log_form)
logger.addHandler(logfile)
logger.info("============ START ================")
logger.info("Python version: {}".format(sys.version_info))
# customize script
ds_character_set = "utf-8"
ds_creation_date = "2015-09-08"
ds_srs_code = "urn:ogc:def:crs:EPSG:2154"
ds_license_lbl = "Licence ouverte ETALAB 1.0"
ds_license_url = "http://www.etalab.gouv.fr/licence-ouverte-open-licence"
# #############################################################################
# ########### Classes #############
# #################################
[docs]class MetadataXML19139Fixer(object):
"""ISO 19139 XML fixer."""
def __init__(self):
"""Batch edit input XML files to enhance compliance to ISO19139."""
self.check_folders()
self.ns = self.add_namespaces()
super(MetadataXML19139Fixer, self).__init__()
[docs] def check_folders(self):
"""Check prerequisites."""
self.fold_in = path.join(getcwd(), "input")
self.fold_out = path.join(getcwd(), "output")
# input folder
if not path.isdir(self.fold_in):
try:
mkdir(self.fold_in, 0o777)
except Exception as e:
logger.error(e)
sys.exit()
else:
logger.info("Input folder already exists.")
pass
# input XML files
if not len(listdir(self.fold_in)):
logger.error(
"Input folder was not created, so "
"there is not any XML file. Please "
"copy your XML ISO19139 files inside."
)
sys.exit()
else:
logger.info("Files are present in input folder.")
# output folder
if not path.isdir(self.fold_out):
try:
mkdir(self.fold_out, 0o777)
except Exception as e:
logger.error(e)
sys.exit()
else:
logger.info("Output folder already exists.")
pass
# method ending
return 0
[docs] def add_namespaces(self):
"""Add ISO19139 namespaces."""
ns = {
"gts": "http://www.isotc211.org/2005/gts",
"gml": "http://www.opengis.net/gml",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
"gco": "http://www.isotc211.org/2005/gco",
"gmd": "http://www.isotc211.org/2005/gmd",
"gmx": "http://www.isotc211.org/2005/gmx",
"srv": "http://www.isotc211.org/2005/srv",
"xl": "http://www.w3.org/1999/xlink",
}
# register namespaces
for namespace in ns:
ET.register_namespace(namespace, ns.get(namespace))
return ns
# -------- Methods to add missing XML parts ------------------------------
[docs] def add_ds_creation_date(self):
"""Add metadata creation date into metadata XML.
Under /MD_Metadata/identificationInfo/MD_DataIdentification/citation/CI_Citation/date
<date>
<CI_Date>
<date>
<gco:Date>2010-07-07Z</gco:Date>
</date>
<dateType>
<CI_DateTypeCode codeList="http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/codelist/ML_gmxCodelists.xml#CI_DateTypeCode" codeListValue="creation">creation</CI_DateTypeCode>
</dateType>
</CI_Date>
</date>
"""
ci_citation = self.get_md_ci_citation()
# creating sub element structure
parent_date = ET.SubElement(ci_citation, "gmd:date")
ci_date = ET.SubElement(parent_date, "gmd:CI_date")
sub_date = ET.SubElement(ci_date, "gmd:date")
# date value
value_date = ET.SubElement(sub_date, "gco:date")
value_date.text = "{}Z".format(ds_creation_date)
# date type
sub_date_type = ET.SubElement(ci_date, "gmd:dateType")
sub_ci_date_typecode = ET.SubElement(sub_date_type, "gmd:CI_DateTypeCode")
sub_ci_date_typecode.set(
"codeList",
"http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/codelist/ML_gmxCodelists.xml#CI_DateTypeCode",
)
sub_ci_date_typecode.set("codeListValue", "creation")
sub_ci_date_typecode.text = "creation"
[docs] def add_md_character_set(self):
"""Add metadata creation date into metadata XML.
Under /MD_Metadata/characterSet
AND /MD_Metadata/identificationInfo/MD_DataIdentification/characterSet
<characterSet>
<MD_CharacterSetCode codeList="http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/codelist/ML_gmxCodelists.xml#MD_CharacterSetCode" codeListValue="utf8">utf-8</MD_CharacterSetCode>
</characterSet>
"""
# metadata root
char_set = ET.SubElement(self.tpl_root, "gmd:characterSet")
sub_char_set_code = ET.SubElement(char_set, "gmd:MD_CharacterSetCode")
sub_char_set_code.set(
"codeList",
"http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/codelist/ML_gmxCodelists.xml#MD_CharacterSetCode",
)
sub_char_set_code.set("codeListValue", "utf8")
sub_char_set_code.text = ds_character_set
# data identification
md_data_identification = self.get_md_data_identification()
char_set = ET.SubElement(md_data_identification, "gmd:characterSet")
sub_char_set_code = ET.SubElement(char_set, "gmd:MD_CharacterSetCode")
sub_char_set_code.set(
"codeList",
"http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/codelist/ML_gmxCodelists.xml#MD_CharacterSetCode",
)
sub_char_set_code.set("codeListValue", "utf8")
sub_char_set_code.text = ds_character_set
[docs] def fix_srs(self):
"""Fix Spatial Reference System.
Under /MD_Metadata/referenceSystemInfo/MD_ReferenceSystem/referenceSystemIdentifier/RS_Identifier
<referenceSystemInfo>
<MD_ReferenceSystem>
<referenceSystemIdentifier>
<RS_Identifier>
<code>
<gco:CharacterString>urn:ogc:def:crs:EPSG:2154</gco:CharacterString>
</code>
</RS_Identifier>
</referenceSystemIdentifier>
</MD_ReferenceSystem>
</referenceSystemInfo>
"""
rs_identifier = self.get_rs_identifier()
# fix SRS syntax
code = rs_identifier.find("gmd:code/gco:CharacterString", self.ns)
code.text = ds_srs_code
# remove useless codeSpace
code_space = rs_identifier.find("gmd:codeSpace", self.ns)
rs_identifier.remove(code_space)
[docs] def fix_cgus(self):
"""Add dataset usage conditions and limitations into metadata XML.
Under /MD_Metadata/identificationInfo/MD_DataIdentification/resourceConstraints
<resourceConstraints>
<MD_Constraints>
<useLimitation>
<gmx:Anchor xl:href="http://www.etalab.gouv.fr/licence-ouverte-open-licence" xl:title="Licence ouverte ETALAB 1.0" />
</useLimitation>
<useLimitation>
<gco:CharacterString>Conditions sur [geopaysdebrest.fr > Usage des données](https://geo.pays-de-brest.fr/usages/Pages/default.aspx).</gco:CharacterString>
</useLimitation>
</MD_Constraints>
</resourceConstraints>
<resourceConstraints>
<MD_LegalConstraints>
<useLimitation>
<gco:CharacterString>Pas de restriction d’accès public selon INSPIRE</gco:CharacterString>
</useLimitation>
<useLimitation>
<gco:CharacterString>Conditions sur [geopaysdebrest.fr > Usage des données](https://geo.pays-de-brest.fr/usages/Pages/default.aspx).</gco:CharacterString>
</useLimitation>
<accessConstraints>
<MD_RestrictionCode codeList="http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/codelist/gmxCodelists.xml#MD_RestrictionCode" codeListValue="license">license</MD_RestrictionCode>
</accessConstraints>
</MD_LegalConstraints>
</resourceConstraints>
"""
rs_ident = self.get_md_data_identification()
rs_constraints = self.get_rs_constraints()
# clean up before adding
if rs_constraints:
for i in rs_constraints:
rs_ident.remove(i)
else:
pass
# add new constraints
constraint_use = ET.SubElement(rs_ident, "gmd:resourceConstraints")
md_constraint = ET.SubElement(constraint_use, "gmd:MD_Constraints")
use_limit = ET.SubElement(md_constraint, "gmd:useLimitation")
use_anchor = ET.SubElement(
use_limit, "{http://www.isotc211.org/2005/gmx}Anchor"
)
use_anchor.set("{http://www.w3.org/1999/xlink}title", ds_license_lbl)
use_anchor.set("{http://www.w3.org/1999/xlink}href", ds_license_url)
# -------- Methods to get XML parts --------------------------------------
[docs] def get_md_data_identification(self):
"""Get Character_set level items."""
pth_character_set = "gmd:identificationInfo/" "gmd:MD_DataIdentification"
return self.tpl_root.find(pth_character_set, self.ns)
[docs] def get_md_ci_citation(self):
"""Get CI_Citation level items."""
pth_ci_citation = (
"gmd:identificationInfo/"
"gmd:MD_DataIdentification/"
"gmd:citation/gmd:CI_Citation"
)
return self.tpl_root.find(pth_ci_citation, self.ns)
[docs] def get_rs_identifier(self):
"""Get RS_Identifier level items."""
pth_rs_identifer = (
"gmd:referenceSystemInfo/"
"gmd:MD_ReferenceSystem/"
"gmd:referenceSystemIdentifier/"
"gmd:RS_Identifier"
)
return self.tpl_root.find(pth_rs_identifer, self.ns)
[docs] def get_rs_constraints(self):
"""Get resourceConstraints level items."""
pth_rs_constraints = (
"gmd:identificationInfo/"
"gmd:MD_DataIdentification/"
"gmd:resourceConstraints"
)
return self.tpl_root.findall(pth_rs_constraints, self.ns)
# -------- XML utils ----------------------------------------------------
[docs] def prettify(self, elem):
"""Return a pretty-printed XML string for the Element."""
rough_string = ET.tostring(elem, "utf-8")
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=" ")
# #############################################################################
# ### Stand alone execution #######
# #################################
if __name__ == "__main__":
"""Test parameters for a stand-alone run."""
app = MetadataXML19139Fixer()
for xml in listdir(app.fold_in):
logger.info(xml)
# opening the input
with open(path.join(app.fold_in, xml), "r", encoding="utf-8") as in_xml:
# parser
app.tpl = ET.parse(in_xml)
# getting the elements and sub-elements structure
app.tpl_root = app.tpl.getroot()
# fixes
app.add_ds_creation_date() # creation date
app.add_md_character_set() # character set
app.fix_srs() # SRS
app.fix_cgus() # CGUs
# # namespaces
# for ns in app.ns:
# ET.register_namespace(ns, app.ns.get(ns))
# saving the output xml file
app.tpl.write(
path.join(app.fold_out, xml),
encoding="utf-8",
xml_declaration=1,
# default_namespace=app.ns.get("gmd"),
method="xml",
)