Source code for isogeo_xml_toolbelt.readers.reader_iso19139
# -*- coding: utf-8 -*-
#! python3
"""
Isogeo XML Fixer - Metadata
Purpose: Read a metadata stored into XML ISO 19139 as an object
Authors: First work by GeoBretagne on mdchecker - updated by Isogeo
Python: 3.6.x
"""
# #############################################################################
# ########## Libraries #############
# ##################################
# standard library
import datetime
import logging
import os
from pathlib import Path
from uuid import UUID
# 3rd party library
from lxml import etree
# submodules
from isogeo_xml_toolbelt.models import Contact
from isogeo_xml_toolbelt.utils import XmlUtils
# #############################################################################
# ########## Globals ###############
# ##################################
# logging
logging.basicConfig(level=logging.INFO)
# utils
utils = XmlUtils()
# #############################################################################
# ########## Classes ###############
# ##################################
[docs]class MetadataIso19139(object):
"""Object representation of a metadata stored into XML respecting ISO 19139."""
def __init__(self, xml: Path):
"""Read and store the input XML metadata as an object.
:param pathlib.Path xml: path to the XML file
"""
# lxml needs a str not a Path
if isinstance(xml, Path):
self.xml_path = str(xml.resolve())
else:
raise TypeError("XML path must be a pathlib.Path instance.")
# ensure namespaces declaration
self.namespaces = {
"gts": "http://www.isotc211.org/2005/gts",
"gml": "http://www.opengis.net/gml",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
"gco": "http://www.isotc211.org/2005/gco",
"gmd": "http://www.isotc211.org/2005/gmd",
"gmx": "http://www.isotc211.org/2005/gmx",
"srv": "http://www.isotc211.org/2005/srv",
"xl": "http://www.w3.org/1999/xlink",
}
# parse xml
self.md = etree.parse(self.xml_path)
# identifiers
self.filename = xml.name
self.fileIdentifier = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString/text()",
self.namespaces,
)
self.MD_Identifier = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/"
"gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/"
"gmd:identifier/gmd:MD_Identifier/gmd:code/gco:CharacterString/text()",
self.namespaces,
)
self.title = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/"
"gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/"
"gmd:title/gco:CharacterString/text()",
self.namespaces,
)
self.OrganisationName = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/"
"gmd:MD_DataIdentification/gmd:pointOfContact/"
"gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString/text()",
self.namespaces,
)
self.abstract = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/"
"gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString/text()",
self.namespaces,
)
# Process context and step
self.processContext = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:dataQualityInfo/"
"gmd:DQ_DataQuality/gmd:lineage"
"/gmd:LI_Lineage/gmd:statement/gco:CharacterString/text()",
self.namespaces,
)
self.processStep = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:dataQualityInfo/"
"gmd:DQ_DataQuality/gmd:lineage"
"/gmd:LI_Lineage/gmd:processStep/gmd:LI_ProcessStep/gmd:description/"
"gco:CharacterString/text()",
self.namespaces,
)
# update frequency
self.updateFrequency = utils.xmlGetTextTag(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification"
"/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation"
"/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode",
self.namespaces,
"codeListValue",
)
# collection parent
self.parentIdentifier = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:parentIdentifier/gco:CharacterString/text()",
self.namespaces,
)
# vector or raster
self.storageType = utils.xmlGetTextTag(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialRepresentationType/gmd:MD_SpatialRepresentationTypeCode",
self.namespaces,
"codeListValue",
)
# format
self.formatName = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:name/gco:CharacterString/text()",
self.namespaces,
)
self.formatVersion = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:distributionInfo/gmd:MD_Distribution/gmd:distributionFormat/gmd:MD_Format/gmd:version/gco:CharacterString/text()",
self.namespaces,
)
# date or datetime ?
dates_str = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/"
"gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/"
"gmd:date/gmd:CI_Date/gmd:date/gco:Date/text()",
self.namespaces,
)
datetimes_str = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/"
"gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/"
"gmd:date/gmd:CI_Date/gmd:date/gco:DateTime/text()",
self.namespaces,
)
if dates_str != "":
self.date = utils.parse_string_for_max_date(dates_str)
else:
self.date = utils.parse_string_for_max_date(datetimes_str)
# seems always datetime
md_dates_str = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:dateStamp/" "gco:DateTime/text()",
self.namespaces,
)
self.md_date = utils.parse_string_for_max_date(md_dates_str)
# contacts
self.list_contacts = self.get_md_contacts()
# keywords
self.keywords = self.get_md_keywords()
# bounding box
self.bbox = []
try:
self.lonmin = float(
utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/"
"gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/"
"gmd:westBoundLongitude/gco:Decimal/text()",
self.namespaces,
)
)
self.lonmax = float(
utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/"
"gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/"
"gmd:eastBoundLongitude/gco:Decimal/text()",
self.namespaces,
)
)
self.latmin = float(
utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/"
"gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/"
"gmd:southBoundLatitude/gco:Decimal/text()",
self.namespaces,
)
)
self.latmax = float(
utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/"
"gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox/"
"gmd:northBoundLatitude/gco:Decimal/text()",
self.namespaces,
)
)
except:
self.lonmin = -180
self.lonmax = 180
self.latmin = -90
self.latmax = 90
self.geometry = utils.xmlGetTextTag(
self.md,
"gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/"
"gmd:geometricObjects/gmd:MD_GeometricObjects/gmd:geometricObjectType/gmd:MD_GeometricObjectTypeCode",
self.namespaces,
"codeListValue",
)
# resolution for rasters
self.resolution = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/"
"gmd:MD_DataIdentification/gmd:spatialResolution/"
"gmd:MD_Resolution/gmd:distance/gco:Distance/text()",
self.namespaces,
)
# scale
self.scale = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/"
"gmd:MD_DataIdentification/gmd:spatialResolution/"
"gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/"
"gmd:denominator/gco:Integer/text()",
self.namespaces,
)
# SRS
self.srs_code = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/"
"gmd:referenceSystemIdentifier/gmd:RS_Identifier/gmd:code/gco:CharacterString/text()",
self.namespaces,
)
self.srs_codeSpace = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/"
"gmd:referenceSystemIdentifier/gmd:RS_Identifier/gmd:codeSpace/gco:CharacterString/text()",
self.namespaces,
)
# feature count
self.featureCount = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/gmd:geometricObjects/gmd:MD_GeometricObjects/gmd:geometricObjectCount/gco:Integer/text()",
self.namespaces,
)
# feature catalogs
self.featureCatalogs = utils.xmlGetTextNodes(
self.md,
"/gmd:MD_Metadata/gmd:contentInfo[19]/gmd:MD_FeatureCatalogueDescription/gmd:featureCatalogueCitation/text()",
self.namespaces,
)
# -- METHODS --------------------------------------------------------------
def __repr__(self):
return self.fileIdentifier
def __str__(self):
return self.fileIdentifier
[docs] def get_md_contacts(self) -> dict:
md_contact = list()
root = self.md.getroot() # get xml root
# get contacts in gmd:contact
for ct in root.findall("gmd:contact/", self.namespaces):
md_contact.append(Contact(ct, self.namespaces).asDict())
# get contacts in gmd:pointOfContact
for pct in root.findall(
"gmd:identificationInfo/gmd:MD_DataIdentification/gmd:pointOfContact/",
self.namespaces,
):
md_contact.append(Contact(pct, self.namespaces).asDict())
return md_contact
[docs] def get_md_keywords(self) -> list:
md_keywords = list()
root = self.md.getroot() # get xml root
# get keywords
for kw in root.findall(
"gmd:identificationInfo/"
"gmd:MD_DataIdentification/"
"gmd:descriptiveKeywords/"
"gmd:MD_Keywords/gmd:keyword/gco:CharacterString",
self.namespaces,
):
# Test for exceptions like <gco:CharacterString>cycles ; circulations douces ; vélo ; aménagements cyclables ; transport ; véloroute ;</gco:CharacterString>
keyword = kw.text.split(";")
if len(keyword) > 1:
for k in keyword:
md_keywords.append(k)
else:
md_keywords.append(kw.text)
return md_keywords
[docs] def asDict(self) -> dict:
"""Retrun object as a structured dictionary key: value."""
return {
"filename": self.filename,
"fileIdentifier": self.fileIdentifier,
"MD_Identifier": self.MD_Identifier,
"type": self.storageType,
"title": self.title,
"abstract": self.abstract,
"processContext": self.processContext,
"processStep": self.processStep,
"updateFrequency": self.updateFrequency,
"OrganisationName": self.OrganisationName,
"keywords": self.keywords,
"formatName": self.formatName,
"formatVersion": self.formatVersion,
"contacts": self.list_contacts,
"md_date": self.md_date,
"date": self.date,
"geometry": self.geometry,
"resolution": self.resolution,
"scale": self.scale,
"srs": "{}:{}".format(self.srs_codeSpace, self.srs_code),
"latmin": self.latmin,
"latmax": self.latmax,
"lonmin": self.lonmin,
"lonmax": self.lonmax,
"featureCount": self.featureCount,
"featureCatalogs": self.featureCatalogs,
"storageType": self.storageType,
"parentidentifier": self.parentIdentifier,
}
# #############################################################################
# ### Stand alone execution #######
# #################################
if __name__ == "__main__":
"""Test parameters for a stand-alone run."""
# li_fixtures_xml = sorted(Path(r"tests/fixtures/").glob("**/*.xml"))
# li_fixtures_xml = sorted(Path(r"input").glob("**/*.xml"))
li_fixtures_xml = sorted(Path(r"tests\fixtures\orano_xml").glob("**/*.xml"))
for xml_path in li_fixtures_xml:
test = MetadataIso19139(xml=xml_path)
print(test.asDict().get("title"), test.asDict().get("scale"))
# print(xml_path.resolve(), test.storageType)