''' Created on 23 janv. 2018 @author: michel ''' from collections import OrderedDict import sys, urllib, re,copy, traceback from _ast import Param import pprint from numpy.core.defchararray import startswith class FieldDescripor: def __init__(self, name, index): self.name = name self.index = index def __str__(self): return pprint.pformat(vars(self), indent=4, width=200) def __repr__(self): return pprint.pformat(vars(self), indent=4, width=200) class FilterDescriptor: def __init__(self, param, operand): self.param = param self.operand = operand self.field_descriptor = FieldDescripor(None, None) def __str__(self): return pprint.pformat(vars(self), indent=4, width=1) def __repr__(self): return pprint.pformat(vars(self), indent=4, width=1) def set_field_descriptor(self, name, index): self.field_descriptor = FieldDescripor(name, index) class ValueDescripor: def __init__(self, value_ref, table_id): self.value_ref = value_ref self.table_id = table_id self.multiplicity = None self.field_descriptor = None self.filter = None def set_field_descriptor(self, name, index): self.field_descriptor = FieldDescripor(name, index) def set_filter(self, param, operand): self.filter = FilterDescriptor(param, operand) def __str__(self): return "value_ref=" + str(self.value_ref) + " table_id=" + str(self.table_id) + " mult=" + str(self.multiplicity) def __repr__(self): return self.__str__() class ShortCut(object): ''' classdocs ''' # # global resources # path_separator = re.compile("[\.:#]+") def __init__(self, keySet): ''' Constructor ''' self.map = OrderedDict() ''' Starting with sirted kety ensure the shortcut will be properly sorted ''' for key in sorted(keySet): # prevent to compute shortcuts on shorcuts if key.startswith("#"): self.map[key]= {"shortcut": None, "cut_level": 0, "cardinality": 0, "cut_level": 0, "nb_ele": 0} def __repr__(self): retour = "" for k, v in self.map.items(): retour += k + ": " + v.__repr__() + "\n" return retour def set_shortcuts(self): ''' It's important to keep the insertion order to make sure vector data are together and that we can reconstruct vectors ''' sorted_key = sorted(self.map.keys()) previous_key = None for key in sorted_key: sl = self.path_separator.split(key) # 2 similar keys: can be for the same quantity if previous_key and key.startswith(previous_key) and len(sl) >= 3: short_key = '.'.join(sl[-3:]) self.map[key]["cut_level"] = -3 else: # Build a shortcut more convenient than the full path if len(sl) >= 2: short_key = '.'.join(sl[-2:]) self.map[key]["cut_level"] = -2 else: short_key = sl[-1] self.map[key]["cut_level"] = -1 previous_key = key self.map[key]["shortcut"] = short_key self.map[key]["cardinality"] = 0 self.map[key]["nb_ele"] = len(sl) self.set_cardinalities() def set_cardinalities(self): for k1,v1 in self.map.items(): v1 = self.map[k1] v1["cardinality"] = 0 for k2,v2 in self.map.items(): if v1["shortcut"] == v2["shortcut"]: v1["cardinality"] += 1 def has_duplicated_shorcuts(self): for k, v in self.map.items(): if v["cardinality"] > 1: return True return False def remove_duplicated_shorcuts(self): for k, v in self.map.items(): if v["cardinality"] > 1: v["cardinality"] = 0 v["cut_level"] -= 1 sl = self.path_separator.split(k) v["shortcut"] = '.'.join(sl[v["cut_level"]:]) self.set_cardinalities() def get_shortcuts(self): self.set_shortcuts() while self.has_duplicated_shorcuts(): self.remove_duplicated_shorcuts() for k, v in self.map.items(): if v["shortcut"].startswith("."): v["shortcut"] = "XX" + v["shortcut"] retour = OrderedDict() for k,v in self.map.items(): #print("@@@@@@@@@@@============= " + v["shortcut"]) retour[v["shortcut"]] = k return retour def main(): sc = ShortCut( ["#timeseries:TimeSerie#root#timeseries:TimeSerie.dataSet#timeseries:dataset.DataSet.calib_level" , "#timeseries:TimeSerie#root#timeseries:TimeSerie.dataSet#timeseries:dataset.DataSet.creator" , "#timeseries:TimeSerie#root#timeseries:TimeSerie.dataSet#timeseries:dataset.DataSet.contributor" ,"#timeseries:TimeSerie#root#timeseries:TimeSerie.dataSet#timeseries:dataset.DataSet.publisher_did" ,"#timeseries:TimeSerie#root#timeseries:TimeSerie.dataSet#timeseries:dataset.DataSet.target" ,"#timeseries:spaceaxis.RefFrame#timeseries:spaceaxis.RefFrame.position" ,"#timeseries:spaceaxis.RefFrame#timeseries:spaceaxis.RefFrame.frame" ,"#timeseries:timeaxis.RefFrame#timeseries:timeaxis.RefFrame.position" ,"#timeseries:wavelengthaxis.RefBandJ#timeseries:wavelengthaxis.RefBand.filter" ,"#timeseries:wavelengthaxis.RefBandJ#timeseries:wavelengthaxis.RefBand.wavelength" ,"#timeseries:wavelengthaxis.RefBandH#timeseries:wavelengthaxis.RefBand.filter" ,"#timeseries:wavelengthaxis.RefBandH#timeseries:wavelengthaxis.RefBand.wavelength" ,"#timeseries:wavelengthaxis.RefBandK#timeseries:wavelengthaxis.RefBand.filter" ,"#timeseries:wavelengthaxis.RefBandK#timeseries:wavelengthaxis.RefBand.wavelength" ,"#timeseries:wavelengthaxis.RefBandL#timeseries:wavelengthaxis.RefBand.filter" ,"#timeseries:wavelengthaxis.RefBandL#timeseries:wavelengthaxis.RefBand.wavelength" ,"#timeseries:wavelengthaxis.RefBandM#timeseries:wavelengthaxis.RefBand.filter" ,"#timeseries:wavelengthaxis.RefBandM#timeseries:wavelengthaxis.RefBand.wavelength" ,"#timeseries:spaceaxis.RefPosition#timeseries:spaceaxis.RefPosition.latitude" ,"#timeseries:spaceaxis.RefPosition#timeseries:spaceaxis.RefPosition.longitude" ,"#timeseries:TimeSerie#root#timeseries:TimeSerie.dependantModelDescriptor#timeseries:dataset.DependantModelDescriptor.name" ,"#timeseries:TimeSerie#root#timeseries:TimeSerie.dependantModelDescriptor#timeseries:dataset.DependantModelDescriptor.ivoid" ,"#timeseries:TimeSerie#root#timeseries:TimeSerie.dependantModelDescriptor#timeseries:dataset.DependantModelDescriptor.url" ,"#timeseries:data.PointJ#timeseries:data.Point.timestamp" ,"#timeseries:data.PointJ#timeseries:data.Point.observable" ,"#timeseries:data.PointH#timeseries:data.Point.timestamp" ,"#timeseries:data.PointH#timeseries:data.Point.observable" ,"#timeseries:data.PointK#timeseries:data.Point.timestamp" ,"#timeseries:data.PointK#timeseries:data.Point.observable" ,"#timeseries:data.PointL#timeseries:data.Point.timestamp" ,"#timeseries:data.PointL#timeseries:data.Point.observable" ,"#timeseries:data.PointM#timeseries:data.Point.timestamp" ,"#timeseries:data.PointM#timeseries:data.Point.observable"]) sc = ShortCut(['#ts:TSPoint.timeAxis#cube:MeasurementAxis.measure#meas:CoordMeasure.coord#coords:domain.time.JD.date' , '#ts:TSPoint.timeAxis#cube:DataAxis.dependent' , '#cube:NDPoint.observable#cube:MeasurementAxis.measure#ts:spec.LuminosityMeasure.type' , '#cube:NDPoint.observable#cube:MeasurementAxis.measure#meas:CoordMeasure.coord#coords:PhysicalCoordValue.cval' , '#coords:domain.space.SpaceFrame.refPosition#coords:domain.space.StdRefLocation.position' , '#coords:domain.time.TimeFrame.refPosition#coords:domain.space.StdRefLocation.position' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.calibLevel' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.target#ds:experiment.BaseTarget.name' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.target#ds:experiment.BaseTarget.description' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.target#ds:experiment.BaseTarget.position#meas:CoordMeasure.coord#coords:domain.time.JD.date' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.target#ds:experiment.BaseTarget.position#meas:CoordMeasure.coord#coords:Coordinate.frame#coords:domain.time.TimeFrame.refPosition#coords:domain.space.StdRefLocation.position' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.target#ds:experiment.BaseTarget.position#meas:CoordMeasure.coord#coords:Coordinate.frame#coords:domain.time.TimeFrame.timescale' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.obsConfig#ds:experiment.ObsConfig.bandpass' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.obsConfig#ds:experiment.ObsConfig.datasource' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.obsConfig#ds:experiment.ObsConfig.instrument#ds:experiment.Instrument.name' , '#cube:DataProduct.dataset#ds:dataset.Dataset.dataProductType' , '#cube:DataProduct.dataset#ds:dataset.Dataset.dataProductSubtype' , '#cube:DataProduct.dataset#ds:dataset.Dataset.curation#ds:dataset.Curation.publisherDID' , '#cube:DataProduct.dataset#ds:dataset.Dataset.curation#ds:dataset.Curation.version' , '#cube:DataProduct.dataset#ds:dataset.Dataset.curation#ds:dataset.Curation.releaseDate' , '#cube:DataProduct.dataset#ds:dataset.Dataset.curation#ds:dataset.Curation.rights' , '#cube:DataProduct.dataset#ds:dataset.Dataset.curation#ds:dataset.Curation.publisher#ds:dataset.Publisher.publisherID' , '#cube:DataProduct.dataset#ds:dataset.Dataset.curation#ds:dataset.Curation.publisher#ds:party.Role.party#ds:party.Individual.address' , '#cube:DataProduct.dataset#ds:dataset.Dataset.curation#ds:dataset.Curation.publisher#ds:party.Role.party#ds:party.Individual.phone' , '#cube:DataProduct.dataset#ds:dataset.Dataset.curation#ds:dataset.Curation.publisher#ds:party.Role.party#ds:party.Individual.email' , '#cube:DataProduct.dataset#ds:dataset.Dataset.curation#ds:dataset.Curation.publisher#ds:party.Role.party#ds:party.Party.name' , '#cube:DataProduct.dataset#ds:dataset.Dataset.dataID#ds:dataset.DataID.title' , '#cube:DataProduct.dataset#ds:dataset.Dataset.dataID#ds:dataset.DataID.datasetID' , '#cube:DataProduct.dataset#ds:dataset.Dataset.dataID#ds:dataset.DataID.creatorDID' , '#cube:DataProduct.dataset#ds:dataset.Dataset.dataID#ds:dataset.DataID.version' , '#cube:DataProduct.dataset#ds:dataset.Dataset.dataID#ds:dataset.DataID.date']) sc = ShortCut([ '#coords:domain.time.TimeFrame.refPosition#coords:domain.space.StdRefLocation.position' , '#cube:DataProduct.dataset#ds:experiment.ObsDataset.target#ds:experiment.BaseTarget.position#meas:CoordMeasure.coord#coords:Coordinate.frame#coords:domain.time.TimeFrame.refPosition#coords:domain.space.StdRefLocation.position' ]) sc.set_shortcuts() print(sc.get_shortcuts()) if __name__ == "__main__": main()