>>> from simo.builder.lexicon.lexicon import LexiconDef
>>> tdf = open('../../simulator/xml/schemas/Typedefs_SIMO.xsd')
>>> typedef = tdf.read()
>>> tdf.close()
>>> sf = open('../../simulator/xml/schemas/lexicon.xsd')
>>> schema = sf.read()
>>> sf.close()
>>> xml1 = '''<rootlevel xmlns="http://www.simo-project.org/simo"
... xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
... xsi:schemaLocation="http://www.simo-project.org/simo
... ../schemas/lexicon.xsd">
... <name>simulation</name>
... <num_vars>
... <variable>
... <name>DIAM_CLASS_WIDTH</name>
... <description>Width of a single class...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... <sublevels>
... <sublevel>
... <name>comp_unit</name>
... <type>static</type>
... <num_vars>
... <variable>
... <name>BA</name>
... <description>Basal area...</description>
... </variable>
... </num_vars>
... <cat_vars>
... <variable>
... <name>SC</name>
... <description>Site class...</description>
... <values>
... <enum>
... <value>1</value>
... <description>Very good</description>
... </enum>
... </values>
... </variable>
... </cat_vars>
... <text_vars>
... <variable>
... <name>Stand label</name>
... <description>Id text for the stand</description>
... </variable>
... </text_vars>
... <sublevels>
... <sublevel>
... <name>stratum</name>
... <type>dynamic</type>
... <num_vars>
... <variable>
... <name>N</name>
... <description>Number of stems...</description>
... </variable>
... </num_vars>
... <cat_vars>
... <variable>
... <name>SP</name>
... <description>Tree species...</description>
... <values>
... <enum>
... <value>1</value>
... <description>Pine</description>
... </enum>
... <enum>
... <value>2</value>
... <description>Spruce</description>
... </enum>
... </values>
... </variable>
... </cat_vars>
... <sublevels>
... <sublevel>
... <name>tree</name>
... <type>dynamic</type>
... <num_vars>
... <variable>
... <name>d</name>
... <description>Diameter...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... </sublevel>
... </sublevels>
... </sublevel>
... </sublevels>
... </sublevel>
... <sublevel>
... <name>sample_plot</name>
... <type>static</type>
... <num_vars>
... <variable>
... <name>BA</name>
... <description>Basal area...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... <sublevels>
... <sublevel>
... <name>sample_tree</name>
... <type>dynamic</type>
... <num_vars>
... <variable>
... <name>h</name>
... <description>Height...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... </sublevel>
... </sublevels>
... </sublevel>
... </sublevels>
... </rootlevel>'''
>>> ldef1 = LexiconDef(typedef)
>>> ldef1.schema = schema
>>> try:
... ldef1.xml = xml1
... except ValueError, e:
... print e
xml name and xml content must be passed
>>> ldef1.xml = ('Test lexicon', xml1)
>>> ldef1.xml['Test lexicon'][0:10]
'<rootlevel'
Parse another lexicon with some additional variables
>>> xml2 = '''<rootlevel xmlns="http://www.simo-project.org/simo"
... xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
... xsi:schemaLocation="http://www.simo-project.org/simo
... ../schemas/lexicon.xsd">
... <name>simulation</name>
... <num_vars>
... <variable>
... <name>NUM_OF_CLASSES</name>
... <description>Number of size classes...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... <sublevels>
... <sublevel>
... <name>comp_unit</name>
... <type>static</type>
... <num_vars>
... <variable>
... <name>D_gM</name>
... <description>Mean diameter...</description>
... </variable>
... </num_vars>
... <cat_vars>
... <variable>
... <name>DEVEL_CLASS</name>
... <description>Development class...</description>
... <values>
... <enum>
... <value>1</value>
... <description>Seedling stand</description>
... </enum>
... </values>
... </variable>
... </cat_vars>
... <text_vars>
... <variable>
... <name>Stand label</name>
... <description>Id text for the stand</description>
... </variable>
... </text_vars>
... <sublevels>
... <sublevel>
... <name>stratum</name>
... <type>dynamic</type>
... <num_vars>
... <variable>
... <name>H_gM</name>
... <description>Mean height...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... </sublevel>
... </sublevels>
... </sublevel>
... <sublevel>
... <name>sample_plot</name>
... <type>static</type>
... <num_vars>
... <variable>
... <name>RADIUS</name>
... <description>Plot radius...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... </sublevel>
... </sublevels>
... </rootlevel>'''
>>> ldef2 = LexiconDef(typedef)
>>> ldef2.schema = schema
>>> try:
... ldef2.xml = xml2
... except ValueError, e:
... print e
xml name and xml content must be passed
>>> ldef2.xml = ('Additional lexicon', xml2)
>>> ldef2.xml['Additional lexicon'][0:10]
'<rootlevel'
Parse yet another lexicon with incompatible data hierarchy
>>> xml3 = '''<rootlevel xmlns="http://www.simo-project.org/simo"
... xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
... xsi:schemaLocation="http://www.simo-project.org/simo
... ../schemas/lexicon.xsd">
... <name>simulation</name>
... <num_vars>
... <variable>
... <name>NUM_OF_CLASSES</name>
... <description>Number of size classes...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... <sublevels>
... <sublevel>
... <name>comp_unit</name>
... <type>static</type>
... <num_vars>
... <variable>
... <name>D_gM</name>
... <description>Mean diameter...</description>
... </variable>
... </num_vars>
... <cat_vars>
... <variable>
... <name>DEVEL_CLASS</name>
... <description>Development class...</description>
... <values>
... <enum>
... <value>1</value>
... <description>Seedling stand</description>
... </enum>
... </values>
... </variable>
... </cat_vars>
... <text_vars>
... <variable>
... <name>Stand label</name>
... <description>Id text for the stand</description>
... </variable>
... </text_vars>
... <sublevels>
... <sublevel>
... <name>sample_tree</name>
... <type>dynamic</type>
... <num_vars>
... <variable>
... <name>h</name>
... <description>tree height...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... </sublevel>
... </sublevels>
... </sublevel>
... <sublevel>
... <name>sample_plot</name>
... <type>static</type>
... <num_vars>
... <variable>
... <name>RADIUS</name>
... <description>Plot radius...</description>
... </variable>
... </num_vars>
... <cat_vars/>
... </sublevel>
... </sublevels>
... </rootlevel>'''
>>> ldef3 = LexiconDef(typedef)
>>> ldef3.schema = schema
>>> try:
... ldef3.xml = xml3
... except ValueError, e:
... print e
xml name and xml content must be passed
>>> ldef3.xml = ('Invalid lexicon', xml3)
>>> ldef3.xml['Invalid lexicon'][0:10]
'<rootlevel'
Class responsible for providing data level and attribute validation for all the other simobuilder classes. The validation is based on the lexicon XML-document, which should define all the data levels, their hierarchy and all the allowed attributes for each level. Variable names can’t exceed 30 characters to maintain Oracle compatibility; any longer and they will raise an error in lexicon.
The main task of Lexicon is to keep track of the correspondence between the names of levels and attributes and their indices in the data matrix. It also keeps record of the hierarchy between the data levels.
The lexicon instance has two internal lexicon representations; the master lexicon which holds the complete SIMO main lexicon containing all the data levels and their attributes, and the instance lexicon which is limited to the data levels and attributes used in the particular simulation instance.
The instance lexicon is constructed stepwise when model chains etc. are being processed.
Changing the master lexicon will trigger object validation for all other classes tied to the lexicon:
>>> lex = ldef1.obj["Test lexicon"]
>>> for x, y in lex.level_ind.items(): print x, y
tree 3
simulation 0
sample_tree 5
sample_plot 4
stratum 2
comp_unit 1
>>> for x, y in lex.variable_ind.items():
... print x, y
tree {'d': None}
simulation {'DIAM_CLASS_WIDTH': None}
sample_tree {'h': None}
sample_plot {'BA': None}
stratum {'SP': None, 'N': None}
comp_unit {'SC': None, 'Stand label': None, 'BA': None}
>>> for x, y in lex.level_name.items():
... print x, y
0 simulation
1 comp_unit
2 stratum
3 tree
4 sample_plot
5 sample_tree
>>> for x, y in lex.variable_name.items():
... print x, y
0 {}
1 {}
2 {}
3 {}
4 {}
5 {}
>>> for x, y in lex.hierarchy.items():
... print x, y
0 {'ordinal': 0, 'lineage': set([0, 1]), 'children': [1, 4], 'parent': None}
1 {'ordinal': 1, 'lineage': set([0]), 'children': [2], 'parent': 0}
2 {'ordinal': 2, 'lineage': set([0]), 'children': [3], 'parent': 1}
3 {'ordinal': 3, 'lineage': set([0]), 'children': None, 'parent': 2}
4 {'ordinal': 1, 'lineage': set([1]), 'children': [5], 'parent': 0}
5 {'ordinal': 2, 'lineage': set([1]), 'children': None, 'parent': 4}
>>> for x, y in lex.active_variables.items():
... print x, y
tree {'d': False}
simulation {'DIAM_CLASS_WIDTH': False}
sample_tree {'h': False}
sample_plot {'BA': False}
stratum {'SP': False, 'N': False}
comp_unit {'SC': False, 'Stand label': False, 'BA': False}
>>> lex.get_variable_ind('tree', 'd')
(3, 0)
>>> lex.active_variables['tree']
{'d': True}
>>> lex.get_variable_ind('simulation', 'DIAM_CLASS_WIDTH')
(0, 0)
>>> lex.get_variable_ind('comp_unit', 'SC')
(1, 0)
>>> cd = lex.get_content_def()
>>> cd[('simulation', 0)]
[('DIAM_CLASS_WIDTH', 0)]
>>> cd[('comp_unit', 1)]
[('SC', 0)]
>>> lex.models
{}
>>> lex.add_model('prediction', 'testmodel')
>>> lex.add_model('aggregation', 'testmodel')
>>> lex.add_model('prediction', 'testmodel')
>>> lex.add_model('operation', 'testmodel1')
0
>>> lex.add_model('operation', 'testmodel2')
1
>>> lex.add_model('operation', 'testmodel2')
1
>>> lex.models['prediction']
set(['testmodel'])
>>> lex.models['aggregation']
set(['testmodel'])
>>> lex.models['operation']
set(['testmodel2', 'testmodel1'])
>>> l = lex.levels
>>> t = l['tree']
>>> var = t.variables['d']
>>> var.desc
'Diameter...'
>>> s = l['stratum']
>>> s.categorical
set(['SP'])
>>> s.numerical
set(['N'])
>>> sp = s.variables['SP']
>>> sp.name
'SP'
>>> sp.unit
>>> sp.maximum
>>> sp.minimum
>>> sp.desc
'Tree species...'
>>> sp.values
{1.0: 'Pine', 2.0: 'Spruce'}
>>> lex.reset_active_variables_and_models()
>>> lex.active_variables['tree']
{'d': False}
>>> lex.models
{}
>>> lex.operation_ind
{}
Parse a single level and store level information into lexicon.
Store variable indices, level indices etc. from a single level. IMPORTANT: any variable longer than 30 characters will result in an error.
Validate level against lexicon:
>>> lex.get_level_ind('comp_unit')
1
>>> lex.get_level_ind('compunit')
Validate variable name and level against lexicon; create a new ind if one doesn’t already exist:
>>> lex.get_variable_ind('simulation', 'DIAM_CLASS_WIDTH')
(0, 0)
>>> lex.get_variable_ind('stratum', 'SP')
(2, 0)
>>> lex.get_variable_ind('stratum', 'N', True)
(None, None)
>>> lex.get_variable_ind('stratum', 'N')
(2, 1)
>>> lex.get_variable_ind('stratum', 'SP', True)
(2, 0)
>>> lex.get_variable_ind('no-level', 'SP')
(None, None)
>>> lex.get_variable_ind('stratum', 'no-variable')
(None, None)
Get level name:
>>> lex.get_level_name(2)
'stratum'
>>> lex.get_level_name(10)
Get variable name:
>>> lex.get_variable_name(1, 0)
'SC'
>>> lex.get_variable_name(10, 1)
>>> lex.get_variable_name(1, 10)
Check whether parentcandidate is a parent level of childcandidate level:
>>> lex.is_parent(0, 0)
False
>>> lex.is_parent(0, 1)
True
>>> lex.is_parent(0, 3)
True
>>> lex.is_parent(0, 5)
True
>>> lex.is_parent(1, 3)
True
>>> lex.is_parent(1, 5)
False
Check whether childcandidate is a child level of parentcandidate level:
>>> lex.is_child(0, 0)
False
>>> lex.is_child(1, 0)
True
>>> lex.is_child(3, 0)
True
>>> lex.is_child(5, 0)
True
>>> lex.is_child(3, 1)
True
>>> lex.is_child(5, 1)
False
>>> for x, y in lex.variable_ind.items():
... print x, y
tree {'d': 0}
simulation {'DIAM_CLASS_WIDTH': 0}
sample_tree {'h': None}
sample_plot {'BA': None}
stratum {'SP': 0, 'N': 1}
comp_unit {'SC': 0, 'Stand label': None, 'BA': None}
>>> for x, y in lex.variable_name.items():
... print x, y
0 {0: 'DIAM_CLASS_WIDTH'}
1 {0: 'SC'}
2 {0: 'SP', 1: 'N'}
3 {0: 'd'}
4 {}
5 {}
Make sure that self and another lexicon are compatible. This requires that the level names and the parent-child relations should match between the two lexicons.
Parameters
lexicon -- lexicon instance to be merged to self
mylevel -- current level indice for self
lexlevel -- current level indice for lexicon
Match two lexicons with compatible data hierarchies
>>> lex2 = ldef2.obj["Additional lexicon"]
>>> lex._match_hierarchy(lex2, 0, 0)
True
Match two lexicons with incompatible data hierarchies
>>> lex3 = ldef3.obj["Invalid lexicon"]
>>> lex._match_hierarchy(lex3, 0, 0)
False
Merge self with another lexicon. This way lexicons can be built from a master lexicon and a number of additional lexicons.
Parameters
lexicon -- Lexicon instance to be joined with self
>>> from minimock import Mock
>>> logger = Mock('Logger')
Merge an additional lexicon to the master lexicon
>>> lex.merge(lex2, logger)
Called Logger.log(
'warning',
"variable 'Stand label' defined in both master and extra lexicons; master lexicon definition will be overwritten.")
True
Make sure that the new variables were actually added to the master lexicon
>>> lex.get_variable_ind('simulation', 'NUM_OF_CLASSES')
(0, 1)
>>> lex.get_variable_ind('comp_unit', 'D_gM')
(1, 1)
>>> lex.get_variable_ind('comp_unit', 'DEVEL_CLASS')
(1, 2)
>>> lex.get_variable_ind('stratum', 'H_gM')
(2, 2)
>>> lex.get_variable_ind('sample_plot', 'RADIUS')
(4, 0)
Check that all level and variable information was also merged
>>> lex.levels['comp_unit'].variables['BA'].desc
'Basal area...'
>>> lex.levels['comp_unit'].variables['DEVEL_CLASS'].desc
'Development class...'
>>> lex.levels['comp_unit'].variables['DEVEL_CLASS'].values
{1.0: 'Seedling stand'}
Try to merge an incompatible lexicon to the master lexicon
>>> lex.merge(lex3, logger)
Called Logger.log(
'error',
'mismatching data hierarchies, cannot merge lexicons')
False