Importing data from ‘inlined’ format; i.e. the data for each level in one file:
>>> from simo.input.importdata import DataImporter
>>> execfile('input/test/mock4importdata.py')
>>> #from simo.input.test.mock4importdata import *
>>> imp = DataImporter(inputdb, mapping, importdate,
... logger, logname, lexicon, 100)
>>> imp.import_data('inlined', [inline], 'simulation')
...
...
Called DataDB.get_main_level()
Called Lexicon.get_level_name(None)
Called Logger.log_message('testlog', 'info', 'Importing data...')
Called DataDB.drop_id(u'stand1')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'simulation': [(datetime.date(2009, 1, 6), {'oid': 'simulation', 'values': [], 'id': 'simulation', 'parent id': None})]},
0,
0)
Called DataDB.add_data_from_dictionary(
{'comp_unit': [(datetime.date(2009, 1, 6), {'oid': u'stand1', 'values': [('DEV_CLASS', 1), ('ORIG_DC', 1.0), ('MAIN_GROUP', 1), ('SOMETHING_ELSE', 99), ('Inventory_date', 733413), ('USE_RESTRICTION_SILVIC', '0'), ('USE_RESTRICTION_HARVEST', '0')], 'id': u'stand1', 'parent id': 'simulation'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 1, 6), {'parent level': 1, 'oid': u'stratum1_1', 'values': [('BA', 200.0), ('BT', u'test')], 'id': u'stand1-stratum1_1', 'parent id': u'stand1'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 1, 6), {'parent level': 1, 'oid': u'stratum1_2', 'values': [('BA', 22.0), ('BT', u'piece')], 'id': u'stand1-stratum1_2', 'parent id': u'stand1'})]},
0,
0)
Called DataDB.drop_id(u'stand2')
Called Logger.log_message(
'testlog',
'error',
u'REJECTING: comp_unit stand2; MAIN_GROUP (4) in [4, 5, 6, 7, 8]')
Called DataDB.row_count('simulation')
Called Logger.log_message(
'testlog',
'info',
'In total 2 simulation units processed')
Called Logger.log_message(
'testlog',
'info',
'In total 1 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False
Importing data in ‘by_level’ format; i.e., each data level has its’ own file:
>>> imp.import_data('by_level', by_level, 'simulation')
...
...
Called DataDB.get_main_level()
Called Lexicon.get_level_name(None)
Called Logger.log_message('testlog', 'info', 'Importing data...')
Called DataDB.drop_id(u'stand1')
Called DataDB.drop_id(u'stand2')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'comp_unit': [(datetime.date(2009, 1, 1), {'oid': u'stand1', 'values': [('MAIN_GROUP', 1), ('SOMETHING_ELSE', 99), ('Inventory_date', 733408), ('USE_RESTRICTION_SILVIC', '0'), ('USE_RESTRICTION_HARVEST', '0')], 'id': u'stand1', 'parent id': 'simulation'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'comp_unit': [(datetime.date(2009, 12, 31), {'oid': u'stand2', 'values': [('MAIN_GROUP', 1), ('SOMETHING_ELSE', 99), ('Inventory_date', 733772), ('USE_RESTRICTION_SILVIC', '0'), ('USE_RESTRICTION_HARVEST', '0')], 'id': u'stand2', 'parent id': 'simulation'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 1, 1), {'parent level': 1, 'oid': u'stratum1_1', 'values': [('BA', 200.0), ('BT', u'oh')], 'id': u'stand1-stratum1_1', 'parent id': u'stand1'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 1, 1), {'parent level': 1, 'oid': u'stratum1_2', 'values': [('BA', 22.0), ('BT', u'which')], 'id': u'stand1-stratum1_2', 'parent id': u'stand1'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 12, 31), {'parent level': 1, 'oid': u'stratum2_1', 'values': [('BA', 31.0), ('BT', u'is')], 'id': u'stand2-stratum2_1', 'parent id': u'stand2'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 12, 31), {'parent level': 1, 'oid': u'stratum2_2', 'values': [('BA', 1.0), ('BT', u'infact')], 'id': u'stand2-stratum2_2', 'parent id': u'stand2'})]},
0,
0)
Called Logger.log_message(
'testlog',
'info',
'In total 2 simulation units processed')
Called Logger.log_message(
'testlog',
'info',
'In total 2 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False
>>> imp.errors
set([])
With skipfirst. If used like here when the first row shoudn’t really be skipped, results in orphan lower data level objects in the database. Also tests id generation; the strata for stand2 have missing ids, so they’ll get ids 1 and 2:
>>> imp.import_data('inlined', [inline2], 'simulation', skip_first=True)
...
...
Called DataDB.get_main_level()
Called Lexicon.get_level_name(None)
Called Logger.log_message('testlog', 'info', 'Importing data...')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(None, {'parent level': 1, 'oid': u'stratum1_1', 'values': [('BA', 200.0), ('BT', u'pretty')], 'id': u'stratum1_1', 'parent id': None})]},
0,
0)
Called DataDB.drop_id(u'stand2')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(None, {'parent level': 1, 'oid': u'stratum1_2', 'values': [('BA', 22.0), ('BT', u'frekin')], 'id': u'stratum1_2', 'parent id': None})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'comp_unit': [(datetime.date(2009, 12, 31), {'oid': u'stand2', 'values': [('MAIN_GROUP', 1), ('SOMETHING_ELSE', 99), ('Inventory_date', 733772), ('USE_RESTRICTION_SILVIC', '0'), ('USE_RESTRICTION_HARVEST', '0')], 'id': u'stand2', 'parent id': 'simulation'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 12, 31), {'parent level': 1, 'oid': '1', 'values': [('BA', 31.0), ('BT', u'hmm')], 'id': u'stand2-1', 'parent id': u'stand2'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 12, 31), {'parent level': 1, 'oid': '2', 'values': [('BA', 1.0), ('BT', u'wait')], 'id': u'stand2-2', 'parent id': u'stand2'})]},
0,
0)
Called Logger.log_message(
'testlog',
'info',
'In total 1 simulation units processed')
Called Logger.log_message(
'testlog',
'info',
'In total 1 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False
Specifying a separator to be used instead of the default whitespace:
>>> imp.import_data('inlined', [inline3], 'simulation', separator=';')
...
...
Called...
'In total 2 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False
By level import for only one, not top level, level with the given data date:
>>> from datetime import date
>>> data_date = date(2009, 5, 6)
>>> imp.import_data('by_level', by_level2, 'simulation', level_ind=[1],
... data_date=data_date, clear_old=False)
...
...
Called DataDB.get_main_level()
Called Lexicon.get_level_name(None)
Called Logger.log_message('testlog', 'info', 'Importing data...')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 5, 6), {'parent level': 1, 'oid': u'stratum1_1', 'values': [('BA', 200.0), ('BT', u'oh')], 'id': u'stand1-stratum1_1', 'parent id': u'stand1'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 5, 6), {'parent level': 1, 'oid': u'stratum1_2', 'values': [('BA', 22.0), ('BT', u'which')], 'id': u'stand1-stratum1_2', 'parent id': u'stand1'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 5, 6), {'parent level': 1, 'oid': u'stratum2_1', 'values': [('BA', 31.0), ('BT', u'is')], 'id': u'stand2-stratum2_1', 'parent id': u'stand2'})]},
0,
0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
{'stratum': [(datetime.date(2009, 5, 6), {'parent level': 1, 'oid': u'stratum2_2', 'values': [('BA', 1.0), ('BT', u'infact')], 'id': u'stand2-stratum2_2', 'parent id': u'stand2'})]},
0,
0)
Called Logger.log_message(
'testlog',
'info',
'In total 0 simulation units processed')
Called Logger.log_message(
'testlog',
'info',
'In total 0 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False
Construct a unique id for a top-level stand
>>> imp._construct_unique_id(1, '1', 'simulation', False)
'1'
Try to construct unique id with an invalid call
>>> imp._construct_unique_id(2, '1', 'stratum1_2', True)
Called Logger.log_message(
'testlog',
'error',
"no parent path available from 'stratum' to 'stratum'!")
'stratum1_2-1'
Reset oids, which would happen when calling import_data, as otherwise the following call generates an error, which is should not do
>>> imp.oids = {}
Construct unique id for bottom level stratum
>>> imp._construct_unique_id(2, '1', 'stand1', True)
'stand1-1'
Parse a date string into a datetime object.
>>> dates = ['230209', '23.07.09', '23-07-09', '23/07/09',
... '23072009', '23.07.2009', '23-07-2009', '23/07/2009',
... '2009-07-23', 'fail']
>>> [imp._parse_date(date) for date in dates]
Called Logger.log_message('testlog', 'error', "invalid date format 'fail'")
[datetime.date(2009, 2, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), None]
Parse dates with month-first order
>>> imp.month_first = True
>>> dates = ['022309', '07.23.09', '07-23-09', '07/23/09',
... '07232009', '07.23.2009', '07-23-2009', '07/23/2009',
... '2009-07-23', 'fail']
>>> [imp._parse_date(date) for date in dates]
[datetime.date(2009, 2, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), None]
Checks that the current line is valid and then splits the line with the given separator
Parameters
line -- input data line, string
sep -- column separator, string or None
Split some valid rows
>>> imp._split_row('1;2;3;4;5', ';')
[u'1', u'2', u'3', u'4', u'5']
>>> imp._split_row('1 2 3 4 5', ' ')
[u'1', u'2', u'3', u'4', u'5']
>>> imp._split_row('1 2 3 4 5', ' ')
[u'1', u'', u'2', u'', u'3', u'', u'4', u'', u'5']
>>> imp._split_row('1 2 3 4 5', None)
[u'1', u'2', u'3', u'4', u'5']
>>> imp._split_row('1\t2\t3\t4\t5', '\t')
[u'1', u'2', u'3', u'4', u'5']
Try to split some rows with mismatching line content and separator
>>> imp._split_row('1\t2\t3\t4\t5', ' ')
>>> imp._split_row('1 2 3 4 5', '\t')
>>> imp._split_row('1;2;3;4;5', ' ')
>>> imp._split_row('1;2;3;4;5', ',')
Split some invalid rows
>>> imp._split_row(' THIS IS AN ERRONEUS ROW ', '\t')
>>> imp._split_row(' THIS IS AN ERRONEUS ROW ', ';')
>>> imp._split_row(' ', ' ')
Still, some rows might be invalid, but impossible to block
>>> imp._split_row(' THIS IS AN ERRONEUS ROW ', ' ')
[u'', u'THIS', u'IS', u'AN', u'ERRONEUS', u'ROW', u'', u'', u'']
Split a row with some unicode as ascii
>>> imp._split_row('Asdf;V\xc3\xa4\xc3\xa4n\xc3\xa4nen', ';')
[u'Asdf', u'V\xe4\xe4n\xe4nen']
Split a row with some iso-8859-1 as ascii WITHOUT the encoding
>>> imp._split_row('Asdf;V\xe4\xe4n\xe4nen', ';')
Called Logger.log_message(
'testlog',
'error',
'Failed to decode import value V??n?nen')
[u'Asdf', u'V\ufffd\ufffdn\ufffdnen']
And then after adding that encoding
>>> imp.encodings = ['utf8', 'iso-8859-1']
>>> imp._split_row('Asdf;V\xe4\xe4n\xe4nen', ';')
[u'Asdf', u'V\xe4\xe4n\xe4nen']