Table Of Contents

Previous topic

db.py

Next topic

importops.py

This Page

importdata.py

class DataImporter(object):

Importing data from ‘inlined’ format; i.e. the data for each level in one file:

>>> from simo.input.importdata import DataImporter
>>> execfile('input/test/mock4importdata.py')
>>> #from simo.input.test.mock4importdata import *
>>> imp = DataImporter(inputdb, mapping, importdate,
...                    logger, logname, lexicon, 100)
>>> imp.import_data('inlined', [inline], 'simulation')
...     
...     
Called DataDB.get_main_level()
Called Lexicon.get_level_name(None)
Called Logger.log_message('testlog', 'info', 'Importing data...')
Called DataDB.drop_id(u'stand1')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'simulation': [(datetime.date(2009, 1, 6), {'oid': 'simulation', 'values': [], 'id': 'simulation', 'parent id': None})]},
    0,
    0)
Called DataDB.add_data_from_dictionary(
    {'comp_unit': [(datetime.date(2009, 1, 6), {'oid': u'stand1', 'values': [('DEV_CLASS', 1), ('ORIG_DC', 1.0), ('MAIN_GROUP', 1), ('SOMETHING_ELSE', 99), ('Inventory_date', 733413), ('USE_RESTRICTION_SILVIC', '0'), ('USE_RESTRICTION_HARVEST', '0')], 'id': u'stand1', 'parent id': 'simulation'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 1, 6), {'parent level': 1, 'oid': u'stratum1_1', 'values': [('BA', 200.0), ('BT', u'test')], 'id': u'stand1-stratum1_1', 'parent id': u'stand1'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 1, 6), {'parent level': 1, 'oid': u'stratum1_2', 'values': [('BA', 22.0), ('BT', u'piece')], 'id': u'stand1-stratum1_2', 'parent id': u'stand1'})]},
    0,
    0)
Called DataDB.drop_id(u'stand2')
Called Logger.log_message(
    'testlog',
    'error',
    u'REJECTING: comp_unit stand2; MAIN_GROUP (4) in [4, 5, 6, 7, 8]')
Called DataDB.row_count('simulation')
Called Logger.log_message(
    'testlog',
    'info',
    'In total 2 simulation units processed')
Called Logger.log_message(
    'testlog',
    'info',
    'In total 1 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False

Importing data in ‘by_level’ format; i.e., each data level has its’ own file:

>>> imp.import_data('by_level', by_level, 'simulation')
...     
...     
Called DataDB.get_main_level()
Called Lexicon.get_level_name(None)
Called Logger.log_message('testlog', 'info', 'Importing data...')
Called DataDB.drop_id(u'stand1')
Called DataDB.drop_id(u'stand2')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'comp_unit': [(datetime.date(2009, 1, 1), {'oid': u'stand1', 'values': [('MAIN_GROUP', 1), ('SOMETHING_ELSE', 99), ('Inventory_date', 733408), ('USE_RESTRICTION_SILVIC', '0'), ('USE_RESTRICTION_HARVEST', '0')], 'id': u'stand1', 'parent id': 'simulation'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'comp_unit': [(datetime.date(2009, 12, 31), {'oid': u'stand2', 'values': [('MAIN_GROUP', 1), ('SOMETHING_ELSE', 99), ('Inventory_date', 733772), ('USE_RESTRICTION_SILVIC', '0'), ('USE_RESTRICTION_HARVEST', '0')], 'id': u'stand2', 'parent id': 'simulation'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 1, 1), {'parent level': 1, 'oid': u'stratum1_1', 'values': [('BA', 200.0), ('BT', u'oh')], 'id': u'stand1-stratum1_1', 'parent id': u'stand1'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 1, 1), {'parent level': 1, 'oid': u'stratum1_2', 'values': [('BA', 22.0), ('BT', u'which')], 'id': u'stand1-stratum1_2', 'parent id': u'stand1'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 12, 31), {'parent level': 1, 'oid': u'stratum2_1', 'values': [('BA', 31.0), ('BT', u'is')], 'id': u'stand2-stratum2_1', 'parent id': u'stand2'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 12, 31), {'parent level': 1, 'oid': u'stratum2_2', 'values': [('BA', 1.0), ('BT', u'infact')], 'id': u'stand2-stratum2_2', 'parent id': u'stand2'})]},
    0,
    0)
Called Logger.log_message(
    'testlog',
    'info',
    'In total 2 simulation units processed')
Called Logger.log_message(
    'testlog',
    'info',
    'In total 2 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False

>>> imp.errors
set([])

With skipfirst. If used like here when the first row shoudn’t really be skipped, results in orphan lower data level objects in the database. Also tests id generation; the strata for stand2 have missing ids, so they’ll get ids 1 and 2:

>>> imp.import_data('inlined', [inline2], 'simulation', skip_first=True)
...      
...      
Called DataDB.get_main_level()
Called Lexicon.get_level_name(None)
Called Logger.log_message('testlog', 'info', 'Importing data...')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(None, {'parent level': 1, 'oid': u'stratum1_1', 'values': [('BA', 200.0), ('BT', u'pretty')], 'id': u'stratum1_1', 'parent id': None})]},
    0,
    0)
Called DataDB.drop_id(u'stand2')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(None, {'parent level': 1, 'oid': u'stratum1_2', 'values': [('BA', 22.0), ('BT', u'frekin')], 'id': u'stratum1_2', 'parent id': None})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'comp_unit': [(datetime.date(2009, 12, 31), {'oid': u'stand2', 'values': [('MAIN_GROUP', 1), ('SOMETHING_ELSE', 99), ('Inventory_date', 733772), ('USE_RESTRICTION_SILVIC', '0'), ('USE_RESTRICTION_HARVEST', '0')], 'id': u'stand2', 'parent id': 'simulation'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 12, 31), {'parent level': 1, 'oid': '1', 'values': [('BA', 31.0), ('BT', u'hmm')], 'id': u'stand2-1', 'parent id': u'stand2'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 12, 31), {'parent level': 1, 'oid': '2', 'values': [('BA', 1.0), ('BT', u'wait')], 'id': u'stand2-2', 'parent id': u'stand2'})]},
    0,
    0)
Called Logger.log_message(
    'testlog',
    'info',
    'In total 1 simulation units processed')
Called Logger.log_message(
    'testlog',
    'info',
    'In total 1 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False

Specifying a separator to be used instead of the default whitespace:

>>> imp.import_data('inlined', [inline3], 'simulation', separator=';')
...      
...      
Called...
    'In total 2 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False

By level import for only one, not top level, level with the given data date:

>>> from datetime import date
>>> data_date = date(2009, 5, 6)
>>> imp.import_data('by_level', by_level2, 'simulation', level_ind=[1],
...                 data_date=data_date, clear_old=False)
...      
...      
Called DataDB.get_main_level()
Called Lexicon.get_level_name(None)
Called Logger.log_message('testlog', 'info', 'Importing data...')
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 5, 6), {'parent level': 1, 'oid': u'stratum1_1', 'values': [('BA', 200.0), ('BT', u'oh')], 'id': u'stand1-stratum1_1', 'parent id': u'stand1'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 5, 6), {'parent level': 1, 'oid': u'stratum1_2', 'values': [('BA', 22.0), ('BT', u'which')], 'id': u'stand1-stratum1_2', 'parent id': u'stand1'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 5, 6), {'parent level': 1, 'oid': u'stratum2_1', 'values': [('BA', 31.0), ('BT', u'is')], 'id': u'stand2-stratum2_1', 'parent id': u'stand2'})]},
    0,
    0)
Called DataDB.row_count('simulation')
Called DataDB.add_data_from_dictionary(
    {'stratum': [(datetime.date(2009, 5, 6), {'parent level': 1, 'oid': u'stratum2_2', 'values': [('BA', 1.0), ('BT', u'infact')], 'id': u'stand2-stratum2_2', 'parent id': u'stand2'})]},
    0,
    0)
Called Logger.log_message(
    'testlog',
    'info',
    'In total 0 simulation units processed')
Called Logger.log_message(
    'testlog',
    'info',
    'In total 0 simulation units imported')
Called DataDB.db.commit()
Called DataDB.db.vacuum_analyze()
False

def _construct_unique_id(self, lind, oid, pid, bottom_level):

Construct a unique id for a top-level stand

>>> imp._construct_unique_id(1, '1', 'simulation', False)
'1'

Try to construct unique id with an invalid call

>>> imp._construct_unique_id(2, '1', 'stratum1_2', True)
Called Logger.log_message(
    'testlog',
    'error',
    "no parent path available from 'stratum' to 'stratum'!")
'stratum1_2-1'

Reset oids, which would happen when calling import_data, as otherwise the following call generates an error, which is should not do

>>> imp.oids = {}

Construct unique id for bottom level stratum

>>> imp._construct_unique_id(2, '1', 'stand1', True)
'stand1-1'

def _parse_date(self, datestr):

Parse a date string into a datetime object.

>>> dates = ['230209', '23.07.09', '23-07-09', '23/07/09',
...          '23072009', '23.07.2009', '23-07-2009', '23/07/2009',
...          '2009-07-23', 'fail']
>>> [imp._parse_date(date) for date in dates] 
Called Logger.log_message('testlog', 'error', "invalid date format 'fail'")
[datetime.date(2009, 2, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), None]

Parse dates with month-first order

>>> imp.month_first = True
>>> dates = ['022309', '07.23.09', '07-23-09', '07/23/09',
...          '07232009', '07.23.2009', '07-23-2009', '07/23/2009',
...          '2009-07-23', 'fail']
>>> [imp._parse_date(date) for date in dates] 
[datetime.date(2009, 2, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), datetime.date(2009, 7, 23), None]

def _split_row(self, line, sep):

Checks that the current line is valid and then splits the line with the given separator

Parameters

line -- input data line, string
sep -- column separator, string or None

Split some valid rows

>>> imp._split_row('1;2;3;4;5', ';')
[u'1', u'2', u'3', u'4', u'5']
>>> imp._split_row('1 2 3 4 5', ' ')
[u'1', u'2', u'3', u'4', u'5']
>>> imp._split_row('1  2  3  4  5', ' ')
[u'1', u'', u'2', u'', u'3', u'', u'4', u'', u'5']
>>> imp._split_row('1 2 3 4 5', None)
[u'1', u'2', u'3', u'4', u'5']
>>> imp._split_row('1\t2\t3\t4\t5', '\t')
[u'1', u'2', u'3', u'4', u'5']

Try to split some rows with mismatching line content and separator

>>> imp._split_row('1\t2\t3\t4\t5', ' ')
>>> imp._split_row('1 2 3 4 5', '\t')
>>> imp._split_row('1;2;3;4;5', ' ')
>>> imp._split_row('1;2;3;4;5', ',')

Split some invalid rows

>>> imp._split_row(' THIS IS AN ERRONEUS ROW   ', '\t')
>>> imp._split_row(' THIS IS AN ERRONEUS ROW   ', ';')
>>> imp._split_row('    ', ' ')

Still, some rows might be invalid, but impossible to block

>>> imp._split_row(' THIS IS AN ERRONEUS ROW   ', ' ')
[u'', u'THIS', u'IS', u'AN', u'ERRONEUS', u'ROW', u'', u'', u'']

Split a row with some unicode as ascii

>>> imp._split_row('Asdf;V\xc3\xa4\xc3\xa4n\xc3\xa4nen', ';')
[u'Asdf', u'V\xe4\xe4n\xe4nen']

Split a row with some iso-8859-1 as ascii WITHOUT the encoding

>>> imp._split_row('Asdf;V\xe4\xe4n\xe4nen', ';')
Called Logger.log_message(
    'testlog',
    'error',
    'Failed to decode import value V??n?nen')
[u'Asdf', u'V\ufffd\ufffdn\ufffdnen']

And then after adding that encoding

>>> imp.encodings = ['utf8', 'iso-8859-1']
>>> imp._split_row('Asdf;V\xe4\xe4n\xe4nen', ';')
[u'Asdf', u'V\xe4\xe4n\xe4nen']