First commit

2026-01-11 01:28:16 +00:00 · 2021-10-15 17:50:24 +02:00
commit 70779fa0ad
13 changed files with 1291 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
 .*
 *.pyc
 __pycache__
 build
 dist
--- a/201
+++ b/201
@@ -0,0 +1,201 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
   APPENDIX: How to apply the Apache License to your work.
      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
   Copyright 2016 Jesús Manuel Sánchez Martínez - Grupo de Sistemas Inteligentes (GSI) DIT UPM
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
 include requirements.txt
 include test-requirements.txt
 include README.md
 graft tsih
 global-exclude __pycache__
 global-exclude *.py[co]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,91 @@
 # TSIH - A dict with a HISTory
 `tsih.Dict` is a type of `UserDict` that allows versioning, backed up by a `sqlite3` database.
 * Transparent operation
 * Only changes (deltas) are stored.
 * Forward-filling of values. A value is reused in future versions, unless it changes.
 * Auto-versioning option (off by default), to produce a new version every time a value change happens.
 * Ability to store related entries as separate dictionaries. Each `tsih.Dict` has a `dict_name` that is used in the database to identify the dictionary.
 * Tuple-based indexing. Get and set values by `dict_name`, `version` and `key`.
 ## Usage and examples
 `tsih.Dict` objects can be used just like regular dictionaries:
 ```python
 >>> from tsih import Dict
 >>> a = Dict()
 >>> a['test'] = True
 >>> a
 {'test': True}
 >>> a.get('missing', 5)
 5
 >>> a['missing']
 Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
 KeyError: 'missing'
 ```
 But at any point, new versions can be produced:
 ```python
 >>> a.version
 0
 >>> a['start'] = 'now'
 >>> a
 {'test': True, 'start': 'now'}
 >>> a.version = 1
 >>> a['start'] = 'one version ago'
 >>> a
 {'test': True, 'start': 'one version ago'}
 ```
 Previous values can be accessed using tuple keys, i.e., (version, key):
 ```python
 >>> a[(0, 'start')]
 'now'
 >>> a[(1, 'start')]
 'one version ago'
 ```
 Each version only "records" changes, but later versions (even if they don't exist yet) inherit unchanged values from the previous ones:
 ```python
 >>> a[(5, 'start')]  
 'one version ago'
 >>> a.version = 5
 >>> # Until the value is changed
 >>> a['start'] = '4 versions ago' 
 >>> a[(5, 'start')]
 '4 versions ago'
 ```
 You can access *every* state of the Dict using `None` in place of the version and/or the key.
 In that case, we will get an iterator, which we can turn into a list explicitly or with the `.value` method.
 For example, here we get all the changes to the `start` key:
 ```python
 >>> a[(None, 'start')].value() # 
 [(0.0, 'now'), (1.0, 'one version ago'), (5.0, '4 versions ago')]
 ```
 Similarly, to get the keys and values at a specific version:
 ```python
 >>> list(a[(0, None)])
 [('start', 'now'), ('test', True)]
 ```
 Or, we can combine both to get the keys and values at every version:
 ```python
 >>> a[(None, None)].value()
 [(0.0, 'start', 'now'), (1.0, 'start', 'one version ago'), (5.0, 'start', '4 versions ago'), (0.0, 'test', True), (1.0, 'test', True), (5.0, 'test', True)]
 ```
 ## Use cases
 Tsih was originally part of the [Soil](https://github.com/gsi-upm/soil) Agent-Based Social Simulation framework, where both the environment and the agents need to keep track of state (i.e., attribute) changes.
--- a/requirements.txt
+++ b/requirements.txt
--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,4 @@
 [aliases]
 test=pytest
 [tool:pytest]
 addopts = --verbose
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,57 @@
 import os
 import re
 from setuptools import setup
 from pathlib import Path
 this_directory = Path(__file__).parent
 long_description = (this_directory / "README.md").read_text()
 version = ""
 with open(os.path.join('tsih', '__init__.py')) as f:
    version = re.search(
        r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', f.read(), re.MULTILINE
    ).group(1)
    assert version
 def parse_requirements(filename):
    """ load requirements from a pip requirements file """
    with open(filename, 'r') as f:
        lineiter = list(line.strip() for line in f)
    return [line for line in lineiter if line and not line.startswith("#")]
 install_reqs = parse_requirements("requirements.txt")
 test_reqs = parse_requirements("test-requirements.txt")
 extras_require={}
 extras_require['all'] = [dep for package in extras_require.values() for dep in package]
 setup(
    name='tsih',
    packages=['tsih'],  # this must be the same as the name above
    version=version,
    description=("A lightweight library to store an object's history into a SQL database"),
    long_description=long_description,
    long_description_content_type='text/markdown',
    author='J. Fernando Sanchez',
    author_email='jf.sanchez@upm.es',
    url='https://github.com/balkian/tsih',  # use the URL to the github repo
    download_url='https://github.com/balkian/tsih/archive/{}.tar.gz'.format(
        version),
    keywords=['history', 'sql', 'records'],
    classifiers=[
        'Development Status :: 4 - Beta',
        'Environment :: Console',
        'Intended Audience :: Developers',
        'License :: OSI Approved :: Apache Software License',
        'Operating System :: MacOS :: MacOS X',
        'Operating System :: Microsoft :: Windows',
        'Operating System :: POSIX',
        'Programming Language :: Python :: 3'],
    install_requires=install_reqs,
    extras_require=extras_require,
    tests_require=test_reqs,
    setup_requires=['pytest-runner', ],
    include_package_data=True,
 )
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -0,0 +1 @@
 pytest
--- a/tests/test_history.py
+++ b/tests/test_history.py
@@ -0,0 +1,227 @@
 from unittest import TestCase
 import os
 import shutil
 from glob import glob
 from tsih import *
 from tsih import utils
 ROOT = os.path.abspath(os.path.dirname(__file__))
 DBROOT = os.path.join(ROOT, 'testdb')
 class TestHistory(TestCase):
    def setUp(self):
        if not os.path.exists(DBROOT):
            os.makedirs(DBROOT)
    def tearDown(self):
        if os.path.exists(DBROOT):
            shutil.rmtree(DBROOT)
    def test_history(self):
        """
        """
        tuples = (
            ('a_0', 0, 'id', 'h'),
            ('a_0', 1, 'id', 'e'),
            ('a_0', 2, 'id', 'l'),
            ('a_0', 3, 'id', 'l'),
            ('a_0', 4, 'id', 'o'),
            ('a_1', 0, 'id', 'v'),
            ('a_1', 1, 'id', 'a'),
            ('a_1', 2, 'id', 'l'),
            ('a_1', 3, 'id', 'u'),
            ('a_1', 4, 'id', 'e'),
            ('env', 1, 'prob', 1),
            ('env', 3, 'prob', 2),
            ('env', 5, 'prob', 3),
            ('a_2', 7, 'finished', True),
        )
        h = History()
        h.save_tuples(tuples)
        # assert h['env', 0, 'prob'] == 0
        for i in range(1, 7):
            assert h['env', i, 'prob'] == ((i-1)//2)+1
        for i, k in zip(range(5), 'hello'):
            assert h['a_0', i, 'id'] == k
        for record, value in zip(h['a_0', None, 'id'], 'hello'):
            t_step, val = record
            assert val == value
        for i, k in zip(range(5), 'value'):
            assert h['a_1', i, 'id'] == k
        for i in range(5, 8):
            assert h['a_1', i, 'id'] == 'e'
        for i in range(7):
            assert h['a_2', i, 'finished'] == False
        assert h['a_2', 7, 'finished']
    def test_history_gen(self):
        """
        """
        tuples = (
            ('a_1', 0, 'id', 'v'),
            ('a_1', 1, 'id', 'a'),
            ('a_1', 2, 'id', 'l'),
            ('a_1', 3, 'id', 'u'),
            ('a_1', 4, 'id', 'e'),
            ('env', 1, 'prob', 1),
            ('env', 2, 'prob', 2),
            ('env', 3, 'prob', 3),
            ('a_2', 7, 'finished', True),
        )
        h = History()
        h.save_tuples(tuples)
        for t_step, key, value in h['env', None, None]:
            assert t_step == value
            assert key == 'prob'
        records = list(h[None, 7, None])
        assert len(records) == 3
        for i in records:
            agent_id, key, value = i
            if agent_id == 'a_1':
                assert key == 'id'
                assert value == 'e'
            elif agent_id == 'a_2':
                assert key == 'finished'
                assert value
            else:
                assert key == 'prob'
                assert value == 3
        records = h['a_1', 7, None]
        assert records['id'] == 'e'
    def test_history_file(self):
        """
        History should be saved to a file
        """
        tuples = (
            ('a_1', 0, 'id', 'v'),
            ('a_1', 1, 'id', 'a'),
            ('a_1', 2, 'id', 'l'),
            ('a_1', 3, 'id', 'u'),
            ('a_1', 4, 'id', 'e'),
            ('env', 1, 'prob', 1),
            ('env', 2, 'prob', 2),
            ('env', 3, 'prob', 3),
            ('a_2', 7, 'finished', True),
        )
        db_path = os.path.join(DBROOT, 'test')
        h = History(db_path=db_path)
        h.save_tuples(tuples)
        h.flush_cache()
        assert os.path.exists(db_path)
        # Recover the data
        recovered = History(db_path=db_path)
        assert recovered['a_1', 0, 'id'] == 'v'
        assert recovered['a_1', 4, 'id'] == 'e'
        # Using backup=True should create a backup copy, and initialize an empty history
        newhistory = History(db_path=db_path, backup=True)
        backuppaths = glob(db_path + '.backup*.sqlite')
        assert len(backuppaths) == 1
        backuppath = backuppaths[0]
        assert newhistory.db_path == h.db_path
        assert os.path.exists(backuppath)
        assert len(newhistory[None, None, None]) == 0
    def test_interpolation(self):
        """
        Values for a key are valid until a new value is introduced at a later version
        """
        tuples = (
            ('a_1', 0, 'id', 'a'),
            ('a_1', 4, 'id', 'b'),
        )
        db_path = os.path.join(DBROOT, 'test')
        h = History(db_path=db_path)
        h.save_tuples(tuples)
        h.flush_cache()
        assert os.path.exists(db_path)
        assert h['a_1', 2, 'id'] == 'a'
        # Recover the data
        recovered = History(db_path=db_path)
        assert recovered['a_1', 0, 'id'] == 'a'
        assert recovered['a_1', 4, 'id'] == 'b'
        assert recovered['a_1', 2, 'id'] == 'a'
    def test_history_tuples(self):
        """
        The data recovered should be equal to the one recorded.
        """
        tuples = (
            ('a_1', 0, 'id', 'v'),
            ('a_1', 1, 'id', 'a'),
            ('a_1', 2, 'id', 'l'),
            ('a_1', 3, 'id', 'u'),
            ('a_1', 4, 'id', 'e'),
            ('env', 1, 'prob', 1),
            ('env', 2, 'prob', 2),
            ('env', 3, 'prob', 3),
            ('a_2', 7, 'finished', True),
        )
        h = History()
        h.save_tuples(tuples)
        recovered = list(h.to_tuples())
        assert recovered
        for i in recovered:
            assert i in tuples
    def test_stats(self):
        """
        The data recovered should be equal to the one recorded.
        """
        tuples = (
            ('a_1', 0, 'id', 'v'),
            ('a_1', 1, 'id', 'a'),
            ('a_1', 2, 'id', 'l'),
            ('a_1', 3, 'id', 'u'),
            ('a_1', 4, 'id', 'e'),
            ('env', 1, 'prob', 1),
            ('env', 2, 'prob', 2),
            ('env', 3, 'prob', 3),
            ('a_2', 7, 'finished', True),
        )
        stat_tuples = [
            {'num_infected': 5, 'runtime': 0.2},
            {'num_infected': 5, 'runtime': 0.2},
            {'new': '40'},
        ]
        h = History()
        h.save_tuples(tuples)
        for stat in stat_tuples:
            h.save_stats(stat)
        recovered = h.get_stats()
        assert recovered
        assert recovered[0]['num_infected'] == 5
        assert recovered[1]['runtime'] == 0.2
        assert recovered[2]['new'] == '40'
    def test_unflatten(self):
        ex = {'count.neighbors.3': 4,
              'count.times.2': 4,
              'count.total.4': 4,
              'mean.neighbors': 3,
              'mean.times': 2,
              'mean.total': 4,
              't_step': 2,
              'trial_id': 'exporter_sim_trial_1605817956-4475424'}
        res = utils.unflatten_dict(ex)
        assert 'count' in res
        assert all(x in res['count'] for x in ['times', 'total', 'neighbors'])
        assert res['count']['times']['2'] == 4
        assert 'mean' in res
        assert all(x in res['mean'] for x in ['times', 'total', 'neighbors'])
        assert 't_step' in res
        assert 'trial_id' in res
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -0,0 +1,79 @@
 from unittest import TestCase
 import os
 import shutil
 import pathlib
 from tsih import Dict
 ROOT = pathlib.Path(os.path.abspath(os.path.dirname(__file__)))
 DBROOT = ROOT / 'testdb'
 class TestTsih(TestCase):
    def setUp(self):
        if not os.path.exists(DBROOT):
            os.makedirs(DBROOT)
    def tearDown(self):
        if os.path.exists(DBROOT):
            shutil.rmtree(DBROOT)
    def test_basic(self):
        '''The data stored in each version should be retrievable'''
        d = Dict()
        d['text'] = 'hello'
        d.version = 1
        d['text'] = 'world'
        assert d[(0, 'text')] == 'hello'
        assert d[(1, 'text')] == 'world'
    def test_auto_version(self):
        '''Changing a value when `auto_version` is on should produce a new version automatically'''
        d = Dict(version=0, auto_version=True)
        d['text'] = 'hello'
        d['text'] = 'world'
        assert d[(1, 'text')] == 'hello'
        assert d[(2, 'text')] == 'world'
    def test_serialized(self):
        '''
        Using the same database should enable retrieving the values of a previous
        dictionary.
        '''
        d = Dict(name='robot', db_path=DBROOT / 'basic.sqlite')
        d['text'] = 'hello'
        d.version = 25
        d['text'] = 'world'
        assert d[(0, 'text')] == 'hello'
        assert d[(24, 'text')] == 'hello'
        assert d[(25, 'text')] == 'world'
        del d
        recovered = Dict(name='robot', db_path=DBROOT / 'basic.sqlite')
        assert recovered[(0, 'text')] == 'hello'
        assert recovered[(24, 'text')] == 'hello'
        assert recovered[(25, 'text')] == 'world'
    def test_custom(self):
        '''
        Inheriting from the Dict class should not change the behavior.
        '''
        class CustomDict(Dict):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, db_path=DBROOT / 'custom.sqlite', **kwargs)
        d = CustomDict(name='robot')
        d['text'] = 'hello'
        d.version = 25
        d['text'] = 'world'
        assert d[(0, 'text')] == 'hello'
        assert d[(24, 'text')] == 'hello'
        assert d[(25, 'text')] == 'world'
        del d
        recovered = CustomDict(name='robot')
        assert recovered[(0, 'text')] == 'hello'
        assert recovered[(24, 'text')] == 'hello'
        assert recovered[(26, 'text')] == 'world'
--- a/tsih/init.py
+++ b/tsih/init.py
@@ -0,0 +1,444 @@
 import time
 import os
 import pandas as pd
 import sqlite3
 import copy
 import uuid
 import logging
 import pathlib
 import tempfile
 logger = logging.getLogger(__name__)
 __version__ = '0.1.4'
 from collections import UserDict, namedtuple
 from . import serialization
 from .utils import open_or_reuse, unflatten_dict
 class Dict(UserDict):
    def __init__(self, name=None, db_name=None, db_path=None, backup=False, readonly=False, version=0, auto_version=False):
        super().__init__()
        self.dict_name = name or 'anonymous_{}'.format(uuid.uuid1())
        self._history = History(name=db_name, db_path=db_path, backup=backup, readonly=readonly)
        self.version = version
        self.auto_version = auto_version
    def __delitem__(self, key):
        if isinstance(key, tuple):
            raise ValueError('Cannot remove past entries')
        if self.auto_version:
            self.version += 1
        self.data[key] = None
    def __getitem__(self, key):
        if isinstance(key, tuple):
            if len(key) < 3:
                key = tuple([self.dict_name] + list(key))
            self._history.flush_cache()
            return self._history[key]
        return self.data[key]
    def __del__(self):
        self._history.close()
    def __setcurrent(self, key, value):
        if self.auto_version:
            self.version += 1
        self.data[key] = value
        self._history.save_record(dict_id=self.dict_name,
                                  t_step=float(self.version),
                                  key=key,
                                  value=value)
    def __setitem__(self, key, value):
        if not isinstance(key, tuple):
            self.__setcurrent(key, value)
        else:
            if len(key) < 3:
                key = tuple([self.dict_name] + list(key))
            k = history.Key(*key)
            if k.t_step == version and k.dict_id == self.dict_name:
                return self.__setcurrent(key.key, key.value)
            self._history.save_record(*k,
                                      value=value)
 class History:
    """
    Store and retrieve values from a sqlite database.
    """
    def __init__(self, name=None, db_path=None, backup=False, readonly=False):
        if readonly and (not os.path.exists(db_path)):
            raise Exception('The DB file does not exist. Cannot open in read-only mode')
        self._db = None
        self._temp = db_path is None
        self._stats_columns = None
        self.readonly = readonly
        if self._temp:
            if not name:
                name = time.time()
            # The file will be deleted as soon as it's closed
            # Normally, that will be on destruction
            db_path = tempfile.NamedTemporaryFile(suffix='{}.sqlite'.format(name)).name
        if backup and os.path.exists(db_path):
                newname = db_path + '.backup{}.sqlite'.format(time.time())
                os.rename(db_path, newname)
        self.db_path = db_path
        self.db = db_path
        self._dtypes = {}
        self._tups = []
        if self.readonly:
            return
        with self.db:
            logger.debug('Creating database {}'.format(self.db_path))
            self.db.execute('''CREATE TABLE IF NOT EXISTS history (dict_id text, t_step real, key text, value text)''')
            self.db.execute('''CREATE TABLE IF NOT EXISTS value_types (key text, value_type text)''')
            self.db.execute('''CREATE TABLE IF NOT EXISTS stats (stat_id text)''')
            self.db.execute('''CREATE UNIQUE INDEX IF NOT EXISTS idx_history ON history (dict_id, t_step, key);''')
    @property
    def db(self):
        try:
            self._db.cursor()
        except (sqlite3.ProgrammingError, AttributeError):
            self.db = None  # Reset the database
        return self._db
    @db.setter
    def db(self, db_path=None):
        self._close()
        db_path = db_path or self.db_path
        if isinstance(db_path, str) or isinstance(db_path, pathlib.Path):
            logger.debug('Connecting to database {}'.format(db_path))
            self._db = sqlite3.connect(db_path)
            self._db.row_factory = sqlite3.Row
        else:
            self._db = db_path
    def __del__(self):
        self._close()
    def close(self):
        self._close()
    def _close(self):
        if self._db is None:
            return
        self.flush_cache()
        self._db.close()
        self._db = None
    def save_stats(self, stat):
        if self.readonly:
            print('DB in readonly mode')
            return
        if not stat:
            return
        with self.db:
            if not self._stats_columns:
                self._stats_columns = list(c['name'] for c in self.db.execute('PRAGMA table_info(stats)'))
            for column, value in stat.items():
                if column in self._stats_columns:
                    continue
                dtype = 'text'
                if not isinstance(value, str):
                    try:
                        float(value)
                        dtype = 'real'
                        int(value)
                        dtype = 'int'
                    except (ValueError, OverflowError):
                        pass
                self.db.execute('ALTER TABLE stats ADD "{}" "{}"'.format(column, dtype))
                self._stats_columns.append(column)
            columns = ", ".join(map(lambda x: '"{}"'.format(x), stat.keys()))
            values = ", ".join(['"{0}"'.format(col) for col in stat.values()])
            query = "INSERT INTO stats ({columns}) VALUES ({values})".format(
                columns=columns,
                values=values
            )
            self.db.execute(query)
    def get_stats(self, unflatten=True):
        rows = self.db.execute("select * from stats").fetchall()
        res = []
        for row in rows:
            d = {}
            for k in row.keys():
                if row[k] is None:
                    continue
                d[k] = row[k]
            if unflatten:
                d = unflatten_dict(d)
            res.append(d)
        return res
    @property
    def dtypes(self):
        self._read_types()
        return {k:v[0] for k, v in self._dtypes.items()}
    def save_tuples(self, tuples):
        '''
        Save a series of tuples, converting them to records if necessary
        '''
        self.save_records(Record(*tup) for tup in tuples)
    def save_records(self, records):
        '''
        Save a collection of records
        '''
        for record in records:
            if not isinstance(record, Record):
                record = Record(*record)
            self.save_record(*record)
    def save_record(self, dict_id, t_step, key, value):
        '''
        Save a collection of records to the database.
        Database writes are cached.
        '''
        if self.readonly:
            raise Exception('DB in readonly mode')
        if key not in self._dtypes:
            self._read_types()
            if key not in self._dtypes:
                name = serialization.name(value)
                serializer = serialization.serializer(name)
                deserializer = serialization.deserializer(name)
                self._dtypes[key] = (name, serializer, deserializer)
                with self.db:
                    self.db.execute("replace into value_types (key, value_type) values (?, ?)", (key, name))
        value = self._dtypes[key][1](value)
        self._tups.append(Record(dict_id=dict_id,
                                 t_step=t_step,
                                 key=key,
                                 value=value))
    def flush_cache(self):
        '''
        Use a cache to save state changes to avoid opening a session for every change.
        The cache will be flushed at the end of the simulation, and when history is accessed.
        '''
        if self.readonly:
            raise Exception('DB in readonly mode')
        logger.debug('Flushing cache {}'.format(self.db_path))
        with self.db:
            self.db.executemany("replace into history(dict_id, t_step, key, value) values (?, ?, ?, ?)", self._tups)
        self._tups.clear()
    def to_tuples(self):
        self.flush_cache()
        with self.db:
            res = self.db.execute("select dict_id, t_step, key, value from history ").fetchall()
        for r in res:
            dict_id, t_step, key, value = r
            if key not in self._dtypes:
                self._read_types()
            if key not in self._dtypes:
                raise ValueError("Unknown datatype for {} and {}".format(key, value))
            value = self._dtypes[key][2](value)
            yield dict_id, t_step, key, value
    def _read_types(self):
        with self.db:
            res = self.db.execute("select key, value_type from value_types ").fetchall()
        for k, v in res:
            serializer = serialization.serializer(v)
            deserializer = serialization.deserializer(v)
            self._dtypes[k] = (v, serializer, deserializer)
    def __getitem__(self, key):
        self.flush_cache()
        key = Key(*key)
        dict_ids = [key.dict_id] if key.dict_id is not None else []
        t_steps = [key.t_step] if key.t_step is not None else []
        keys = [key.key] if key.key is not None else []
        df = self.read_sql(dict_ids=dict_ids,
                           t_steps=t_steps,
                           keys=keys)
        r = Records(df, filter=key, dtypes=self._dtypes)
        if r.resolved:
            return r.value()
        return r
    def read_sql(self, keys=None, dict_ids=None, not_dict_ids=None, t_steps=None, convert_types=False, limit=-1):
        self._read_types()
        def escape_and_join(v):
            if v is None:
                return
            return ",".join(map(lambda x: "\'{}\'".format(x), v))
        filters = [("key in ({})".format(escape_and_join(keys)), keys),
                   ("dict_id in ({})".format(escape_and_join(dict_ids)), dict_ids),
                   ("dict_id not in ({})".format(escape_and_join(not_dict_ids)), not_dict_ids)
        ]
        filters = list(k[0] for k in filters if k[1])
        last_df = None
        if t_steps:
            # Convert negative indices into positive
            if any(x<0 for x in t_steps):
                max_t = int(self.db.execute("select max(t_step) from history").fetchone()[0])
                t_steps = [t if t>0 else max_t+1+t for t in t_steps]
            # We will be doing ffill interpolation, so we need to look for
            # the last value before the minimum step in the query
            min_step = min(t_steps)
            last_filters = ['t_step < {}'.format(min_step),]
            last_filters = last_filters + filters
            condition = ' and '.join(last_filters)
            last_query = '''
            select h1.*
            from history h1
            inner join (
            select dict_id, key, max(t_step) as t_step
            from history
            where {condition}
            group by dict_id, key
            ) h2
            on h1.dict_id = h2.dict_id  and
               h1.key      = h2.key       and
               h1.t_step   = h2.t_step
            '''.format(condition=condition)
            last_df = pd.read_sql_query(last_query, self.db)
            filters.append("t_step >= '{}' and t_step <= '{}'".format(min_step, max(t_steps)))
        condition = ''
        if filters:
            condition = 'where {} '.format(' and '.join(filters))
        query = 'select * from history {} limit {}'.format(condition, limit)
        df = pd.read_sql_query(query, self.db)
        if last_df is not None:
            df = pd.concat([df, last_df])
        df_p = df.pivot_table(values='value', index=['t_step'],
                              columns=['key', 'dict_id'],
                              aggfunc='first')
        for k, v in self._dtypes.items():
            if k in df_p:
                dtype, _, deserial = v
                try:
                    df_p[k] = df_p[k].fillna(method='ffill').astype(dtype)
                except (TypeError, ValueError):
                    # Avoid forward-filling unknown/incompatible types
                    continue
        if t_steps:
            df_p = df_p.reindex(t_steps, method='ffill')
        return df_p.ffill()
    def __getstate__(self):
        state = dict(**self.__dict__)
        del state['_db']
        del state['_dtypes']
        return state
    def __setstate__(self, state):
        self.__dict__ = state
        self._dtypes = {}
        self._db = None
    def dump(self, f):
        self._close()
        for line in open_or_reuse(self.db_path, 'rb'):
            f.write(line)
 class Records():
    def __init__(self, df, filter=None, dtypes=None):
        if not filter:
            filter = Key(dict_id=None,
                         t_step=None,
                         key=None)
        self._df = df
        self._filter = filter
        self.dtypes = dtypes or {}
        super().__init__()
    def mask(self, tup):
        res = ()
        for i, k in zip(tup[:-1], self._filter):
            if k is None:
                res = res + (i,)
        res = res + (tup[-1],)
        return res
    def filter(self, newKey):
        f = list(self._filter)
        for ix, i in enumerate(f):
            if i is None:
                f[ix] = newKey
        self._filter = Key(*f)
    @property
    def resolved(self):
        return sum(1 for i in self._filter if i is not None) == 3
    def __iter__(self):
        for column, series in self._df.iteritems():
            key, dict_id = column
            for t_step, value in series.iteritems():
                r = Record(t_step=t_step,
                           dict_id=dict_id,
                           key=key,
                           value=value)
                yield self.mask(r)
    def value(self):
        if self.resolved:
            f = self._filter
            try:
                i = self._df[f.key][str(f.dict_id)]
                ix = i.index.get_loc(f.t_step, method='ffill')
                return i.iloc[ix]
            except KeyError as ex:
                return self.dtypes[f.key][2]()
        return list(self)
    def df(self):
        return self._df
    def __getitem__(self, k):
        n = copy.copy(self)
        n.filter(k)
        if n.resolved:
            return n.value()
        return n
    def __len__(self):
        return len(self._df)
    def __str__(self):
        if self.resolved:
            return str(self.value())
        return '<Records for [{}]>'.format(self._filter)
 Key = namedtuple('Key', ['dict_id', 't_step', 'key'])
 Record = namedtuple('Record', 'dict_id t_step key value')
 Stat = namedtuple('Stat', 'stat_id text')
--- a/tsih/serialization.py
+++ b/tsih/serialization.py
@@ -0,0 +1,89 @@
 import os
 import logging
 import ast
 import sys
 import importlib
 from itertools import product, chain
 logger = logging.getLogger('soil')
 builtins = importlib.import_module('builtins')
 def name(value, known_modules=[]):
    '''Return a name that can be imported, to serialize/deserialize an object'''
    if value is None:
        return 'None'
    if not isinstance(value, type):  # Get the class name first
        value = type(value)
    tname = value.__name__
    if hasattr(builtins, tname):
        return tname
    modname = value.__module__
    if modname == '__main__':
        return tname
    if known_modules and modname in known_modules:
        return tname
    for kmod in known_modules:
        if not kmod:
            continue
        module = importlib.import_module(kmod)
        if hasattr(module, tname):
            return tname
    return '{}.{}'.format(modname, tname)
 def serializer(type_):
    if type_ != 'str' and hasattr(builtins, type_):
        return repr
    return lambda x: x
 def serialize(v, known_modules=[]):
    '''Get a text representation of an object.'''
    tname = name(v, known_modules=known_modules)
    func = serializer(tname)
    return func(v), tname
 def deserializer(type_, known_modules=[]):
    if type(type_) != str:  # Already deserialized
        return type_
    if type_ == 'str':
        return lambda x='': x
    if type_ == 'None':
        return lambda x=None: None
    if hasattr(builtins, type_):  # Check if it's a builtin type
        cls = getattr(builtins, type_)
        return lambda x=None: ast.literal_eval(x) if x is not None else cls()
    # Otherwise, see if we can find the module and the class
    modules = known_modules or []
    options = []
    for mod in modules:
        if mod:
            options.append((mod, type_))
    if '.' in type_:  # Fully qualified module
        module, type_ = type_.rsplit(".", 1)
        options.append ((module, type_))
    errors = []
    for modname, tname in options:
        try:
            module = importlib.import_module(modname)
            cls = getattr(module, tname)
            return getattr(cls, 'deserialize', cls)
        except (ImportError, AttributeError) as ex:
            errors.append((modname, tname, ex))
    raise Exception('Could not find type {}. Tried: {}'.format(type_, errors))
 def deserialize(type_, value=None, **kwargs):
    '''Get an object from a text representation'''
    if not isinstance(type_, str):
        return type_
    des = deserializer(type_, **kwargs)
    if value is None:
        return des
    return des(value)
--- a/tsih/utils.py
+++ b/tsih/utils.py
@@ -0,0 +1,87 @@
 import logging
 import time
 import os
 from shutil import copyfile
 from contextlib import contextmanager
 logger = logging.getLogger('soil')
 # logging.basicConfig()
 # logger.setLevel(logging.INFO)
@contextmanager
 def timer(name='task', pre="", function=logger.info, to_object=None):
    start = time.time()
    function('{}Starting {} at {}.'.format(pre, name,
                                           time.strftime("%X", time.gmtime(start))))
    yield start
    end = time.time()
    function('{}Finished {} at {} in {} seconds'.format(pre, name,
                                                        time.strftime("%X", time.gmtime(end)),
                                                        str(end-start)))
    if to_object:
        to_object.start = start
        to_object.end = end
 def safe_open(path, mode='r', backup=True, **kwargs):
    outdir = os.path.dirname(path)
    if outdir and not os.path.exists(outdir):
        os.makedirs(outdir)
    if backup and 'w' in mode and os.path.exists(path):
        creation = os.path.getctime(path)
        stamp = time.strftime('%Y-%m-%d_%H.%M.%S', time.localtime(creation))
        backup_dir = os.path.join(outdir, 'backup')
        if not os.path.exists(backup_dir):
            os.makedirs(backup_dir)
        newpath = os.path.join(backup_dir, '{}@{}'.format(os.path.basename(path),
                                                               stamp))
        copyfile(path, newpath)
    return open(path, mode=mode, **kwargs)
 def open_or_reuse(f, *args, **kwargs):
    try:
        return safe_open(f, *args, **kwargs)
    except (AttributeError, TypeError):
        return f
 def flatten_dict(d):
    if not isinstance(d, dict):
        return d
    return dict(_flatten_dict(d))
 def _flatten_dict(d, prefix=''):
    if not isinstance(d, dict):
        # print('END:', prefix, d)
        yield prefix, d
        return
    if prefix:
        prefix = prefix + '.'
    for k, v in d.items():
        # print(k, v)
        res = list(_flatten_dict(v, prefix='{}{}'.format(prefix, k)))
        # print('RES:', res)
        yield from res
 def unflatten_dict(d):
    out = {}
    for k, v in d.items():
        target = out
        if not isinstance(k, str):
            target[k] = v
            continue
        tokens = k.split('.')
        if len(tokens) < 2:
            target[k] = v
            continue
        for token in tokens[:-1]:
            if token not in target:
                target[token] = {}
            target = target[token]
        target[tokens[-1]] = v
    return out