First commit

2026-01-10 17:18:16 +00:00 · 2021-10-15 17:50:24 +02:00
commit 70779fa0ad
13 changed files with 1291 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.*
+*.pyc
+__pycache__
+build
+dist
--- a/201
+++ b/201
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2016 Jesús Manuel Sánchez Martínez - Grupo de Sistemas Inteligentes (GSI) DIT UPM
+   
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
+include requirements.txt
+include test-requirements.txt
+include README.md
+graft tsih
+global-exclude __pycache__
+global-exclude *.py[co]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,91 @@
+# TSIH - A dict with a HISTory
+
+`tsih.Dict` is a type of `UserDict` that allows versioning, backed up by a `sqlite3` database.
+
+* Transparent operation
+* Only changes (deltas) are stored.
+* Forward-filling of values. A value is reused in future versions, unless it changes.
+* Auto-versioning option (off by default), to produce a new version every time a value change happens.
+* Ability to store related entries as separate dictionaries. Each `tsih.Dict` has a `dict_name` that is used in the database to identify the dictionary.
+* Tuple-based indexing. Get and set values by `dict_name`, `version` and `key`.
+
+## Usage and examples
+
+`tsih.Dict` objects can be used just like regular dictionaries:
+
+```python
+>>> from tsih import Dict
+>>> a = Dict()
+>>> a['test'] = True
+>>> a
+{'test': True}
+>>> a.get('missing', 5)
+5
+>>> a['missing']
+Traceback (most recent call last):
+  File "<stdin>", line 1, in <module>
+KeyError: 'missing'
+```
+
+But at any point, new versions can be produced:
+
+```python
+>>> a.version
+0
+>>> a['start'] = 'now'
+>>> a
+{'test': True, 'start': 'now'}
+>>> a.version = 1
+>>> a['start'] = 'one version ago'
+>>> a
+{'test': True, 'start': 'one version ago'}
+```
+
+Previous values can be accessed using tuple keys, i.e., (version, key):
+
+```python
+>>> a[(0, 'start')]
+'now'
+>>> a[(1, 'start')]
+'one version ago'
+```
+
+Each version only "records" changes, but later versions (even if they don't exist yet) inherit unchanged values from the previous ones:
+
+```python
+>>> a[(5, 'start')]  
+'one version ago'
+>>> a.version = 5
+>>> # Until the value is changed
+>>> a['start'] = '4 versions ago' 
+>>> a[(5, 'start')]
+'4 versions ago'
+```
+
+You can access *every* state of the Dict using `None` in place of the version and/or the key.
+In that case, we will get an iterator, which we can turn into a list explicitly or with the `.value` method.
+
+For example, here we get all the changes to the `start` key:
+
+```python
+>>> a[(None, 'start')].value() # 
+[(0.0, 'now'), (1.0, 'one version ago'), (5.0, '4 versions ago')]
+```
+
+Similarly, to get the keys and values at a specific version:
+
+```python
+>>> list(a[(0, None)])
+[('start', 'now'), ('test', True)]
+```
+
+Or, we can combine both to get the keys and values at every version:
+
+```python
+>>> a[(None, None)].value()
+[(0.0, 'start', 'now'), (1.0, 'start', 'one version ago'), (5.0, 'start', '4 versions ago'), (0.0, 'test', True), (1.0, 'test', True), (5.0, 'test', True)]
+```
+
+## Use cases
+
+Tsih was originally part of the [Soil](https://github.com/gsi-upm/soil) Agent-Based Social Simulation framework, where both the environment and the agents need to keep track of state (i.e., attribute) changes.
--- a/requirements.txt
+++ b/requirements.txt
--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,4 @@
+[aliases]
+test=pytest
+[tool:pytest]
+addopts = --verbose
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,57 @@
+import os
+import re
+from setuptools import setup
+from pathlib import Path
+
+this_directory = Path(__file__).parent
+long_description = (this_directory / "README.md").read_text()
+
+version = ""
+with open(os.path.join('tsih', '__init__.py')) as f:
+    version = re.search(
+        r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', f.read(), re.MULTILINE
+    ).group(1)
+    assert version
+
+
+def parse_requirements(filename):
+    """ load requirements from a pip requirements file """
+    with open(filename, 'r') as f:
+        lineiter = list(line.strip() for line in f)
+    return [line for line in lineiter if line and not line.startswith("#")]
+
+
+install_reqs = parse_requirements("requirements.txt")
+test_reqs = parse_requirements("test-requirements.txt")
+extras_require={}
+extras_require['all'] = [dep for package in extras_require.values() for dep in package]
+
+
+setup(
+    name='tsih',
+    packages=['tsih'],  # this must be the same as the name above
+    version=version,
+    description=("A lightweight library to store an object's history into a SQL database"),
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    author='J. Fernando Sanchez',
+    author_email='jf.sanchez@upm.es',
+    url='https://github.com/balkian/tsih',  # use the URL to the github repo
+    download_url='https://github.com/balkian/tsih/archive/{}.tar.gz'.format(
+        version),
+    keywords=['history', 'sql', 'records'],
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'Environment :: Console',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: Apache Software License',
+        'Operating System :: MacOS :: MacOS X',
+        'Operating System :: Microsoft :: Windows',
+        'Operating System :: POSIX',
+        'Programming Language :: Python :: 3'],
+    install_requires=install_reqs,
+    extras_require=extras_require,
+    tests_require=test_reqs,
+    setup_requires=['pytest-runner', ],
+    include_package_data=True,
+)
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -0,0 +1 @@
+pytest
--- a/tests/test_history.py
+++ b/tests/test_history.py
@@ -0,0 +1,227 @@
+from unittest import TestCase
+
+import os
+import shutil
+from glob import glob
+
+from tsih import *
+from tsih import utils
+
+
+ROOT = os.path.abspath(os.path.dirname(__file__))
+DBROOT = os.path.join(ROOT, 'testdb')
+
+
+class TestHistory(TestCase):
+
+    def setUp(self):
+        if not os.path.exists(DBROOT):
+            os.makedirs(DBROOT)
+
+    def tearDown(self):
+        if os.path.exists(DBROOT):
+            shutil.rmtree(DBROOT)
+
+    def test_history(self):
+        """
+        """
+        tuples = (
+            ('a_0', 0, 'id', 'h'),
+            ('a_0', 1, 'id', 'e'),
+            ('a_0', 2, 'id', 'l'),
+            ('a_0', 3, 'id', 'l'),
+            ('a_0', 4, 'id', 'o'),
+            ('a_1', 0, 'id', 'v'),
+            ('a_1', 1, 'id', 'a'),
+            ('a_1', 2, 'id', 'l'),
+            ('a_1', 3, 'id', 'u'),
+            ('a_1', 4, 'id', 'e'),
+            ('env', 1, 'prob', 1),
+            ('env', 3, 'prob', 2),
+            ('env', 5, 'prob', 3),
+            ('a_2', 7, 'finished', True),
+        )
+        h = History()
+        h.save_tuples(tuples)
+        # assert h['env', 0, 'prob'] == 0
+        for i in range(1, 7):
+            assert h['env', i, 'prob'] == ((i-1)//2)+1
+
+
+        for i, k in zip(range(5), 'hello'):
+            assert h['a_0', i, 'id'] == k
+        for record, value in zip(h['a_0', None, 'id'], 'hello'):
+            t_step, val = record
+            assert val == value
+
+        for i, k in zip(range(5), 'value'):
+            assert h['a_1', i, 'id'] == k
+        for i in range(5, 8):
+            assert h['a_1', i, 'id'] == 'e'
+        for i in range(7):
+            assert h['a_2', i, 'finished'] == False
+        assert h['a_2', 7, 'finished']
+
+    def test_history_gen(self):
+        """
+        """
+        tuples = (
+            ('a_1', 0, 'id', 'v'),
+            ('a_1', 1, 'id', 'a'),
+            ('a_1', 2, 'id', 'l'),
+            ('a_1', 3, 'id', 'u'),
+            ('a_1', 4, 'id', 'e'),
+            ('env', 1, 'prob', 1),
+            ('env', 2, 'prob', 2),
+            ('env', 3, 'prob', 3),
+            ('a_2', 7, 'finished', True),
+        )
+        h = History()
+        h.save_tuples(tuples)
+        for t_step, key, value in h['env', None, None]:
+            assert t_step == value
+            assert key == 'prob'
+
+        records = list(h[None, 7, None])
+        assert len(records) == 3
+        for i in records:
+            agent_id, key, value = i
+            if agent_id == 'a_1':
+                assert key == 'id'
+                assert value == 'e'
+            elif agent_id == 'a_2':
+                assert key == 'finished'
+                assert value
+            else:
+                assert key == 'prob'
+                assert value == 3
+
+        records = h['a_1', 7, None]
+        assert records['id'] == 'e'
+
+    def test_history_file(self):
+        """
+        History should be saved to a file
+        """
+        tuples = (
+            ('a_1', 0, 'id', 'v'),
+            ('a_1', 1, 'id', 'a'),
+            ('a_1', 2, 'id', 'l'),
+            ('a_1', 3, 'id', 'u'),
+            ('a_1', 4, 'id', 'e'),
+            ('env', 1, 'prob', 1),
+            ('env', 2, 'prob', 2),
+            ('env', 3, 'prob', 3),
+            ('a_2', 7, 'finished', True),
+        )
+        db_path = os.path.join(DBROOT, 'test')
+        h = History(db_path=db_path)
+        h.save_tuples(tuples)
+        h.flush_cache()
+        assert os.path.exists(db_path)
+
+        # Recover the data
+        recovered = History(db_path=db_path)
+        assert recovered['a_1', 0, 'id'] == 'v'
+        assert recovered['a_1', 4, 'id'] == 'e'
+
+        # Using backup=True should create a backup copy, and initialize an empty history
+        newhistory = History(db_path=db_path, backup=True)
+        backuppaths = glob(db_path + '.backup*.sqlite')
+        assert len(backuppaths) == 1
+        backuppath = backuppaths[0]
+        assert newhistory.db_path == h.db_path
+        assert os.path.exists(backuppath)
+        assert len(newhistory[None, None, None]) == 0
+
+    def test_interpolation(self):
+        """
+        Values for a key are valid until a new value is introduced at a later version
+        """
+        tuples = (
+            ('a_1', 0, 'id', 'a'),
+            ('a_1', 4, 'id', 'b'),
+        )
+        db_path = os.path.join(DBROOT, 'test')
+        h = History(db_path=db_path)
+        h.save_tuples(tuples)
+        h.flush_cache()
+        assert os.path.exists(db_path)
+
+        assert h['a_1', 2, 'id'] == 'a'
+        # Recover the data
+        recovered = History(db_path=db_path)
+        assert recovered['a_1', 0, 'id'] == 'a'
+        assert recovered['a_1', 4, 'id'] == 'b'
+        assert recovered['a_1', 2, 'id'] == 'a'
+
+    def test_history_tuples(self):
+        """
+        The data recovered should be equal to the one recorded.
+        """
+        tuples = (
+            ('a_1', 0, 'id', 'v'),
+            ('a_1', 1, 'id', 'a'),
+            ('a_1', 2, 'id', 'l'),
+            ('a_1', 3, 'id', 'u'),
+            ('a_1', 4, 'id', 'e'),
+            ('env', 1, 'prob', 1),
+            ('env', 2, 'prob', 2),
+            ('env', 3, 'prob', 3),
+            ('a_2', 7, 'finished', True),
+        )
+        h = History()
+        h.save_tuples(tuples)
+        recovered = list(h.to_tuples())
+        assert recovered
+        for i in recovered:
+            assert i in tuples
+
+    def test_stats(self):
+        """
+        The data recovered should be equal to the one recorded.
+        """
+        tuples = (
+            ('a_1', 0, 'id', 'v'),
+            ('a_1', 1, 'id', 'a'),
+            ('a_1', 2, 'id', 'l'),
+            ('a_1', 3, 'id', 'u'),
+            ('a_1', 4, 'id', 'e'),
+            ('env', 1, 'prob', 1),
+            ('env', 2, 'prob', 2),
+            ('env', 3, 'prob', 3),
+            ('a_2', 7, 'finished', True),
+        )
+        stat_tuples = [
+            {'num_infected': 5, 'runtime': 0.2},
+            {'num_infected': 5, 'runtime': 0.2},
+            {'new': '40'},
+        ]
+        h = History()
+        h.save_tuples(tuples)
+        for stat in stat_tuples:
+            h.save_stats(stat)
+        recovered = h.get_stats()
+        assert recovered
+        assert recovered[0]['num_infected'] == 5
+        assert recovered[1]['runtime'] == 0.2
+        assert recovered[2]['new'] == '40'
+
+    def test_unflatten(self):
+        ex = {'count.neighbors.3': 4,
+              'count.times.2': 4,
+              'count.total.4': 4,
+              'mean.neighbors': 3,
+              'mean.times': 2,
+              'mean.total': 4,
+              't_step': 2,
+              'trial_id': 'exporter_sim_trial_1605817956-4475424'}
+        res = utils.unflatten_dict(ex)
+
+        assert 'count' in res
+        assert all(x in res['count'] for x in ['times', 'total', 'neighbors'])
+        assert res['count']['times']['2'] == 4
+        assert 'mean' in res
+        assert all(x in res['mean'] for x in ['times', 'total', 'neighbors'])
+        assert 't_step' in res
+        assert 'trial_id' in res
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -0,0 +1,79 @@
+from unittest import TestCase
+import os
+import shutil
+import pathlib
+
+from tsih import Dict
+
+
+ROOT = pathlib.Path(os.path.abspath(os.path.dirname(__file__)))
+DBROOT = ROOT / 'testdb'
+
+
+class TestTsih(TestCase):
+    def setUp(self):
+        if not os.path.exists(DBROOT):
+            os.makedirs(DBROOT)
+
+    def tearDown(self):
+        if os.path.exists(DBROOT):
+            shutil.rmtree(DBROOT)
+
+    def test_basic(self):
+        '''The data stored in each version should be retrievable'''
+        d = Dict()
+        d['text'] = 'hello'
+        d.version = 1
+        d['text'] = 'world'
+        assert d[(0, 'text')] == 'hello'
+        assert d[(1, 'text')] == 'world'
+
+    def test_auto_version(self):
+        '''Changing a value when `auto_version` is on should produce a new version automatically'''
+        d = Dict(version=0, auto_version=True)
+        d['text'] = 'hello'
+        d['text'] = 'world'
+        assert d[(1, 'text')] == 'hello'
+        assert d[(2, 'text')] == 'world'
+
+    def test_serialized(self):
+        '''
+        Using the same database should enable retrieving the values of a previous
+        dictionary.
+        '''
+        d = Dict(name='robot', db_path=DBROOT / 'basic.sqlite')
+        d['text'] = 'hello'
+        d.version = 25
+        d['text'] = 'world'
+        assert d[(0, 'text')] == 'hello'
+        assert d[(24, 'text')] == 'hello'
+        assert d[(25, 'text')] == 'world'
+        del d
+
+        recovered = Dict(name='robot', db_path=DBROOT / 'basic.sqlite')
+        assert recovered[(0, 'text')] == 'hello'
+        assert recovered[(24, 'text')] == 'hello'
+        assert recovered[(25, 'text')] == 'world'
+
+    def test_custom(self):
+        '''
+        Inheriting from the Dict class should not change the behavior.
+        '''
+
+        class CustomDict(Dict):
+            def __init__(self, *args, **kwargs):
+                super().__init__(*args, db_path=DBROOT / 'custom.sqlite', **kwargs)
+
+        d = CustomDict(name='robot')
+        d['text'] = 'hello'
+        d.version = 25
+        d['text'] = 'world'
+        assert d[(0, 'text')] == 'hello'
+        assert d[(24, 'text')] == 'hello'
+        assert d[(25, 'text')] == 'world'
+        del d
+
+        recovered = CustomDict(name='robot')
+        assert recovered[(0, 'text')] == 'hello'
+        assert recovered[(24, 'text')] == 'hello'
+        assert recovered[(26, 'text')] == 'world'
--- a/tsih/init.py
+++ b/tsih/init.py
@@ -0,0 +1,444 @@
+import time
+import os
+import pandas as pd
+import sqlite3
+import copy
+import uuid
+import logging
+import pathlib
+import tempfile
+
+logger = logging.getLogger(__name__)
+
+__version__ = '0.1.4'
+
+from collections import UserDict, namedtuple
+
+from . import serialization
+from .utils import open_or_reuse, unflatten_dict
+
+
+
+class Dict(UserDict):
+
+    def __init__(self, name=None, db_name=None, db_path=None, backup=False, readonly=False, version=0, auto_version=False):
+        super().__init__()
+        self.dict_name = name or 'anonymous_{}'.format(uuid.uuid1())
+        self._history = History(name=db_name, db_path=db_path, backup=backup, readonly=readonly)
+        self.version = version
+        self.auto_version = auto_version
+
+    def __delitem__(self, key):
+        if isinstance(key, tuple):
+            raise ValueError('Cannot remove past entries')
+        if self.auto_version:
+            self.version += 1
+        self.data[key] = None
+
+    def __getitem__(self, key):
+        if isinstance(key, tuple):
+            if len(key) < 3:
+                key = tuple([self.dict_name] + list(key))
+            self._history.flush_cache()
+            return self._history[key]
+
+        return self.data[key]
+
+    def __del__(self):
+        self._history.close()
+
+    def __setcurrent(self, key, value):
+        if self.auto_version:
+            self.version += 1
+        self.data[key] = value
+        self._history.save_record(dict_id=self.dict_name,
+                                  t_step=float(self.version),
+                                  key=key,
+                                  value=value)
+
+    def __setitem__(self, key, value):
+        if not isinstance(key, tuple):
+            self.__setcurrent(key, value)
+        else:
+            if len(key) < 3:
+                key = tuple([self.dict_name] + list(key))
+            k = history.Key(*key)
+            if k.t_step == version and k.dict_id == self.dict_name:
+                return self.__setcurrent(key.key, key.value)
+            self._history.save_record(*k,
+                                      value=value)
+
+
+class History:
+    """
+    Store and retrieve values from a sqlite database.
+    """
+
+    def __init__(self, name=None, db_path=None, backup=False, readonly=False):
+        if readonly and (not os.path.exists(db_path)):
+            raise Exception('The DB file does not exist. Cannot open in read-only mode')
+
+        self._db = None
+        self._temp = db_path is None
+        self._stats_columns = None
+        self.readonly = readonly
+
+        if self._temp:
+            if not name:
+                name = time.time()
+            # The file will be deleted as soon as it's closed
+            # Normally, that will be on destruction
+            db_path = tempfile.NamedTemporaryFile(suffix='{}.sqlite'.format(name)).name
+
+
+        if backup and os.path.exists(db_path):
+                newname = db_path + '.backup{}.sqlite'.format(time.time())
+                os.rename(db_path, newname)
+
+        self.db_path = db_path
+
+        self.db = db_path
+        self._dtypes = {}
+        self._tups = []
+
+
+        if self.readonly:
+            return
+
+        with self.db:
+            logger.debug('Creating database {}'.format(self.db_path))
+            self.db.execute('''CREATE TABLE IF NOT EXISTS history (dict_id text, t_step real, key text, value text)''')
+            self.db.execute('''CREATE TABLE IF NOT EXISTS value_types (key text, value_type text)''')
+            self.db.execute('''CREATE TABLE IF NOT EXISTS stats (stat_id text)''')
+            self.db.execute('''CREATE UNIQUE INDEX IF NOT EXISTS idx_history ON history (dict_id, t_step, key);''')
+
+    @property
+    def db(self):
+        try:
+            self._db.cursor()
+        except (sqlite3.ProgrammingError, AttributeError):
+            self.db = None  # Reset the database
+        return self._db
+
+    @db.setter
+    def db(self, db_path=None):
+        self._close()
+        db_path = db_path or self.db_path
+        if isinstance(db_path, str) or isinstance(db_path, pathlib.Path):
+            logger.debug('Connecting to database {}'.format(db_path))
+            self._db = sqlite3.connect(db_path)
+            self._db.row_factory = sqlite3.Row
+        else:
+            self._db = db_path
+
+    def __del__(self):
+        self._close()
+
+    def close(self):
+        self._close()
+
+    def _close(self):
+        if self._db is None:
+            return
+        self.flush_cache()
+        self._db.close()
+        self._db = None
+
+    def save_stats(self, stat):
+        if self.readonly:
+            print('DB in readonly mode')
+            return
+        if not stat:
+            return
+        with self.db:
+            if not self._stats_columns:
+                self._stats_columns = list(c['name'] for c in self.db.execute('PRAGMA table_info(stats)'))
+
+            for column, value in stat.items():
+                if column in self._stats_columns:
+                    continue
+                dtype = 'text'
+                if not isinstance(value, str):
+                    try:
+                        float(value)
+                        dtype = 'real'
+                        int(value)
+                        dtype = 'int'
+                    except (ValueError, OverflowError):
+                        pass
+                self.db.execute('ALTER TABLE stats ADD "{}" "{}"'.format(column, dtype))
+                self._stats_columns.append(column)
+
+            columns = ", ".join(map(lambda x: '"{}"'.format(x), stat.keys()))
+            values = ", ".join(['"{0}"'.format(col) for col in stat.values()])
+            query = "INSERT INTO stats ({columns}) VALUES ({values})".format(
+                columns=columns,
+                values=values
+            )
+            self.db.execute(query)
+
+    def get_stats(self, unflatten=True):
+        rows = self.db.execute("select * from stats").fetchall()
+        res = []
+        for row in rows:
+            d = {}
+            for k in row.keys():
+                if row[k] is None:
+                    continue
+                d[k] = row[k]
+            if unflatten:
+                d = unflatten_dict(d)
+            res.append(d)
+        return res
+
+    @property
+    def dtypes(self):
+        self._read_types()
+        return {k:v[0] for k, v in self._dtypes.items()}
+
+    def save_tuples(self, tuples):
+        '''
+        Save a series of tuples, converting them to records if necessary
+        '''
+        self.save_records(Record(*tup) for tup in tuples)
+
+    def save_records(self, records):
+        '''
+        Save a collection of records
+        '''
+        for record in records:
+            if not isinstance(record, Record):
+                record = Record(*record)
+            self.save_record(*record)
+
+    def save_record(self, dict_id, t_step, key, value):
+        '''
+        Save a collection of records to the database.
+        Database writes are cached.
+        '''
+        if self.readonly:
+            raise Exception('DB in readonly mode')
+        if key not in self._dtypes:
+            self._read_types()
+            if key not in self._dtypes:
+                name = serialization.name(value)
+                serializer = serialization.serializer(name)
+                deserializer = serialization.deserializer(name)
+                self._dtypes[key] = (name, serializer, deserializer)
+                with self.db:
+                    self.db.execute("replace into value_types (key, value_type) values (?, ?)", (key, name))
+        value = self._dtypes[key][1](value)
+
+        self._tups.append(Record(dict_id=dict_id,
+                                 t_step=t_step,
+                                 key=key,
+                                 value=value))
+
+    def flush_cache(self):
+        '''
+        Use a cache to save state changes to avoid opening a session for every change.
+        The cache will be flushed at the end of the simulation, and when history is accessed.
+        '''
+        if self.readonly:
+            raise Exception('DB in readonly mode')
+        logger.debug('Flushing cache {}'.format(self.db_path))
+        with self.db:
+            self.db.executemany("replace into history(dict_id, t_step, key, value) values (?, ?, ?, ?)", self._tups)
+        self._tups.clear()
+
+    def to_tuples(self):
+        self.flush_cache()
+        with self.db:
+            res = self.db.execute("select dict_id, t_step, key, value from history ").fetchall()
+        for r in res:
+            dict_id, t_step, key, value = r
+            if key not in self._dtypes:
+                self._read_types()
+            if key not in self._dtypes:
+                raise ValueError("Unknown datatype for {} and {}".format(key, value))
+            value = self._dtypes[key][2](value)
+            yield dict_id, t_step, key, value
+
+    def _read_types(self):
+        with self.db:
+            res = self.db.execute("select key, value_type from value_types ").fetchall()
+        for k, v in res:
+            serializer = serialization.serializer(v)
+            deserializer = serialization.deserializer(v)
+            self._dtypes[k] = (v, serializer, deserializer)
+
+    def __getitem__(self, key):
+        self.flush_cache()
+        key = Key(*key)
+        dict_ids = [key.dict_id] if key.dict_id is not None else []
+        t_steps = [key.t_step] if key.t_step is not None else []
+        keys = [key.key] if key.key is not None else []
+
+        df = self.read_sql(dict_ids=dict_ids,
+                           t_steps=t_steps,
+                           keys=keys)
+        r = Records(df, filter=key, dtypes=self._dtypes)
+        if r.resolved:
+            return r.value()
+        return r
+
+    def read_sql(self, keys=None, dict_ids=None, not_dict_ids=None, t_steps=None, convert_types=False, limit=-1):
+
+        self._read_types()
+
+        def escape_and_join(v):
+            if v is None:
+                return
+            return ",".join(map(lambda x: "\'{}\'".format(x), v))
+
+        filters = [("key in ({})".format(escape_and_join(keys)), keys),
+                   ("dict_id in ({})".format(escape_and_join(dict_ids)), dict_ids),
+                   ("dict_id not in ({})".format(escape_and_join(not_dict_ids)), not_dict_ids)
+        ]
+        filters = list(k[0] for k in filters if k[1])
+
+        last_df = None
+        if t_steps:
+            # Convert negative indices into positive
+            if any(x<0 for x in t_steps):
+                max_t = int(self.db.execute("select max(t_step) from history").fetchone()[0])
+                t_steps = [t if t>0 else max_t+1+t for t in t_steps]
+
+            # We will be doing ffill interpolation, so we need to look for
+            # the last value before the minimum step in the query
+            min_step = min(t_steps)
+            last_filters = ['t_step < {}'.format(min_step),]
+            last_filters = last_filters + filters
+            condition = ' and '.join(last_filters)
+
+            last_query = '''
+            select h1.*
+            from history h1
+            inner join (
+            select dict_id, key, max(t_step) as t_step
+            from history
+            where {condition}
+            group by dict_id, key
+            ) h2
+            on h1.dict_id = h2.dict_id  and
+               h1.key      = h2.key       and
+               h1.t_step   = h2.t_step
+            '''.format(condition=condition)
+            last_df = pd.read_sql_query(last_query, self.db)
+
+            filters.append("t_step >= '{}' and t_step <= '{}'".format(min_step, max(t_steps)))
+
+        condition = ''
+        if filters:
+            condition = 'where {} '.format(' and '.join(filters))
+        query = 'select * from history {} limit {}'.format(condition, limit)
+        df = pd.read_sql_query(query, self.db)
+        if last_df is not None:
+            df = pd.concat([df, last_df])
+
+        df_p = df.pivot_table(values='value', index=['t_step'],
+                              columns=['key', 'dict_id'],
+                              aggfunc='first')
+
+        for k, v in self._dtypes.items():
+            if k in df_p:
+                dtype, _, deserial = v
+                try:
+                    df_p[k] = df_p[k].fillna(method='ffill').astype(dtype)
+                except (TypeError, ValueError):
+                    # Avoid forward-filling unknown/incompatible types
+                    continue
+        if t_steps:
+            df_p = df_p.reindex(t_steps, method='ffill')
+        return df_p.ffill()
+
+    def __getstate__(self):
+        state = dict(**self.__dict__)
+        del state['_db']
+        del state['_dtypes']
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__ = state
+        self._dtypes = {}
+        self._db = None
+
+    def dump(self, f):
+        self._close()
+        for line in open_or_reuse(self.db_path, 'rb'):
+            f.write(line)
+
+class Records():
+
+    def __init__(self, df, filter=None, dtypes=None):
+        if not filter:
+            filter = Key(dict_id=None,
+                         t_step=None,
+                         key=None)
+        self._df = df
+        self._filter = filter
+        self.dtypes = dtypes or {}
+        super().__init__()
+
+    def mask(self, tup):
+        res = ()
+        for i, k in zip(tup[:-1], self._filter):
+            if k is None:
+                res = res + (i,)
+        res = res + (tup[-1],)
+        return res
+
+    def filter(self, newKey):
+        f = list(self._filter)
+        for ix, i in enumerate(f):
+            if i is None:
+                f[ix] = newKey
+        self._filter = Key(*f)
+
+    @property
+    def resolved(self):
+        return sum(1 for i in self._filter if i is not None) == 3
+
+    def __iter__(self):
+        for column, series in self._df.iteritems():
+            key, dict_id = column
+            for t_step, value in series.iteritems():
+                r = Record(t_step=t_step,
+                           dict_id=dict_id,
+                           key=key,
+                           value=value)
+                yield self.mask(r)
+
+    def value(self):
+        if self.resolved:
+            f = self._filter
+            try:
+                i = self._df[f.key][str(f.dict_id)]
+                ix = i.index.get_loc(f.t_step, method='ffill')
+                return i.iloc[ix]
+            except KeyError as ex:
+                return self.dtypes[f.key][2]()
+        return list(self)
+
+    def df(self):
+        return self._df
+
+    def __getitem__(self, k):
+        n = copy.copy(self)
+        n.filter(k)
+        if n.resolved:
+            return n.value()
+        return n
+
+    def __len__(self):
+        return len(self._df)
+
+    def __str__(self):
+        if self.resolved:
+            return str(self.value())
+        return '<Records for [{}]>'.format(self._filter)
+
+Key = namedtuple('Key', ['dict_id', 't_step', 'key'])
+Record = namedtuple('Record', 'dict_id t_step key value')
+
+Stat = namedtuple('Stat', 'stat_id text')
--- a/tsih/serialization.py
+++ b/tsih/serialization.py
@@ -0,0 +1,89 @@
+import os
+import logging
+import ast
+import sys
+import importlib
+from itertools import product, chain
+
+
+logger = logging.getLogger('soil')
+
+
+builtins = importlib.import_module('builtins')
+
+def name(value, known_modules=[]):
+    '''Return a name that can be imported, to serialize/deserialize an object'''
+    if value is None:
+        return 'None'
+    if not isinstance(value, type):  # Get the class name first
+        value = type(value)
+    tname = value.__name__
+    if hasattr(builtins, tname):
+        return tname
+    modname = value.__module__
+    if modname == '__main__':
+        return tname
+    if known_modules and modname in known_modules:
+        return tname
+    for kmod in known_modules:
+        if not kmod:
+            continue
+        module = importlib.import_module(kmod)
+        if hasattr(module, tname):
+            return tname
+    return '{}.{}'.format(modname, tname)
+
+
+def serializer(type_):
+    if type_ != 'str' and hasattr(builtins, type_):
+        return repr
+    return lambda x: x
+
+
+def serialize(v, known_modules=[]):
+    '''Get a text representation of an object.'''
+    tname = name(v, known_modules=known_modules)
+    func = serializer(tname)
+    return func(v), tname
+
+def deserializer(type_, known_modules=[]):
+    if type(type_) != str:  # Already deserialized
+        return type_
+    if type_ == 'str':
+        return lambda x='': x
+    if type_ == 'None':
+        return lambda x=None: None
+    if hasattr(builtins, type_):  # Check if it's a builtin type
+        cls = getattr(builtins, type_)
+        return lambda x=None: ast.literal_eval(x) if x is not None else cls()
+    # Otherwise, see if we can find the module and the class
+    modules = known_modules or []
+    options = []
+
+    for mod in modules:
+        if mod:
+            options.append((mod, type_))
+
+    if '.' in type_:  # Fully qualified module
+        module, type_ = type_.rsplit(".", 1)
+        options.append ((module, type_))
+
+    errors = []
+    for modname, tname in options:
+        try:
+            module = importlib.import_module(modname)
+            cls = getattr(module, tname)
+            return getattr(cls, 'deserialize', cls)
+        except (ImportError, AttributeError) as ex:
+            errors.append((modname, tname, ex))
+    raise Exception('Could not find type {}. Tried: {}'.format(type_, errors))
+
+
+def deserialize(type_, value=None, **kwargs):
+    '''Get an object from a text representation'''
+    if not isinstance(type_, str):
+        return type_
+    des = deserializer(type_, **kwargs)
+    if value is None:
+        return des
+    return des(value)
--- a/tsih/utils.py
+++ b/tsih/utils.py
@@ -0,0 +1,87 @@
+import logging
+import time
+import os
+
+from shutil import copyfile
+
+from contextlib import contextmanager
+
+logger = logging.getLogger('soil')
+# logging.basicConfig()
+# logger.setLevel(logging.INFO)
+
+
+@contextmanager
+def timer(name='task', pre="", function=logger.info, to_object=None):
+    start = time.time()
+    function('{}Starting {} at {}.'.format(pre, name,
+                                           time.strftime("%X", time.gmtime(start))))
+    yield start
+    end = time.time()
+    function('{}Finished {} at {} in {} seconds'.format(pre, name,
+                                                        time.strftime("%X", time.gmtime(end)),
+                                                        str(end-start)))
+    if to_object:
+        to_object.start = start
+        to_object.end = end
+
+
+def safe_open(path, mode='r', backup=True, **kwargs):
+    outdir = os.path.dirname(path)
+    if outdir and not os.path.exists(outdir):
+        os.makedirs(outdir)
+    if backup and 'w' in mode and os.path.exists(path):
+        creation = os.path.getctime(path)
+        stamp = time.strftime('%Y-%m-%d_%H.%M.%S', time.localtime(creation))
+
+        backup_dir = os.path.join(outdir, 'backup')
+        if not os.path.exists(backup_dir):
+            os.makedirs(backup_dir)
+        newpath = os.path.join(backup_dir, '{}@{}'.format(os.path.basename(path),
+                                                               stamp))
+        copyfile(path, newpath)
+    return open(path, mode=mode, **kwargs)
+
+
+def open_or_reuse(f, *args, **kwargs):
+    try:
+        return safe_open(f, *args, **kwargs)
+    except (AttributeError, TypeError):
+        return f
+
+def flatten_dict(d):
+    if not isinstance(d, dict):
+        return d
+    return dict(_flatten_dict(d))
+
+def _flatten_dict(d, prefix=''):
+    if not isinstance(d, dict):
+        # print('END:', prefix, d)
+        yield prefix, d
+        return
+    if prefix:
+        prefix = prefix + '.'
+    for k, v in d.items():
+        # print(k, v)
+        res = list(_flatten_dict(v, prefix='{}{}'.format(prefix, k)))
+        # print('RES:', res)
+        yield from res
+
+
+def unflatten_dict(d):
+    out = {}
+    for k, v in d.items():
+        target = out
+        if not isinstance(k, str):
+            target[k] = v
+            continue
+        tokens = k.split('.')
+        if len(tokens) < 2:
+            target[k] = v
+            continue
+        for token in tokens[:-1]:
+            if token not in target:
+                target[token] = {}
+            target = target[token]
+        target[tokens[-1]] = v
+    return out