mirror of
https://github.com/balkian/tsih.git
synced 2024-12-22 05:58:13 +00:00
First commit
This commit is contained in:
commit
70779fa0ad
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
.*
|
||||||
|
*.pyc
|
||||||
|
__pycache__
|
||||||
|
build
|
||||||
|
dist
|
201
LICENSE
Normal file
201
LICENSE
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright 2016 Jesús Manuel Sánchez Martínez - Grupo de Sistemas Inteligentes (GSI) DIT UPM
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
6
MANIFEST.in
Normal file
6
MANIFEST.in
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
include requirements.txt
|
||||||
|
include test-requirements.txt
|
||||||
|
include README.md
|
||||||
|
graft tsih
|
||||||
|
global-exclude __pycache__
|
||||||
|
global-exclude *.py[co]
|
91
README.md
Normal file
91
README.md
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
# TSIH - A dict with a HISTory
|
||||||
|
|
||||||
|
`tsih.Dict` is a type of `UserDict` that allows versioning, backed up by a `sqlite3` database.
|
||||||
|
|
||||||
|
* Transparent operation
|
||||||
|
* Only changes (deltas) are stored.
|
||||||
|
* Forward-filling of values. A value is reused in future versions, unless it changes.
|
||||||
|
* Auto-versioning option (off by default), to produce a new version every time a value change happens.
|
||||||
|
* Ability to store related entries as separate dictionaries. Each `tsih.Dict` has a `dict_name` that is used in the database to identify the dictionary.
|
||||||
|
* Tuple-based indexing. Get and set values by `dict_name`, `version` and `key`.
|
||||||
|
|
||||||
|
## Usage and examples
|
||||||
|
|
||||||
|
`tsih.Dict` objects can be used just like regular dictionaries:
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> from tsih import Dict
|
||||||
|
>>> a = Dict()
|
||||||
|
>>> a['test'] = True
|
||||||
|
>>> a
|
||||||
|
{'test': True}
|
||||||
|
>>> a.get('missing', 5)
|
||||||
|
5
|
||||||
|
>>> a['missing']
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "<stdin>", line 1, in <module>
|
||||||
|
KeyError: 'missing'
|
||||||
|
```
|
||||||
|
|
||||||
|
But at any point, new versions can be produced:
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> a.version
|
||||||
|
0
|
||||||
|
>>> a['start'] = 'now'
|
||||||
|
>>> a
|
||||||
|
{'test': True, 'start': 'now'}
|
||||||
|
>>> a.version = 1
|
||||||
|
>>> a['start'] = 'one version ago'
|
||||||
|
>>> a
|
||||||
|
{'test': True, 'start': 'one version ago'}
|
||||||
|
```
|
||||||
|
|
||||||
|
Previous values can be accessed using tuple keys, i.e., (version, key):
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> a[(0, 'start')]
|
||||||
|
'now'
|
||||||
|
>>> a[(1, 'start')]
|
||||||
|
'one version ago'
|
||||||
|
```
|
||||||
|
|
||||||
|
Each version only "records" changes, but later versions (even if they don't exist yet) inherit unchanged values from the previous ones:
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> a[(5, 'start')]
|
||||||
|
'one version ago'
|
||||||
|
>>> a.version = 5
|
||||||
|
>>> # Until the value is changed
|
||||||
|
>>> a['start'] = '4 versions ago'
|
||||||
|
>>> a[(5, 'start')]
|
||||||
|
'4 versions ago'
|
||||||
|
```
|
||||||
|
|
||||||
|
You can access *every* state of the Dict using `None` in place of the version and/or the key.
|
||||||
|
In that case, we will get an iterator, which we can turn into a list explicitly or with the `.value` method.
|
||||||
|
|
||||||
|
For example, here we get all the changes to the `start` key:
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> a[(None, 'start')].value() #
|
||||||
|
[(0.0, 'now'), (1.0, 'one version ago'), (5.0, '4 versions ago')]
|
||||||
|
```
|
||||||
|
|
||||||
|
Similarly, to get the keys and values at a specific version:
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> list(a[(0, None)])
|
||||||
|
[('start', 'now'), ('test', True)]
|
||||||
|
```
|
||||||
|
|
||||||
|
Or, we can combine both to get the keys and values at every version:
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> a[(None, None)].value()
|
||||||
|
[(0.0, 'start', 'now'), (1.0, 'start', 'one version ago'), (5.0, 'start', '4 versions ago'), (0.0, 'test', True), (1.0, 'test', True), (5.0, 'test', True)]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Use cases
|
||||||
|
|
||||||
|
Tsih was originally part of the [Soil](https://github.com/gsi-upm/soil) Agent-Based Social Simulation framework, where both the environment and the agents need to keep track of state (i.e., attribute) changes.
|
0
requirements.txt
Normal file
0
requirements.txt
Normal file
4
setup.cfg
Normal file
4
setup.cfg
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
[aliases]
|
||||||
|
test=pytest
|
||||||
|
[tool:pytest]
|
||||||
|
addopts = --verbose
|
57
setup.py
Normal file
57
setup.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
from setuptools import setup
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
this_directory = Path(__file__).parent
|
||||||
|
long_description = (this_directory / "README.md").read_text()
|
||||||
|
|
||||||
|
version = ""
|
||||||
|
with open(os.path.join('tsih', '__init__.py')) as f:
|
||||||
|
version = re.search(
|
||||||
|
r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', f.read(), re.MULTILINE
|
||||||
|
).group(1)
|
||||||
|
assert version
|
||||||
|
|
||||||
|
|
||||||
|
def parse_requirements(filename):
|
||||||
|
""" load requirements from a pip requirements file """
|
||||||
|
with open(filename, 'r') as f:
|
||||||
|
lineiter = list(line.strip() for line in f)
|
||||||
|
return [line for line in lineiter if line and not line.startswith("#")]
|
||||||
|
|
||||||
|
|
||||||
|
install_reqs = parse_requirements("requirements.txt")
|
||||||
|
test_reqs = parse_requirements("test-requirements.txt")
|
||||||
|
extras_require={}
|
||||||
|
extras_require['all'] = [dep for package in extras_require.values() for dep in package]
|
||||||
|
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='tsih',
|
||||||
|
packages=['tsih'], # this must be the same as the name above
|
||||||
|
version=version,
|
||||||
|
description=("A lightweight library to store an object's history into a SQL database"),
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type='text/markdown',
|
||||||
|
author='J. Fernando Sanchez',
|
||||||
|
author_email='jf.sanchez@upm.es',
|
||||||
|
url='https://github.com/balkian/tsih', # use the URL to the github repo
|
||||||
|
download_url='https://github.com/balkian/tsih/archive/{}.tar.gz'.format(
|
||||||
|
version),
|
||||||
|
keywords=['history', 'sql', 'records'],
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 4 - Beta',
|
||||||
|
'Environment :: Console',
|
||||||
|
'Intended Audience :: Developers',
|
||||||
|
'License :: OSI Approved :: Apache Software License',
|
||||||
|
'Operating System :: MacOS :: MacOS X',
|
||||||
|
'Operating System :: Microsoft :: Windows',
|
||||||
|
'Operating System :: POSIX',
|
||||||
|
'Programming Language :: Python :: 3'],
|
||||||
|
install_requires=install_reqs,
|
||||||
|
extras_require=extras_require,
|
||||||
|
tests_require=test_reqs,
|
||||||
|
setup_requires=['pytest-runner', ],
|
||||||
|
include_package_data=True,
|
||||||
|
)
|
1
test-requirements.txt
Normal file
1
test-requirements.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
pytest
|
227
tests/test_history.py
Normal file
227
tests/test_history.py
Normal file
@ -0,0 +1,227 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from glob import glob
|
||||||
|
|
||||||
|
from tsih import *
|
||||||
|
from tsih import utils
|
||||||
|
|
||||||
|
|
||||||
|
ROOT = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
DBROOT = os.path.join(ROOT, 'testdb')
|
||||||
|
|
||||||
|
|
||||||
|
class TestHistory(TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
if not os.path.exists(DBROOT):
|
||||||
|
os.makedirs(DBROOT)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if os.path.exists(DBROOT):
|
||||||
|
shutil.rmtree(DBROOT)
|
||||||
|
|
||||||
|
def test_history(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
tuples = (
|
||||||
|
('a_0', 0, 'id', 'h'),
|
||||||
|
('a_0', 1, 'id', 'e'),
|
||||||
|
('a_0', 2, 'id', 'l'),
|
||||||
|
('a_0', 3, 'id', 'l'),
|
||||||
|
('a_0', 4, 'id', 'o'),
|
||||||
|
('a_1', 0, 'id', 'v'),
|
||||||
|
('a_1', 1, 'id', 'a'),
|
||||||
|
('a_1', 2, 'id', 'l'),
|
||||||
|
('a_1', 3, 'id', 'u'),
|
||||||
|
('a_1', 4, 'id', 'e'),
|
||||||
|
('env', 1, 'prob', 1),
|
||||||
|
('env', 3, 'prob', 2),
|
||||||
|
('env', 5, 'prob', 3),
|
||||||
|
('a_2', 7, 'finished', True),
|
||||||
|
)
|
||||||
|
h = History()
|
||||||
|
h.save_tuples(tuples)
|
||||||
|
# assert h['env', 0, 'prob'] == 0
|
||||||
|
for i in range(1, 7):
|
||||||
|
assert h['env', i, 'prob'] == ((i-1)//2)+1
|
||||||
|
|
||||||
|
|
||||||
|
for i, k in zip(range(5), 'hello'):
|
||||||
|
assert h['a_0', i, 'id'] == k
|
||||||
|
for record, value in zip(h['a_0', None, 'id'], 'hello'):
|
||||||
|
t_step, val = record
|
||||||
|
assert val == value
|
||||||
|
|
||||||
|
for i, k in zip(range(5), 'value'):
|
||||||
|
assert h['a_1', i, 'id'] == k
|
||||||
|
for i in range(5, 8):
|
||||||
|
assert h['a_1', i, 'id'] == 'e'
|
||||||
|
for i in range(7):
|
||||||
|
assert h['a_2', i, 'finished'] == False
|
||||||
|
assert h['a_2', 7, 'finished']
|
||||||
|
|
||||||
|
def test_history_gen(self):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
tuples = (
|
||||||
|
('a_1', 0, 'id', 'v'),
|
||||||
|
('a_1', 1, 'id', 'a'),
|
||||||
|
('a_1', 2, 'id', 'l'),
|
||||||
|
('a_1', 3, 'id', 'u'),
|
||||||
|
('a_1', 4, 'id', 'e'),
|
||||||
|
('env', 1, 'prob', 1),
|
||||||
|
('env', 2, 'prob', 2),
|
||||||
|
('env', 3, 'prob', 3),
|
||||||
|
('a_2', 7, 'finished', True),
|
||||||
|
)
|
||||||
|
h = History()
|
||||||
|
h.save_tuples(tuples)
|
||||||
|
for t_step, key, value in h['env', None, None]:
|
||||||
|
assert t_step == value
|
||||||
|
assert key == 'prob'
|
||||||
|
|
||||||
|
records = list(h[None, 7, None])
|
||||||
|
assert len(records) == 3
|
||||||
|
for i in records:
|
||||||
|
agent_id, key, value = i
|
||||||
|
if agent_id == 'a_1':
|
||||||
|
assert key == 'id'
|
||||||
|
assert value == 'e'
|
||||||
|
elif agent_id == 'a_2':
|
||||||
|
assert key == 'finished'
|
||||||
|
assert value
|
||||||
|
else:
|
||||||
|
assert key == 'prob'
|
||||||
|
assert value == 3
|
||||||
|
|
||||||
|
records = h['a_1', 7, None]
|
||||||
|
assert records['id'] == 'e'
|
||||||
|
|
||||||
|
def test_history_file(self):
|
||||||
|
"""
|
||||||
|
History should be saved to a file
|
||||||
|
"""
|
||||||
|
tuples = (
|
||||||
|
('a_1', 0, 'id', 'v'),
|
||||||
|
('a_1', 1, 'id', 'a'),
|
||||||
|
('a_1', 2, 'id', 'l'),
|
||||||
|
('a_1', 3, 'id', 'u'),
|
||||||
|
('a_1', 4, 'id', 'e'),
|
||||||
|
('env', 1, 'prob', 1),
|
||||||
|
('env', 2, 'prob', 2),
|
||||||
|
('env', 3, 'prob', 3),
|
||||||
|
('a_2', 7, 'finished', True),
|
||||||
|
)
|
||||||
|
db_path = os.path.join(DBROOT, 'test')
|
||||||
|
h = History(db_path=db_path)
|
||||||
|
h.save_tuples(tuples)
|
||||||
|
h.flush_cache()
|
||||||
|
assert os.path.exists(db_path)
|
||||||
|
|
||||||
|
# Recover the data
|
||||||
|
recovered = History(db_path=db_path)
|
||||||
|
assert recovered['a_1', 0, 'id'] == 'v'
|
||||||
|
assert recovered['a_1', 4, 'id'] == 'e'
|
||||||
|
|
||||||
|
# Using backup=True should create a backup copy, and initialize an empty history
|
||||||
|
newhistory = History(db_path=db_path, backup=True)
|
||||||
|
backuppaths = glob(db_path + '.backup*.sqlite')
|
||||||
|
assert len(backuppaths) == 1
|
||||||
|
backuppath = backuppaths[0]
|
||||||
|
assert newhistory.db_path == h.db_path
|
||||||
|
assert os.path.exists(backuppath)
|
||||||
|
assert len(newhistory[None, None, None]) == 0
|
||||||
|
|
||||||
|
def test_interpolation(self):
|
||||||
|
"""
|
||||||
|
Values for a key are valid until a new value is introduced at a later version
|
||||||
|
"""
|
||||||
|
tuples = (
|
||||||
|
('a_1', 0, 'id', 'a'),
|
||||||
|
('a_1', 4, 'id', 'b'),
|
||||||
|
)
|
||||||
|
db_path = os.path.join(DBROOT, 'test')
|
||||||
|
h = History(db_path=db_path)
|
||||||
|
h.save_tuples(tuples)
|
||||||
|
h.flush_cache()
|
||||||
|
assert os.path.exists(db_path)
|
||||||
|
|
||||||
|
assert h['a_1', 2, 'id'] == 'a'
|
||||||
|
# Recover the data
|
||||||
|
recovered = History(db_path=db_path)
|
||||||
|
assert recovered['a_1', 0, 'id'] == 'a'
|
||||||
|
assert recovered['a_1', 4, 'id'] == 'b'
|
||||||
|
assert recovered['a_1', 2, 'id'] == 'a'
|
||||||
|
|
||||||
|
def test_history_tuples(self):
|
||||||
|
"""
|
||||||
|
The data recovered should be equal to the one recorded.
|
||||||
|
"""
|
||||||
|
tuples = (
|
||||||
|
('a_1', 0, 'id', 'v'),
|
||||||
|
('a_1', 1, 'id', 'a'),
|
||||||
|
('a_1', 2, 'id', 'l'),
|
||||||
|
('a_1', 3, 'id', 'u'),
|
||||||
|
('a_1', 4, 'id', 'e'),
|
||||||
|
('env', 1, 'prob', 1),
|
||||||
|
('env', 2, 'prob', 2),
|
||||||
|
('env', 3, 'prob', 3),
|
||||||
|
('a_2', 7, 'finished', True),
|
||||||
|
)
|
||||||
|
h = History()
|
||||||
|
h.save_tuples(tuples)
|
||||||
|
recovered = list(h.to_tuples())
|
||||||
|
assert recovered
|
||||||
|
for i in recovered:
|
||||||
|
assert i in tuples
|
||||||
|
|
||||||
|
def test_stats(self):
|
||||||
|
"""
|
||||||
|
The data recovered should be equal to the one recorded.
|
||||||
|
"""
|
||||||
|
tuples = (
|
||||||
|
('a_1', 0, 'id', 'v'),
|
||||||
|
('a_1', 1, 'id', 'a'),
|
||||||
|
('a_1', 2, 'id', 'l'),
|
||||||
|
('a_1', 3, 'id', 'u'),
|
||||||
|
('a_1', 4, 'id', 'e'),
|
||||||
|
('env', 1, 'prob', 1),
|
||||||
|
('env', 2, 'prob', 2),
|
||||||
|
('env', 3, 'prob', 3),
|
||||||
|
('a_2', 7, 'finished', True),
|
||||||
|
)
|
||||||
|
stat_tuples = [
|
||||||
|
{'num_infected': 5, 'runtime': 0.2},
|
||||||
|
{'num_infected': 5, 'runtime': 0.2},
|
||||||
|
{'new': '40'},
|
||||||
|
]
|
||||||
|
h = History()
|
||||||
|
h.save_tuples(tuples)
|
||||||
|
for stat in stat_tuples:
|
||||||
|
h.save_stats(stat)
|
||||||
|
recovered = h.get_stats()
|
||||||
|
assert recovered
|
||||||
|
assert recovered[0]['num_infected'] == 5
|
||||||
|
assert recovered[1]['runtime'] == 0.2
|
||||||
|
assert recovered[2]['new'] == '40'
|
||||||
|
|
||||||
|
def test_unflatten(self):
|
||||||
|
ex = {'count.neighbors.3': 4,
|
||||||
|
'count.times.2': 4,
|
||||||
|
'count.total.4': 4,
|
||||||
|
'mean.neighbors': 3,
|
||||||
|
'mean.times': 2,
|
||||||
|
'mean.total': 4,
|
||||||
|
't_step': 2,
|
||||||
|
'trial_id': 'exporter_sim_trial_1605817956-4475424'}
|
||||||
|
res = utils.unflatten_dict(ex)
|
||||||
|
|
||||||
|
assert 'count' in res
|
||||||
|
assert all(x in res['count'] for x in ['times', 'total', 'neighbors'])
|
||||||
|
assert res['count']['times']['2'] == 4
|
||||||
|
assert 'mean' in res
|
||||||
|
assert all(x in res['mean'] for x in ['times', 'total', 'neighbors'])
|
||||||
|
assert 't_step' in res
|
||||||
|
assert 'trial_id' in res
|
79
tests/test_main.py
Normal file
79
tests/test_main.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
from tsih import Dict
|
||||||
|
|
||||||
|
|
||||||
|
ROOT = pathlib.Path(os.path.abspath(os.path.dirname(__file__)))
|
||||||
|
DBROOT = ROOT / 'testdb'
|
||||||
|
|
||||||
|
|
||||||
|
class TestTsih(TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
if not os.path.exists(DBROOT):
|
||||||
|
os.makedirs(DBROOT)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if os.path.exists(DBROOT):
|
||||||
|
shutil.rmtree(DBROOT)
|
||||||
|
|
||||||
|
def test_basic(self):
|
||||||
|
'''The data stored in each version should be retrievable'''
|
||||||
|
d = Dict()
|
||||||
|
d['text'] = 'hello'
|
||||||
|
d.version = 1
|
||||||
|
d['text'] = 'world'
|
||||||
|
assert d[(0, 'text')] == 'hello'
|
||||||
|
assert d[(1, 'text')] == 'world'
|
||||||
|
|
||||||
|
def test_auto_version(self):
|
||||||
|
'''Changing a value when `auto_version` is on should produce a new version automatically'''
|
||||||
|
d = Dict(version=0, auto_version=True)
|
||||||
|
d['text'] = 'hello'
|
||||||
|
d['text'] = 'world'
|
||||||
|
assert d[(1, 'text')] == 'hello'
|
||||||
|
assert d[(2, 'text')] == 'world'
|
||||||
|
|
||||||
|
def test_serialized(self):
|
||||||
|
'''
|
||||||
|
Using the same database should enable retrieving the values of a previous
|
||||||
|
dictionary.
|
||||||
|
'''
|
||||||
|
d = Dict(name='robot', db_path=DBROOT / 'basic.sqlite')
|
||||||
|
d['text'] = 'hello'
|
||||||
|
d.version = 25
|
||||||
|
d['text'] = 'world'
|
||||||
|
assert d[(0, 'text')] == 'hello'
|
||||||
|
assert d[(24, 'text')] == 'hello'
|
||||||
|
assert d[(25, 'text')] == 'world'
|
||||||
|
del d
|
||||||
|
|
||||||
|
recovered = Dict(name='robot', db_path=DBROOT / 'basic.sqlite')
|
||||||
|
assert recovered[(0, 'text')] == 'hello'
|
||||||
|
assert recovered[(24, 'text')] == 'hello'
|
||||||
|
assert recovered[(25, 'text')] == 'world'
|
||||||
|
|
||||||
|
def test_custom(self):
|
||||||
|
'''
|
||||||
|
Inheriting from the Dict class should not change the behavior.
|
||||||
|
'''
|
||||||
|
|
||||||
|
class CustomDict(Dict):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, db_path=DBROOT / 'custom.sqlite', **kwargs)
|
||||||
|
|
||||||
|
d = CustomDict(name='robot')
|
||||||
|
d['text'] = 'hello'
|
||||||
|
d.version = 25
|
||||||
|
d['text'] = 'world'
|
||||||
|
assert d[(0, 'text')] == 'hello'
|
||||||
|
assert d[(24, 'text')] == 'hello'
|
||||||
|
assert d[(25, 'text')] == 'world'
|
||||||
|
del d
|
||||||
|
|
||||||
|
recovered = CustomDict(name='robot')
|
||||||
|
assert recovered[(0, 'text')] == 'hello'
|
||||||
|
assert recovered[(24, 'text')] == 'hello'
|
||||||
|
assert recovered[(26, 'text')] == 'world'
|
444
tsih/__init__.py
Normal file
444
tsih/__init__.py
Normal file
@ -0,0 +1,444 @@
|
|||||||
|
import time
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import sqlite3
|
||||||
|
import copy
|
||||||
|
import uuid
|
||||||
|
import logging
|
||||||
|
import pathlib
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
__version__ = '0.1.4'
|
||||||
|
|
||||||
|
from collections import UserDict, namedtuple
|
||||||
|
|
||||||
|
from . import serialization
|
||||||
|
from .utils import open_or_reuse, unflatten_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Dict(UserDict):
|
||||||
|
|
||||||
|
def __init__(self, name=None, db_name=None, db_path=None, backup=False, readonly=False, version=0, auto_version=False):
|
||||||
|
super().__init__()
|
||||||
|
self.dict_name = name or 'anonymous_{}'.format(uuid.uuid1())
|
||||||
|
self._history = History(name=db_name, db_path=db_path, backup=backup, readonly=readonly)
|
||||||
|
self.version = version
|
||||||
|
self.auto_version = auto_version
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
if isinstance(key, tuple):
|
||||||
|
raise ValueError('Cannot remove past entries')
|
||||||
|
if self.auto_version:
|
||||||
|
self.version += 1
|
||||||
|
self.data[key] = None
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
if isinstance(key, tuple):
|
||||||
|
if len(key) < 3:
|
||||||
|
key = tuple([self.dict_name] + list(key))
|
||||||
|
self._history.flush_cache()
|
||||||
|
return self._history[key]
|
||||||
|
|
||||||
|
return self.data[key]
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self._history.close()
|
||||||
|
|
||||||
|
def __setcurrent(self, key, value):
|
||||||
|
if self.auto_version:
|
||||||
|
self.version += 1
|
||||||
|
self.data[key] = value
|
||||||
|
self._history.save_record(dict_id=self.dict_name,
|
||||||
|
t_step=float(self.version),
|
||||||
|
key=key,
|
||||||
|
value=value)
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
if not isinstance(key, tuple):
|
||||||
|
self.__setcurrent(key, value)
|
||||||
|
else:
|
||||||
|
if len(key) < 3:
|
||||||
|
key = tuple([self.dict_name] + list(key))
|
||||||
|
k = history.Key(*key)
|
||||||
|
if k.t_step == version and k.dict_id == self.dict_name:
|
||||||
|
return self.__setcurrent(key.key, key.value)
|
||||||
|
self._history.save_record(*k,
|
||||||
|
value=value)
|
||||||
|
|
||||||
|
|
||||||
|
class History:
|
||||||
|
"""
|
||||||
|
Store and retrieve values from a sqlite database.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, name=None, db_path=None, backup=False, readonly=False):
|
||||||
|
if readonly and (not os.path.exists(db_path)):
|
||||||
|
raise Exception('The DB file does not exist. Cannot open in read-only mode')
|
||||||
|
|
||||||
|
self._db = None
|
||||||
|
self._temp = db_path is None
|
||||||
|
self._stats_columns = None
|
||||||
|
self.readonly = readonly
|
||||||
|
|
||||||
|
if self._temp:
|
||||||
|
if not name:
|
||||||
|
name = time.time()
|
||||||
|
# The file will be deleted as soon as it's closed
|
||||||
|
# Normally, that will be on destruction
|
||||||
|
db_path = tempfile.NamedTemporaryFile(suffix='{}.sqlite'.format(name)).name
|
||||||
|
|
||||||
|
|
||||||
|
if backup and os.path.exists(db_path):
|
||||||
|
newname = db_path + '.backup{}.sqlite'.format(time.time())
|
||||||
|
os.rename(db_path, newname)
|
||||||
|
|
||||||
|
self.db_path = db_path
|
||||||
|
|
||||||
|
self.db = db_path
|
||||||
|
self._dtypes = {}
|
||||||
|
self._tups = []
|
||||||
|
|
||||||
|
|
||||||
|
if self.readonly:
|
||||||
|
return
|
||||||
|
|
||||||
|
with self.db:
|
||||||
|
logger.debug('Creating database {}'.format(self.db_path))
|
||||||
|
self.db.execute('''CREATE TABLE IF NOT EXISTS history (dict_id text, t_step real, key text, value text)''')
|
||||||
|
self.db.execute('''CREATE TABLE IF NOT EXISTS value_types (key text, value_type text)''')
|
||||||
|
self.db.execute('''CREATE TABLE IF NOT EXISTS stats (stat_id text)''')
|
||||||
|
self.db.execute('''CREATE UNIQUE INDEX IF NOT EXISTS idx_history ON history (dict_id, t_step, key);''')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def db(self):
|
||||||
|
try:
|
||||||
|
self._db.cursor()
|
||||||
|
except (sqlite3.ProgrammingError, AttributeError):
|
||||||
|
self.db = None # Reset the database
|
||||||
|
return self._db
|
||||||
|
|
||||||
|
@db.setter
|
||||||
|
def db(self, db_path=None):
|
||||||
|
self._close()
|
||||||
|
db_path = db_path or self.db_path
|
||||||
|
if isinstance(db_path, str) or isinstance(db_path, pathlib.Path):
|
||||||
|
logger.debug('Connecting to database {}'.format(db_path))
|
||||||
|
self._db = sqlite3.connect(db_path)
|
||||||
|
self._db.row_factory = sqlite3.Row
|
||||||
|
else:
|
||||||
|
self._db = db_path
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self._close()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self._close()
|
||||||
|
|
||||||
|
def _close(self):
|
||||||
|
if self._db is None:
|
||||||
|
return
|
||||||
|
self.flush_cache()
|
||||||
|
self._db.close()
|
||||||
|
self._db = None
|
||||||
|
|
||||||
|
def save_stats(self, stat):
|
||||||
|
if self.readonly:
|
||||||
|
print('DB in readonly mode')
|
||||||
|
return
|
||||||
|
if not stat:
|
||||||
|
return
|
||||||
|
with self.db:
|
||||||
|
if not self._stats_columns:
|
||||||
|
self._stats_columns = list(c['name'] for c in self.db.execute('PRAGMA table_info(stats)'))
|
||||||
|
|
||||||
|
for column, value in stat.items():
|
||||||
|
if column in self._stats_columns:
|
||||||
|
continue
|
||||||
|
dtype = 'text'
|
||||||
|
if not isinstance(value, str):
|
||||||
|
try:
|
||||||
|
float(value)
|
||||||
|
dtype = 'real'
|
||||||
|
int(value)
|
||||||
|
dtype = 'int'
|
||||||
|
except (ValueError, OverflowError):
|
||||||
|
pass
|
||||||
|
self.db.execute('ALTER TABLE stats ADD "{}" "{}"'.format(column, dtype))
|
||||||
|
self._stats_columns.append(column)
|
||||||
|
|
||||||
|
columns = ", ".join(map(lambda x: '"{}"'.format(x), stat.keys()))
|
||||||
|
values = ", ".join(['"{0}"'.format(col) for col in stat.values()])
|
||||||
|
query = "INSERT INTO stats ({columns}) VALUES ({values})".format(
|
||||||
|
columns=columns,
|
||||||
|
values=values
|
||||||
|
)
|
||||||
|
self.db.execute(query)
|
||||||
|
|
||||||
|
def get_stats(self, unflatten=True):
|
||||||
|
rows = self.db.execute("select * from stats").fetchall()
|
||||||
|
res = []
|
||||||
|
for row in rows:
|
||||||
|
d = {}
|
||||||
|
for k in row.keys():
|
||||||
|
if row[k] is None:
|
||||||
|
continue
|
||||||
|
d[k] = row[k]
|
||||||
|
if unflatten:
|
||||||
|
d = unflatten_dict(d)
|
||||||
|
res.append(d)
|
||||||
|
return res
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dtypes(self):
|
||||||
|
self._read_types()
|
||||||
|
return {k:v[0] for k, v in self._dtypes.items()}
|
||||||
|
|
||||||
|
def save_tuples(self, tuples):
|
||||||
|
'''
|
||||||
|
Save a series of tuples, converting them to records if necessary
|
||||||
|
'''
|
||||||
|
self.save_records(Record(*tup) for tup in tuples)
|
||||||
|
|
||||||
|
def save_records(self, records):
|
||||||
|
'''
|
||||||
|
Save a collection of records
|
||||||
|
'''
|
||||||
|
for record in records:
|
||||||
|
if not isinstance(record, Record):
|
||||||
|
record = Record(*record)
|
||||||
|
self.save_record(*record)
|
||||||
|
|
||||||
|
def save_record(self, dict_id, t_step, key, value):
|
||||||
|
'''
|
||||||
|
Save a collection of records to the database.
|
||||||
|
Database writes are cached.
|
||||||
|
'''
|
||||||
|
if self.readonly:
|
||||||
|
raise Exception('DB in readonly mode')
|
||||||
|
if key not in self._dtypes:
|
||||||
|
self._read_types()
|
||||||
|
if key not in self._dtypes:
|
||||||
|
name = serialization.name(value)
|
||||||
|
serializer = serialization.serializer(name)
|
||||||
|
deserializer = serialization.deserializer(name)
|
||||||
|
self._dtypes[key] = (name, serializer, deserializer)
|
||||||
|
with self.db:
|
||||||
|
self.db.execute("replace into value_types (key, value_type) values (?, ?)", (key, name))
|
||||||
|
value = self._dtypes[key][1](value)
|
||||||
|
|
||||||
|
self._tups.append(Record(dict_id=dict_id,
|
||||||
|
t_step=t_step,
|
||||||
|
key=key,
|
||||||
|
value=value))
|
||||||
|
|
||||||
|
def flush_cache(self):
|
||||||
|
'''
|
||||||
|
Use a cache to save state changes to avoid opening a session for every change.
|
||||||
|
The cache will be flushed at the end of the simulation, and when history is accessed.
|
||||||
|
'''
|
||||||
|
if self.readonly:
|
||||||
|
raise Exception('DB in readonly mode')
|
||||||
|
logger.debug('Flushing cache {}'.format(self.db_path))
|
||||||
|
with self.db:
|
||||||
|
self.db.executemany("replace into history(dict_id, t_step, key, value) values (?, ?, ?, ?)", self._tups)
|
||||||
|
self._tups.clear()
|
||||||
|
|
||||||
|
def to_tuples(self):
|
||||||
|
self.flush_cache()
|
||||||
|
with self.db:
|
||||||
|
res = self.db.execute("select dict_id, t_step, key, value from history ").fetchall()
|
||||||
|
for r in res:
|
||||||
|
dict_id, t_step, key, value = r
|
||||||
|
if key not in self._dtypes:
|
||||||
|
self._read_types()
|
||||||
|
if key not in self._dtypes:
|
||||||
|
raise ValueError("Unknown datatype for {} and {}".format(key, value))
|
||||||
|
value = self._dtypes[key][2](value)
|
||||||
|
yield dict_id, t_step, key, value
|
||||||
|
|
||||||
|
def _read_types(self):
|
||||||
|
with self.db:
|
||||||
|
res = self.db.execute("select key, value_type from value_types ").fetchall()
|
||||||
|
for k, v in res:
|
||||||
|
serializer = serialization.serializer(v)
|
||||||
|
deserializer = serialization.deserializer(v)
|
||||||
|
self._dtypes[k] = (v, serializer, deserializer)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
self.flush_cache()
|
||||||
|
key = Key(*key)
|
||||||
|
dict_ids = [key.dict_id] if key.dict_id is not None else []
|
||||||
|
t_steps = [key.t_step] if key.t_step is not None else []
|
||||||
|
keys = [key.key] if key.key is not None else []
|
||||||
|
|
||||||
|
df = self.read_sql(dict_ids=dict_ids,
|
||||||
|
t_steps=t_steps,
|
||||||
|
keys=keys)
|
||||||
|
r = Records(df, filter=key, dtypes=self._dtypes)
|
||||||
|
if r.resolved:
|
||||||
|
return r.value()
|
||||||
|
return r
|
||||||
|
|
||||||
|
def read_sql(self, keys=None, dict_ids=None, not_dict_ids=None, t_steps=None, convert_types=False, limit=-1):
|
||||||
|
|
||||||
|
self._read_types()
|
||||||
|
|
||||||
|
def escape_and_join(v):
|
||||||
|
if v is None:
|
||||||
|
return
|
||||||
|
return ",".join(map(lambda x: "\'{}\'".format(x), v))
|
||||||
|
|
||||||
|
filters = [("key in ({})".format(escape_and_join(keys)), keys),
|
||||||
|
("dict_id in ({})".format(escape_and_join(dict_ids)), dict_ids),
|
||||||
|
("dict_id not in ({})".format(escape_and_join(not_dict_ids)), not_dict_ids)
|
||||||
|
]
|
||||||
|
filters = list(k[0] for k in filters if k[1])
|
||||||
|
|
||||||
|
last_df = None
|
||||||
|
if t_steps:
|
||||||
|
# Convert negative indices into positive
|
||||||
|
if any(x<0 for x in t_steps):
|
||||||
|
max_t = int(self.db.execute("select max(t_step) from history").fetchone()[0])
|
||||||
|
t_steps = [t if t>0 else max_t+1+t for t in t_steps]
|
||||||
|
|
||||||
|
# We will be doing ffill interpolation, so we need to look for
|
||||||
|
# the last value before the minimum step in the query
|
||||||
|
min_step = min(t_steps)
|
||||||
|
last_filters = ['t_step < {}'.format(min_step),]
|
||||||
|
last_filters = last_filters + filters
|
||||||
|
condition = ' and '.join(last_filters)
|
||||||
|
|
||||||
|
last_query = '''
|
||||||
|
select h1.*
|
||||||
|
from history h1
|
||||||
|
inner join (
|
||||||
|
select dict_id, key, max(t_step) as t_step
|
||||||
|
from history
|
||||||
|
where {condition}
|
||||||
|
group by dict_id, key
|
||||||
|
) h2
|
||||||
|
on h1.dict_id = h2.dict_id and
|
||||||
|
h1.key = h2.key and
|
||||||
|
h1.t_step = h2.t_step
|
||||||
|
'''.format(condition=condition)
|
||||||
|
last_df = pd.read_sql_query(last_query, self.db)
|
||||||
|
|
||||||
|
filters.append("t_step >= '{}' and t_step <= '{}'".format(min_step, max(t_steps)))
|
||||||
|
|
||||||
|
condition = ''
|
||||||
|
if filters:
|
||||||
|
condition = 'where {} '.format(' and '.join(filters))
|
||||||
|
query = 'select * from history {} limit {}'.format(condition, limit)
|
||||||
|
df = pd.read_sql_query(query, self.db)
|
||||||
|
if last_df is not None:
|
||||||
|
df = pd.concat([df, last_df])
|
||||||
|
|
||||||
|
df_p = df.pivot_table(values='value', index=['t_step'],
|
||||||
|
columns=['key', 'dict_id'],
|
||||||
|
aggfunc='first')
|
||||||
|
|
||||||
|
for k, v in self._dtypes.items():
|
||||||
|
if k in df_p:
|
||||||
|
dtype, _, deserial = v
|
||||||
|
try:
|
||||||
|
df_p[k] = df_p[k].fillna(method='ffill').astype(dtype)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
# Avoid forward-filling unknown/incompatible types
|
||||||
|
continue
|
||||||
|
if t_steps:
|
||||||
|
df_p = df_p.reindex(t_steps, method='ffill')
|
||||||
|
return df_p.ffill()
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
state = dict(**self.__dict__)
|
||||||
|
del state['_db']
|
||||||
|
del state['_dtypes']
|
||||||
|
return state
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
self.__dict__ = state
|
||||||
|
self._dtypes = {}
|
||||||
|
self._db = None
|
||||||
|
|
||||||
|
def dump(self, f):
|
||||||
|
self._close()
|
||||||
|
for line in open_or_reuse(self.db_path, 'rb'):
|
||||||
|
f.write(line)
|
||||||
|
|
||||||
|
class Records():
|
||||||
|
|
||||||
|
def __init__(self, df, filter=None, dtypes=None):
|
||||||
|
if not filter:
|
||||||
|
filter = Key(dict_id=None,
|
||||||
|
t_step=None,
|
||||||
|
key=None)
|
||||||
|
self._df = df
|
||||||
|
self._filter = filter
|
||||||
|
self.dtypes = dtypes or {}
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def mask(self, tup):
|
||||||
|
res = ()
|
||||||
|
for i, k in zip(tup[:-1], self._filter):
|
||||||
|
if k is None:
|
||||||
|
res = res + (i,)
|
||||||
|
res = res + (tup[-1],)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def filter(self, newKey):
|
||||||
|
f = list(self._filter)
|
||||||
|
for ix, i in enumerate(f):
|
||||||
|
if i is None:
|
||||||
|
f[ix] = newKey
|
||||||
|
self._filter = Key(*f)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolved(self):
|
||||||
|
return sum(1 for i in self._filter if i is not None) == 3
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for column, series in self._df.iteritems():
|
||||||
|
key, dict_id = column
|
||||||
|
for t_step, value in series.iteritems():
|
||||||
|
r = Record(t_step=t_step,
|
||||||
|
dict_id=dict_id,
|
||||||
|
key=key,
|
||||||
|
value=value)
|
||||||
|
yield self.mask(r)
|
||||||
|
|
||||||
|
def value(self):
|
||||||
|
if self.resolved:
|
||||||
|
f = self._filter
|
||||||
|
try:
|
||||||
|
i = self._df[f.key][str(f.dict_id)]
|
||||||
|
ix = i.index.get_loc(f.t_step, method='ffill')
|
||||||
|
return i.iloc[ix]
|
||||||
|
except KeyError as ex:
|
||||||
|
return self.dtypes[f.key][2]()
|
||||||
|
return list(self)
|
||||||
|
|
||||||
|
def df(self):
|
||||||
|
return self._df
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
n = copy.copy(self)
|
||||||
|
n.filter(k)
|
||||||
|
if n.resolved:
|
||||||
|
return n.value()
|
||||||
|
return n
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._df)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.resolved:
|
||||||
|
return str(self.value())
|
||||||
|
return '<Records for [{}]>'.format(self._filter)
|
||||||
|
|
||||||
|
Key = namedtuple('Key', ['dict_id', 't_step', 'key'])
|
||||||
|
Record = namedtuple('Record', 'dict_id t_step key value')
|
||||||
|
|
||||||
|
Stat = namedtuple('Stat', 'stat_id text')
|
89
tsih/serialization.py
Normal file
89
tsih/serialization.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import ast
|
||||||
|
import sys
|
||||||
|
import importlib
|
||||||
|
from itertools import product, chain
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger('soil')
|
||||||
|
|
||||||
|
|
||||||
|
builtins = importlib.import_module('builtins')
|
||||||
|
|
||||||
|
def name(value, known_modules=[]):
|
||||||
|
'''Return a name that can be imported, to serialize/deserialize an object'''
|
||||||
|
if value is None:
|
||||||
|
return 'None'
|
||||||
|
if not isinstance(value, type): # Get the class name first
|
||||||
|
value = type(value)
|
||||||
|
tname = value.__name__
|
||||||
|
if hasattr(builtins, tname):
|
||||||
|
return tname
|
||||||
|
modname = value.__module__
|
||||||
|
if modname == '__main__':
|
||||||
|
return tname
|
||||||
|
if known_modules and modname in known_modules:
|
||||||
|
return tname
|
||||||
|
for kmod in known_modules:
|
||||||
|
if not kmod:
|
||||||
|
continue
|
||||||
|
module = importlib.import_module(kmod)
|
||||||
|
if hasattr(module, tname):
|
||||||
|
return tname
|
||||||
|
return '{}.{}'.format(modname, tname)
|
||||||
|
|
||||||
|
|
||||||
|
def serializer(type_):
|
||||||
|
if type_ != 'str' and hasattr(builtins, type_):
|
||||||
|
return repr
|
||||||
|
return lambda x: x
|
||||||
|
|
||||||
|
|
||||||
|
def serialize(v, known_modules=[]):
|
||||||
|
'''Get a text representation of an object.'''
|
||||||
|
tname = name(v, known_modules=known_modules)
|
||||||
|
func = serializer(tname)
|
||||||
|
return func(v), tname
|
||||||
|
|
||||||
|
def deserializer(type_, known_modules=[]):
|
||||||
|
if type(type_) != str: # Already deserialized
|
||||||
|
return type_
|
||||||
|
if type_ == 'str':
|
||||||
|
return lambda x='': x
|
||||||
|
if type_ == 'None':
|
||||||
|
return lambda x=None: None
|
||||||
|
if hasattr(builtins, type_): # Check if it's a builtin type
|
||||||
|
cls = getattr(builtins, type_)
|
||||||
|
return lambda x=None: ast.literal_eval(x) if x is not None else cls()
|
||||||
|
# Otherwise, see if we can find the module and the class
|
||||||
|
modules = known_modules or []
|
||||||
|
options = []
|
||||||
|
|
||||||
|
for mod in modules:
|
||||||
|
if mod:
|
||||||
|
options.append((mod, type_))
|
||||||
|
|
||||||
|
if '.' in type_: # Fully qualified module
|
||||||
|
module, type_ = type_.rsplit(".", 1)
|
||||||
|
options.append ((module, type_))
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
for modname, tname in options:
|
||||||
|
try:
|
||||||
|
module = importlib.import_module(modname)
|
||||||
|
cls = getattr(module, tname)
|
||||||
|
return getattr(cls, 'deserialize', cls)
|
||||||
|
except (ImportError, AttributeError) as ex:
|
||||||
|
errors.append((modname, tname, ex))
|
||||||
|
raise Exception('Could not find type {}. Tried: {}'.format(type_, errors))
|
||||||
|
|
||||||
|
|
||||||
|
def deserialize(type_, value=None, **kwargs):
|
||||||
|
'''Get an object from a text representation'''
|
||||||
|
if not isinstance(type_, str):
|
||||||
|
return type_
|
||||||
|
des = deserializer(type_, **kwargs)
|
||||||
|
if value is None:
|
||||||
|
return des
|
||||||
|
return des(value)
|
87
tsih/utils.py
Normal file
87
tsih/utils.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
|
||||||
|
from shutil import copyfile
|
||||||
|
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
logger = logging.getLogger('soil')
|
||||||
|
# logging.basicConfig()
|
||||||
|
# logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def timer(name='task', pre="", function=logger.info, to_object=None):
|
||||||
|
start = time.time()
|
||||||
|
function('{}Starting {} at {}.'.format(pre, name,
|
||||||
|
time.strftime("%X", time.gmtime(start))))
|
||||||
|
yield start
|
||||||
|
end = time.time()
|
||||||
|
function('{}Finished {} at {} in {} seconds'.format(pre, name,
|
||||||
|
time.strftime("%X", time.gmtime(end)),
|
||||||
|
str(end-start)))
|
||||||
|
if to_object:
|
||||||
|
to_object.start = start
|
||||||
|
to_object.end = end
|
||||||
|
|
||||||
|
|
||||||
|
def safe_open(path, mode='r', backup=True, **kwargs):
|
||||||
|
outdir = os.path.dirname(path)
|
||||||
|
if outdir and not os.path.exists(outdir):
|
||||||
|
os.makedirs(outdir)
|
||||||
|
if backup and 'w' in mode and os.path.exists(path):
|
||||||
|
creation = os.path.getctime(path)
|
||||||
|
stamp = time.strftime('%Y-%m-%d_%H.%M.%S', time.localtime(creation))
|
||||||
|
|
||||||
|
backup_dir = os.path.join(outdir, 'backup')
|
||||||
|
if not os.path.exists(backup_dir):
|
||||||
|
os.makedirs(backup_dir)
|
||||||
|
newpath = os.path.join(backup_dir, '{}@{}'.format(os.path.basename(path),
|
||||||
|
stamp))
|
||||||
|
copyfile(path, newpath)
|
||||||
|
return open(path, mode=mode, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def open_or_reuse(f, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
return safe_open(f, *args, **kwargs)
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
return f
|
||||||
|
|
||||||
|
def flatten_dict(d):
|
||||||
|
if not isinstance(d, dict):
|
||||||
|
return d
|
||||||
|
return dict(_flatten_dict(d))
|
||||||
|
|
||||||
|
def _flatten_dict(d, prefix=''):
|
||||||
|
if not isinstance(d, dict):
|
||||||
|
# print('END:', prefix, d)
|
||||||
|
yield prefix, d
|
||||||
|
return
|
||||||
|
if prefix:
|
||||||
|
prefix = prefix + '.'
|
||||||
|
for k, v in d.items():
|
||||||
|
# print(k, v)
|
||||||
|
res = list(_flatten_dict(v, prefix='{}{}'.format(prefix, k)))
|
||||||
|
# print('RES:', res)
|
||||||
|
yield from res
|
||||||
|
|
||||||
|
|
||||||
|
def unflatten_dict(d):
|
||||||
|
out = {}
|
||||||
|
for k, v in d.items():
|
||||||
|
target = out
|
||||||
|
if not isinstance(k, str):
|
||||||
|
target[k] = v
|
||||||
|
continue
|
||||||
|
tokens = k.split('.')
|
||||||
|
if len(tokens) < 2:
|
||||||
|
target[k] = v
|
||||||
|
continue
|
||||||
|
for token in tokens[:-1]:
|
||||||
|
if token not in target:
|
||||||
|
target[token] = {}
|
||||||
|
target = target[token]
|
||||||
|
target[tokens[-1]] = v
|
||||||
|
return out
|
Loading…
Reference in New Issue
Block a user