Added history class

Now the environment does not deal with history directly, it delegates it to a specific class. The analysis also uses history instances instead of either using the database directly or creating a proxy environment. This should make it easier to change the implementation in the future. In fact, the change was motivated by the large size of the csv files in previous versions. This new implementation only stores results in deltas, and it fills any necessary values when needed.
2025-08-23 11:52:19 +00:00 · 2018-05-04 10:01:49 +02:00
parent 73c90887e8
commit fc48ed7e09
19 changed files with 1469 additions and 2911 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,8 @@
+version: '3'
+services:
+  dev:
+    build: .
+    volumes:
+      - .:/usr/src/app
+    tty: true
+    entrypoint: /bin/bash
--- a/examples/NewsSpread.ipynb
+++ b/examples/NewsSpread.ipynb
--- a/examples/newsspread/NewsSpread.ipynb
+++ b/examples/newsspread/NewsSpread.ipynb
--- a/examples/tutorial/soil_tutorial.ipynb
+++ b/examples/tutorial/soil_tutorial.ipynb
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 nxsim
 simpy
-networkx
+networkx>=2.0
 numpy
 matplotlib
 pyyaml
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ from setuptools import setup


 with open(os.path.join('soil', 'VERSION')) as f:
-    __version__ = f.read().strip()
+    __version__ = f.readlines()[0].strip()
    assert __version__


--- a/soil/VERSION
+++ b/soil/VERSION
@@ -1 +1 @@
-0.10.2
+0.11
--- a/soil/init.py
+++ b/soil/init.py
@@ -62,7 +62,7 @@ def main():
        simulation.run_from_config(args.file,
                                   dry_run=args.dry_run,
                                   dump=dump,
-                                   parallel=(not args.synchronous),
+                                   parallel=(not args.synchronous and not args.pdb),
                                   results_dir=args.output)
    except Exception as ex:
        if args.pdb:
--- a/soil/agents/CounterModel.py
+++ b/soil/agents/CounterModel.py
@@ -11,9 +11,9 @@ class CounterModel(BaseAgent):
        # Outside effects
        total = len(list(self.get_all_agents()))
        neighbors = len(list(self.get_neighboring_agents()))
-        self.state['times'] = self.state.get('times', 0) + 1
-        self.state['neighbors'] = neighbors
-        self.state['total'] = total
+        self['times'] = self.get('times', 0) + 1
+        self['neighbors'] = neighbors
+        self['total'] = total


 class AggregatedCounter(BaseAgent):
@@ -26,7 +26,7 @@ class AggregatedCounter(BaseAgent):
        # Outside effects
        total = len(list(self.get_all_agents()))
        neighbors = len(list(self.get_neighboring_agents()))
-        self.state['times'] = self.state.get('times', 0) + 1
-        self.state['neighbors'] = self.state.get('neighbors', 0) + neighbors
-        self.state['total'] = total = self.state.get('total', 0) + total
+        self['times'] = self.get('times', 0) + 1
+        self['neighbors'] = self.get('neighbors', 0) + neighbors
+        self['total'] = total = self.get('total', 0) + total
        self.debug('Running for step: {}. Total: {}'.format(self.now, total))
--- a/soil/agents/init.py
+++ b/soil/agents/init.py
@@ -14,7 +14,7 @@ import json

 from functools import wraps

-from .. import utils
+from .. import utils, history

 agent_types = {}

@@ -32,33 +32,67 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):

    defaults = {}

-    def __init__(self, **kwargs):
+    def __init__(self, environment=None, agent_id=None, state=None,
+                 name='network_process', interval=None, **state_params):
+        # Check for REQUIRED arguments
+        assert environment is not None, TypeError('__init__ missing 1 required keyword argument: \'environment\'. '
+                                                  'Cannot be NoneType.')
+        # Initialize agent parameters
+        self.id = agent_id
+        self.name = name
+        self.state_params = state_params
+
+        # Global parameters
+        self.global_topology = environment.G
+        self.environment_params = environment.environment_params
+
+        # Register agent to environment
+        self.env = environment
+
        self._neighbors = None
        self.alive = True
-        state = deepcopy(self.defaults)
-        state.update(kwargs.pop('state', {}))
-        kwargs['state'] = state
-        super().__init__(**kwargs)
+        real_state = deepcopy(self.defaults)
+        real_state.update(state or {})
+        self._state = real_state
+        self.interval = interval
+
        if not hasattr(self, 'level'):
            self.level = logging.DEBUG
-        self.logger = logging.getLogger('{}-Agent-{}'.format(self.env.name, self.id))
+        self.logger = logging.getLogger('{}-Agent-{}'.format(self.env.name,
+                                                             self.id))
        self.logger.setLevel(self.level)

+        # initialize every time an instance of the agent is created
+        self.action = self.env.process(self.run())
+
+    @property
+    def state(self):
+        return self._state
+
+    @state.setter
+    def state(self, value):
+        for k, v in value.items():
+            self[k] = v

    def __getitem__(self, key):
        if isinstance(key, tuple):
-            k, t_step = key
-            return self.env[self.id, t_step, k]
+            key, t_step = key
+            k = history.Key(key=key, t_step=t_step, agent_id=self.id)
+            return self.env[k]
        return self.state.get(key, None)

    def __delitem__(self, key):
-        del self.state[key]
+        self.state[key] = None

    def __contains__(self, key):
        return key in self.state

    def __setitem__(self, key, value):
        self.state[key] = value
+        k = history.Key(t_step=self.now,
+                        agent_id=self.id,
+                        key=key)
+        self.env[k] = value

    def get(self, key, default=None):
        return self[key] if key in self else default
@@ -72,7 +106,12 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):
            return None

    def run(self):
-        interval = self.env.interval
+        if self.interval is not None:
+            interval = self.interval
+        elif 'interval' in self:
+            interval = self['interval']
+        else:
+            interval = self.env.interval
        while self.alive:
            res = self.step()
            yield res or self.env.timeout(interval)
@@ -95,7 +134,7 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):
            agents = self.global_topology.nodes()
        count = 0
        for agent in agents:
-            if state_id and state_id != self.global_topology.node[agent]['agent'].state['id']:
+            if state_id and state_id != self.global_topology.node[agent]['agent']['id']:
                continue
            count += 1
        return count
@@ -197,11 +236,13 @@ class FSM(BaseAgent, metaclass=MetaFSM):
    def __init__(self, *args, **kwargs):
        super(FSM, self).__init__(*args, **kwargs)
        if 'id' not in self.state:
-            self.state['id'] = self.default_state.id
+            if not self.default_state:
+                raise ValueError('No default state specified for {}'.format(self.id))
+            self['id'] = self.default_state.id

    def step(self):
        if 'id' in self.state:
-            next_state = self.state['id']
+            next_state = self['id']
        elif self.default_state:
            next_state = self.default_state.id
        else:
@@ -215,7 +256,7 @@ class FSM(BaseAgent, metaclass=MetaFSM):
            state = state.id
        if state not in self.states:
            raise ValueError('{} is not a valid state'.format(state))
-        self.state['id'] = state
+        self['id'] = state
        return state


--- a/soil/analysis.py
+++ b/soil/analysis.py
@@ -4,7 +4,7 @@ import glob
 import yaml
 from os.path import join

-from . import utils
+from . import utils, history


 def read_data(*args, group=False, **kwargs):
@@ -15,8 +15,9 @@ def read_data(*args, group=False, **kwargs):
        return list(iterable)


-def _read_data(pattern, keys=None, convert_types=False,
-               process=None, from_csv=False, **kwargs):
+def _read_data(pattern, *args, from_csv=False, process_args=None, **kwargs):
+    if not process_args:
+        process_args = {}
    for folder in glob.glob(pattern):
        config_file = glob.glob(join(folder, '*.yml'))[0]
        config = yaml.load(open(config_file))
@@ -24,19 +25,20 @@ def _read_data(pattern, keys=None, convert_types=False,
        if from_csv:
            for trial_data in sorted(glob.glob(join(folder,
                                                    '*.environment.csv'))):
-                df = read_csv(trial_data, convert_types=convert_types)
-                if process:
-                    df = process(df, **kwargs)
+                df = read_csv(trial_data, **kwargs)
                yield config_file, df, config
        else:
            for trial_data in sorted(glob.glob(join(folder, '*.db.sqlite'))):
-                df = read_sql(trial_data, convert_types=convert_types,
-                              keys=keys)
-                if process:
-                    df = process(df, **kwargs)
+                df = read_sql(trial_data, **kwargs)
                yield config_file, df, config


+def read_sql(db, *args, **kwargs):
+    h = history.History(db, backup=False)
+    df = h.read_sql(*args, **kwargs)
+    return df
+
+
 def read_csv(filename, keys=None, convert_types=False, **kwargs):
    '''
    Read a CSV in canonical form: ::
@@ -49,18 +51,7 @@ def read_csv(filename, keys=None, convert_types=False, **kwargs):
        df = convert_types_slow(df)
    if keys:
        df = df[df['key'].isin(keys)]
-    return df
-
-
-def read_sql(filename, keys=None, convert_types=False, limit=-1):
-    condition = ''
-    if keys:
-        k = map(lambda x: "\'{}\'".format(x), keys)
-        condition = 'where key in ({})'.format(','.join(k))
-    query = 'select * from history {} limit {}'.format(condition, limit)
-    df = pd.read_sql_query(query, 'sqlite:///{}'.format(filename))
-    if convert_types:
-        df = convert_types_slow(df)
+    df = process_one(df)
    return df


@@ -108,8 +99,9 @@ def get_types(df):
    return {k:v[0] for k,v in dtypes.iteritems()}


-def process_one(df, *keys, columns=['key'], values='value',
-                index=['t_step', 'agent_id'], aggfunc='first', **kwargs):
+def process_one(df, *keys, columns=['key', 'agent_id'], values='value',
+                fill=True, index=['t_step',],
+                aggfunc='first', **kwargs):
    '''
    Process a dataframe in canonical form ``(t_step, agent_id, key, value, value_type)`` into
    a dataframe with a column per key
@@ -119,35 +111,29 @@ def process_one(df, *keys, columns=['key'], values='value',
    if keys:
        df = df[df['key'].isin(keys)]

-    dtypes = get_types(df)
-
    df = df.pivot_table(values=values, index=index, columns=columns,
                        aggfunc=aggfunc, **kwargs)
-    df = df.fillna(0).astype(dtypes)
+    if fill:
+        df = fillna(df)
    return df


-def get_count_processed(df, *keys):
-    if keys:
-        df = df[list(keys)]
-    # p = df.groupby(level=0).apply(pd.Series.value_counts)
-    p = df.unstack().apply(pd.Series.value_counts, axis=1)
-    return p
-
-
 def get_count(df, *keys):
    if keys:
-        df = df[df['key'].isin(keys)]
-    p = df.groupby(by=['t_step', 'key', 'value']).size().unstack(level=[1,2]).fillna(0)
-    return p
+        df = df[list(keys)]
+    counts = pd.DataFrame()
+    for key in df.columns.levels[0]:
+        g = df[key].apply(pd.Series.value_counts, axis=1).fillna(0)
+        for value, series in g.iteritems():
+            counts[key, value] = series
+    counts.columns = pd.MultiIndex.from_tuples(counts.columns)
+    return counts


 def get_value(df, *keys, aggfunc='sum'):
    if keys:
-        df = df[df['key'].isin(keys)]
-    p = process_one(df, *keys)
-    p = p.groupby(level='t_step').agg(aggfunc)
-    return p
+        df = df[list(keys)]
+    return df.groupby(axis=1, level=0).agg(aggfunc, axis=1)


 def plot_all(*args, **kwargs):
@@ -175,4 +161,6 @@ def group_trials(trials, aggfunc=['mean', 'min', 'max', 'std']):
    return pd.concat(trials).groupby(level=0).agg(aggfunc).reorder_levels([2, 0,1] ,axis=1)


-
+def fillna(df):
+    new_df = df.ffill(axis=0)
+    return new_df
--- a/soil/environment.py
+++ b/soil/environment.py
@@ -5,16 +5,26 @@ import csv
 import random
 import simpy
 import tempfile
+import pandas as pd
 from copy import deepcopy
 from networkx.readwrite import json_graph

 import networkx as nx
 import nxsim

-from . import utils, agents
+from . import utils, agents, analysis, history


 class SoilEnvironment(nxsim.NetworkEnvironment):
+    """
+    The environment is key in a simulation. It contains the network topology,
+    a reference to network and environment agents, as well as the environment
+    params, which are used as shared state between agents.
+
+    The environment parameters and the state of every agent can be accessed
+    both by using the environment as a dictionary or with the environment's 
+    :meth:`soil.environment.SoilEnvironment.get` method.
+    """

    def __init__(self, name=None,
                 network_agents=None,
@@ -38,19 +48,21 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        self._env_agents = {}
        self.dry_run = dry_run
        self.interval = interval
+        self.dir_path = dir_path or tempfile.mkdtemp('soil-env')
+        self.get_path()
+        self._history = history.History(name=self.name if not dry_run else None,
+                                        dir_path=self.dir_path)
        # Add environment agents first, so their events get
        # executed before network agents
-        self['SEED'] = seed or time.time()
-        random.seed(self['SEED'])
-        self.process(self.save_state())
        self.environment_agents = environment_agents or []
        self.network_agents = network_agents or []
-        self.dir_path = dir_path or tempfile.mkdtemp('soil-env')
        if self.dry_run:
            self._db_path = ":memory:"
        else:
            self._db_path = os.path.join(self.get_path(), '{}.db.sqlite'.format(self.name))
        self.create_db(self._db_path)
+        self['SEED'] = seed or time.time()
+        random.seed(self['SEED'])

    def create_db(self, db_path=None):
        db_path = db_path or self._db_path
@@ -95,10 +107,8 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        if not network_agents:
            return
        for ix in self.G.nodes():
-            i = ix
-            node = self.G.node[i]
            agent, state = agents._agent_from_distribution(network_agents)
-            self.set_agent(i, agent_type=agent, state=state)
+            self.set_agent(ix, agent_type=agent, state=state)

    def set_agent(self, agent_id, agent_type, state=None):
        node = self.G.nodes[agent_id]
@@ -125,16 +135,21 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        return self.G.add_edge(agent1, agent2)

    def run(self, *args, **kwargs):
+        self._save_state()
        super().run(*args, **kwargs)
+        self._history.flush_cache()

    def _save_state(self, now=None):
        # for agent in self.agents:
        #     agent.save_state()
        utils.logger.debug('Saving state @{}'.format(self.now))
-        with self._db:
-            self._db.executemany("insert into history(agent_id, t_step, key, value, value_type) values (?, ?, ?, ?, ?)", self.state_to_tuples(now=now))
+        self._history.save_records(self.state_to_tuples(now=now))

    def save_state(self):
+        '''
+        :DEPRECATED:
+        Periodically save the state of the environment and the agents.
+        '''
        self._save_state()
        while self.peek() != simpy.core.Infinity:
            delay = max(self.peek() - self.now, self.interval)
@@ -149,64 +164,44 @@ class SoilEnvironment(nxsim.NetworkEnvironment):

    def __getitem__(self, key):
        if isinstance(key, tuple):
-            values = [("agent_id", key[0]),
-                      ("t_step", key[1]),
-                      ("key", key[2]),
-                      ("value", None),
-                      ("value_type", None)]
-            fields = list(k for k, v in values if v is None)
-            conditions = " and ".join("{}='{}'".format(k, v) for k, v in values if v is not None)
-
-            query = """SELECT {fields} from history""".format(fields=",".join(fields))
-            if conditions:
-                query = """{query} where {conditions}""".format(query=query,
-                                                                conditions=conditions)
-            with self._db:
-                rows = self._db.execute(query).fetchall()
-
-            utils.logger.debug(rows)
-            results = self.rows_to_dict(rows)
-            return results
+            self._history.flush_cache()
+            return self._history[key]

        return self.environment_params[key]

-    def rows_to_dict(self, rows):
-        if len(rows) < 1:
-            return None
-
-        level = len(rows[0])-2
-
-        if level == 0:
-            if len(rows) != 1:
-                raise ValueError('Cannot convert {} to dictionaries'.format(rows))
-            value, value_type = rows[0]
-            return utils.convert(value, value_type)
-
-        results = {}
-        for row in rows:
-            item = results
-            for i in range(level-1):
-                key = row[i]
-                if key not in item:
-                    item[key] = {}
-                item = item[key]
-            key, value, value_type = row[level-1:]
-            item[key] = utils.convert(value, value_type)
-        return results
-
    def __setitem__(self, key, value):
+        if isinstance(key, tuple):
+            k = history.Key(*key)
+            self._history.save_record(*k,
+                                      value=value)
+            return
        self.environment_params[key] = value
+        self._history.save_record(agent_id='env',
+                                  t_step=self.now,
+                                  key=key,
+                                  value=value)

    def __contains__(self, key):
        return key in self.environment_params

    def get(self, key, default=None):
+        '''
+        Get the value of an environment attribute in a
+        given point in the simulation (history).
+        If key is an attribute name, this method returns
+        the current value.
+        To get values at other times, use a
+        :meth: `soil.history.Key` tuple.
+        '''
        return self[key] if key in self else default

    def get_path(self, dir_path=None):
        dir_path = dir_path or self.dir_path
        if not os.path.exists(dir_path):
-            os.makedirs(dir_path)
+            try:
+                os.makedirs(dir_path)
+            except FileExistsError:
+                pass
        return dir_path

    def get_agent(self, agent_id):
@@ -255,17 +250,19 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        if now is None:
            now = self.now
        for k, v in self.environment_params.items():
-            v, v_t = utils.repr(v)
-            yield 'env', now, k, v, v_t
+            yield history.Record(agent_id='env',
+                                 t_step=now,
+                                 key=k,
+                                 value=v)
        for agent in self.agents:
            for k, v in agent.state.items():
-                v, v_t = utils.repr(v)
-                yield agent.id, now, k, v, v_t
+                yield history.Record(agent_id=agent.id,
+                                     t_step=now,
+                                     key=k,
+                                     value=v)

    def history_to_tuples(self):
-        with self._db:
-            res = self._db.execute("select agent_id, t_step, key, value, value_type from history ").fetchall()
-        yield from res
+        return self._history.to_tuples()

    def history_to_graph(self):
        G = nx.Graph(self.G)
@@ -317,14 +314,10 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
    def __getstate__(self):
        state = self.__dict__.copy()
        state['G'] = json_graph.node_link_data(self.G)
-        state['network_agents'] = agents.serialize_distribution(self.network_agents)
+        state['network_agents'] = agents._serialize_distribution(self.network_agents)
        state['environment_agents'] = agents._convert_agent_types(self.environment_agents,
                                                                 to_string=True)
        del state['_queue']
-        import inspect
-        for k, v in state.items():
-            if inspect.isgeneratorfunction(v):
-                print(k, v, type(v))
        return state

    def __setstate__(self, state):
--- a/soil/history.py
+++ b/soil/history.py
@@ -0,0 +1,231 @@
+import time
+import os
+import pandas as pd
+import sqlite3
+import copy
+from collections import UserDict, Iterable, namedtuple
+
+from . import utils
+
+
+class History:
+    """
+    Store and retrieve values from a sqlite database.
+    """
+
+    def __init__(self, db_path=None, name=None, dir_path=None, backup=True):
+        if db_path is None and name:
+            db_path = os.path.join(dir_path or os.getcwd(), '{}.db.sqlite'.format(name))
+
+        if db_path is None:
+            db_path = ":memory:"
+        else:
+            if backup and os.path.exists(db_path):
+                newname = db_path.replace('db.sqlite', 'backup{}.sqlite'.format(time.time()))
+                os.rename(db_path, newname)
+        self._db_path = db_path
+        if isinstance(db_path, str):
+            self._db = sqlite3.connect(db_path)
+        else:
+            self._db = db_path
+
+        with self._db:
+            self._db.execute('''CREATE TABLE IF NOT EXISTS history (agent_id text, t_step int, key text, value text text)''')
+            self._db.execute('''CREATE TABLE IF NOT EXISTS value_types (key text, value_type text)''')
+            self._db.execute('''CREATE UNIQUE INDEX IF NOT EXISTS idx_history ON history (agent_id, t_step, key);''')
+        self._dtypes = {}
+        self._tups = []
+
+    def conversors(self, key):
+        """Get the serializer and deserializer for a given key."""
+        if key not in self._dtypes:
+            self.read_types()
+        return self._dtypes[key]
+
+    @property
+    def dtypes(self):
+        return {k:v[0] for k, v in self._dtypes.items()}
+
+    def save_tuples(self, tuples):
+        self.save_records(Record(*tup) for tup in tuples)
+
+    def save_records(self, records):
+        with self._db:
+            for rec in records:
+                if not isinstance(rec, Record):
+                    rec = Record(*rec)
+                if rec.key not in self._dtypes:
+                    name = utils.name(rec.value)
+                    serializer = utils.serializer(name)
+                    deserializer = utils.deserializer(name)
+                    self._dtypes[rec.key] = (name, serializer, deserializer)
+                    self._db.execute("replace into value_types (key, value_type) values (?, ?)", (rec.key, name))
+                self._db.execute("replace into history(agent_id, t_step, key, value) values (?, ?, ?, ?)", (rec.agent_id, rec.t_step, rec.key, rec.value))
+
+    def save_record(self, *args, **kwargs):
+        self._tups.append(Record(*args, **kwargs))
+        if len(self._tups) > 100:
+            self.flush_cache()
+
+    def flush_cache(self):
+        '''
+        Use a cache to save state changes to avoid opening a session for every change.
+        The cache will be flushed at the end of the simulation, and when history is accessed.
+        '''
+        self.save_records(self._tups)
+        self._tups = list()
+
+    def to_tuples(self):
+            self.flush_cache()
+            with self._db:
+                res = self._db.execute("select agent_id, t_step, key, value from history ").fetchall()
+            for r in res:
+                agent_id, t_step, key, value = r
+                _, _ , des = self.conversors(key)
+                yield agent_id, t_step, key, des(value)
+
+    def read_types(self):
+            with self._db:
+                res = self._db.execute("select key, value_type from value_types ").fetchall()
+            for k, v in res:
+                serializer = utils.serializer(v)
+                deserializer = utils.deserializer(v)
+                self._dtypes[k] = (v, serializer, deserializer)
+
+    def __getitem__(self, key):
+        key = Key(*key)
+        agent_ids = [key.agent_id] if key.agent_id is not None else []
+        t_steps = [key.t_step] if key.t_step is not None else []
+        keys = [key.key] if key.key is not None else []
+
+        df = self.read_sql(agent_ids=agent_ids,
+                           t_steps=t_steps,
+                           keys=keys)
+        r = Records(df, filter=key, dtypes=self._dtypes)
+        return r.value()
+
+
+
+    def read_sql(self, keys=None, agent_ids=None, t_steps=None, convert_types=False, limit=-1):
+
+        self.read_types()
+
+        def escape_and_join(v):
+            if v is None:
+                return
+            return ",".join(map(lambda x: "\'{}\'".format(x), v))
+
+        filters = [("key in ({})".format(escape_and_join(keys)), keys),
+                   ("agent_id in ({})".format(escape_and_join(agent_ids)), agent_ids)
+        ]
+        filters = list(k[0] for k in filters if k[1])
+
+        last_df = None
+        if t_steps:
+            # Look for the last value before the minimum step in the query
+            min_step = min(t_steps)
+            last_filters = ['t_step < {}'.format(min_step),]
+            last_filters = last_filters + filters
+            condition = ' and '.join(last_filters)
+
+            last_query = '''
+            select h1.*
+            from history h1
+            inner join (
+            select agent_id, key, max(t_step) as t_step
+            from history
+            where {condition}
+            group by agent_id, key
+            ) h2
+            on h1.agent_id = h2.agent_id  and
+               h1.key      = h2.key       and
+               h1.t_step   = h2.t_step
+            '''.format(condition=condition)
+            last_df = pd.read_sql_query(last_query, self._db)
+
+            filters.append("t_step >= '{}' and t_step <= '{}'".format(min_step, max(t_steps)))
+
+        condition = ''
+        if filters:
+            condition = 'where {} '.format(' and '.join(filters))
+        query = 'select * from history {} limit {}'.format(condition, limit)
+        df = pd.read_sql_query(query, self._db)
+        if last_df is not None:
+            df = pd.concat([df, last_df])
+
+        df_p = df.pivot_table(values='value', index=['t_step'],
+                              columns=['key', 'agent_id'],
+                              aggfunc='first')
+
+        for k, v in self._dtypes.items():
+            if k in df_p:
+                dtype, _, deserial = v
+                df_p[k] = df_p[k].fillna(method='ffill').fillna(deserial()).astype(dtype)
+        if t_steps:
+            df_p = df_p.reindex(t_steps, method='ffill')
+        return df_p.ffill()
+
+
+class Records():
+
+    def __init__(self, df, filter=None, dtypes=None):
+        if not filter:
+            filter = Key(agent_id=None,
+                         t_step=None,
+                         key=None)
+        self._df = df
+        self._filter = filter
+        self.dtypes = dtypes or {}
+        super().__init__()
+
+    def mask(self, tup):
+        res = ()
+        for i, k in zip(tup[:-1], self._filter):
+            if k is None:
+                res = res + (i,)
+        res = res + (tup[-1],)
+        return res
+
+    def filter(self, newKey):
+        f = list(self._filter)
+        for ix, i in enumerate(f):
+            if i is None:
+                f[ix] = newKey
+        self._filter = Key(*f)
+
+    @property
+    def resolved(self):
+        return sum(1 for i in self._filter if i is not None) == 3
+
+    def __iter__(self):
+        for column, series in self._df.iteritems():
+            key, agent_id = column
+            for t_step, value in series.iteritems():
+                r = Record(t_step=t_step,
+                           agent_id=agent_id,
+                           key=key,
+                           value=value)
+                yield self.mask(r)
+
+    def value(self):
+        if self.resolved:
+            f = self._filter
+            try:
+                i = self._df[f.key][str(f.agent_id)]
+                ix = i.index.get_loc(f.t_step, method='ffill')
+                return i.iloc[ix]
+            except KeyError:
+                return self.dtypes[f.key][2]()
+        return self
+
+    def __getitem__(self, k):
+        n = copy.copy(self)
+        n.filter(k)
+        return n.value()
+
+    def __len__(self):
+        return len(self._df)
+
+
+Key = namedtuple('Key', ['agent_id', 't_step', 'key'])
+Record = namedtuple('Record', 'agent_id t_step key value')
--- a/soil/simulation.py
+++ b/soil/simulation.py
@@ -20,7 +20,7 @@ class SoilSimulation(NetworkSimulation):
    """
    Subclass of nsim.NetworkSimulation with three main differences:
        1) agent type can be specified by name or by class.
-        2) instead of just one type, an network_agents can be used.
+        2) instead of just one type, a network agents distribution can be used.
           The distribution specifies the weight (or probability) of each
           agent type in the topology. This is an example distribution: ::

@@ -95,16 +95,16 @@ class SoilSimulation(NetworkSimulation):
    def run_simulation_gen(self, *args, parallel=False, dry_run=False,
                           **kwargs):
        p = Pool()
-        with utils.timer('simulation'):
+        with utils.timer('simulation {}'.format(self.name)):
            if parallel:
-                func = partial(self.run_trial, dry_run=dry_run,
+                func = partial(self.run_trial, dry_run=dry_run or self.dry_run,
                               return_env=not parallel, **kwargs)
                for i in p.imap_unordered(func, range(self.num_trials)):
                    yield i
            else:
                for i in range(self.num_trials):
-                    yield self.run_trial(i, dry_run=dry_run, **kwargs)
-            if not dry_run or self.dry_run:
+                    yield self.run_trial(i, dry_run=dry_run or self.dry_run, **kwargs)
+            if not (dry_run or self.dry_run):
                logger.info('Dumping results to {}'.format(self.dir_path))
                self.dump_pickle(self.dir_path)
                self.dump_yaml(self.dir_path)
@@ -192,7 +192,7 @@ class SoilSimulation(NetworkSimulation):
        return state


-def from_config(config, G=None):
+def from_config(config):
    config = list(utils.load_config(config))
    if len(config) > 1:
        raise AttributeError('Provide only one configuration')
@@ -201,9 +201,10 @@ def from_config(config, G=None):
    return sim


-def run_from_config(*configs, results_dir='soil_output', dump=None, timestamp=False,  **kwargs):
+def run_from_config(*configs, results_dir='soil_output', dry_run=False, dump=None, timestamp=False,  **kwargs):
    for config_def in configs:
-        for config, cpath in utils.load_config(config_def):
+        # logger.info("Found {} config(s)".format(len(ls)))
+        for config, _ in utils.load_config(config_def):
            name = config.get('name', 'unnamed')
            logger.info("Using config(s): {name}".format(name=name))

@@ -215,4 +216,4 @@ def run_from_config(*configs, results_dir='soil_output', dump=None, timestamp=Fa
            dir_path = os.path.join(results_dir, sim_folder)
            sim = SoilSimulation(dir_path=dir_path, dump=dump, **config)
            logger.info('Dumping results to {} : {}'.format(sim.dir_path, sim.dump))
-            results = sim.run_simulation(**kwargs)
+            sim.run_simulation(**kwargs)
--- a/soil/utils.py
+++ b/soil/utils.py
@@ -1,6 +1,7 @@
 import os
 import yaml
 import logging
+import importlib
 from time import time
 from glob import glob
 from random import random
@@ -72,13 +73,22 @@ def timer(name='task', pre="", function=logger.info, to_object=None):


 def repr(v):
-    if isinstance(v, bool):
-        v = "true" if v else ""
-        return v, bool.__name__
-    return v, type(v).__name__
+    func = serializer(v)
+    tname = name(v)
+    return func(v), tname

-def convert(value, type_):
-    import importlib
+
+def name(v):
+    return type(v).__name__
+
+
+def serializer(type_):
+    if type_ == 'bool':
+        return lambda x:  "true" if x else ""
+    return lambda x: x
+
+
+def deserializer(type_):
    try:
        # Check if it's a builtin type
        module = importlib.import_module('builtins')
@@ -88,4 +98,8 @@ def convert(value, type_):
        module, type_ = type_.rsplit(".", 1)
        module = importlib.import_module(module)
        cls = getattr(module, type_)
-    return cls(value)
+    return cls
+
+
+def convert(value, type_):
+    return deserializer(type_)(value)
--- a/tests/test.csv
+++ b/tests/test.csv
@@ -0,0 +1,16 @@
+agent_id,t_step,key,value,value_type
+a0,0,hello,w,str
+a0,1,hello,o,str
+a0,2,hello,r,str
+a0,3,hello,l,str
+a0,4,hello,d,str
+a0,5,hello,!,str
+env,1,started,,bool
+env,2,started,True,bool
+env,7,started,,bool
+a0,0,hello,w,str
+a0,1,hello,o,str
+a0,2,hello,r,str
+a0,3,hello,l,str
+a0,4,hello,d,str
+a0,5,hello,!,str
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -0,0 +1,90 @@
+from unittest import TestCase
+
+import os
+import pandas as pd
+import yaml
+from functools import partial
+
+from os.path import join
+from soil import simulation, analysis, agents
+
+
+ROOT = os.path.abspath(os.path.dirname(__file__))
+
+
+class Ping(agents.FSM):
+
+    defaults = {
+        'count': 0,
+    }
+
+    @agents.default_state
+    @agents.state
+    def even(self):
+        self['count'] += 1
+        return self.odd
+
+    @agents.state
+    def odd(self):
+        self['count'] += 1
+        return self.even
+
+
+class TestAnalysis(TestCase):
+
+    # Code to generate a simple sqlite history
+    def setUp(self):
+        """
+        The initial states should be applied to the agent and the
+        agent should be able to update its state."""
+        config = {
+            'name': 'analysis',
+            'dry_run': True,
+            'seed': 'seed',
+            'network_params': {
+                'generator': 'complete_graph',
+                'n': 2
+            },
+            'agent_type': Ping,
+            'states': [{'interval': 1}, {'interval': 2}],
+            'max_time': 30,
+            'num_trials': 1,
+            'environment_params': {
+            }
+        }
+        s = simulation.from_config(config)
+        self.env = s.run_simulation()[0]
+
+    def test_saved(self):
+        env = self.env
+        assert env.get_agent(0)['count', 0] == 1
+        assert env.get_agent(0)['count', 29] == 30
+        assert env.get_agent(1)['count', 0] == 1
+        assert env.get_agent(1)['count', 29] == 15
+        assert env['env', 29, None]['SEED'] == env['env', 29, 'SEED']
+
+    def test_count(self):
+        env = self.env
+        df = analysis.read_sql(env._history._db)
+        res = analysis.get_count(df, 'SEED', 'id')
+        assert res['SEED']['seedanalysis_trial_0'].iloc[0] == 1
+        assert res['SEED']['seedanalysis_trial_0'].iloc[-1] == 1
+        assert res['id']['odd'].iloc[0] == 2
+        assert res['id']['even'].iloc[0] == 0
+        assert res['id']['odd'].iloc[-1] == 1
+        assert res['id']['even'].iloc[-1] == 1
+
+    def test_value(self):
+        env = self.env
+        df = analysis.read_sql(env._history._db)
+        res_sum = analysis.get_value(df, 'count')
+
+        assert res_sum['count'].iloc[0] == 2
+
+        import numpy as np
+        res_mean = analysis.get_value(df, 'count', aggfunc=np.mean)
+        assert res_mean['count'].iloc[0] == 1
+
+        res_total = analysis.get_value(df)
+
+        res_total['SEED'].iloc[0] == 'seedanalysis_trial_0'
--- a/tests/test_history.py
+++ b/tests/test_history.py
@@ -0,0 +1,90 @@
+from unittest import TestCase
+
+import os
+import pandas as pd
+
+from soil import history, analysis
+
+
+ROOT = os.path.abspath(os.path.dirname(__file__))
+
+
+class TestHistory(TestCase):
+
+    def test_history(self):
+        """
+        """
+        tuples = (
+            ('a_0', 0, 'id', 'h',  ),
+            ('a_0', 1, 'id', 'e',  ),
+            ('a_0', 2, 'id', 'l',  ),
+            ('a_0', 3, 'id', 'l',  ),
+            ('a_0', 4, 'id', 'o',  ),
+            ('a_1', 0, 'id', 'v',  ),
+            ('a_1', 1, 'id', 'a',  ),
+            ('a_1', 2, 'id', 'l',  ),
+            ('a_1', 3, 'id', 'u',  ),
+            ('a_1', 4, 'id', 'e',  ),
+            ('env', 1, 'prob', 1),
+            ('env', 3, 'prob', 2),
+            ('env', 5, 'prob', 3),
+            ('a_2', 7, 'finished', True),
+        )
+        h = history.History()
+        h.save_tuples(tuples)
+        # assert h['env', 0, 'prob'] == 0
+        for i in range(1, 7):
+            assert h['env', i, 'prob'] == ((i-1)//2)+1
+
+
+        for i, k in zip(range(5), 'hello'):
+            assert h['a_0', i, 'id'] == k
+        for record, value in zip(h['a_0', None, 'id'], 'hello'):
+            t_step, val = record
+            assert val == value
+
+        for i, k in zip(range(5), 'value'):
+            assert h['a_1', i, 'id'] == k
+        for i in range(5, 8):
+            assert h['a_1', i, 'id'] == 'e'
+        for i in range(7):
+            assert h['a_2', i, 'finished'] == False
+        assert h['a_2', 7, 'finished']
+
+    def test_history_gen(self):
+        """
+        """
+        tuples = (
+            ('a_1', 0, 'id', 'v',  ),
+            ('a_1', 1, 'id', 'a',  ),
+            ('a_1', 2, 'id', 'l',  ),
+            ('a_1', 3, 'id', 'u',  ),
+            ('a_1', 4, 'id', 'e',  ),
+            ('env', 1, 'prob', 1),
+            ('env', 2, 'prob', 2),
+            ('env', 3, 'prob', 3),
+            ('a_2', 7, 'finished', True),
+        )
+        h = history.History()
+        h.save_tuples(tuples)
+        for t_step, key, value in h['env', None, None]:
+            assert t_step == value
+            assert key == 'prob'
+
+        records = list(h[None, 7, None])
+        assert len(records) == 3
+        for i in records:
+            agent_id, key, value = i
+            if agent_id == 'a_1':
+                assert key == 'id'
+                assert value == 'e'
+            elif agent_id == 'a_2':
+                assert key == 'finished'
+                assert value == True
+            else:
+                assert key == 'prob'
+                assert value == 3
+
+
+        records = h['a_1', 7, None]
+        assert records['id'] == 'e'
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -22,6 +22,7 @@ class TestMain(TestCase):
        Raise an exception otherwise.
        """
        config = {
+            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            }
@@ -31,6 +32,7 @@ class TestMain(TestCase):
        assert len(G) == 2
        with self.assertRaises(AttributeError):
            config = {
+            'dry_run': True,
                'network_params': {
                    'path': join(ROOT, 'unknown.extension')
                }
@@ -44,6 +46,7 @@ class TestMain(TestCase):
        should be used to generate a network
        """
        config = {
+            'dry_run': True,
            'network_params': {
                'generator': 'barabasi_albert_graph'
            }
@@ -58,6 +61,7 @@ class TestMain(TestCase):
    def test_empty_simulation(self):
        """A simulation with a base behaviour should do nothing"""
        config = {
+            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -74,11 +78,12 @@ class TestMain(TestCase):
        agent should be able to update its state."""
        config = {
            'name': 'CounterAgent',
+            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
            'agent_type': 'CounterModel',
-            'states': [{'neighbors': 10}, {'total': 12}],
+            'states': [{'times': 10}, {'times': 20}],
            'max_time': 2,
            'num_trials': 1,
            'environment_params': {
@@ -86,10 +91,10 @@ class TestMain(TestCase):
        }
        s = simulation.from_config(config)
        env = s.run_simulation(dry_run=True)[0]
-        assert env.get_agent(0)['neighbors', 0] == 10
-        assert env.get_agent(0)['neighbors', 1] == 1
-        assert env.get_agent(1)['total', 0] == 12
-        assert env.get_agent(1)['neighbors', 1] == 1
+        assert env.get_agent(0)['times', 0] == 11
+        assert env.get_agent(0)['times', 1] == 12
+        assert env.get_agent(1)['times', 0] == 21
+        assert env.get_agent(1)['times', 1] == 22

    def test_counter_agent_history(self):
        """
@@ -97,6 +102,7 @@ class TestMain(TestCase):
        """
        config = {
            'name': 'CounterAgent',
+            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -114,11 +120,10 @@ class TestMain(TestCase):
        env = s.run_simulation(dry_run=True)[0]
        for agent in env.network_agents:
            last = 0
-            assert len(agent[None, None]) == 11
-            for step, total in agent['total', None].items():
-                if step > 0:
-                    assert total == last + 2
-                    last = total
+            assert len(agent[None, None]) == 10
+            for step, total in sorted(agent['total', None]):
+                assert total == last + 2
+                last = total

    def test_custom_agent(self):
        """Allow for search of neighbors with a certain state_id"""
@@ -127,6 +132,7 @@ class TestMain(TestCase):
                self.state['neighbors'] = self.count_agents(state_id=0,
                                                            limit_neighbors=True)
        config = {
+            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -150,7 +156,8 @@ class TestMain(TestCase):
        config['network_params']['path'] = join(EXAMPLES,
                                                config['network_params']['path'])
        s = simulation.from_config(config)
-        env = s.run_simulation(dry_run=True)[0]
+        s.dry_run = True
+        env = s.run_simulation()[0]
        for a in env.network_agents:
            skill_level = a.state['skill_level']
            if a.id == 'Torvalds':
@@ -174,14 +181,15 @@ class TestMain(TestCase):
        with utils.timer('loading'):
            config = utils.load_file(join(EXAMPLES, 'complete.yml'))[0]
            s = simulation.from_config(config)
+            s.dry_run = True
        with utils.timer('serializing'):
            serial = s.to_yaml()
        with utils.timer('recovering'):
            recovered = yaml.load(serial)
        with utils.timer('deleting'):
            del recovered['topology']
-            del recovered['dry_run']
            del recovered['load_module']
+            del recovered['dry_run']
        assert config == recovered

    def test_configuration_changes(self):
@@ -191,6 +199,7 @@ class TestMain(TestCase):
        """
        config = utils.load_file('examples/complete.yml')[0]
        s = simulation.from_config(config)
+        s.dry_run = True
        for i in range(5):
            s.run_simulation(dry_run=True)
            nconfig = s.to_dict()
@@ -206,17 +215,14 @@ class TestMain(TestCase):
        pass

    def test_row_conversion(self):
-        sim = simulation.SoilSimulation()
        env = environment.SoilEnvironment(dry_run=True)
        env['test'] = 'test_value'
-        env._save_state(now=0)

        res = list(env.history_to_tuples())
        assert len(res) == len(env.environment_params)
-        assert ('env', 0, 'test', 'test_value', 'str') in res

+        env._now = 1
        env['test'] = 'second_value'
-        env._save_state(now=1)
        res = list(env.history_to_tuples())

        assert env['env', 0, 'test' ] == 'test_value'
@@ -1 +1 @@
 .10.2
 .11