Added history class

Now the environment does not deal with history directly, it delegates it to a specific class. The analysis also uses history instances instead of either using the database directly or creating a proxy environment. This should make it easier to change the implementation in the future. In fact, the change was motivated by the large size of the csv files in previous versions. This new implementation only stores results in deltas, and it fills any necessary values when needed.
2026-02-23 07:08:16 +00:00 · 2018-05-04 10:01:49 +02:00
parent 73c90887e8
commit fc48ed7e09
19 changed files with 1469 additions and 2911 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,8 @@
 version: '3'
 services:
  dev:
    build: .
    volumes:
      - .:/usr/src/app
    tty: true
    entrypoint: /bin/bash
--- a/examples/NewsSpread.ipynb
+++ b/examples/NewsSpread.ipynb
--- a/examples/newsspread/NewsSpread.ipynb
+++ b/examples/newsspread/NewsSpread.ipynb
--- a/examples/tutorial/soil_tutorial.ipynb
+++ b/examples/tutorial/soil_tutorial.ipynb
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 nxsim
 simpy
-networkx
+networkx>=2.0
 numpy
 matplotlib
 pyyaml
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ from setuptools import setup
 with open(os.path.join('soil', 'VERSION')) as f:
-    __version__ = f.read().strip()
+    __version__ = f.readlines()[0].strip()
    assert __version__
--- a/soil/VERSION
+++ b/soil/VERSION
@@ -1 +1 @@
-0.10.2
+0.11
--- a/soil/init.py
+++ b/soil/init.py
@@ -62,7 +62,7 @@ def main():
        simulation.run_from_config(args.file,
                                   dry_run=args.dry_run,
                                   dump=dump,
-                                   parallel=(not args.synchronous),
+                                   parallel=(not args.synchronous and not args.pdb),
                                   results_dir=args.output)
    except Exception as ex:
        if args.pdb:
--- a/soil/agents/CounterModel.py
+++ b/soil/agents/CounterModel.py
@@ -11,9 +11,9 @@ class CounterModel(BaseAgent):
        # Outside effects
        total = len(list(self.get_all_agents()))
        neighbors = len(list(self.get_neighboring_agents()))
-        self.state['times'] = self.state.get('times', 0) + 1
+        self['times'] = self.get('times', 0) + 1
-        self.state['neighbors'] = neighbors
+        self['neighbors'] = neighbors
-        self.state['total'] = total
+        self['total'] = total
 class AggregatedCounter(BaseAgent):
@@ -26,7 +26,7 @@ class AggregatedCounter(BaseAgent):
        # Outside effects
        total = len(list(self.get_all_agents()))
        neighbors = len(list(self.get_neighboring_agents()))
-        self.state['times'] = self.state.get('times', 0) + 1
+        self['times'] = self.get('times', 0) + 1
-        self.state['neighbors'] = self.state.get('neighbors', 0) + neighbors
+        self['neighbors'] = self.get('neighbors', 0) + neighbors
-        self.state['total'] = total = self.state.get('total', 0) + total
+        self['total'] = total = self.get('total', 0) + total
        self.debug('Running for step: {}. Total: {}'.format(self.now, total))
--- a/soil/agents/init.py
+++ b/soil/agents/init.py
@@ -14,7 +14,7 @@ import json
 from functools import wraps
-from .. import utils
+from .. import utils, history
 agent_types = {}
@@ -32,33 +32,67 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):
    defaults = {}
-    def __init__(self, **kwargs):
+    def __init__(self, environment=None, agent_id=None, state=None,
                 name='network_process', interval=None, **state_params):
        # Check for REQUIRED arguments
        assert environment is not None, TypeError('__init__ missing 1 required keyword argument: \'environment\'. '
                                                  'Cannot be NoneType.')
        # Initialize agent parameters
        self.id = agent_id
        self.name = name
        self.state_params = state_params
        # Global parameters
        self.global_topology = environment.G
        self.environment_params = environment.environment_params
        # Register agent to environment
        self.env = environment
        self._neighbors = None
        self.alive = True
-        state = deepcopy(self.defaults)
+        real_state = deepcopy(self.defaults)
-        state.update(kwargs.pop('state', {}))
+        real_state.update(state or {})
-        kwargs['state'] = state
+        self._state = real_state
-        super().__init__(**kwargs)
+        self.interval = interval
        if not hasattr(self, 'level'):
            self.level = logging.DEBUG
-        self.logger = logging.getLogger('{}-Agent-{}'.format(self.env.name, self.id))
+        self.logger = logging.getLogger('{}-Agent-{}'.format(self.env.name,
                                                             self.id))
        self.logger.setLevel(self.level)
        # initialize every time an instance of the agent is created
        self.action = self.env.process(self.run())
    @property
    def state(self):
        return self._state
    @state.setter
    def state(self, value):
        for k, v in value.items():
            self[k] = v
    def __getitem__(self, key):
        if isinstance(key, tuple):
-            k, t_step = key
+            key, t_step = key
-            return self.env[self.id, t_step, k]
+            k = history.Key(key=key, t_step=t_step, agent_id=self.id)
            return self.env[k]
        return self.state.get(key, None)
    def __delitem__(self, key):
-        del self.state[key]
+        self.state[key] = None
    def __contains__(self, key):
        return key in self.state
    def __setitem__(self, key, value):
        self.state[key] = value
        k = history.Key(t_step=self.now,
                        agent_id=self.id,
                        key=key)
        self.env[k] = value
    def get(self, key, default=None):
        return self[key] if key in self else default
@@ -72,7 +106,12 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):
            return None
    def run(self):
-        interval = self.env.interval
+        if self.interval is not None:
            interval = self.interval
        elif 'interval' in self:
            interval = self['interval']
        else:
            interval = self.env.interval
        while self.alive:
            res = self.step()
            yield res or self.env.timeout(interval)
@@ -95,7 +134,7 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):
            agents = self.global_topology.nodes()
        count = 0
        for agent in agents:
-            if state_id and state_id != self.global_topology.node[agent]['agent'].state['id']:
+            if state_id and state_id != self.global_topology.node[agent]['agent']['id']:
                continue
            count += 1
        return count
@@ -197,11 +236,13 @@ class FSM(BaseAgent, metaclass=MetaFSM):
    def __init__(self, *args, **kwargs):
        super(FSM, self).__init__(*args, **kwargs)
        if 'id' not in self.state:
-            self.state['id'] = self.default_state.id
+            if not self.default_state:
                raise ValueError('No default state specified for {}'.format(self.id))
            self['id'] = self.default_state.id
    def step(self):
        if 'id' in self.state:
-            next_state = self.state['id']
+            next_state = self['id']
        elif self.default_state:
            next_state = self.default_state.id
        else:
@@ -215,7 +256,7 @@ class FSM(BaseAgent, metaclass=MetaFSM):
            state = state.id
        if state not in self.states:
            raise ValueError('{} is not a valid state'.format(state))
-        self.state['id'] = state
+        self['id'] = state
        return state
--- a/soil/analysis.py
+++ b/soil/analysis.py
@@ -4,7 +4,7 @@ import glob
 import yaml
 from os.path import join
-from . import utils
+from . import utils, history
 def read_data(*args, group=False, **kwargs):
@@ -15,8 +15,9 @@ def read_data(*args, group=False, **kwargs):
        return list(iterable)
-def _read_data(pattern, keys=None, convert_types=False,
+def _read_data(pattern, *args, from_csv=False, process_args=None, **kwargs):
-               process=None, from_csv=False, **kwargs):
+    if not process_args:
        process_args = {}
    for folder in glob.glob(pattern):
        config_file = glob.glob(join(folder, '*.yml'))[0]
        config = yaml.load(open(config_file))
@@ -24,19 +25,20 @@ def _read_data(pattern, keys=None, convert_types=False,
        if from_csv:
            for trial_data in sorted(glob.glob(join(folder,
                                                    '*.environment.csv'))):
-                df = read_csv(trial_data, convert_types=convert_types)
+                df = read_csv(trial_data, **kwargs)
                if process:
                    df = process(df, **kwargs)
                yield config_file, df, config
        else:
            for trial_data in sorted(glob.glob(join(folder, '*.db.sqlite'))):
-                df = read_sql(trial_data, convert_types=convert_types,
+                df = read_sql(trial_data, **kwargs)
                              keys=keys)
                if process:
                    df = process(df, **kwargs)
                yield config_file, df, config
 def read_sql(db, *args, **kwargs):
    h = history.History(db, backup=False)
    df = h.read_sql(*args, **kwargs)
    return df
 def read_csv(filename, keys=None, convert_types=False, **kwargs):
    '''
    Read a CSV in canonical form: ::
@@ -49,18 +51,7 @@ def read_csv(filename, keys=None, convert_types=False, **kwargs):
        df = convert_types_slow(df)
    if keys:
        df = df[df['key'].isin(keys)]
-    return df
+    df = process_one(df)
 def read_sql(filename, keys=None, convert_types=False, limit=-1):
    condition = ''
    if keys:
        k = map(lambda x: "\'{}\'".format(x), keys)
        condition = 'where key in ({})'.format(','.join(k))
    query = 'select * from history {} limit {}'.format(condition, limit)
    df = pd.read_sql_query(query, 'sqlite:///{}'.format(filename))
    if convert_types:
        df = convert_types_slow(df)
    return df
@@ -108,8 +99,9 @@ def get_types(df):
    return {k:v[0] for k,v in dtypes.iteritems()}
-def process_one(df, *keys, columns=['key'], values='value',
+def process_one(df, *keys, columns=['key', 'agent_id'], values='value',
-                index=['t_step', 'agent_id'], aggfunc='first', **kwargs):
+                fill=True, index=['t_step',],
                aggfunc='first', **kwargs):
    '''
    Process a dataframe in canonical form ``(t_step, agent_id, key, value, value_type)`` into
    a dataframe with a column per key
@@ -119,35 +111,29 @@ def process_one(df, *keys, columns=['key'], values='value',
    if keys:
        df = df[df['key'].isin(keys)]
    dtypes = get_types(df)
    df = df.pivot_table(values=values, index=index, columns=columns,
                        aggfunc=aggfunc, **kwargs)
-    df = df.fillna(0).astype(dtypes)
+    if fill:
        df = fillna(df)
    return df
 def get_count_processed(df, *keys):
    if keys:
        df = df[list(keys)]
    # p = df.groupby(level=0).apply(pd.Series.value_counts)
    p = df.unstack().apply(pd.Series.value_counts, axis=1)
    return p
 def get_count(df, *keys):
    if keys:
-        df = df[df['key'].isin(keys)]
+        df = df[list(keys)]
-    p = df.groupby(by=['t_step', 'key', 'value']).size().unstack(level=[1,2]).fillna(0)
+    counts = pd.DataFrame()
-    return p
+    for key in df.columns.levels[0]:
        g = df[key].apply(pd.Series.value_counts, axis=1).fillna(0)
        for value, series in g.iteritems():
            counts[key, value] = series
    counts.columns = pd.MultiIndex.from_tuples(counts.columns)
    return counts
 def get_value(df, *keys, aggfunc='sum'):
    if keys:
-        df = df[df['key'].isin(keys)]
+        df = df[list(keys)]
-    p = process_one(df, *keys)
+    return df.groupby(axis=1, level=0).agg(aggfunc, axis=1)
    p = p.groupby(level='t_step').agg(aggfunc)
    return p
 def plot_all(*args, **kwargs):
@@ -175,4 +161,6 @@ def group_trials(trials, aggfunc=['mean', 'min', 'max', 'std']):
    return pd.concat(trials).groupby(level=0).agg(aggfunc).reorder_levels([2, 0,1] ,axis=1)
-
+def fillna(df):
    new_df = df.ffill(axis=0)
    return new_df
--- a/soil/environment.py
+++ b/soil/environment.py
@@ -5,16 +5,26 @@ import csv
 import random
 import simpy
 import tempfile
 import pandas as pd
 from copy import deepcopy
 from networkx.readwrite import json_graph
 import networkx as nx
 import nxsim
-from . import utils, agents
+from . import utils, agents, analysis, history
 class SoilEnvironment(nxsim.NetworkEnvironment):
    """
    The environment is key in a simulation. It contains the network topology,
    a reference to network and environment agents, as well as the environment
    params, which are used as shared state between agents.
    The environment parameters and the state of every agent can be accessed
    both by using the environment as a dictionary or with the environment's 
    :meth:`soil.environment.SoilEnvironment.get` method.
    """
    def __init__(self, name=None,
                 network_agents=None,
@@ -38,19 +48,21 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        self._env_agents = {}
        self.dry_run = dry_run
        self.interval = interval
        self.dir_path = dir_path or tempfile.mkdtemp('soil-env')
        self.get_path()
        self._history = history.History(name=self.name if not dry_run else None,
                                        dir_path=self.dir_path)
        # Add environment agents first, so their events get
        # executed before network agents
        self['SEED'] = seed or time.time()
        random.seed(self['SEED'])
        self.process(self.save_state())
        self.environment_agents = environment_agents or []
        self.network_agents = network_agents or []
        self.dir_path = dir_path or tempfile.mkdtemp('soil-env')
        if self.dry_run:
            self._db_path = ":memory:"
        else:
            self._db_path = os.path.join(self.get_path(), '{}.db.sqlite'.format(self.name))
        self.create_db(self._db_path)
        self['SEED'] = seed or time.time()
        random.seed(self['SEED'])
    def create_db(self, db_path=None):
        db_path = db_path or self._db_path
@@ -95,10 +107,8 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        if not network_agents:
            return
        for ix in self.G.nodes():
            i = ix
            node = self.G.node[i]
            agent, state = agents._agent_from_distribution(network_agents)
-            self.set_agent(i, agent_type=agent, state=state)
+            self.set_agent(ix, agent_type=agent, state=state)
    def set_agent(self, agent_id, agent_type, state=None):
        node = self.G.nodes[agent_id]
@@ -125,16 +135,21 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        return self.G.add_edge(agent1, agent2)
    def run(self, *args, **kwargs):
        self._save_state()
        super().run(*args, **kwargs)
        self._history.flush_cache()
    def _save_state(self, now=None):
        # for agent in self.agents:
        #     agent.save_state()
        utils.logger.debug('Saving state @{}'.format(self.now))
-        with self._db:
+        self._history.save_records(self.state_to_tuples(now=now))
            self._db.executemany("insert into history(agent_id, t_step, key, value, value_type) values (?, ?, ?, ?, ?)", self.state_to_tuples(now=now))
    def save_state(self):
        '''
        :DEPRECATED:
        Periodically save the state of the environment and the agents.
        '''
        self._save_state()
        while self.peek() != simpy.core.Infinity:
            delay = max(self.peek() - self.now, self.interval)
@@ -149,64 +164,44 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
    def __getitem__(self, key):
        if isinstance(key, tuple):
-            values = [("agent_id", key[0]),
+            self._history.flush_cache()
-                      ("t_step", key[1]),
+            return self._history[key]
                      ("key", key[2]),
                      ("value", None),
                      ("value_type", None)]
            fields = list(k for k, v in values if v is None)
            conditions = " and ".join("{}='{}'".format(k, v) for k, v in values if v is not None)
            query = """SELECT {fields} from history""".format(fields=",".join(fields))
            if conditions:
                query = """{query} where {conditions}""".format(query=query,
                                                                conditions=conditions)
            with self._db:
                rows = self._db.execute(query).fetchall()
            utils.logger.debug(rows)
            results = self.rows_to_dict(rows)
            return results
        return self.environment_params[key]
    def rows_to_dict(self, rows):
        if len(rows) < 1:
            return None
        level = len(rows[0])-2
        if level == 0:
            if len(rows) != 1:
                raise ValueError('Cannot convert {} to dictionaries'.format(rows))
            value, value_type = rows[0]
            return utils.convert(value, value_type)
        results = {}
        for row in rows:
            item = results
            for i in range(level-1):
                key = row[i]
                if key not in item:
                    item[key] = {}
                item = item[key]
            key, value, value_type = row[level-1:]
            item[key] = utils.convert(value, value_type)
        return results
    def __setitem__(self, key, value):
        if isinstance(key, tuple):
            k = history.Key(*key)
            self._history.save_record(*k,
                                      value=value)
            return
        self.environment_params[key] = value
        self._history.save_record(agent_id='env',
                                  t_step=self.now,
                                  key=key,
                                  value=value)
    def __contains__(self, key):
        return key in self.environment_params
    def get(self, key, default=None):
        '''
        Get the value of an environment attribute in a
        given point in the simulation (history).
        If key is an attribute name, this method returns
        the current value.
        To get values at other times, use a
        :meth: `soil.history.Key` tuple.
        '''
        return self[key] if key in self else default
    def get_path(self, dir_path=None):
        dir_path = dir_path or self.dir_path
        if not os.path.exists(dir_path):
-            os.makedirs(dir_path)
+            try:
                os.makedirs(dir_path)
            except FileExistsError:
                pass
        return dir_path
    def get_agent(self, agent_id):
@@ -255,17 +250,19 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        if now is None:
            now = self.now
        for k, v in self.environment_params.items():
-            v, v_t = utils.repr(v)
+            yield history.Record(agent_id='env',
-            yield 'env', now, k, v, v_t
+                                 t_step=now,
                                 key=k,
                                 value=v)
        for agent in self.agents:
            for k, v in agent.state.items():
-                v, v_t = utils.repr(v)
+                yield history.Record(agent_id=agent.id,
-                yield agent.id, now, k, v, v_t
+                                     t_step=now,
                                     key=k,
                                     value=v)
    def history_to_tuples(self):
-        with self._db:
+        return self._history.to_tuples()
            res = self._db.execute("select agent_id, t_step, key, value, value_type from history ").fetchall()
        yield from res
    def history_to_graph(self):
        G = nx.Graph(self.G)
@@ -317,14 +314,10 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
    def __getstate__(self):
        state = self.__dict__.copy()
        state['G'] = json_graph.node_link_data(self.G)
-        state['network_agents'] = agents.serialize_distribution(self.network_agents)
+        state['network_agents'] = agents._serialize_distribution(self.network_agents)
        state['environment_agents'] = agents._convert_agent_types(self.environment_agents,
                                                                 to_string=True)
        del state['_queue']
        import inspect
        for k, v in state.items():
            if inspect.isgeneratorfunction(v):
                print(k, v, type(v))
        return state
    def __setstate__(self, state):
--- a/soil/history.py
+++ b/soil/history.py
@@ -0,0 +1,231 @@
 import time
 import os
 import pandas as pd
 import sqlite3
 import copy
 from collections import UserDict, Iterable, namedtuple
 from . import utils
 class History:
    """
    Store and retrieve values from a sqlite database.
    """
    def __init__(self, db_path=None, name=None, dir_path=None, backup=True):
        if db_path is None and name:
            db_path = os.path.join(dir_path or os.getcwd(), '{}.db.sqlite'.format(name))
        if db_path is None:
            db_path = ":memory:"
        else:
            if backup and os.path.exists(db_path):
                newname = db_path.replace('db.sqlite', 'backup{}.sqlite'.format(time.time()))
                os.rename(db_path, newname)
        self._db_path = db_path
        if isinstance(db_path, str):
            self._db = sqlite3.connect(db_path)
        else:
            self._db = db_path
        with self._db:
            self._db.execute('''CREATE TABLE IF NOT EXISTS history (agent_id text, t_step int, key text, value text text)''')
            self._db.execute('''CREATE TABLE IF NOT EXISTS value_types (key text, value_type text)''')
            self._db.execute('''CREATE UNIQUE INDEX IF NOT EXISTS idx_history ON history (agent_id, t_step, key);''')
        self._dtypes = {}
        self._tups = []
    def conversors(self, key):
        """Get the serializer and deserializer for a given key."""
        if key not in self._dtypes:
            self.read_types()
        return self._dtypes[key]
    @property
    def dtypes(self):
        return {k:v[0] for k, v in self._dtypes.items()}
    def save_tuples(self, tuples):
        self.save_records(Record(*tup) for tup in tuples)
    def save_records(self, records):
        with self._db:
            for rec in records:
                if not isinstance(rec, Record):
                    rec = Record(*rec)
                if rec.key not in self._dtypes:
                    name = utils.name(rec.value)
                    serializer = utils.serializer(name)
                    deserializer = utils.deserializer(name)
                    self._dtypes[rec.key] = (name, serializer, deserializer)
                    self._db.execute("replace into value_types (key, value_type) values (?, ?)", (rec.key, name))
                self._db.execute("replace into history(agent_id, t_step, key, value) values (?, ?, ?, ?)", (rec.agent_id, rec.t_step, rec.key, rec.value))
    def save_record(self, *args, **kwargs):
        self._tups.append(Record(*args, **kwargs))
        if len(self._tups) > 100:
            self.flush_cache()
    def flush_cache(self):
        '''
        Use a cache to save state changes to avoid opening a session for every change.
        The cache will be flushed at the end of the simulation, and when history is accessed.
        '''
        self.save_records(self._tups)
        self._tups = list()
    def to_tuples(self):
            self.flush_cache()
            with self._db:
                res = self._db.execute("select agent_id, t_step, key, value from history ").fetchall()
            for r in res:
                agent_id, t_step, key, value = r
                _, _ , des = self.conversors(key)
                yield agent_id, t_step, key, des(value)
    def read_types(self):
            with self._db:
                res = self._db.execute("select key, value_type from value_types ").fetchall()
            for k, v in res:
                serializer = utils.serializer(v)
                deserializer = utils.deserializer(v)
                self._dtypes[k] = (v, serializer, deserializer)
    def __getitem__(self, key):
        key = Key(*key)
        agent_ids = [key.agent_id] if key.agent_id is not None else []
        t_steps = [key.t_step] if key.t_step is not None else []
        keys = [key.key] if key.key is not None else []
        df = self.read_sql(agent_ids=agent_ids,
                           t_steps=t_steps,
                           keys=keys)
        r = Records(df, filter=key, dtypes=self._dtypes)
        return r.value()
    def read_sql(self, keys=None, agent_ids=None, t_steps=None, convert_types=False, limit=-1):
        self.read_types()
        def escape_and_join(v):
            if v is None:
                return
            return ",".join(map(lambda x: "\'{}\'".format(x), v))
        filters = [("key in ({})".format(escape_and_join(keys)), keys),
                   ("agent_id in ({})".format(escape_and_join(agent_ids)), agent_ids)
        ]
        filters = list(k[0] for k in filters if k[1])
        last_df = None
        if t_steps:
            # Look for the last value before the minimum step in the query
            min_step = min(t_steps)
            last_filters = ['t_step < {}'.format(min_step),]
            last_filters = last_filters + filters
            condition = ' and '.join(last_filters)
            last_query = '''
            select h1.*
            from history h1
            inner join (
            select agent_id, key, max(t_step) as t_step
            from history
            where {condition}
            group by agent_id, key
            ) h2
            on h1.agent_id = h2.agent_id  and
               h1.key      = h2.key       and
               h1.t_step   = h2.t_step
            '''.format(condition=condition)
            last_df = pd.read_sql_query(last_query, self._db)
            filters.append("t_step >= '{}' and t_step <= '{}'".format(min_step, max(t_steps)))
        condition = ''
        if filters:
            condition = 'where {} '.format(' and '.join(filters))
        query = 'select * from history {} limit {}'.format(condition, limit)
        df = pd.read_sql_query(query, self._db)
        if last_df is not None:
            df = pd.concat([df, last_df])
        df_p = df.pivot_table(values='value', index=['t_step'],
                              columns=['key', 'agent_id'],
                              aggfunc='first')
        for k, v in self._dtypes.items():
            if k in df_p:
                dtype, _, deserial = v
                df_p[k] = df_p[k].fillna(method='ffill').fillna(deserial()).astype(dtype)
        if t_steps:
            df_p = df_p.reindex(t_steps, method='ffill')
        return df_p.ffill()
 class Records():
    def __init__(self, df, filter=None, dtypes=None):
        if not filter:
            filter = Key(agent_id=None,
                         t_step=None,
                         key=None)
        self._df = df
        self._filter = filter
        self.dtypes = dtypes or {}
        super().__init__()
    def mask(self, tup):
        res = ()
        for i, k in zip(tup[:-1], self._filter):
            if k is None:
                res = res + (i,)
        res = res + (tup[-1],)
        return res
    def filter(self, newKey):
        f = list(self._filter)
        for ix, i in enumerate(f):
            if i is None:
                f[ix] = newKey
        self._filter = Key(*f)
    @property
    def resolved(self):
        return sum(1 for i in self._filter if i is not None) == 3
    def __iter__(self):
        for column, series in self._df.iteritems():
            key, agent_id = column
            for t_step, value in series.iteritems():
                r = Record(t_step=t_step,
                           agent_id=agent_id,
                           key=key,
                           value=value)
                yield self.mask(r)
    def value(self):
        if self.resolved:
            f = self._filter
            try:
                i = self._df[f.key][str(f.agent_id)]
                ix = i.index.get_loc(f.t_step, method='ffill')
                return i.iloc[ix]
            except KeyError:
                return self.dtypes[f.key][2]()
        return self
    def __getitem__(self, k):
        n = copy.copy(self)
        n.filter(k)
        return n.value()
    def __len__(self):
        return len(self._df)
 Key = namedtuple('Key', ['agent_id', 't_step', 'key'])
 Record = namedtuple('Record', 'agent_id t_step key value')
--- a/soil/simulation.py
+++ b/soil/simulation.py
@@ -20,7 +20,7 @@ class SoilSimulation(NetworkSimulation):
    """
    Subclass of nsim.NetworkSimulation with three main differences:
        1) agent type can be specified by name or by class.
-        2) instead of just one type, an network_agents can be used.
+        2) instead of just one type, a network agents distribution can be used.
           The distribution specifies the weight (or probability) of each
           agent type in the topology. This is an example distribution: ::
@@ -95,16 +95,16 @@ class SoilSimulation(NetworkSimulation):
    def run_simulation_gen(self, *args, parallel=False, dry_run=False,
                           **kwargs):
        p = Pool()
-        with utils.timer('simulation'):
+        with utils.timer('simulation {}'.format(self.name)):
            if parallel:
-                func = partial(self.run_trial, dry_run=dry_run,
+                func = partial(self.run_trial, dry_run=dry_run or self.dry_run,
                               return_env=not parallel, **kwargs)
                for i in p.imap_unordered(func, range(self.num_trials)):
                    yield i
            else:
                for i in range(self.num_trials):
-                    yield self.run_trial(i, dry_run=dry_run, **kwargs)
+                    yield self.run_trial(i, dry_run=dry_run or self.dry_run, **kwargs)
-            if not dry_run or self.dry_run:
+            if not (dry_run or self.dry_run):
                logger.info('Dumping results to {}'.format(self.dir_path))
                self.dump_pickle(self.dir_path)
                self.dump_yaml(self.dir_path)
@@ -192,7 +192,7 @@ class SoilSimulation(NetworkSimulation):
        return state
-def from_config(config, G=None):
+def from_config(config):
    config = list(utils.load_config(config))
    if len(config) > 1:
        raise AttributeError('Provide only one configuration')
@@ -201,9 +201,10 @@ def from_config(config, G=None):
    return sim
-def run_from_config(*configs, results_dir='soil_output', dump=None, timestamp=False,  **kwargs):
+def run_from_config(*configs, results_dir='soil_output', dry_run=False, dump=None, timestamp=False,  **kwargs):
    for config_def in configs:
-        for config, cpath in utils.load_config(config_def):
+        # logger.info("Found {} config(s)".format(len(ls)))
        for config, _ in utils.load_config(config_def):
            name = config.get('name', 'unnamed')
            logger.info("Using config(s): {name}".format(name=name))
@@ -215,4 +216,4 @@ def run_from_config(*configs, results_dir='soil_output', dump=None, timestamp=Fa
            dir_path = os.path.join(results_dir, sim_folder)
            sim = SoilSimulation(dir_path=dir_path, dump=dump, **config)
            logger.info('Dumping results to {} : {}'.format(sim.dir_path, sim.dump))
-            results = sim.run_simulation(**kwargs)
+            sim.run_simulation(**kwargs)
--- a/soil/utils.py
+++ b/soil/utils.py
@@ -1,6 +1,7 @@
 import os
 import yaml
 import logging
 import importlib
 from time import time
 from glob import glob
 from random import random
@@ -72,13 +73,22 @@ def timer(name='task', pre="", function=logger.info, to_object=None):
 def repr(v):
-    if isinstance(v, bool):
+    func = serializer(v)
-        v = "true" if v else ""
+    tname = name(v)
-        return v, bool.__name__
+    return func(v), tname
    return v, type(v).__name__
-def convert(value, type_):
+
-    import importlib
+def name(v):
    return type(v).__name__
 def serializer(type_):
    if type_ == 'bool':
        return lambda x:  "true" if x else ""
    return lambda x: x
 def deserializer(type_):
    try:
        # Check if it's a builtin type
        module = importlib.import_module('builtins')
@@ -88,4 +98,8 @@ def convert(value, type_):
        module, type_ = type_.rsplit(".", 1)
        module = importlib.import_module(module)
        cls = getattr(module, type_)
-    return cls(value)
+    return cls
 def convert(value, type_):
    return deserializer(type_)(value)
--- a/tests/test.csv
+++ b/tests/test.csv
@@ -0,0 +1,16 @@
 agent_id,t_step,key,value,value_type
 a0,0,hello,w,str
 a0,1,hello,o,str
 a0,2,hello,r,str
 a0,3,hello,l,str
 a0,4,hello,d,str
 a0,5,hello,!,str
 env,1,started,,bool
 env,2,started,True,bool
 env,7,started,,bool
 a0,0,hello,w,str
 a0,1,hello,o,str
 a0,2,hello,r,str
 a0,3,hello,l,str
 a0,4,hello,d,str
 a0,5,hello,!,str
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -0,0 +1,90 @@
 from unittest import TestCase
 import os
 import pandas as pd
 import yaml
 from functools import partial
 from os.path import join
 from soil import simulation, analysis, agents
 ROOT = os.path.abspath(os.path.dirname(__file__))
 class Ping(agents.FSM):
    defaults = {
        'count': 0,
    }
    @agents.default_state
    @agents.state
    def even(self):
        self['count'] += 1
        return self.odd
    @agents.state
    def odd(self):
        self['count'] += 1
        return self.even
 class TestAnalysis(TestCase):
    # Code to generate a simple sqlite history
    def setUp(self):
        """
        The initial states should be applied to the agent and the
        agent should be able to update its state."""
        config = {
            'name': 'analysis',
            'dry_run': True,
            'seed': 'seed',
            'network_params': {
                'generator': 'complete_graph',
                'n': 2
            },
            'agent_type': Ping,
            'states': [{'interval': 1}, {'interval': 2}],
            'max_time': 30,
            'num_trials': 1,
            'environment_params': {
            }
        }
        s = simulation.from_config(config)
        self.env = s.run_simulation()[0]
    def test_saved(self):
        env = self.env
        assert env.get_agent(0)['count', 0] == 1
        assert env.get_agent(0)['count', 29] == 30
        assert env.get_agent(1)['count', 0] == 1
        assert env.get_agent(1)['count', 29] == 15
        assert env['env', 29, None]['SEED'] == env['env', 29, 'SEED']
    def test_count(self):
        env = self.env
        df = analysis.read_sql(env._history._db)
        res = analysis.get_count(df, 'SEED', 'id')
        assert res['SEED']['seedanalysis_trial_0'].iloc[0] == 1
        assert res['SEED']['seedanalysis_trial_0'].iloc[-1] == 1
        assert res['id']['odd'].iloc[0] == 2
        assert res['id']['even'].iloc[0] == 0
        assert res['id']['odd'].iloc[-1] == 1
        assert res['id']['even'].iloc[-1] == 1
    def test_value(self):
        env = self.env
        df = analysis.read_sql(env._history._db)
        res_sum = analysis.get_value(df, 'count')
        assert res_sum['count'].iloc[0] == 2
        import numpy as np
        res_mean = analysis.get_value(df, 'count', aggfunc=np.mean)
        assert res_mean['count'].iloc[0] == 1
        res_total = analysis.get_value(df)
        res_total['SEED'].iloc[0] == 'seedanalysis_trial_0'
--- a/tests/test_history.py
+++ b/tests/test_history.py
@@ -0,0 +1,90 @@
 from unittest import TestCase
 import os
 import pandas as pd
 from soil import history, analysis
 ROOT = os.path.abspath(os.path.dirname(__file__))
 class TestHistory(TestCase):
    def test_history(self):
        """
        """
        tuples = (
            ('a_0', 0, 'id', 'h',  ),
            ('a_0', 1, 'id', 'e',  ),
            ('a_0', 2, 'id', 'l',  ),
            ('a_0', 3, 'id', 'l',  ),
            ('a_0', 4, 'id', 'o',  ),
            ('a_1', 0, 'id', 'v',  ),
            ('a_1', 1, 'id', 'a',  ),
            ('a_1', 2, 'id', 'l',  ),
            ('a_1', 3, 'id', 'u',  ),
            ('a_1', 4, 'id', 'e',  ),
            ('env', 1, 'prob', 1),
            ('env', 3, 'prob', 2),
            ('env', 5, 'prob', 3),
            ('a_2', 7, 'finished', True),
        )
        h = history.History()
        h.save_tuples(tuples)
        # assert h['env', 0, 'prob'] == 0
        for i in range(1, 7):
            assert h['env', i, 'prob'] == ((i-1)//2)+1
        for i, k in zip(range(5), 'hello'):
            assert h['a_0', i, 'id'] == k
        for record, value in zip(h['a_0', None, 'id'], 'hello'):
            t_step, val = record
            assert val == value
        for i, k in zip(range(5), 'value'):
            assert h['a_1', i, 'id'] == k
        for i in range(5, 8):
            assert h['a_1', i, 'id'] == 'e'
        for i in range(7):
            assert h['a_2', i, 'finished'] == False
        assert h['a_2', 7, 'finished']
    def test_history_gen(self):
        """
        """
        tuples = (
            ('a_1', 0, 'id', 'v',  ),
            ('a_1', 1, 'id', 'a',  ),
            ('a_1', 2, 'id', 'l',  ),
            ('a_1', 3, 'id', 'u',  ),
            ('a_1', 4, 'id', 'e',  ),
            ('env', 1, 'prob', 1),
            ('env', 2, 'prob', 2),
            ('env', 3, 'prob', 3),
            ('a_2', 7, 'finished', True),
        )
        h = history.History()
        h.save_tuples(tuples)
        for t_step, key, value in h['env', None, None]:
            assert t_step == value
            assert key == 'prob'
        records = list(h[None, 7, None])
        assert len(records) == 3
        for i in records:
            agent_id, key, value = i
            if agent_id == 'a_1':
                assert key == 'id'
                assert value == 'e'
            elif agent_id == 'a_2':
                assert key == 'finished'
                assert value == True
            else:
                assert key == 'prob'
                assert value == 3
        records = h['a_1', 7, None]
        assert records['id'] == 'e'
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -22,6 +22,7 @@ class TestMain(TestCase):
        Raise an exception otherwise.
        """
        config = {
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            }
@@ -31,6 +32,7 @@ class TestMain(TestCase):
        assert len(G) == 2
        with self.assertRaises(AttributeError):
            config = {
            'dry_run': True,
                'network_params': {
                    'path': join(ROOT, 'unknown.extension')
                }
@@ -44,6 +46,7 @@ class TestMain(TestCase):
        should be used to generate a network
        """
        config = {
            'dry_run': True,
            'network_params': {
                'generator': 'barabasi_albert_graph'
            }
@@ -58,6 +61,7 @@ class TestMain(TestCase):
    def test_empty_simulation(self):
        """A simulation with a base behaviour should do nothing"""
        config = {
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -74,11 +78,12 @@ class TestMain(TestCase):
        agent should be able to update its state."""
        config = {
            'name': 'CounterAgent',
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
            'agent_type': 'CounterModel',
-            'states': [{'neighbors': 10}, {'total': 12}],
+            'states': [{'times': 10}, {'times': 20}],
            'max_time': 2,
            'num_trials': 1,
            'environment_params': {
@@ -86,10 +91,10 @@ class TestMain(TestCase):
        }
        s = simulation.from_config(config)
        env = s.run_simulation(dry_run=True)[0]
-        assert env.get_agent(0)['neighbors', 0] == 10
+        assert env.get_agent(0)['times', 0] == 11
-        assert env.get_agent(0)['neighbors', 1] == 1
+        assert env.get_agent(0)['times', 1] == 12
-        assert env.get_agent(1)['total', 0] == 12
+        assert env.get_agent(1)['times', 0] == 21
-        assert env.get_agent(1)['neighbors', 1] == 1
+        assert env.get_agent(1)['times', 1] == 22
    def test_counter_agent_history(self):
        """
@@ -97,6 +102,7 @@ class TestMain(TestCase):
        """
        config = {
            'name': 'CounterAgent',
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -114,11 +120,10 @@ class TestMain(TestCase):
        env = s.run_simulation(dry_run=True)[0]
        for agent in env.network_agents:
            last = 0
-            assert len(agent[None, None]) == 11
+            assert len(agent[None, None]) == 10
-            for step, total in agent['total', None].items():
+            for step, total in sorted(agent['total', None]):
-                if step > 0:
+                assert total == last + 2
-                    assert total == last + 2
+                last = total
                    last = total
    def test_custom_agent(self):
        """Allow for search of neighbors with a certain state_id"""
@@ -127,6 +132,7 @@ class TestMain(TestCase):
                self.state['neighbors'] = self.count_agents(state_id=0,
                                                            limit_neighbors=True)
        config = {
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -150,7 +156,8 @@ class TestMain(TestCase):
        config['network_params']['path'] = join(EXAMPLES,
                                                config['network_params']['path'])
        s = simulation.from_config(config)
-        env = s.run_simulation(dry_run=True)[0]
+        s.dry_run = True
        env = s.run_simulation()[0]
        for a in env.network_agents:
            skill_level = a.state['skill_level']
            if a.id == 'Torvalds':
@@ -174,14 +181,15 @@ class TestMain(TestCase):
        with utils.timer('loading'):
            config = utils.load_file(join(EXAMPLES, 'complete.yml'))[0]
            s = simulation.from_config(config)
            s.dry_run = True
        with utils.timer('serializing'):
            serial = s.to_yaml()
        with utils.timer('recovering'):
            recovered = yaml.load(serial)
        with utils.timer('deleting'):
            del recovered['topology']
            del recovered['dry_run']
            del recovered['load_module']
            del recovered['dry_run']
        assert config == recovered
    def test_configuration_changes(self):
@@ -191,6 +199,7 @@ class TestMain(TestCase):
        """
        config = utils.load_file('examples/complete.yml')[0]
        s = simulation.from_config(config)
        s.dry_run = True
        for i in range(5):
            s.run_simulation(dry_run=True)
            nconfig = s.to_dict()
@@ -206,17 +215,14 @@ class TestMain(TestCase):
        pass
    def test_row_conversion(self):
        sim = simulation.SoilSimulation()
        env = environment.SoilEnvironment(dry_run=True)
        env['test'] = 'test_value'
        env._save_state(now=0)
        res = list(env.history_to_tuples())
        assert len(res) == len(env.environment_params)
        assert ('env', 0, 'test', 'test_value', 'str') in res
        env._now = 1
        env['test'] = 'second_value'
        env._save_state(now=1)
        res = list(env.history_to_tuples())
        assert env['env', 0, 'test' ] == 'test_value'