WIP: exporters

2026-01-07 19:28:16 +00:00 · 2019-04-29 18:47:15 +02:00
parent 9bc036d185
commit d1006bd55c
16 changed files with 288 additions and 195 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,12 +9,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 The definition of the variables and their possible values (i.e., a problem in SALib terms), as well as a sampler function, can be provided.
 Soil uses this definition and the template to generate a set of configurations.
 * Simulation group names, to link related simulations. For now, they are only used to group all simulations in the same group under the same folder.
 * Exporters unify exporting/dumping results and other files to disk. If `dry_run` is set to `True`, exporters will write to stdout instead of a file (useful for testing/debugging).
 * Distribution exporter, to write statistics about values and value_counts in every simulation. The results are dumped to two CSV files.
 ### Changed
-* `dir_path` is now the directory for resources (modules, files), and the output dir has been renamed to `outdir`
+* `dir_path` is now the directory for resources (modules, files)
 * Environments and simulations do not export or write anything by default. That task is delegated to Exporters
 ### Removed
 * The output dir for environments and simulations (see Exporters)
 * DrawingAgent, because it wrote to disk and was not being used. We provide a partial alternative in the form of the GraphDrawing exporter. A complete alternative will be provided once the network at each state can be accessed by exporters.
 ## Fixed
 * Modules with custom agents/environments failed to load when they were run from outside the directory of the definition file. Modules are now loaded from the directory of the simulation file in addition to the working directory
 * Memory databases (in history) can now be shared between threads.
 * Testing all examples, not just subdirectories
 ## [0.13.8]
 ### Changed
--- a/examples/complete.yml
+++ b/examples/complete.yml
@@ -3,11 +3,9 @@ name: simple
 group: tests
 dir_path: "/tmp/"
 num_trials: 3
 dry_run: True
 max_time: 100
 interval: 1
 seed: "CompleteSeed!"
 dump: false
 network_params:
  generator: complete_graph
  n: 10
--- a/examples/custom_generator/custom_generator.yml
+++ b/examples/custom_generator/custom_generator.yml
@@ -2,7 +2,6 @@
 name: custom-generator
 description: Using a custom generator for the network
 num_trials: 3
 dry_run: True
 max_time: 100
 interval: 1
 network_params:
@@ -14,4 +13,4 @@ network_agents:
  - agent_type: CounterModel
    weight: 1
    state:
-      id: 0
+      id: 0
--- a/examples/custom_timeouts/custom_timeouts.py
+++ b/examples/custom_timeouts/custom_timeouts.py
@@ -29,8 +29,7 @@ if __name__ == '__main__':
    from soil import Simulation
    s = Simulation(network_agents=[{'ids': [0], 'agent_type': Fibonacci},
                                   {'ids': [1], 'agent_type': Odds}],
                   dry_run=True,
                   network_params={"generator": "complete_graph", "n": 2},
                   max_time=100,
                   )
-    s.run()
+    s.run(dry_run=True)
--- a/soil/init.py
+++ b/soil/init.py
@@ -57,18 +57,20 @@ def main():
    logging.info('Loading config file: {}'.format(args.file))
    try:
-        dump = []
+        exporters = list(args.exporter or [])
-        if not args.dry_run:
+        if args.csv:
-            if args.csv:
+            exporters.append('CSV')
-                dump.append('csv')
+        if args.graph:
-            if args.graph:
+            exporters.append('Gexf')
-                dump.append('gexf')
+        exp_params = {}
        if args.dry_run:
            exp_params['copy_to'] = sys.stdout
        simulation.run_from_config(args.file,
                                   dry_run=args.dry_run,
-                                   dump=dump,
+                                   exporters=exporters,
                                   exporters=args.exporter,
                                   parallel=(not args.synchronous),
-                                   outdir=args.output)
+                                   outdir=args.output,
                                   exporter_params=exp_params)
    except Exception:
        if args.pdb:
            pdb.post_mortem()
--- a/soil/agents/DrawingAgent.py
+++ b/soil/agents/DrawingAgent.py
@@ -1,18 +0,0 @@
 from . import BaseAgent
 import os.path
 import matplotlib
 import matplotlib.pyplot as plt
 import networkx as nx
 class DrawingAgent(BaseAgent):
    """
    Agent that draws the state of the network.
    """
    def step(self):
        # Outside effects
        f = plt.figure()
        nx.draw(self.env.G, node_size=10, width=0.2, pos=nx.spring_layout(self.env.G, scale=100), ax=f.add_subplot(111))
        f.savefig(os.path.join(self.env.get_path(), "graph-"+str(self.env.now)+".png"))
--- a/soil/agents/init.py
+++ b/soil/agents/init.py
@@ -515,4 +515,3 @@ from .ModelM2 import *
 from .SentimentCorrelationModel import *
 from .SISaModel import *
 from .CounterModel import *
 from .DrawingAgent import *
--- a/soil/analysis.py
+++ b/soil/analysis.py
@@ -34,7 +34,7 @@ def _read_data(pattern, *args, from_csv=False, process_args=None, **kwargs):
 def read_sql(db, *args, **kwargs):
-    h = history.History(db, backup=False)
+    h = history.History(db_path=db, backup=False)
    df = h.read_sql(*args, **kwargs)
    return df
--- a/soil/environment.py
+++ b/soil/environment.py
@@ -14,15 +14,13 @@ from networkx.readwrite import json_graph
 import networkx as nx
 import nxsim
-from . import serialization, agents, analysis, history
+from . import serialization, agents, analysis, history, utils
 # These properties will be copied when pickling/unpickling the environment
 _CONFIG_PROPS = [ 'name',
                 'states',
                 'default_state',
                 'interval',
                 'dry_run',
                 'outdir',
                 ]
 class Environment(nxsim.NetworkEnvironment):
@@ -43,8 +41,6 @@ class Environment(nxsim.NetworkEnvironment):
                 default_state=None,
                 interval=1,
                 seed=None,
                 dry_run=False,
                 outdir=None,
                 topology=None,
                 *args, **kwargs):
        self.name = name or 'UnnamedEnvironment'
@@ -56,13 +52,8 @@ class Environment(nxsim.NetworkEnvironment):
            topology = nx.Graph()
        super().__init__(*args, topology=topology, **kwargs)
        self._env_agents = {}
        self.dry_run = dry_run
        self.interval = interval
-        self.outdir = outdir or tempfile.mkdtemp('soil-env')
+        self._history = history.History(name=self.name,
        if not dry_run:
            self.get_path()
        self._history = history.History(name=self.name if not dry_run else None,
                                        outdir=self.outdir,
                                        backup=True)
        # Add environment agents first, so their events get
        # executed before network agents
@@ -167,8 +158,6 @@ class Environment(nxsim.NetworkEnvironment):
        self.log_stats()
    def _save_state(self, now=None):
        # for agent in self.agents:
        #     agent.save_state()
        serialization.logger.debug('Saving state @{}'.format(self.now))
        self._history.save_records(self.state_to_tuples(now=now))
@@ -222,15 +211,6 @@ class Environment(nxsim.NetworkEnvironment):
        '''
        return self[key] if key in self else default
    def get_path(self, outdir=None):
        outdir = outdir or self.outdir
        if not os.path.exists(outdir):
            try:
                os.makedirs(outdir)
            except FileExistsError:
                pass
        return outdir
    def get_agent(self, agent_id):
        return self.G.node[agent_id]['agent']
@@ -239,20 +219,15 @@ class Environment(nxsim.NetworkEnvironment):
            return list(self.agents)
        return [self.G.node[i]['agent'] for i in nodes]
-    def dump_csv(self, outdir=None):
+    def dump_csv(self, f):
-        csv_name = os.path.join(self.get_path(outdir),
+        with utils.open_or_reuse(f, 'w') as f:
                                '{}.environment.csv'.format(self.name))
        with open(csv_name, 'w') as f:
            cr = csv.writer(f)
            cr.writerow(('agent_id', 't_step', 'key', 'value'))
            for i in self.history_to_tuples():
                cr.writerow(i)
-    def dump_gexf(self, outdir=None):
+    def dump_gexf(self, f):
        G = self.history_to_graph()
        graph_path = os.path.join(self.get_path(outdir),
                                  self.name+".gexf")
        # Workaround for geometric models
        # See soil/soil#4
        for node in G.nodes():
@@ -260,9 +235,9 @@ class Environment(nxsim.NetworkEnvironment):
                G.node[node]['viz'] = {"position": {"x": G.node[node]['pos'][0], "y": G.node[node]['pos'][1], "z": 0.0}}
                del (G.node[node]['pos'])
-        nx.write_gexf(G, graph_path, version="1.2draft")
+        nx.write_gexf(G, f, version="1.2draft")
-    def dump(self, outdir=None, formats=None):
+    def dump(self, *args, formats=None, **kwargs):
        if not formats:
            return
        functions = {
@@ -271,10 +246,13 @@ class Environment(nxsim.NetworkEnvironment):
        }
        for f in formats:
            if f in functions:
-                functions[f](outdir)
+                functions[f](*args, **kwargs)
            else:
                raise ValueError('Unknown format: {}'.format(f))
    def dump_sqlite(self, f):
        return self._history.dump(f)
    def state_to_tuples(self, now=None):
        if now is None:
            now = self.now
@@ -338,7 +316,7 @@ class Environment(nxsim.NetworkEnvironment):
                G.add_node(agent.id, **attributes)
        return G
-    
+
    def stats(self):
        stats = {}
        stats['network'] = {}
--- a/soil/exporters.py
+++ b/soil/exporters.py
@@ -1,43 +1,174 @@
 from .serialization import deserialize
 import os
 import time
 from io import BytesIO
 import matplotlib.pyplot as plt
 import networkx as nx
 import pandas as pd
 from .serialization import deserialize
 from .utils import open_or_reuse, logger, timer
-def for_sim(simulation, names, dir_path=None):
+from . import utils
 def for_sim(simulation, names, *args, **kwargs):
    '''Return the set of exporters for a simulation, given the exporter names'''
    exporters = []
    for name in names:
        mod = deserialize(name, known_modules=['soil.exporters'])
-        exporters.append(mod(simulation))
+        exporters.append(mod(simulation, *args, **kwargs))
    return exporters
 class DryRunner(BytesIO):
    def __init__(self, fname, *args, copy_to=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.__fname = fname
        self.__copy_to = copy_to
-class Base:
+    def write(self, txt):
        if self.__copy_to:
            self.__copy_to.write('{}:::{}'.format(self.__fname, txt))
        try:
            super().write(txt)
        except TypeError:
            super().write(bytes(txt, 'utf-8'))
-    def __init__(self, simulation):
+    def close(self):
        logger.info('**Not** written to {} (dry run mode):\n\n{}\n\n'.format(self.__fname,
                                                                       self.getvalue().decode()))
        super().close()
 class Exporter:
    '''
    Interface for all exporters. It is not necessary, but it is useful
    if you don't plan to implement all the methods.
    '''
    def __init__(self, simulation, outdir=None, dry_run=None, copy_to=None):
        self.sim = simulation
        outdir = outdir or os.getcwd()
        self.outdir = os.path.join(outdir,
                                   simulation.group or '',
                                   simulation.name)
        self.dry_run = dry_run
        self.copy_to = copy_to
    def start(self):
-        pass
+        '''Method to call when the simulation starts'''
    def end(self):
-        pass
+        '''Method to call when the simulation ends'''
-    def env(self):
+    def trial_end(self, env):
-        pass
+        '''Method to call when a trial ends'''
    def output(self, f, mode='w', **kwargs):
        if self.dry_run:
            f = DryRunner(f, copy_to=self.copy_to)
        else:
            try:
                if not os.path.isabs(f):
                    f = os.path.join(self.outdir, f)
            except TypeError:
                pass
        return open_or_reuse(f, mode=mode, **kwargs)
-class Dummy(Base):
+class Default(Exporter):
    '''Default exporter. Writes CSV and sqlite results, as well as the simulation YAML'''
    def start(self):
-        with open(os.path.join(self.sim.outdir, 'dummy')) as f:
+        if not self.dry_run:
-            f.write('simulation started @ {}'.format(time.time()))
+            logger.info('Dumping results to %s', self.outdir)
            self.sim.dump_yaml(outdir=self.outdir)
        else:
            logger.info('NOT dumping results')
-    def env(self, env):
+    def trial_end(self, env):
-        with open(os.path.join(self.sim.outdir, 'dummy-trial-{}'.format(env.name))) as f:
+        if not self.dry_run:
            with timer('Dumping simulation {} trial {}'.format(self.sim.name,
                                                               env.name)):
                with self.output('{}.sqlite'.format(env.name), mode='wb') as f:
                    env.dump_sqlite(f)
 class CSV(Exporter):
    def trial_end(self, env):
        if not self.dry_run:
            with timer('[CSV] Dumping simulation {} trial {}'.format(self.sim.name,
                                                               env.name)):
                with self.output('{}.csv'.format(env.name)) as f:
                    env.dump_csv(f)
 class Gexf(Exporter):
    def trial_end(self, env):
        if not self.dry_run:
            with timer('[CSV] Dumping simulation {} trial {}'.format(self.sim.name,
                                                                     env.name)):
                with self.output('{}.gexf'.format(env.name), mode='wb') as f:
                    env.dump_gexf(f)
 class Dummy(Exporter):
    def start(self):
        with self.output('dummy', 'w') as f:
            f.write('simulation started @ {}\n'.format(time.time()))
    def trial_end(self, env):
        with self.output('dummy', 'w') as f:
            for i in env.history_to_tuples():
-                f.write(','.join(i))
+                f.write(','.join(map(str, i)))
-
+                f.write('\n')
    def end(self):
-        with open(os.path.join(self.sim.outdir, 'dummy')) as f:
+        with self.output('dummy', 'a') as f:
-            f.write('simulation ended @ {}'.format(time.time()))
+            f.write('simulation ended @ {}\n'.format(time.time()))
 class Distribution(Exporter):
    '''
    Write the distribution of agent states at the end of each trial,
    the mean value, and its deviation.
    '''
    def start(self):
        self.means = []
        self.counts = []
    def trial_end(self, env):
        df = env[None, None, None].df()
        ix = df.index[-1]
        attrs = df.columns.levels[0]
        vc = {}
        stats = {}
        for a in attrs:
            t = df.loc[(ix, a)]
            try:
                self.means.append(('mean', a, t.mean()))
            except TypeError:
                for name, count in t.value_counts().iteritems():
                    self.counts.append(('count', a, name, count))
    def end(self):
        dfm = pd.DataFrame(self.means, columns=['metric', 'key', 'value'])
        dfc = pd.DataFrame(self.counts, columns=['metric', 'key', 'value', 'count'])
        dfm = dfm.groupby(by=['key']).agg(['mean', 'std', 'count', 'median', 'max', 'min'])
        dfc = dfc.groupby(by=['key', 'value']).agg(['mean', 'std', 'count', 'median', 'max', 'min'])
        with self.output('counts.csv') as f:
            dfc.to_csv(f)
        with self.output('metrics.csv') as f:
            dfm.to_csv(f)
 class GraphDrawing(Exporter):
    def trial_end(self, env):
        # Outside effects
        f = plt.figure()
        nx.draw(env.G, node_size=10, width=0.2, pos=nx.spring_layout(env.G, scale=100), ax=f.add_subplot(111))
        with open('graph-{}.png'.format(env.name)) as f:
            f.savefig(f)
--- a/soil/history.py
+++ b/soil/history.py
@@ -4,6 +4,7 @@ import pandas as pd
 import sqlite3
 import copy
 import logging
 import tempfile
 logger = logging.getLogger(__name__)
@@ -17,16 +18,18 @@ class History:
    Store and retrieve values from a sqlite database.
    """
-    def __init__(self, db_path=None, name=None, outdir=None, backup=False):
+    def __init__(self, name=None, db_path=None, backup=False):
-        if db_path is None and name:
+        self._db = None
-            db_path = os.path.join(outdir or os.getcwd(),
+
-                                   '{}.db.sqlite'.format(name))
+        if db_path is None:
-        if db_path:
+            if not name:
-            if backup and os.path.exists(db_path):
+                name = time.time()
-                newname = db_path + '.backup{}.sqlite'.format(time.time())
+            _, db_path = tempfile.mkstemp(suffix='{}.sqlite'.format(name))
-                os.rename(db_path, newname)
+
-        else:
+        if backup and os.path.exists(db_path):
-            db_path = ":memory:"
+            newname = db_path + '.backup{}.sqlite'.format(time.time())
            os.rename(db_path, newname)
        self.db_path = db_path
        self.db = db_path
@@ -49,6 +52,7 @@ class History:
    @db.setter
    def db(self, db_path=None):
        self._close()
        db_path = db_path or self.db_path
        if isinstance(db_path, str):
            logger.debug('Connecting to database {}'.format(db_path))
@@ -56,6 +60,13 @@ class History:
        else:
            self._db = db_path
    def _close(self):
        if self._db is None:
            return
        self.flush_cache()
        self._db.close()
        self._db = None
    @property
    def dtypes(self):
        self.read_types()
@@ -110,7 +121,6 @@ class History:
            raise ValueError("Unknown datatype for {} and {}".format(key, value))
        return self._dtypes[key][2](value)
    def flush_cache(self):
        '''
        Use a cache to save state changes to avoid opening a session for every change.
@@ -154,8 +164,6 @@ class History:
            return r.value()
        return r
    def read_sql(self, keys=None, agent_ids=None, t_steps=None, convert_types=False, limit=-1):
        self.read_types()
@@ -214,16 +222,22 @@ class History:
        if t_steps:
            df_p = df_p.reindex(t_steps, method='ffill')
        return df_p.ffill()
-    
+
    def __getstate__(self):
        state = dict(**self.__dict__)
        del state['_db']
        del state['_dtypes']
        return state
-    
+
    def __setstate__(self, state):
        self.__dict__ = state
        self._dtypes = {}
        self._db = None
    def dump(self, f):
        self._close()
        for line in open(self.db_path, 'rb'):
            f.write(line)
 class Records():
@@ -274,10 +288,13 @@ class Records():
                i = self._df[f.key][str(f.agent_id)]
                ix = i.index.get_loc(f.t_step, method='ffill')
                return i.iloc[ix]
-            except KeyError:
+            except KeyError as ex:
                return self.dtypes[f.key][2]()
        return list(self)
    def df(self):
        return self._df
    def __getitem__(self, k):
        n = copy.copy(self)
        n.filter(k)
@@ -293,6 +310,5 @@ class Records():
            return str(self.value())
        return '<Records for [{}]>'.format(self._filter)
 Key = namedtuple('Key', ['agent_id', 't_step', 'key'])
 Record = namedtuple('Record', 'agent_id t_step key value')
--- a/soil/serialization.py
+++ b/soil/serialization.py
@@ -2,16 +2,15 @@ import os
 import logging
 import ast
 import sys
 import yaml
 import importlib
 from glob import glob
 from random import random
 from copy import deepcopy
 from itertools import product, chain
 import yaml
 import networkx as nx
 from jinja2 import Template
 import networkx as nx
 logger = logging.getLogger('soil')
 logger.setLevel(logging.INFO)
@@ -36,6 +35,9 @@ def load_network(network_params, dir_path=None):
        return method(path, **kwargs)
    net_args = network_params.copy()
    if 'generator' not in net_args:
        return nx.Graph()
    net_gen = net_args.pop('generator')
    if dir_path not in sys.path:
@@ -51,6 +53,7 @@ def load_file(infile):
    with open(infile, 'r') as f:
        return list(chain.from_iterable(map(expand_template, load_string(f))))
 def load_string(string):
    yield from yaml.load_all(string)
@@ -91,7 +94,6 @@ def expand_template(config):
    blank = list(load_string(blank_str))
    if len(blank) > 1:
        raise ValueError('Templates must not return more than one configuration')
    if 'name' in blank[0]:
        raise ValueError('Templates cannot be named, use group instead')
--- a/soil/simulation.py
+++ b/soil/simulation.py
@@ -15,7 +15,7 @@ from nxsim import NetworkSimulation
 from . import serialization, utils, basestring, agents
 from .environment import Environment
-from .serialization import logger
+from .utils import logger
 from .exporters import for_sim as exporters_for_sim
@@ -65,8 +65,6 @@ class Simulation(NetworkSimulation):
        whose value indicates the state
    dir_path: str, optional
        Directory path to load simulation assets (files, modules...)
    outdir : str, optional
        Directory path to save simulation results
    seed : str, optional
        Seed to use for the random generator
    num_trials : int, optional
@@ -87,11 +85,11 @@ class Simulation(NetworkSimulation):
    def __init__(self, name=None, group=None, topology=None, network_params=None,
                 network_agents=None, agent_type=None, states=None,
-                 default_state=None, interval=1, dump=None, dry_run=False,
+                 default_state=None, interval=1, num_trials=1,
-                 outdir=None, num_trials=1, max_time=100,
+                 max_time=100, load_module=None, seed=None,
-                 load_module=None, seed=None, dir_path=None,
+                 dir_path=None, environment_agents=None,
-                 environment_agents=None, environment_params=None,
+                 environment_params=None, environment_class=None,
-                 environment_class=None, **kwargs):
+                 **kwargs):
        self.seed = str(seed) or str(time.time())
        self.load_module = load_module
@@ -101,18 +99,10 @@ class Simulation(NetworkSimulation):
        self.num_trials = num_trials
        self.max_time = max_time
        self.default_state = default_state or {}
        if not outdir:
            outdir = os.path.join(os.getcwd(),
                                       'soil_output')
        self.outdir = os.path.join(outdir,
                                   self.group or '',
                                   self.name)
        self.dir_path = dir_path or os.getcwd()
        self.interval = interval
        self.dump = dump
        self.dry_run = dry_run
-        sys.path += list(x for x in [self.outdir, os.getcwd(), self.dir_path] if x not in sys.path)
+        sys.path += list(x for x in [os.getcwd(), self.dir_path] if x not in sys.path)
        if topology is None:
            topology = serialization.load_network(network_params,
@@ -142,6 +132,7 @@ class Simulation(NetworkSimulation):
        return self.run(*args, **kwargs)
    def run(self, *args, **kwargs):
        '''Run the simulation and return the list of resulting environments'''
        return list(self._run_simulation_gen(*args, **kwargs))
    def _run_sync_or_async(self, parallel=False, *args, **kwargs):
@@ -152,7 +143,7 @@ class Simulation(NetworkSimulation):
                           **kwargs)
            for i in p.imap_unordered(func, range(self.num_trials)):
                if isinstance(i, Exception):
-                    logger.error('Trial failed:\n\t{}'.format(i.message))
+                    logger.error('Trial failed:\n\t%s', i.message)
                    continue
                yield i
        else:
@@ -162,29 +153,30 @@ class Simulation(NetworkSimulation):
                                     **kwargs)
    def _run_simulation_gen(self, *args, parallel=False, dry_run=False,
-                            exporters=None, **kwargs):
+                            exporters=None, outdir=None, exporter_params={}, **kwargs):
        logger.info('Using exporters: %s', exporters or [])
        logger.info('Output directory: %s', outdir)
        exporters = exporters_for_sim(self,
-                                      exporters or [])
+                                      exporters or [],
                                      dry_run=dry_run,
                                      outdir=outdir,
                                      **exporter_params)
        with utils.timer('simulation {}'.format(self.name)):
            if not (dry_run or self.dry_run):
                logger.info('Dumping results to {}'.format(self.outdir))
                self.dump_pickle(self.outdir)
                self.dump_yaml(self.outdir)
            else:
                logger.info('NOT dumping results')
            for exporter in exporters:
                exporter.start()
            for env in self._run_sync_or_async(*args, parallel=parallel,
-                                               dry_run=dry_run, **kwargs):
+                                               **kwargs):
                for exporter in exporters:
-                    exporter.env(env)
+                    exporter.trial_end(env)
                yield env
            for exporter in exporters:
                exporter.end()
    def get_env(self, trial_id = 0, **kwargs):
        '''Create an environment for a trial of the simulation'''
        opts = self.environment_params.copy()
        env_name = '{}_trial_{}'.format(self.name, trial_id)
        opts.update({
@@ -192,19 +184,17 @@ class Simulation(NetworkSimulation):
            'topology': self.topology.copy(),
            'seed': self.seed+env_name,
            'initial_time': 0,
            'dry_run': self.dry_run,
            'interval': self.interval,
            'network_agents': self.network_agents,
            'states': self.states,
            'default_state': self.default_state,
            'environment_agents': self.environment_agents,
            'outdir': self.outdir,
        })
        opts.update(kwargs)
        env = self.environment_class(**opts)
        return env
-    def run_trial(self, trial_id=0, until=None, dry_run=False, **opts):
+    def run_trial(self, trial_id=0, until=None, **opts):
        """Run a single trial of the simulation
        Parameters
@@ -214,13 +204,9 @@ class Simulation(NetworkSimulation):
        # Set-up trial environment and graph
        until = until or self.max_time
        env = self.get_env(trial_id = trial_id, **opts)
        dry_run = self.dry_run or dry_run
        # Set up agents on nodes
        with utils.timer('Simulation {} trial {}'.format(self.name, trial_id)):
            env.run(until)
        if self.dump and not dry_run:
            with utils.timer('Dumping simulation {} trial {}'.format(self.name, trial_id)):
                env.dump(formats = self.dump)
        return env
    def run_trial_exceptions(self, *args, **kwargs):
        '''
@@ -240,24 +226,25 @@ class Simulation(NetworkSimulation):
    def to_yaml(self):
        return yaml.dump(self.to_dict())
-    def dump_yaml(self, outdir = None, file_name = None):
+
-        outdir = outdir or self.outdir
+    def dump_yaml(self, f=None, outdir=None):
-        if not os.path.exists(outdir):
+        if not f and not outdir:
-            os.makedirs(outdir)
+            raise ValueError('specify a file or an output directory')
-        if not file_name:
+
-            file_name=os.path.join(outdir,
+        if not f:
-                                     '{}.dumped.yml'.format(self.name))
+            f = os.path.join(outdir, '{}.dumped.yml'.format(self.name))
-        with open(file_name, 'w') as f:
+
        with utils.open_or_reuse(f, 'w') as f:
            f.write(self.to_yaml())
-    def dump_pickle(self, outdir = None, pickle_name = None):
+    def dump_pickle(self, f=None, outdir=None):
-        outdir = outdir or self.outdir
+        if not outdir and not f:
-        if not os.path.exists(outdir):
+            raise ValueError('specify a file or an output directory')
-            os.makedirs(outdir)
+
-        if not pickle_name:
+        if not f:
-            pickle_name=os.path.join(outdir,
+            f = os.path.join(outdir,
-                                       '{}.simulation.pickle'.format(self.name))
+                             '{}.simulation.pickle'.format(self.name))
-        with open(pickle_name, 'wb') as f:
+        with utils.open_or_reuse(f, 'wb') as f:
            pickle.dump(self, f)
    def __getstate__(self):
@@ -279,8 +266,6 @@ class Simulation(NetworkSimulation):
    def __setstate__(self, state):
        self.__dict__ = state
        self.load_module = getattr(self, 'load_module', None)
        if self.outdir not in sys.path:
            sys.path += [self.outdir, os.getcwd()]
        if self.dir_path not in sys.path:
            sys.path += [self.dir_path, os.getcwd()]
        self.topology = json_graph.node_link_graph(state['topology'])
@@ -308,24 +293,14 @@ def from_config(conf_or_path):
    return sim
-def run_from_config(*configs, outdir=None, dump=None, timestamp=False,  **kwargs):
+def run_from_config(*configs, **kwargs):
    for config_def in configs:
        # logger.info("Found {} config(s)".format(len(ls)))
        for config, path in serialization.load_config(config_def):
            name = config.get('name', 'unnamed')
            logger.info("Using config(s): {name}".format(name=name))
            if timestamp:
                sim_folder = '{}_{}'.format(name,
                                            time.strftime("%Y-%m-%d_%H:%M:%S"))
            else:
                sim_folder = name
            if dump is not None:
                config['dump'] = dump
            dir_path = config.pop('dir_path', os.path.dirname(path))
            outdir = config.pop('outdir', outdir)
            sim = Simulation(dir_path=dir_path,
                             outdir=outdir,
                             **config)
            logger.info('Dumping results to {} : {}'.format(sim.outdir, sim.dump))
            sim.run_simulation(**kwargs)
--- a/soil/utils.py
+++ b/soil/utils.py
@@ -1,5 +1,6 @@
 import logging
 import time
 import os
 from contextlib import contextmanager
@@ -20,3 +21,17 @@ def timer(name='task', pre="", function=logger.info, to_object=None):
    if to_object:
        to_object.start = start
        to_object.end = end
 def safe_open(path, *args, **kwargs):
    outdir = os.path.dirname(path)
    if outdir and not os.path.exists(outdir):
        os.makedirs(outdir)
    return open(path, *args, **kwargs)
 def open_or_reuse(f, *args, **kwargs):
    try:
        return safe_open(f, *args, **kwargs)
    except TypeError:
        return f
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -39,7 +39,6 @@ class TestAnalysis(TestCase):
        agent should be able to update its state."""
        config = {
            'name': 'analysis',
            'dry_run': True,
            'seed': 'seed',
            'network_params': {
                'generator': 'complete_graph',
@@ -53,7 +52,7 @@ class TestAnalysis(TestCase):
            }
        }
        s = simulation.from_config(config)
-        self.env = s.run_simulation()[0]
+        self.env = s.run_simulation(dry_run=True)[0]
    def test_saved(self):
        env = self.env
@@ -65,7 +64,7 @@ class TestAnalysis(TestCase):
    def test_count(self):
        env = self.env
-        df = analysis.read_sql(env._history._db)
+        df = analysis.read_sql(env._history.db_path)
        res = analysis.get_count(df, 'SEED', 'id')
        assert res['SEED']['seedanalysis_trial_0'].iloc[0] == 1
        assert res['SEED']['seedanalysis_trial_0'].iloc[-1] == 1
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -28,7 +28,6 @@ class TestMain(TestCase):
        Raise an exception otherwise.
        """
        config = {
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            }
@@ -38,7 +37,6 @@ class TestMain(TestCase):
        assert len(G) == 2
        with self.assertRaises(AttributeError):
            config = {
            'dry_run': True,
                'network_params': {
                    'path': join(ROOT, 'unknown.extension')
                }
@@ -52,7 +50,6 @@ class TestMain(TestCase):
        should be used to generate a network
        """
        config = {
            'dry_run': True,
            'network_params': {
                'generator': 'barabasi_albert_graph'
            }
@@ -67,7 +64,6 @@ class TestMain(TestCase):
    def test_empty_simulation(self):
        """A simulation with a base behaviour should do nothing"""
        config = {
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -84,7 +80,6 @@ class TestMain(TestCase):
        agent should be able to update its state."""
        config = {
            'name': 'CounterAgent',
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -108,7 +103,6 @@ class TestMain(TestCase):
        """
        config = {
            'name': 'CounterAgent',
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -134,7 +128,6 @@ class TestMain(TestCase):
    def test_custom_agent(self):
        """Allow for search of neighbors with a certain state_id"""
        config = {
            'dry_run': True,
            'network_params': {
                'path': join(ROOT, 'test.gexf')
            },
@@ -158,8 +151,7 @@ class TestMain(TestCase):
        config['network_params']['path'] = join(EXAMPLES,
                                                config['network_params']['path'])
        s = simulation.from_config(config)
-        s.dry_run = True
+        env = s.run_simulation(dry_run=True)[0]
        env = s.run_simulation()[0]
        for a in env.network_agents:
            skill_level = a.state['skill_level']
            if a.id == 'Torvalds':
@@ -183,14 +175,12 @@ class TestMain(TestCase):
        with utils.timer('loading'):
            config = serialization.load_file(join(EXAMPLES, 'complete.yml'))[0]
            s = simulation.from_config(config)
            s.dry_run = True
        with utils.timer('serializing'):
            serial = s.to_yaml()
        with utils.timer('recovering'):
            recovered = yaml.load(serial)
        with utils.timer('deleting'):
            del recovered['topology']
            del recovered['outdir']
        assert config == recovered
    def test_configuration_changes(self):
@@ -200,16 +190,14 @@ class TestMain(TestCase):
        """
        config = serialization.load_file(join(EXAMPLES, 'complete.yml'))[0]
        s = simulation.from_config(config)
        s.dry_run = True
        for i in range(5):
            s.run_simulation(dry_run=True)
            nconfig = s.to_dict()
            del nconfig['topology']
            del nconfig['outdir']
            assert config == nconfig
    def test_row_conversion(self):
-        env = Environment(dry_run=True)
+        env = Environment()
        env['test'] = 'test_value'
        res = list(env.history_to_tuples())
@@ -228,8 +216,8 @@ class TestMain(TestCase):
        from geometric models. We should work around it.
        """
        G = nx.random_geometric_graph(20, 0.1)
-        env = Environment(topology=G, dry_run=True)
+        env = Environment(topology=G)
-        env.dump_gexf('/tmp/dump-gexf')
+        env.dump_gexf('/tmp/dump-gexf/prueba.gexf')
    def test_save_graph(self):
        '''
@@ -239,7 +227,7 @@ class TestMain(TestCase):
        '''
        G = nx.cycle_graph(5)
        distribution = agents.calculate_distribution(None, agents.BaseAgent)
-        env = Environment(topology=G, network_agents=distribution, dry_run=True)
+        env = Environment(topology=G, network_agents=distribution)
        env[0, 0, 'testvalue'] = 'start'
        env[0, 10, 'testvalue'] = 'finish'
        nG = env.history_to_graph()
@@ -315,8 +303,9 @@ class TestMain(TestCase):
        recovered = pickle.loads(pickled)
        assert recovered.env.name == 'Test'
-        assert recovered['key'] == 'test'
+        assert list(recovered.env._history.to_tuples())
        assert recovered['key', 0] == 'test'
        assert recovered['key'] == 'test'
    def test_history(self):
        '''Test storing in and retrieving from history (sqlite)'''