From d1006bd55cdf2bc4b4fd86178d01d245a6d76913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Mon, 29 Apr 2019 18:47:15 +0200 Subject: [PATCH] WIP: exporters --- CHANGELOG.md | 11 +- examples/complete.yml | 2 - .../custom_generator/custom_generator.yml | 3 +- examples/custom_timeouts/custom_timeouts.py | 3 +- soil/__init__.py | 20 ++- soil/agents/DrawingAgent.py | 18 -- soil/agents/__init__.py | 1 - soil/analysis.py | 2 +- soil/environment.py | 46 ++--- soil/exporters.py | 165 ++++++++++++++++-- soil/history.py | 50 ++++-- soil/serialization.py | 12 +- soil/simulation.py | 103 +++++------ soil/utils.py | 15 ++ tests/test_analysis.py | 5 +- tests/test_main.py | 25 +-- 16 files changed, 287 insertions(+), 194 deletions(-) delete mode 100644 soil/agents/DrawingAgent.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fee5fc6..c11473d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,12 +9,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), The definition of the variables and their possible values (i.e., a problem in SALib terms), as well as a sampler function, can be provided. Soil uses this definition and the template to generate a set of configurations. * Simulation group names, to link related simulations. For now, they are only used to group all simulations in the same group under the same folder. +* Exporters unify exporting/dumping results and other files to disk. If `dry_run` is set to `True`, exporters will write to stdout instead of a file (useful for testing/debugging). +* Distribution exporter, to write statistics about values and value_counts in every simulation. The results are dumped to two CSV files. ### Changed -* `dir_path` is now the directory for resources (modules, files), and the output dir has been renamed to `outdir` +* `dir_path` is now the directory for resources (modules, files) +* Environments and simulations do not export or write anything by default. That task is delegated to Exporters + +### Removed +* The output dir for environments and simulations (see Exporters) +* DrawingAgent, because it wrote to disk and was not being used. We provide a partial alternative in the form of the GraphDrawing exporter. A complete alternative will be provided once the network at each state can be accessed by exporters. ## Fixed * Modules with custom agents/environments failed to load when they were run from outside the directory of the definition file. Modules are now loaded from the directory of the simulation file in addition to the working directory +* Memory databases (in history) can now be shared between threads. +* Testing all examples, not just subdirectories ## [0.13.8] ### Changed diff --git a/examples/complete.yml b/examples/complete.yml index 0bd0383..ad563a4 100644 --- a/examples/complete.yml +++ b/examples/complete.yml @@ -3,11 +3,9 @@ name: simple group: tests dir_path: "/tmp/" num_trials: 3 -dry_run: True max_time: 100 interval: 1 seed: "CompleteSeed!" -dump: false network_params: generator: complete_graph n: 10 diff --git a/examples/custom_generator/custom_generator.yml b/examples/custom_generator/custom_generator.yml index e455a24..1f8fa36 100644 --- a/examples/custom_generator/custom_generator.yml +++ b/examples/custom_generator/custom_generator.yml @@ -2,7 +2,6 @@ name: custom-generator description: Using a custom generator for the network num_trials: 3 -dry_run: True max_time: 100 interval: 1 network_params: @@ -14,4 +13,4 @@ network_agents: - agent_type: CounterModel weight: 1 state: - id: 0 \ No newline at end of file + id: 0 diff --git a/examples/custom_timeouts/custom_timeouts.py b/examples/custom_timeouts/custom_timeouts.py index 8d36c02..75cfc91 100644 --- a/examples/custom_timeouts/custom_timeouts.py +++ b/examples/custom_timeouts/custom_timeouts.py @@ -29,8 +29,7 @@ if __name__ == '__main__': from soil import Simulation s = Simulation(network_agents=[{'ids': [0], 'agent_type': Fibonacci}, {'ids': [1], 'agent_type': Odds}], - dry_run=True, network_params={"generator": "complete_graph", "n": 2}, max_time=100, ) - s.run() \ No newline at end of file + s.run(dry_run=True) diff --git a/soil/__init__.py b/soil/__init__.py index 9b4b0c6..191ec9d 100644 --- a/soil/__init__.py +++ b/soil/__init__.py @@ -57,18 +57,20 @@ def main(): logging.info('Loading config file: {}'.format(args.file)) try: - dump = [] - if not args.dry_run: - if args.csv: - dump.append('csv') - if args.graph: - dump.append('gexf') + exporters = list(args.exporter or []) + if args.csv: + exporters.append('CSV') + if args.graph: + exporters.append('Gexf') + exp_params = {} + if args.dry_run: + exp_params['copy_to'] = sys.stdout simulation.run_from_config(args.file, dry_run=args.dry_run, - dump=dump, - exporters=args.exporter, + exporters=exporters, parallel=(not args.synchronous), - outdir=args.output) + outdir=args.output, + exporter_params=exp_params) except Exception: if args.pdb: pdb.post_mortem() diff --git a/soil/agents/DrawingAgent.py b/soil/agents/DrawingAgent.py deleted file mode 100644 index cd41534..0000000 --- a/soil/agents/DrawingAgent.py +++ /dev/null @@ -1,18 +0,0 @@ -from . import BaseAgent - -import os.path -import matplotlib -import matplotlib.pyplot as plt -import networkx as nx - - -class DrawingAgent(BaseAgent): - """ - Agent that draws the state of the network. - """ - - def step(self): - # Outside effects - f = plt.figure() - nx.draw(self.env.G, node_size=10, width=0.2, pos=nx.spring_layout(self.env.G, scale=100), ax=f.add_subplot(111)) - f.savefig(os.path.join(self.env.get_path(), "graph-"+str(self.env.now)+".png")) diff --git a/soil/agents/__init__.py b/soil/agents/__init__.py index fb90548..2584b77 100644 --- a/soil/agents/__init__.py +++ b/soil/agents/__init__.py @@ -515,4 +515,3 @@ from .ModelM2 import * from .SentimentCorrelationModel import * from .SISaModel import * from .CounterModel import * -from .DrawingAgent import * diff --git a/soil/analysis.py b/soil/analysis.py index 01d45a8..25e2782 100644 --- a/soil/analysis.py +++ b/soil/analysis.py @@ -34,7 +34,7 @@ def _read_data(pattern, *args, from_csv=False, process_args=None, **kwargs): def read_sql(db, *args, **kwargs): - h = history.History(db, backup=False) + h = history.History(db_path=db, backup=False) df = h.read_sql(*args, **kwargs) return df diff --git a/soil/environment.py b/soil/environment.py index 7efda1f..1341ca7 100644 --- a/soil/environment.py +++ b/soil/environment.py @@ -14,15 +14,13 @@ from networkx.readwrite import json_graph import networkx as nx import nxsim -from . import serialization, agents, analysis, history +from . import serialization, agents, analysis, history, utils # These properties will be copied when pickling/unpickling the environment _CONFIG_PROPS = [ 'name', 'states', 'default_state', 'interval', - 'dry_run', - 'outdir', ] class Environment(nxsim.NetworkEnvironment): @@ -43,8 +41,6 @@ class Environment(nxsim.NetworkEnvironment): default_state=None, interval=1, seed=None, - dry_run=False, - outdir=None, topology=None, *args, **kwargs): self.name = name or 'UnnamedEnvironment' @@ -56,13 +52,8 @@ class Environment(nxsim.NetworkEnvironment): topology = nx.Graph() super().__init__(*args, topology=topology, **kwargs) self._env_agents = {} - self.dry_run = dry_run self.interval = interval - self.outdir = outdir or tempfile.mkdtemp('soil-env') - if not dry_run: - self.get_path() - self._history = history.History(name=self.name if not dry_run else None, - outdir=self.outdir, + self._history = history.History(name=self.name, backup=True) # Add environment agents first, so their events get # executed before network agents @@ -167,8 +158,6 @@ class Environment(nxsim.NetworkEnvironment): self.log_stats() def _save_state(self, now=None): - # for agent in self.agents: - # agent.save_state() serialization.logger.debug('Saving state @{}'.format(self.now)) self._history.save_records(self.state_to_tuples(now=now)) @@ -222,15 +211,6 @@ class Environment(nxsim.NetworkEnvironment): ''' return self[key] if key in self else default - def get_path(self, outdir=None): - outdir = outdir or self.outdir - if not os.path.exists(outdir): - try: - os.makedirs(outdir) - except FileExistsError: - pass - return outdir - def get_agent(self, agent_id): return self.G.node[agent_id]['agent'] @@ -239,20 +219,15 @@ class Environment(nxsim.NetworkEnvironment): return list(self.agents) return [self.G.node[i]['agent'] for i in nodes] - def dump_csv(self, outdir=None): - csv_name = os.path.join(self.get_path(outdir), - '{}.environment.csv'.format(self.name)) - - with open(csv_name, 'w') as f: + def dump_csv(self, f): + with utils.open_or_reuse(f, 'w') as f: cr = csv.writer(f) cr.writerow(('agent_id', 't_step', 'key', 'value')) for i in self.history_to_tuples(): cr.writerow(i) - def dump_gexf(self, outdir=None): + def dump_gexf(self, f): G = self.history_to_graph() - graph_path = os.path.join(self.get_path(outdir), - self.name+".gexf") # Workaround for geometric models # See soil/soil#4 for node in G.nodes(): @@ -260,9 +235,9 @@ class Environment(nxsim.NetworkEnvironment): G.node[node]['viz'] = {"position": {"x": G.node[node]['pos'][0], "y": G.node[node]['pos'][1], "z": 0.0}} del (G.node[node]['pos']) - nx.write_gexf(G, graph_path, version="1.2draft") + nx.write_gexf(G, f, version="1.2draft") - def dump(self, outdir=None, formats=None): + def dump(self, *args, formats=None, **kwargs): if not formats: return functions = { @@ -271,10 +246,13 @@ class Environment(nxsim.NetworkEnvironment): } for f in formats: if f in functions: - functions[f](outdir) + functions[f](*args, **kwargs) else: raise ValueError('Unknown format: {}'.format(f)) + def dump_sqlite(self, f): + return self._history.dump(f) + def state_to_tuples(self, now=None): if now is None: now = self.now @@ -338,7 +316,7 @@ class Environment(nxsim.NetworkEnvironment): G.add_node(agent.id, **attributes) return G - + def stats(self): stats = {} stats['network'] = {} diff --git a/soil/exporters.py b/soil/exporters.py index 16bcd86..aa6f2dc 100644 --- a/soil/exporters.py +++ b/soil/exporters.py @@ -1,43 +1,174 @@ -from .serialization import deserialize import os import time +from io import BytesIO + +import matplotlib.pyplot as plt +import networkx as nx +import pandas as pd + +from .serialization import deserialize +from .utils import open_or_reuse, logger, timer -def for_sim(simulation, names, dir_path=None): +from . import utils + + +def for_sim(simulation, names, *args, **kwargs): + '''Return the set of exporters for a simulation, given the exporter names''' exporters = [] for name in names: mod = deserialize(name, known_modules=['soil.exporters']) - exporters.append(mod(simulation)) + exporters.append(mod(simulation, *args, **kwargs)) return exporters +class DryRunner(BytesIO): + def __init__(self, fname, *args, copy_to=None, **kwargs): + super().__init__(*args, **kwargs) + self.__fname = fname + self.__copy_to = copy_to + + def write(self, txt): + if self.__copy_to: + self.__copy_to.write('{}:::{}'.format(self.__fname, txt)) + try: + super().write(txt) + except TypeError: + super().write(bytes(txt, 'utf-8')) -class Base: + def close(self): + logger.info('**Not** written to {} (dry run mode):\n\n{}\n\n'.format(self.__fname, + self.getvalue().decode())) + super().close() - def __init__(self, simulation): + +class Exporter: + ''' + Interface for all exporters. It is not necessary, but it is useful + if you don't plan to implement all the methods. + ''' + + def __init__(self, simulation, outdir=None, dry_run=None, copy_to=None): self.sim = simulation + outdir = outdir or os.getcwd() + self.outdir = os.path.join(outdir, + simulation.group or '', + simulation.name) + self.dry_run = dry_run + self.copy_to = copy_to def start(self): - pass + '''Method to call when the simulation starts''' def end(self): - pass + '''Method to call when the simulation ends''' - def env(self): - pass + def trial_end(self, env): + '''Method to call when a trial ends''' + def output(self, f, mode='w', **kwargs): + if self.dry_run: + f = DryRunner(f, copy_to=self.copy_to) + else: + try: + if not os.path.isabs(f): + f = os.path.join(self.outdir, f) + except TypeError: + pass + return open_or_reuse(f, mode=mode, **kwargs) -class Dummy(Base): + +class Default(Exporter): + '''Default exporter. Writes CSV and sqlite results, as well as the simulation YAML''' def start(self): - with open(os.path.join(self.sim.outdir, 'dummy')) as f: - f.write('simulation started @ {}'.format(time.time())) + if not self.dry_run: + logger.info('Dumping results to %s', self.outdir) + self.sim.dump_yaml(outdir=self.outdir) + else: + logger.info('NOT dumping results') + + def trial_end(self, env): + if not self.dry_run: + with timer('Dumping simulation {} trial {}'.format(self.sim.name, + env.name)): + with self.output('{}.sqlite'.format(env.name), mode='wb') as f: + env.dump_sqlite(f) + + +class CSV(Exporter): + def trial_end(self, env): + if not self.dry_run: + with timer('[CSV] Dumping simulation {} trial {}'.format(self.sim.name, + env.name)): + with self.output('{}.csv'.format(env.name)) as f: + env.dump_csv(f) + - def env(self, env): - with open(os.path.join(self.sim.outdir, 'dummy-trial-{}'.format(env.name))) as f: +class Gexf(Exporter): + def trial_end(self, env): + if not self.dry_run: + with timer('[CSV] Dumping simulation {} trial {}'.format(self.sim.name, + env.name)): + with self.output('{}.gexf'.format(env.name), mode='wb') as f: + env.dump_gexf(f) + + +class Dummy(Exporter): + + def start(self): + with self.output('dummy', 'w') as f: + f.write('simulation started @ {}\n'.format(time.time())) + + def trial_end(self, env): + with self.output('dummy', 'w') as f: for i in env.history_to_tuples(): - f.write(','.join(i)) + f.write(','.join(map(str, i))) + f.write('\n') + + def end(self): + with self.output('dummy', 'a') as f: + f.write('simulation ended @ {}\n'.format(time.time())) +class Distribution(Exporter): + ''' + Write the distribution of agent states at the end of each trial, + the mean value, and its deviation. + ''' + + def start(self): + self.means = [] + self.counts = [] + + def trial_end(self, env): + df = env[None, None, None].df() + ix = df.index[-1] + attrs = df.columns.levels[0] + vc = {} + stats = {} + for a in attrs: + t = df.loc[(ix, a)] + try: + self.means.append(('mean', a, t.mean())) + except TypeError: + for name, count in t.value_counts().iteritems(): + self.counts.append(('count', a, name, count)) + def end(self): - with open(os.path.join(self.sim.outdir, 'dummy')) as f: - f.write('simulation ended @ {}'.format(time.time())) + dfm = pd.DataFrame(self.means, columns=['metric', 'key', 'value']) + dfc = pd.DataFrame(self.counts, columns=['metric', 'key', 'value', 'count']) + dfm = dfm.groupby(by=['key']).agg(['mean', 'std', 'count', 'median', 'max', 'min']) + dfc = dfc.groupby(by=['key', 'value']).agg(['mean', 'std', 'count', 'median', 'max', 'min']) + with self.output('counts.csv') as f: + dfc.to_csv(f) + with self.output('metrics.csv') as f: + dfm.to_csv(f) + +class GraphDrawing(Exporter): + + def trial_end(self, env): + # Outside effects + f = plt.figure() + nx.draw(env.G, node_size=10, width=0.2, pos=nx.spring_layout(env.G, scale=100), ax=f.add_subplot(111)) + with open('graph-{}.png'.format(env.name)) as f: + f.savefig(f) diff --git a/soil/history.py b/soil/history.py index 7be5ad3..41282f0 100644 --- a/soil/history.py +++ b/soil/history.py @@ -4,6 +4,7 @@ import pandas as pd import sqlite3 import copy import logging +import tempfile logger = logging.getLogger(__name__) @@ -17,16 +18,18 @@ class History: Store and retrieve values from a sqlite database. """ - def __init__(self, db_path=None, name=None, outdir=None, backup=False): - if db_path is None and name: - db_path = os.path.join(outdir or os.getcwd(), - '{}.db.sqlite'.format(name)) - if db_path: - if backup and os.path.exists(db_path): - newname = db_path + '.backup{}.sqlite'.format(time.time()) - os.rename(db_path, newname) - else: - db_path = ":memory:" + def __init__(self, name=None, db_path=None, backup=False): + self._db = None + + if db_path is None: + if not name: + name = time.time() + _, db_path = tempfile.mkstemp(suffix='{}.sqlite'.format(name)) + + if backup and os.path.exists(db_path): + newname = db_path + '.backup{}.sqlite'.format(time.time()) + os.rename(db_path, newname) + self.db_path = db_path self.db = db_path @@ -49,6 +52,7 @@ class History: @db.setter def db(self, db_path=None): + self._close() db_path = db_path or self.db_path if isinstance(db_path, str): logger.debug('Connecting to database {}'.format(db_path)) @@ -56,6 +60,13 @@ class History: else: self._db = db_path + def _close(self): + if self._db is None: + return + self.flush_cache() + self._db.close() + self._db = None + @property def dtypes(self): self.read_types() @@ -110,7 +121,6 @@ class History: raise ValueError("Unknown datatype for {} and {}".format(key, value)) return self._dtypes[key][2](value) - def flush_cache(self): ''' Use a cache to save state changes to avoid opening a session for every change. @@ -154,8 +164,6 @@ class History: return r.value() return r - - def read_sql(self, keys=None, agent_ids=None, t_steps=None, convert_types=False, limit=-1): self.read_types() @@ -214,16 +222,22 @@ class History: if t_steps: df_p = df_p.reindex(t_steps, method='ffill') return df_p.ffill() - + def __getstate__(self): state = dict(**self.__dict__) del state['_db'] del state['_dtypes'] return state - + def __setstate__(self, state): self.__dict__ = state self._dtypes = {} + self._db = None + + def dump(self, f): + self._close() + for line in open(self.db_path, 'rb'): + f.write(line) class Records(): @@ -274,10 +288,13 @@ class Records(): i = self._df[f.key][str(f.agent_id)] ix = i.index.get_loc(f.t_step, method='ffill') return i.iloc[ix] - except KeyError: + except KeyError as ex: return self.dtypes[f.key][2]() return list(self) + def df(self): + return self._df + def __getitem__(self, k): n = copy.copy(self) n.filter(k) @@ -293,6 +310,5 @@ class Records(): return str(self.value()) return ''.format(self._filter) - Key = namedtuple('Key', ['agent_id', 't_step', 'key']) Record = namedtuple('Record', 'agent_id t_step key value') diff --git a/soil/serialization.py b/soil/serialization.py index f00cf72..884e2f0 100644 --- a/soil/serialization.py +++ b/soil/serialization.py @@ -2,16 +2,15 @@ import os import logging import ast import sys -import yaml import importlib from glob import glob -from random import random -from copy import deepcopy from itertools import product, chain +import yaml +import networkx as nx + from jinja2 import Template -import networkx as nx logger = logging.getLogger('soil') logger.setLevel(logging.INFO) @@ -36,6 +35,9 @@ def load_network(network_params, dir_path=None): return method(path, **kwargs) net_args = network_params.copy() + if 'generator' not in net_args: + return nx.Graph() + net_gen = net_args.pop('generator') if dir_path not in sys.path: @@ -51,6 +53,7 @@ def load_file(infile): with open(infile, 'r') as f: return list(chain.from_iterable(map(expand_template, load_string(f)))) + def load_string(string): yield from yaml.load_all(string) @@ -91,7 +94,6 @@ def expand_template(config): blank = list(load_string(blank_str)) if len(blank) > 1: raise ValueError('Templates must not return more than one configuration') - if 'name' in blank[0]: raise ValueError('Templates cannot be named, use group instead') diff --git a/soil/simulation.py b/soil/simulation.py index a008e74..daade10 100644 --- a/soil/simulation.py +++ b/soil/simulation.py @@ -15,7 +15,7 @@ from nxsim import NetworkSimulation from . import serialization, utils, basestring, agents from .environment import Environment -from .serialization import logger +from .utils import logger from .exporters import for_sim as exporters_for_sim @@ -65,8 +65,6 @@ class Simulation(NetworkSimulation): whose value indicates the state dir_path: str, optional Directory path to load simulation assets (files, modules...) - outdir : str, optional - Directory path to save simulation results seed : str, optional Seed to use for the random generator num_trials : int, optional @@ -87,11 +85,11 @@ class Simulation(NetworkSimulation): def __init__(self, name=None, group=None, topology=None, network_params=None, network_agents=None, agent_type=None, states=None, - default_state=None, interval=1, dump=None, dry_run=False, - outdir=None, num_trials=1, max_time=100, - load_module=None, seed=None, dir_path=None, - environment_agents=None, environment_params=None, - environment_class=None, **kwargs): + default_state=None, interval=1, num_trials=1, + max_time=100, load_module=None, seed=None, + dir_path=None, environment_agents=None, + environment_params=None, environment_class=None, + **kwargs): self.seed = str(seed) or str(time.time()) self.load_module = load_module @@ -101,18 +99,10 @@ class Simulation(NetworkSimulation): self.num_trials = num_trials self.max_time = max_time self.default_state = default_state or {} - if not outdir: - outdir = os.path.join(os.getcwd(), - 'soil_output') - self.outdir = os.path.join(outdir, - self.group or '', - self.name) self.dir_path = dir_path or os.getcwd() self.interval = interval - self.dump = dump - self.dry_run = dry_run - sys.path += list(x for x in [self.outdir, os.getcwd(), self.dir_path] if x not in sys.path) + sys.path += list(x for x in [os.getcwd(), self.dir_path] if x not in sys.path) if topology is None: topology = serialization.load_network(network_params, @@ -142,6 +132,7 @@ class Simulation(NetworkSimulation): return self.run(*args, **kwargs) def run(self, *args, **kwargs): + '''Run the simulation and return the list of resulting environments''' return list(self._run_simulation_gen(*args, **kwargs)) def _run_sync_or_async(self, parallel=False, *args, **kwargs): @@ -152,7 +143,7 @@ class Simulation(NetworkSimulation): **kwargs) for i in p.imap_unordered(func, range(self.num_trials)): if isinstance(i, Exception): - logger.error('Trial failed:\n\t{}'.format(i.message)) + logger.error('Trial failed:\n\t%s', i.message) continue yield i else: @@ -162,29 +153,30 @@ class Simulation(NetworkSimulation): **kwargs) def _run_simulation_gen(self, *args, parallel=False, dry_run=False, - exporters=None, **kwargs): + exporters=None, outdir=None, exporter_params={}, **kwargs): + logger.info('Using exporters: %s', exporters or []) + logger.info('Output directory: %s', outdir) exporters = exporters_for_sim(self, - exporters or []) + exporters or [], + dry_run=dry_run, + outdir=outdir, + **exporter_params) + with utils.timer('simulation {}'.format(self.name)): - if not (dry_run or self.dry_run): - logger.info('Dumping results to {}'.format(self.outdir)) - self.dump_pickle(self.outdir) - self.dump_yaml(self.outdir) - else: - logger.info('NOT dumping results') for exporter in exporters: exporter.start() for env in self._run_sync_or_async(*args, parallel=parallel, - dry_run=dry_run, **kwargs): + **kwargs): for exporter in exporters: - exporter.env(env) + exporter.trial_end(env) yield env for exporter in exporters: exporter.end() def get_env(self, trial_id = 0, **kwargs): + '''Create an environment for a trial of the simulation''' opts = self.environment_params.copy() env_name = '{}_trial_{}'.format(self.name, trial_id) opts.update({ @@ -192,19 +184,17 @@ class Simulation(NetworkSimulation): 'topology': self.topology.copy(), 'seed': self.seed+env_name, 'initial_time': 0, - 'dry_run': self.dry_run, 'interval': self.interval, 'network_agents': self.network_agents, 'states': self.states, 'default_state': self.default_state, 'environment_agents': self.environment_agents, - 'outdir': self.outdir, }) opts.update(kwargs) env = self.environment_class(**opts) return env - def run_trial(self, trial_id=0, until=None, dry_run=False, **opts): + def run_trial(self, trial_id=0, until=None, **opts): """Run a single trial of the simulation Parameters @@ -214,13 +204,9 @@ class Simulation(NetworkSimulation): # Set-up trial environment and graph until = until or self.max_time env = self.get_env(trial_id = trial_id, **opts) - dry_run = self.dry_run or dry_run # Set up agents on nodes with utils.timer('Simulation {} trial {}'.format(self.name, trial_id)): env.run(until) - if self.dump and not dry_run: - with utils.timer('Dumping simulation {} trial {}'.format(self.name, trial_id)): - env.dump(formats = self.dump) return env def run_trial_exceptions(self, *args, **kwargs): ''' @@ -240,24 +226,25 @@ class Simulation(NetworkSimulation): def to_yaml(self): return yaml.dump(self.to_dict()) - def dump_yaml(self, outdir = None, file_name = None): - outdir = outdir or self.outdir - if not os.path.exists(outdir): - os.makedirs(outdir) - if not file_name: - file_name=os.path.join(outdir, - '{}.dumped.yml'.format(self.name)) - with open(file_name, 'w') as f: + + def dump_yaml(self, f=None, outdir=None): + if not f and not outdir: + raise ValueError('specify a file or an output directory') + + if not f: + f = os.path.join(outdir, '{}.dumped.yml'.format(self.name)) + + with utils.open_or_reuse(f, 'w') as f: f.write(self.to_yaml()) - def dump_pickle(self, outdir = None, pickle_name = None): - outdir = outdir or self.outdir - if not os.path.exists(outdir): - os.makedirs(outdir) - if not pickle_name: - pickle_name=os.path.join(outdir, - '{}.simulation.pickle'.format(self.name)) - with open(pickle_name, 'wb') as f: + def dump_pickle(self, f=None, outdir=None): + if not outdir and not f: + raise ValueError('specify a file or an output directory') + + if not f: + f = os.path.join(outdir, + '{}.simulation.pickle'.format(self.name)) + with utils.open_or_reuse(f, 'wb') as f: pickle.dump(self, f) def __getstate__(self): @@ -279,8 +266,6 @@ class Simulation(NetworkSimulation): def __setstate__(self, state): self.__dict__ = state self.load_module = getattr(self, 'load_module', None) - if self.outdir not in sys.path: - sys.path += [self.outdir, os.getcwd()] if self.dir_path not in sys.path: sys.path += [self.dir_path, os.getcwd()] self.topology = json_graph.node_link_graph(state['topology']) @@ -308,24 +293,14 @@ def from_config(conf_or_path): return sim -def run_from_config(*configs, outdir=None, dump=None, timestamp=False, **kwargs): +def run_from_config(*configs, **kwargs): for config_def in configs: # logger.info("Found {} config(s)".format(len(ls))) for config, path in serialization.load_config(config_def): name = config.get('name', 'unnamed') logger.info("Using config(s): {name}".format(name=name)) - if timestamp: - sim_folder = '{}_{}'.format(name, - time.strftime("%Y-%m-%d_%H:%M:%S")) - else: - sim_folder = name - if dump is not None: - config['dump'] = dump dir_path = config.pop('dir_path', os.path.dirname(path)) - outdir = config.pop('outdir', outdir) sim = Simulation(dir_path=dir_path, - outdir=outdir, **config) - logger.info('Dumping results to {} : {}'.format(sim.outdir, sim.dump)) sim.run_simulation(**kwargs) diff --git a/soil/utils.py b/soil/utils.py index a965a93..5fa67aa 100644 --- a/soil/utils.py +++ b/soil/utils.py @@ -1,5 +1,6 @@ import logging import time +import os from contextlib import contextmanager @@ -20,3 +21,17 @@ def timer(name='task', pre="", function=logger.info, to_object=None): if to_object: to_object.start = start to_object.end = end + + +def safe_open(path, *args, **kwargs): + outdir = os.path.dirname(path) + if outdir and not os.path.exists(outdir): + os.makedirs(outdir) + return open(path, *args, **kwargs) + + +def open_or_reuse(f, *args, **kwargs): + try: + return safe_open(f, *args, **kwargs) + except TypeError: + return f diff --git a/tests/test_analysis.py b/tests/test_analysis.py index a5b1e94..7150d9d 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -39,7 +39,6 @@ class TestAnalysis(TestCase): agent should be able to update its state.""" config = { 'name': 'analysis', - 'dry_run': True, 'seed': 'seed', 'network_params': { 'generator': 'complete_graph', @@ -53,7 +52,7 @@ class TestAnalysis(TestCase): } } s = simulation.from_config(config) - self.env = s.run_simulation()[0] + self.env = s.run_simulation(dry_run=True)[0] def test_saved(self): env = self.env @@ -65,7 +64,7 @@ class TestAnalysis(TestCase): def test_count(self): env = self.env - df = analysis.read_sql(env._history._db) + df = analysis.read_sql(env._history.db_path) res = analysis.get_count(df, 'SEED', 'id') assert res['SEED']['seedanalysis_trial_0'].iloc[0] == 1 assert res['SEED']['seedanalysis_trial_0'].iloc[-1] == 1 diff --git a/tests/test_main.py b/tests/test_main.py index 44a8150..286c1a8 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -28,7 +28,6 @@ class TestMain(TestCase): Raise an exception otherwise. """ config = { - 'dry_run': True, 'network_params': { 'path': join(ROOT, 'test.gexf') } @@ -38,7 +37,6 @@ class TestMain(TestCase): assert len(G) == 2 with self.assertRaises(AttributeError): config = { - 'dry_run': True, 'network_params': { 'path': join(ROOT, 'unknown.extension') } @@ -52,7 +50,6 @@ class TestMain(TestCase): should be used to generate a network """ config = { - 'dry_run': True, 'network_params': { 'generator': 'barabasi_albert_graph' } @@ -67,7 +64,6 @@ class TestMain(TestCase): def test_empty_simulation(self): """A simulation with a base behaviour should do nothing""" config = { - 'dry_run': True, 'network_params': { 'path': join(ROOT, 'test.gexf') }, @@ -84,7 +80,6 @@ class TestMain(TestCase): agent should be able to update its state.""" config = { 'name': 'CounterAgent', - 'dry_run': True, 'network_params': { 'path': join(ROOT, 'test.gexf') }, @@ -108,7 +103,6 @@ class TestMain(TestCase): """ config = { 'name': 'CounterAgent', - 'dry_run': True, 'network_params': { 'path': join(ROOT, 'test.gexf') }, @@ -134,7 +128,6 @@ class TestMain(TestCase): def test_custom_agent(self): """Allow for search of neighbors with a certain state_id""" config = { - 'dry_run': True, 'network_params': { 'path': join(ROOT, 'test.gexf') }, @@ -158,8 +151,7 @@ class TestMain(TestCase): config['network_params']['path'] = join(EXAMPLES, config['network_params']['path']) s = simulation.from_config(config) - s.dry_run = True - env = s.run_simulation()[0] + env = s.run_simulation(dry_run=True)[0] for a in env.network_agents: skill_level = a.state['skill_level'] if a.id == 'Torvalds': @@ -183,14 +175,12 @@ class TestMain(TestCase): with utils.timer('loading'): config = serialization.load_file(join(EXAMPLES, 'complete.yml'))[0] s = simulation.from_config(config) - s.dry_run = True with utils.timer('serializing'): serial = s.to_yaml() with utils.timer('recovering'): recovered = yaml.load(serial) with utils.timer('deleting'): del recovered['topology'] - del recovered['outdir'] assert config == recovered def test_configuration_changes(self): @@ -200,16 +190,14 @@ class TestMain(TestCase): """ config = serialization.load_file(join(EXAMPLES, 'complete.yml'))[0] s = simulation.from_config(config) - s.dry_run = True for i in range(5): s.run_simulation(dry_run=True) nconfig = s.to_dict() del nconfig['topology'] - del nconfig['outdir'] assert config == nconfig def test_row_conversion(self): - env = Environment(dry_run=True) + env = Environment() env['test'] = 'test_value' res = list(env.history_to_tuples()) @@ -228,8 +216,8 @@ class TestMain(TestCase): from geometric models. We should work around it. """ G = nx.random_geometric_graph(20, 0.1) - env = Environment(topology=G, dry_run=True) - env.dump_gexf('/tmp/dump-gexf') + env = Environment(topology=G) + env.dump_gexf('/tmp/dump-gexf/prueba.gexf') def test_save_graph(self): ''' @@ -239,7 +227,7 @@ class TestMain(TestCase): ''' G = nx.cycle_graph(5) distribution = agents.calculate_distribution(None, agents.BaseAgent) - env = Environment(topology=G, network_agents=distribution, dry_run=True) + env = Environment(topology=G, network_agents=distribution) env[0, 0, 'testvalue'] = 'start' env[0, 10, 'testvalue'] = 'finish' nG = env.history_to_graph() @@ -315,8 +303,9 @@ class TestMain(TestCase): recovered = pickle.loads(pickled) assert recovered.env.name == 'Test' - assert recovered['key'] == 'test' + assert list(recovered.env._history.to_tuples()) assert recovered['key', 0] == 'test' + assert recovered['key'] == 'test' def test_history(self): '''Test storing in and retrieving from history (sqlite)'''