From 6f7481769e57a98c00e0a73ac21f6de72f8603b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Mon, 4 Apr 2022 16:00:54 +0200 Subject: [PATCH] WIP --- soil/__init__.py | 6 +- soil/agents/__init__.py | 13 ++- soil/config.py | 251 ++++++++++++++++++++++++++++++++++++++++ soil/datacollection.py | 10 +- soil/environment.py | 174 ++++++++-------------------- soil/exporters.py | 98 +++++++++++++--- soil/serialization.py | 24 +++- soil/simulation.py | 230 ++++++------------------------------ soil/stats.py | 23 ++-- 9 files changed, 467 insertions(+), 362 deletions(-) create mode 100644 soil/config.py diff --git a/soil/__init__.py b/soil/__init__.py index dc79354..44b548f 100644 --- a/soil/__init__.py +++ b/soil/__init__.py @@ -36,13 +36,13 @@ def main(): parser.add_argument('--module', '-m', type=str, help='file containing the code of any custom agents.') parser.add_argument('--dry-run', '--dry', action='store_true', - help='Do not store the results of the simulation.') + help='Do not store the results of the simulation to disk, show in terminal instead.') parser.add_argument('--pdb', action='store_true', help='Use a pdb console in case of exception.') parser.add_argument('--graph', '-g', action='store_true', - help='Dump GEXF graph. Defaults to false.') + help='Dump each trial\'s network topology as a GEXF graph. Defaults to false.') parser.add_argument('--csv', action='store_true', - help='Dump history in CSV format. Defaults to false.') + help='Dump all data collected in CSV format. Defaults to false.') parser.add_argument('--level', type=str, help='Logging level') parser.add_argument('--output', '-o', type=str, default="soil_output", diff --git a/soil/agents/__init__.py b/soil/agents/__init__.py index 7555207..623c21a 100644 --- a/soil/agents/__init__.py +++ b/soil/agents/__init__.py @@ -26,7 +26,14 @@ class DeadAgent(Exception): class BaseAgent(Agent): """ - A special Agent that keeps track of its state history. + A special type of Mesa Agent that: + + * Can be used as a dictionary to access its state. + * Has logging built-in + * Can be given default arguments through a defaults class attribute, + which will be used on construction to initialize each agent's state + + Any attribute that is not preceded by an underscore (`_`) will also be added to its state. """ defaults = {} @@ -61,6 +68,9 @@ class BaseAgent(Agent): for (k, v) in kwargs.items(): setattr(self, k, v) + for (k, v) in getattr(self, 'defaults', {}).items(): + if not hasattr(self, k) or getattr(self, k) is None: + setattr(self, k, v) # TODO: refactor to clean up mesa compatibility @property @@ -79,7 +89,6 @@ class BaseAgent(Agent): def state(self): ''' Return the agent itself, which behaves as a dictionary. - Changes made to `agent.state` will be reflected in the history. This method shouldn't be used, but is kept here for backwards compatibility. ''' diff --git a/soil/config.py b/soil/config.py new file mode 100644 index 0000000..390be62 --- /dev/null +++ b/soil/config.py @@ -0,0 +1,251 @@ +import yaml +import os +import sys +import networkx as nx +import collections.abc + +from . import serialization, utils, basestring, agents + +class Config(collections.abc.Mapping): + """ + + 1) agent type can be specified by name or by class. + 2) instead of just one type, a network agents distribution can be used. + The distribution specifies the weight (or probability) of each + agent type in the topology. This is an example distribution: :: + + [ + {'agent_type': 'agent_type_1', + 'weight': 0.2, + 'state': { + 'id': 0 + } + }, + {'agent_type': 'agent_type_2', + 'weight': 0.8, + 'state': { + 'id': 1 + } + } + ] + + In this example, 20% of the nodes will be marked as type + 'agent_type_1'. + 3) if no initial state is given, each node's state will be set + to `{'id': 0}`. + + Parameters + --------- + name : str, optional + name of the Simulation + group : str, optional + a group name can be used to link simulations + topology (optional): networkx.Graph instance or Node-Link topology as a dict or string (will be loaded with `json_graph.node_link_graph(topology`). + network_params : dict + parameters used to create a topology with networkx, if no topology is given + network_agents : dict + definition of agents to populate the topology with + agent_type : NetworkAgent subclass, optional + Default type of NetworkAgent to use for nodes not specified in network_agents + states : list, optional + List of initial states corresponding to the nodes in the topology. Basic form is a list of integers + whose value indicates the state + dir_path: str, optional + Directory path to load simulation assets (files, modules...) + seed : str, optional + Seed to use for the random generator + num_trials : int, optional + Number of independent simulation runs + max_time : int, optional + Maximum step/time for each simulation + environment_params : dict, optional + Dictionary of globally-shared environmental parameters + environment_agents: dict, optional + Similar to network_agents. Distribution of Agents that control the environment + environment_class: soil.environment.Environment subclass, optional + Class for the environment. It defailts to soil.environment.Environment + """ + __slots__ = 'name', 'agent_type', 'group', 'network_agents', 'environment_agents', 'states', 'default_state', 'interval', 'network_params', 'seed', 'num_trials', 'max_time', 'topology', 'schedule', 'initial_time', 'environment_params', 'environment_class', 'dir_path', '_added_to_path' + + def __init__(self, name=None, + group=None, + agent_type='BaseAgent', + network_agents=None, + environment_agents=None, + states=None, + default_state=None, + interval=1, + network_params=None, + seed=None, + num_trials=1, + max_time=None, + topology=None, + schedule=None, + initial_time=0, + environment_params={}, + environment_class='soil.Environment', + dir_path=None): + + self.network_params = network_params + self.name = name or 'Unnamed' + self.seed = str(seed or name) + self.group = group or '' + self.num_trials = num_trials + self.max_time = max_time + self.default_state = default_state or {} + self.dir_path = dir_path or os.getcwd() + self.interval = interval + + self._added_to_path = list(x for x in [os.getcwd(), self.dir_path] if x not in sys.path) + sys.path += self._added_to_path + + self.topology = topology + + self.schedule = schedule + self.initial_time = initial_time + + + self.environment_class = environment_class + self.environment_params = dict(environment_params) + + #TODO: Check agent distro vs fixed agents + self.environment_agents = environment_agents or [] + + self.agent_type = agent_type + + self.network_agents = network_agents or {} + + self.states = states or {} + + + def validate(self): + agents._validate_states(self.states, + self._topology) + + def restore_path(self): + for added in self._added_to_path: + sys.path.remove(added) + + def to_yaml(self): + return yaml.dump(self.to_dict()) + + def dump_yaml(self, f=None, outdir=None): + if not f and not outdir: + raise ValueError('specify a file or an output directory') + + if not f: + f = os.path.join(outdir, '{}.dumped.yml'.format(self.name)) + + with utils.open_or_reuse(f, 'w') as f: + f.write(self.to_yaml()) + + def to_yaml(self): + return yaml.dump(self.to_dict()) + + # TODO: See note on getstate + def to_dict(self): + return self.__getstate__() + + def dump_yaml(self, f=None, outdir=None): + if not f and not outdir: + raise ValueError('specify a file or an output directory') + + if not f: + f = os.path.join(outdir, '{}.dumped.yml'.format(self.name)) + + with utils.open_or_reuse(f, 'w') as f: + f.write(self.to_yaml()) + + def __getitem__(self, key): + return getattr(self, key) + + def __iter__(self): + return (k for k in self.__slots__ if k[0] != '_') + + def __len__(self): + return len(self.__slots__) + + def dump_pickle(self, f=None, outdir=None): + if not outdir and not f: + raise ValueError('specify a file or an output directory') + + if not f: + f = os.path.join(outdir, + '{}.simulation.pickle'.format(self.name)) + with utils.open_or_reuse(f, 'wb') as f: + pickle.dump(self, f) + + # TODO: remove this. A config should be sendable regardless. Non-pickable objects could be computed via properties and the like + # def __getstate__(self): + # state={} + # for k, v in self.__dict__.items(): + # if k[0] != '_': + # state[k] = v + # state['topology'] = json_graph.node_link_data(self.topology) + # state['network_agents'] = agents.serialize_definition(self.network_agents, + # known_modules = []) + # state['environment_agents'] = agents.serialize_definition(self.environment_agents, + # known_modules = []) + # state['environment_class'] = serialization.serialize(self.environment_class, + # known_modules=['soil.environment'])[1] # func, name + # if state['load_module'] is None: + # del state['load_module'] + # return state + + # # TODO: remove, same as __getstate__ + # def __setstate__(self, state): + # self.__dict__ = state + # self.load_module = getattr(self, 'load_module', None) + # if self.dir_path not in sys.path: + # sys.path += [self.dir_path, os.getcwd()] + # self.topology = json_graph.node_link_graph(state['topology']) + # self.network_agents = agents.calculate_distribution(agents._convert_agent_types(self.network_agents)) + # self.environment_agents = agents._convert_agent_types(self.environment_agents, + # known_modules=[self.load_module]) + # self.environment_class = serialization.deserialize(self.environment_class, + # known_modules=[self.load_module, + # 'soil.environment', ]) # func, name + +class CalculatedConfig(Config): + def __init__(self, config): + """ + Returns a configuration object that replaces some "plain" attributes (e.g., `environment_class` string) into + a Python object (`soil.environment.Environment` class). + """ + self._config = config + values = dict(config) + values['environment_class'] = self._environment_class() + values['environment_agents'] = self._environment_agents() + values['topology'] = self._topology() + values['network_agents'] = self._network_agents() + values['agent_type'] = serialization.deserialize(self.agent_type, known_modules=['soil.agents']) + + return values + + def _topology(self): + topology = self._config.topology + if topology is None: + topology = serialization.load_network(self._config.network_params, + dir_path=self._config.dir_path) + + elif isinstance(topology, basestring) or isinstance(topology, dict): + topology = json_graph.node_link_graph(topology) + + return nx.Graph(topology) + + def _environment_class(self): + return serialization.deserialize(self._config.environment_class, + known_modules=['soil.environment', ]) or Environment + + def _environment_agents(self): + return agents._convert_agent_types(self._config.environment_agents) + + def _network_agents(self): + distro = agents.calculate_distribution(self._config.network_agents, + self._config.agent_type) + return agents._convert_agent_types(distro) + + def _environment_class(self): + return serialization.deserialize(self._config.environment_class, + known_modules=['soil.environment', ]) # func, name + diff --git a/soil/datacollection.py b/soil/datacollection.py index 075d988..979c7bd 100644 --- a/soil/datacollection.py +++ b/soil/datacollection.py @@ -8,19 +8,17 @@ class SoilDataCollector(MDC): # Populate model and env reporters so they have a key per # So they can be shown in the web interface self.environment = environment - + raise NotImplementedError() @property def model_vars(self): - pass + raise NotImplementedError() @model_vars.setter def model_vars(self, value): - pass + raise NotImplementedError() @property def agent_reporters(self): - self.model._history._ - - pass + raise NotImplementedError() diff --git a/soil/environment.py b/soil/environment.py index 47e0997..6635849 100644 --- a/soil/environment.py +++ b/soil/environment.py @@ -1,23 +1,20 @@ +from __future__ import annotations import os import sqlite3 -import csv import math import random -import yaml -import tempfile -import logging -import pandas as pd from time import time as current_time from copy import deepcopy from networkx.readwrite import json_graph -import networkx as nx -from tsih import History, NoHistory, Record, Key +import networkx as nx from mesa import Model -from . import serialization, agents, analysis, utils, time +from tsih import Record + +from . import serialization, agents, analysis, utils, time, config # These properties will be copied when pickling/unpickling the environment _CONFIG_PROPS = [ 'name', @@ -49,7 +46,6 @@ class Environment(Model): schedule=None, initial_time=0, environment_params=None, - history=False, dir_path=None, **kwargs): @@ -76,20 +72,11 @@ class Environment(Model): topology = nx.Graph() self.G = nx.Graph(topology) - self.environment_params = environment_params or {} self.environment_params.update(kwargs) self._env_agents = {} self.interval = interval - - if history: - history = History - else: - history = NoHistory - - self._history = history(name=self.name, - backup=True) self['SEED'] = seed if network_agents: @@ -106,6 +93,19 @@ class Environment(Model): self.logger = utils.logger.getChild(self.name) + @staticmethod + def from_config(conf: config.Config, trial_id, **kwargs) -> Environment: + '''Create an environment for a trial of the simulation''' + + conf = config.Config(conf, **kwargs) + conf.seed = '{}_{}'.format(conf.seed, trial_id) + conf.name = '{}_trial_{}'.format(conf.name, trial_id).replace('.', '-') + opts = conf.environment_params.copy() + opts.update(conf) + opts.update(kwargs) + env = serialization.deserialize(conf.environment_class)(**opts) + return env + @property def now(self): if self.schedule: @@ -212,11 +212,14 @@ class Environment(Model): return self.logger.log(level, message, extra=extra) def step(self): + ''' + Advance one step in the simulation, and update the data collection and scheduler appropriately + ''' super().step() self.schedule.step() def run(self, until, *args, **kwargs): - self._save_state() + until = until or float('inf') while self.schedule.next_time < until: self.step() @@ -252,14 +255,16 @@ class Environment(Model): def get(self, key, default=None): ''' - Get the value of an environment attribute in a - given point in the simulation (history). - If key is an attribute name, this method returns - the current value. - To get values at other times, use a - :meth: `soil.history.Key` tuple. + Get the value of an environment attribute. + Return `default` if the value is not set. ''' - return self[key] if key in self else default + return self.environment_params.get(key, default) + + def __getitem__(self, key): + return self.environment_params.get(key) + + def __setitem__(self, key, value): + return self.environment_params.__setitem__(key, value) def get_agent(self, agent_id): return self.G.nodes[agent_id]['agent'] @@ -269,112 +274,31 @@ class Environment(Model): return self.agents return (self.G.nodes[i]['agent'] for i in nodes) - def dump_csv(self, f): - with utils.open_or_reuse(f, 'w') as f: - cr = csv.writer(f) - cr.writerow(('agent_id', 't_step', 'key', 'value')) - for i in self.history_to_tuples(): - cr.writerow(i) - - def dump_gexf(self, f): - G = self.history_to_graph() - # Workaround for geometric models - # See soil/soil#4 - for node in G.nodes(): - if 'pos' in G.nodes[node]: - G.nodes[node]['viz'] = {"position": {"x": G.nodes[node]['pos'][0], "y": G.nodes[node]['pos'][1], "z": 0.0}} - del (G.nodes[node]['pos']) - - nx.write_gexf(G, f, version="1.2draft") - - def dump(self, *args, formats=None, **kwargs): - if not formats: - return - functions = { - 'csv': self.dump_csv, - 'gexf': self.dump_gexf - } - for f in formats: - if f in functions: - functions[f](*args, **kwargs) - else: - raise ValueError('Unknown format: {}'.format(f)) - - def df(self): - return self._history[None, None, None].df() - - def dump_sqlite(self, f): - return self._history.dump(f) - - def state_to_tuples(self, now=None): + def _agent_to_tuples(self, agent, now=None): if now is None: now = self.now + for k, v in agent.state.items(): + yield Record(dict_id=agent.id, + t_step=now, + key=k, + value=v) + + def state_to_tuples(self, agent_id=None, now=None): + if now is None: + now = self.now + + if agent_id: + agent = self.get_agent(agent_id) + yield from self._agent_to_tuples(agent, now) + return + for k, v in self.environment_params.items(): yield Record(dict_id='env', t_step=now, key=k, value=v) for agent in self.agents: - for k, v in agent.state.items(): - yield Record(dict_id=agent.id, - t_step=now, - key=k, - value=v) - - def history_to_tuples(self, agent_id=None): - if isinstance(self._history, NoHistory): - tuples = self.state_to_tuples() - else: - tuples = self._history.to_tuples() - if agent_id is None: - return tuples - return filter(lambda x: str(x[0]) == str(agent_id), tuples) - - def history_to_graph(self): - G = nx.Graph(self.G) - - for agent in self.network_agents: - - attributes = {'agent': str(agent.__class__)} - lastattributes = {} - spells = [] - lastvisible = False - laststep = None - history = sorted(list(self.history_to_tuples(agent_id=agent.id))) - if not history: - continue - for _, t_step, attribute, value in history: - if attribute == 'visible': - nowvisible = value - if nowvisible and not lastvisible: - laststep = t_step - if not nowvisible and lastvisible: - spells.append((laststep, t_step)) - - lastvisible = nowvisible - continue - key = 'attr_' + attribute - if key not in attributes: - attributes[key] = list() - if key not in lastattributes: - lastattributes[key] = (value, t_step) - elif lastattributes[key][0] != value: - last_value, laststep = lastattributes[key] - commit_value = (last_value, laststep, t_step) - if key not in attributes: - attributes[key] = list() - attributes[key].append(commit_value) - lastattributes[key] = (value, t_step) - for k, v in lastattributes.items(): - attributes[k].append((v[0], v[1], None)) - if lastvisible: - spells.append((laststep, None)) - if spells: - G.add_node(agent.id, spells=spells, **attributes) - else: - G.add_node(agent.id, **attributes) - - return G + yield from self._agent_to_tuples(agent, now) def __getstate__(self): state = {} @@ -382,7 +306,6 @@ class Environment(Model): state[prop] = self.__dict__[prop] state['G'] = json_graph.node_link_data(self.G) state['environment_agents'] = self._env_agents - state['history'] = self._history state['schedule'] = self.schedule return state @@ -391,7 +314,6 @@ class Environment(Model): self.__dict__[prop] = state[prop] self._env_agents = state['environment_agents'] self.G = json_graph.node_link_graph(state['G']) - self._history = state['history'] # self._env = None self.schedule = state['schedule'] self._queue = [] diff --git a/soil/exporters.py b/soil/exporters.py index cc4f03c..f2bbe78 100644 --- a/soil/exporters.py +++ b/soil/exporters.py @@ -48,20 +48,24 @@ class Exporter: self.simulation = simulation outdir = outdir or os.path.join(os.getcwd(), 'soil_output') self.outdir = os.path.join(outdir, - simulation.group or '', - simulation.name) + simulation.config.group or '', + simulation.config.name) self.dry_run = dry_run self.copy_to = copy_to - def start(self): + def sim_start(self): '''Method to call when the simulation starts''' pass - def end(self, stats): + def sim_end(self, stats): '''Method to call when the simulation ends''' pass - def trial(self, env, stats): + def trial_start(self, env): + '''Method to call when a trial start''' + pass + + def trial_end(self, env, stats): '''Method to call when a trial ends''' pass @@ -80,21 +84,21 @@ class Exporter: class default(Exporter): '''Default exporter. Writes sqlite results, as well as the simulation YAML''' - def start(self): + def sim_start(self): if not self.dry_run: logger.info('Dumping results to %s', self.outdir) self.simulation.dump_yaml(outdir=self.outdir) else: logger.info('NOT dumping results') - def trial(self, env, stats): + def trial_start(self, env, stats): if not self.dry_run: with timer('Dumping simulation {} trial {}'.format(self.simulation.name, env.name)): with self.output('{}.sqlite'.format(env.name), mode='wb') as f: env.dump_sqlite(f) - def end(self, stats): + def sim_end(self, stats): with timer('Dumping simulation {}\'s stats'.format(self.simulation.name)): with self.output('{}.sqlite'.format(self.simulation.name), mode='wb') as f: self.simulation.dump_sqlite(f) @@ -102,15 +106,14 @@ class default(Exporter): class csv(Exporter): + '''Export the state of each environment (and its agents) in a separate CSV file''' - def trial(self, env, stats): + def trial_end(self, env, stats): with timer('[CSV] Dumping simulation {} trial {} @ dir {}'.format(self.simulation.name, env.name, self.outdir)): - with self.output('{}.csv'.format(env.name)) as f: - env.dump_csv(f) - with self.output('{}.stats.csv'.format(env.name)) as f: + with self.output('{}.stats.{}.csv'.format(env.name, stats.name)) as f: statwriter = csvlib.writer(f, delimiter='\t', quotechar='"', quoting=csvlib.QUOTE_ALL) for stat in stats: @@ -118,7 +121,7 @@ class csv(Exporter): class gexf(Exporter): - def trial(self, env, stats): + def trial_end(self, env, stats): if self.dry_run: logger.info('Not dumping GEXF in dry_run mode') return @@ -126,22 +129,32 @@ class gexf(Exporter): with timer('[GEXF] Dumping simulation {} trial {}'.format(self.simulation.name, env.name)): with self.output('{}.gexf'.format(env.name), mode='wb') as f: - env.dump_gexf(f) + self.dump_gexf(env, f) + def dump_gexf(self, env, f): + G = env.history_to_graph() + # Workaround for geometric models + # See soil/soil#4 + for node in G.nodes(): + if 'pos' in G.nodes[node]: + G.nodes[node]['viz'] = {"position": {"x": G.nodes[node]['pos'][0], "y": G.nodes[node]['pos'][1], "z": 0.0}} + del (G.nodes[node]['pos']) + + nx.write_gexf(G, f, version="1.2draft") class dummy(Exporter): - def start(self): + def sim_start(self): with self.output('dummy', 'w') as f: f.write('simulation started @ {}\n'.format(current_time())) - def trial(self, env, stats): + def trial_end(self, env, stats): with self.output('dummy', 'w') as f: - for i in env.history_to_tuples(): + for i in stats: f.write(','.join(map(str, i))) f.write('\n') - def sim(self, stats): + def sim_end(self, stats): with self.output('dummy', 'a') as f: f.write('simulation ended @ {}\n'.format(current_time())) @@ -149,10 +162,57 @@ class dummy(Exporter): class graphdrawing(Exporter): - def trial(self, env, stats): + def trial_end(self, env, stats): # Outside effects f = plt.figure() nx.draw(env.G, node_size=10, width=0.2, pos=nx.spring_layout(env.G, scale=100), ax=f.add_subplot(111)) with open('graph-{}.png'.format(env.name)) as f: f.savefig(f) +''' +Convert an environment into a NetworkX graph +''' +def env_to_graph(env, history=None): + G = nx.Graph(env.G) + + for agent in env.network_agents: + + attributes = {'agent': str(agent.__class__)} + lastattributes = {} + spells = [] + lastvisible = False + laststep = None + if not history: + history = sorted(list(env.state_to_tuples())) + for _, t_step, attribute, value in history: + if attribute == 'visible': + nowvisible = value + if nowvisible and not lastvisible: + laststep = t_step + if not nowvisible and lastvisible: + spells.append((laststep, t_step)) + + lastvisible = nowvisible + continue + key = 'attr_' + attribute + if key not in attributes: + attributes[key] = list() + if key not in lastattributes: + lastattributes[key] = (value, t_step) + elif lastattributes[key][0] != value: + last_value, laststep = lastattributes[key] + commit_value = (last_value, laststep, t_step) + if key not in attributes: + attributes[key] = list() + attributes[key].append(commit_value) + lastattributes[key] = (value, t_step) + for k, v in lastattributes.items(): + attributes[k].append((v[0], v[1], None)) + if lastvisible: + spells.append((laststep, None)) + if spells: + G.add_node(agent.id, spells=spells, **attributes) + else: + G.add_node(agent.id, **attributes) + + return G diff --git a/soil/serialization.py b/soil/serialization.py index 76c60fc..dd94108 100644 --- a/soil/serialization.py +++ b/soil/serialization.py @@ -2,6 +2,7 @@ import os import logging import ast import sys +import re import importlib from glob import glob from itertools import product, chain @@ -18,6 +19,9 @@ logger = logging.getLogger('soil') def load_network(network_params, dir_path=None): G = nx.Graph() + if not network_params: + return G + if 'path' in network_params: path = network_params['path'] if dir_path and not os.path.isabs(path): @@ -169,6 +173,9 @@ def serialize(v, known_modules=[]): func = serializer(tname) return func(v), tname + +IS_CLASS = re.compile(r"") + def deserializer(type_, known_modules=[]): if type(type_) != str: # Already deserialized return type_ @@ -179,6 +186,13 @@ def deserializer(type_, known_modules=[]): if hasattr(builtins, type_): # Check if it's a builtin type cls = getattr(builtins, type_) return lambda x=None: ast.literal_eval(x) if x is not None else cls() + match = IS_CLASS.match(type_) + if match: + modname, tname = match.group(1).rsplit(".", 1) + module = importlib.import_module(modname) + cls = getattr(module, tname) + return getattr(cls, 'deserialize', cls) + # Otherwise, see if we can find the module and the class modules = known_modules or [] options = [] @@ -189,7 +203,7 @@ def deserializer(type_, known_modules=[]): if '.' in type_: # Fully qualified module module, type_ = type_.rsplit(".", 1) - options.append ((module, type_)) + options.append((module, type_)) errors = [] for modname, tname in options: @@ -213,10 +227,10 @@ def deserialize(type_, value=None, **kwargs): def deserialize_all(names, *args, known_modules=['soil'], **kwargs): - '''Return the set of exporters for a simulation, given the exporter names''' - exporters = [] + '''Return the list of deserialized objects''' + objects = [] for name in names: mod = deserialize(name, known_modules=known_modules) - exporters.append(mod(*args, **kwargs)) - return exporters + objects.append(mod(*args, **kwargs)) + return objects diff --git a/soil/simulation.py b/soil/simulation.py index 0990bc9..9985d3e 100644 --- a/soil/simulation.py +++ b/soil/simulation.py @@ -10,8 +10,6 @@ import networkx as nx from networkx.readwrite import json_graph from multiprocessing import Pool from functools import partial -from tsih import History - import pickle from . import serialization, utils, basestring, agents @@ -20,127 +18,34 @@ from .utils import logger from .exporters import default from .stats import defaultStats +from .config import Config + #TODO: change documentation for simulation - class Simulation: """ - Similar to nsim.NetworkSimulation with three main differences: - 1) agent type can be specified by name or by class. - 2) instead of just one type, a network agents distribution can be used. - The distribution specifies the weight (or probability) of each - agent type in the topology. This is an example distribution: :: - - [ - {'agent_type': 'agent_type_1', - 'weight': 0.2, - 'state': { - 'id': 0 - } - }, - {'agent_type': 'agent_type_2', - 'weight': 0.8, - 'state': { - 'id': 1 - } - } - ] - - In this example, 20% of the nodes will be marked as type - 'agent_type_1'. - 3) if no initial state is given, each node's state will be set - to `{'id': 0}`. - Parameters --------- - name : str, optional + config (optional): :class:`config.Config` name of the Simulation - group : str, optional - a group name can be used to link simulations - topology : networkx.Graph instance, optional - network_params : dict - parameters used to create a topology with networkx, if no topology is given - network_agents : dict - definition of agents to populate the topology with - agent_type : NetworkAgent subclass, optional - Default type of NetworkAgent to use for nodes not specified in network_agents - states : list, optional - List of initial states corresponding to the nodes in the topology. Basic form is a list of integers - whose value indicates the state - dir_path: str, optional - Directory path to load simulation assets (files, modules...) - seed : str, optional - Seed to use for the random generator - num_trials : int, optional - Number of independent simulation runs - max_time : int, optional - Time how long the simulation should run - environment_params : dict, optional - Dictionary of globally-shared environmental parameters - environment_agents: dict, optional - Similar to network_agents. Distribution of Agents that control the environment - environment_class: soil.environment.Environment subclass, optional - Class for the environment. It defailts to soil.environment.Environment - load_module : str, module name, deprecated - If specified, soil will load the content of this module under 'soil.agents.custom' - history: tsih.History subclass, optional - Class to use to store the history of the simulation (and environments). It defailts to tsih.History - If set to True, tsih.History will be used. If set to False or None, tsih.NoHistory will be used. + + kwargs: parameters to use to initialize a new configuration, if one has not been provided. """ - def __init__(self, name=None, group=None, topology=None, network_params=None, - network_agents=None, agent_type=None, states=None, - default_state=None, interval=1, num_trials=1, - max_time=100, load_module=None, seed=None, - dir_path=None, environment_agents=None, - environment_params=None, environment_class=None, - history=History, **kwargs): + def __init__(self, config=None, + **kwargs): - self.load_module = load_module - self.network_params = network_params - self.name = name or 'Unnamed' - self.seed = str(seed or name) - self._id = '{}_{}'.format(self.name, strftime("%Y-%m-%d_%H.%M.%S")) - self.group = group or '' - self.num_trials = num_trials - self.max_time = max_time - self.default_state = default_state or {} - self.dir_path = dir_path or os.getcwd() - self.interval = interval + if bool(config) == bool(kwargs): + raise ValueError("Specify either a configuration or the parameters to initialize a configuration") - sys.path += list(x for x in [os.getcwd(), self.dir_path] if x not in sys.path) + if kwargs: + config = Config(**kwargs) - if topology is None: - topology = serialization.load_network(network_params, - dir_path=self.dir_path) - elif isinstance(topology, basestring) or isinstance(topology, dict): - topology = json_graph.node_link_graph(topology) - self.topology = nx.Graph(topology) + self.config = config - - self.environment_params = environment_params or {} - self.environment_class = serialization.deserialize(environment_class, - known_modules=['soil.environment', ]) or Environment - - environment_agents = environment_agents or [] - self.environment_agents = agents._convert_agent_types(environment_agents, - known_modules=[self.load_module]) - - distro = agents.calculate_distribution(network_agents, - agent_type) - self.network_agents = agents._convert_agent_types(distro, - known_modules=[self.load_module]) - - self.states = agents._validate_states(states, - self.topology) - - if history == True: - history = History - elif not history: - history = NoHistory - - self._history = history(name=self.name, - backup=False) + @property + def name(self) -> str: + return self.config.name def run_simulation(self, *args, **kwargs): return self.run(*args, **kwargs) @@ -153,13 +58,13 @@ class Simulation: if parallel and not os.environ.get('SENPY_DEBUG', None): p = Pool() func = partial(self.run_trial_exceptions, **kwargs) - for i in p.imap_unordered(func, range(self.num_trials)): + for i in p.imap_unordered(func, range(self.config.num_trials)): if isinstance(i, Exception): logger.error('Trial failed:\n\t%s', i.message) continue yield i else: - for i in range(self.num_trials): + for i in range(self.config.num_trials): yield self.run_trial(trial_id=i, **kwargs) @@ -179,50 +84,47 @@ class Simulation: outdir=outdir, **exporter_params) stats = serialization.deserialize_all(simulation=self, - names=stats, - known_modules=['soil.stats',], - **stats_params) + names=stats, + known_modules=['soil.stats',], + **stats_params) - with utils.timer('simulation {}'.format(self.name)): + with utils.timer('simulation {}'.format(self.config.name)): for stat in stats: - stat.start() + stat.sim_start() for exporter in exporters: exporter.start() + for env in self._run_sync_or_async(parallel=parallel, log_level=log_level, **kwargs): - collected = list(stat.trial(env) for stat in stats) + for exporter in exporters: + exporter.trial_start(env) - saved = self.save_stats(collected, t_step=env.now, trial_id=env.name) + collected = list(stat.trial_end(env) for stat in stats) + + saved = self._update_stats(collected, t_step=env.now, trial_id=env.name) for exporter in exporters: - exporter.trial(env, saved) + exporter.trial_end(env, saved) yield env - collected = list(stat.end() for stat in stats) - saved = self.save_stats(collected) + saved = self._update_stats(collected) for exporter in exporters: - exporter.end(saved) + exporter.sim_end(saved) - - def save_stats(self, collection, **kwargs): + def _update_stats(self, collection, **kwargs): stats = dict(kwargs) for stat in collection: stats.update(stat) - self._history.save_stats(utils.flatten_dict(stats)) return stats - def get_stats(self, **kwargs): - return self._history.get_stats(**kwargs) - def log_stats(self, stats): logger.info('Stats: \n{}'.format(yaml.dump(stats, default_flow_style=False))) - def get_env(self, trial_id=0, **kwargs): '''Create an environment for a trial of the simulation''' @@ -246,18 +148,20 @@ class Simulation: env = self.environment_class(**opts) return env - def run_trial(self, trial_id=0, until=None, log_level=logging.INFO, **opts): + def run_trial(self, trial_id=None, until=None, log_level=logging.INFO, **opts): """ Run a single trial of the simulation """ + trial_id = trial_id if trial_id is not None else current_time() if log_level: logger.setLevel(log_level) # Set-up trial environment and graph - until = until or self.max_time - env = self.get_env(trial_id=trial_id, **opts) + until = until or self.config.max_time + + env = Environment.from_config(self.config, trial_id=trial_id) # Set up agents on nodes - with utils.timer('Simulation {} trial {}'.format(self.name, trial_id)): + with utils.timer('Simulation {} trial {}'.format(self.config.name, trial_id)): env.run(until) return env @@ -274,64 +178,6 @@ class Simulation: ex.message = ''.join(traceback.format_exception(type(ex), ex, ex.__traceback__)[:]) return ex - def to_dict(self): - return self.__getstate__() - - def to_yaml(self): - return yaml.dump(self.to_dict()) - - - def dump_yaml(self, f=None, outdir=None): - if not f and not outdir: - raise ValueError('specify a file or an output directory') - - if not f: - f = os.path.join(outdir, '{}.dumped.yml'.format(self.name)) - - with utils.open_or_reuse(f, 'w') as f: - f.write(self.to_yaml()) - - def dump_pickle(self, f=None, outdir=None): - if not outdir and not f: - raise ValueError('specify a file or an output directory') - - if not f: - f = os.path.join(outdir, - '{}.simulation.pickle'.format(self.name)) - with utils.open_or_reuse(f, 'wb') as f: - pickle.dump(self, f) - - def dump_sqlite(self, f): - return self._history.dump(f) - - def __getstate__(self): - state={} - for k, v in self.__dict__.items(): - if k[0] != '_': - state[k] = v - state['topology'] = json_graph.node_link_data(self.topology) - state['network_agents'] = agents.serialize_definition(self.network_agents, - known_modules = []) - state['environment_agents'] = agents.serialize_definition(self.environment_agents, - known_modules = []) - state['environment_class'] = serialization.serialize(self.environment_class, - known_modules=['soil.environment'])[1] # func, name - if state['load_module'] is None: - del state['load_module'] - return state - - def __setstate__(self, state): - self.__dict__ = state - self.load_module = getattr(self, 'load_module', None) - if self.dir_path not in sys.path: - sys.path += [self.dir_path, os.getcwd()] - self.topology = json_graph.node_link_graph(state['topology']) - self.network_agents = agents.calculate_distribution(agents._convert_agent_types(self.network_agents)) - self.environment_agents = agents._convert_agent_types(self.environment_agents, - known_modules=[self.load_module]) - self.environment_class = serialization.deserialize(self.environment_class, - known_modules=[self.load_module, 'soil.environment', ]) # func, name - def all_from_config(config): configs = list(serialization.load_config(config)) diff --git a/soil/stats.py b/soil/stats.py index 84082bd..5de9a40 100644 --- a/soil/stats.py +++ b/soil/stats.py @@ -8,18 +8,23 @@ class Stats: if you don't plan to implement all the methods. ''' - def __init__(self, simulation): + def __init__(self, simulation, name=None): + self.name = name or type(self).__name__ self.simulation = simulation - def start(self): + def sim_start(self): '''Method to call when the simulation starts''' pass - def end(self): + def sim_end(self): '''Method to call when the simulation ends''' return {} - def trial(self, env): + def trial_start(self, env): + '''Method to call when a trial starts''' + return {} + + def trial_end(self, env): '''Method to call when a trial ends''' return {} @@ -30,12 +35,12 @@ class distribution(Stats): the mean value, and its deviation. ''' - def start(self): + def sim_start(self): self.means = [] self.counts = [] - def trial(self, env): - df = env.df() + def trial_end(self, env): + df = pd.DataFrame(env.state_to_tuples()) df = df.drop('SEED', axis=1) ix = df.index[-1] attrs = df.columns.get_level_values(0) @@ -60,7 +65,7 @@ class distribution(Stats): return stats - def end(self): + def sim_end(self): dfm = pd.DataFrame(self.means, columns=['metric', 'key', 'value']) dfc = pd.DataFrame(self.counts, columns=['metric', 'key', 'value', 'count']) @@ -87,7 +92,7 @@ class distribution(Stats): class defaultStats(Stats): - def trial(self, env): + def trial_end(self, env): c = Counter() c.update(a.__class__.__name__ for a in env.network_agents)