import os import sys from time import time as current_time from io import BytesIO from sqlalchemy import create_engine from textwrap import dedent, indent import matplotlib.pyplot as plt import networkx as nx import pandas as pd from .serialization import deserialize, serialize from .utils import try_backup, open_or_reuse, logger, timer from . import utils, network class DryRunner(BytesIO): def __init__(self, fname, *args, copy_to=None, **kwargs): super().__init__(*args, **kwargs) self.__fname = fname self.__copy_to = copy_to def write(self, txt): if self.__copy_to: self.__copy_to.write("{}:::{}".format(self.__fname, txt)) try: super().write(txt) except TypeError: super().write(bytes(txt, "utf-8")) def close(self): content = "(binary data not shown)" try: content = self.getvalue().decode() except UnicodeDecodeError: pass logger.info( "**Not** written to {} (no_dump mode):\n\n{}\n\n".format( self.__fname, content ) ) super().close() class Exporter: """ Interface for all exporters. It is not necessary, but it is useful if you don't plan to implement all the methods. """ def __init__(self, simulation, outdir=None, dump=True, copy_to=None): self.simulation = simulation outdir = outdir or os.path.join(os.getcwd(), "soil_output") self.outdir = os.path.join(outdir, simulation.group or "", simulation.name) self.dump = dump if copy_to is None and not dump: copy_to = sys.stdout self.copy_to = copy_to def sim_start(self): """Method to call when the simulation starts""" pass def sim_end(self): """Method to call when the simulation ends""" pass def iteration_start(self, env): """Method to call when a iteration start""" pass def iteration_end(self, env, params, params_id): """Method to call when a iteration ends""" pass def output(self, f, mode="w", **kwargs): if not self.dump: f = DryRunner(f, copy_to=self.copy_to) else: try: if not os.path.isabs(f): f = os.path.join(self.outdir, f) except TypeError: pass return open_or_reuse(f, mode=mode, backup=self.simulation.backup, **kwargs) def get_dfs(self, env, **kwargs): yield from get_dc_dfs(env.datacollector, simulation_id=self.simulation.id, iteration_id=env.id, **kwargs) def get_dc_dfs(dc, **kwargs): dfs = {} dfe = dc.get_model_vars_dataframe() dfe.index.rename("step", inplace=True) dfs["env"] = dfe try: dfa = dc.get_agent_vars_dataframe() dfa.index.rename(["step", "agent_id"], inplace=True) dfs["agents"] = dfa except UserWarning: pass for table_name in dc.tables: dfs[table_name] = dc.get_table_dataframe(table_name) for (name, df) in dfs.items(): for (k, v) in kwargs.items(): df[k] = v df.set_index(["simulation_id", "iteration_id"], append=True, inplace=True) yield from dfs.items() class SQLite(Exporter): """Writes sqlite results""" sim_started = False def sim_start(self): if not self.dump: logger.debug("NOT dumping results") return self.dbpath = os.path.join(self.outdir, f"{self.simulation.name}.sqlite") logger.info("Dumping results to %s", self.dbpath) if self.simulation.backup: try_backup(self.dbpath, remove=True) if self.simulation.overwrite: if os.path.exists(self.dbpath): os.remove(self.dbpath) self.engine = create_engine(f"sqlite:///{self.dbpath}", echo=False) sim_dict = {k: serialize(v)[0] for (k,v) in self.simulation.to_dict().items()} sim_dict["simulation_id"] = self.simulation.id df = pd.DataFrame([sim_dict]) df.to_sql("configuration", con=self.engine, if_exists="append") def iteration_end(self, env, params, params_id, *args, **kwargs): if not self.dump: logger.info("Running in NO DUMP mode. Results will NOT be saved to a DB.") return with timer( "Dumping simulation {} iteration {}".format(self.simulation.name, env.id) ): pd.DataFrame([{"simulation_id": self.simulation.id, "params_id": params_id, "iteration_id": env.id, "key": k, "value": serialize(v)[0]} for (k,v) in params.items()]).to_sql("parameters", con=self.engine, if_exists="append") for (t, df) in self.get_dfs(env, params_id=params_id): df.to_sql(t, con=self.engine, if_exists="append") class csv(Exporter): """Export the state of each environment (and its agents) a CSV file for the simulation""" def sim_start(self): super().sim_start() def iteration_end(self, env, params, params_id, *args, **kwargs): with timer( "[CSV] Dumping simulation {} iteration {} @ dir {}".format( self.simulation.name, env.id, self.outdir ) ): for (df_name, df) in self.get_dfs(env, params_id=params_id): with self.output("{}.{}.csv".format(env.id, df_name), mode="a") as f: df.to_csv(f) # TODO: reimplement GEXF exporting without history class gexf(Exporter): def iteration_end(self, env, *args, **kwargs): if not self.dump: logger.info("Not dumping GEXF (NO_DUMP mode)") return with timer( "[GEXF] Dumping simulation {} iteration {}".format(self.simulation.name, env.id) ): with self.output("{}.gexf".format(env.id), mode="wb") as f: network.dump_gexf(env.history_to_graph(), f) self.dump_gexf(env, f) class dummy(Exporter): def sim_start(self): with self.output("dummy", "w") as f: f.write("simulation started @ {}\n".format(current_time())) def iteration_start(self, env): with self.output("dummy", "w") as f: f.write("iteration started@ {}\n".format(current_time())) def iteration_end(self, env, *args, **kwargs): with self.output("dummy", "w") as f: f.write("iteration ended@ {}\n".format(current_time())) def sim_end(self): with self.output("dummy", "a") as f: f.write("simulation ended @ {}\n".format(current_time())) class graphdrawing(Exporter): def iteration_end(self, env, *args, **kwargs): # Outside effects f = plt.figure() nx.draw( env.G, node_size=10, width=0.2, pos=nx.spring_layout(env.G, scale=100), ax=f.add_subplot(111), ) with open("graph-{}.png".format(env.id)) as f: f.savefig(f) class summary(Exporter): """Print a summary of each iteration to sys.stdout""" def iteration_end(self, env, *args, **kwargs): msg = "" for (t, df) in self.get_dfs(env): if not len(df): continue tabs = "\t" * 2 description = indent(str(df.describe()), tabs) last_line = indent(str(df.iloc[-1:]), tabs) # value_counts = indent(str(df.value_counts()), tabs) value_counts = indent(str(df.apply(lambda x: x.value_counts()).T.stack()), tabs) msg += dedent(""" Dataframe {t}: Last line: : {last_line} Description: {description} Value counts: {value_counts} """).format(**locals()) logger.info(msg) class YAML(Exporter): """Writes the configuration of the simulation to a YAML file""" def sim_start(self): if not self.dump: logger.debug("NOT dumping results") return with self.output(self.simulation.id + ".dumped.yml") as f: logger.info(f"Dumping simulation configuration to {self.outdir}") f.write(self.simulation.to_yaml()) class default(Exporter): """Default exporter. Writes sqlite results, as well as the simulation YAML""" def __init__(self, *args, exporter_cls=[], **kwargs): exporter_cls = exporter_cls or [YAML, SQLite] self.inner = [cls(*args, **kwargs) for cls in exporter_cls] def sim_start(self, *args, **kwargs): for exporter in self.inner: exporter.sim_start(*args, **kwargs) def sim_end(self, *args, **kwargs): for exporter in self.inner: exporter.sim_end(*args, **kwargs) def iteration_end(self, *args, **kwargs): for exporter in self.inner: exporter.iteration_end(*args, **kwargs)