soil/soil/simulation.py

import os
from time import time as current_time, strftime
import sys
import yaml
import hashlib

import inspect
import logging
import networkx as nx

from tqdm.auto import tqdm

from textwrap import dedent

from dataclasses import dataclass, field, asdict, replace
from typing import Any, Dict, Union, Optional, List


from functools import partial
from contextlib import contextmanager
from itertools import product
import json


from . import serialization, exporters, utils, basestring, agents
from .environment import Environment
from .utils import logger, run_and_return_exceptions
from .debugging import set_trace

_AVOID_RUNNING = False
_QUEUED = []

@contextmanager
def do_not_run(): 
    global _AVOID_RUNNING
    _AVOID_RUNNING = True
    try:
        logger.debug("NOT RUNNING")
        yield
    finally:
        logger.debug("RUNNING AGAIN")
        _AVOID_RUNNING = False


def _iter_queued():
    while _QUEUED:
        (cls, params) = _QUEUED.pop(0)
        yield replace(cls, parameters=params)


# TODO: change documentation for simulation
# TODO: rename iterations to iterations
# TODO: make parameters a dict of iterable/any
@dataclass
class Simulation:
    """
    A simulation is a collection of agents and a model. It is responsible for running the model and agents, and collecting data from them.

    Args:
        version: The version of the simulation. This is used to determine how to load the simulation.
        name: The name of the simulation.
        description: A description of the simulation.
        group: The group that the simulation belongs to.
        model: The model to use for the simulation. This can be a string or a class.
        parameters: The parameters to pass to the model.
        matrix: A matrix of values for each parameter.
        seed: The seed to use for the simulation.
        dir_path: The directory path to use for the simulation.
        max_time: The maximum time to run the simulation.
        max_steps: The maximum number of steps to run the simulation.
        interval: The interval to use for the simulation.
        iterations: The number of iterations (times) to run the simulation.
        num_processes: The number of processes to use for the simulation. If greater than one, simulations will be performed in parallel. This may make debugging and error handling difficult.
        tables: The tables to use in the simulation datacollector
        agent_reporters: The agent reporters to use in the datacollector
        model_reporters: The model reporters to use in the datacollector
        dry_run: Whether or not to run the simulation. If True, the simulation will not be run.
        backup: Whether or not to backup the simulation. If True, the simulation files will be backed up to a different directory.
        overwrite: Whether or not to replace existing simulation data.
        source_file: Python file to use to find additional classes.
    """

    version: str = "2"
    source_file: Optional[str] = None
    name: Optional[str] = None
    description: Optional[str] = ""
    group: str = None
    backup: bool = False
    overwrite: bool = False
    dry_run: bool = False
    dump: bool = False
    model: Union[str, type] = "soil.Environment"
    parameters: dict = field(default_factory=dict)
    matrix: dict = field(default_factory=dict)
    seed: str = "default"
    dir_path: str = field(default_factory=lambda: os.getcwd())
    max_time: float = None
    max_steps: int = None
    interval: int = 1
    iterations: int = 1
    num_processes: Optional[int] = 1
    exporters: Optional[List[str]] = field(default_factory=lambda: [exporters.default])
    model_reporters: Optional[Dict[str, Any]] = field(default_factory=dict)
    agent_reporters: Optional[Dict[str, Any]] = field(default_factory=dict)
    tables: Optional[Dict[str, Any]] = field(default_factory=dict)
    outdir: str = field(default_factory=lambda: os.path.join(os.getcwd(), "soil_output"))
    # outdir: Optional[str] = None
    exporter_params: Optional[Dict[str, Any]] = field(default_factory=dict)
    level: int = logging.INFO
    skip_test: Optional[bool] = False
    debug: Optional[bool] = False

    def __post_init__(self):
        if self.name is None:
            if isinstance(self.model, str):
                self.name = self.model
            else:
                self.name = self.model.__name__
        self.logger = logger.getChild(self.name)
        self.logger.setLevel(self.level)

        if self.source_file:
            source_file = self.source_file
            if not os.path.isabs(source_file):
                source_file = os.path.abspath(os.path.join(self.dir_path, source_file))
            serialization.add_source_file(source_file)
            self.source_file = source_file

        if isinstance(self.model, str):
            self.model = serialization.deserialize(self.model)

        def deserialize_reporters(reporters):
            for (k, v) in reporters.items():
                if isinstance(v, str) and v.startswith("py:"):
                    reporters[k] = serialization.deserialize(v.split(":", 1)[1])
            return reporters

        self.agent_reporters = deserialize_reporters(self.agent_reporters)
        self.model_reporters = deserialize_reporters(self.model_reporters)
        self.tables = deserialize_reporters(self.tables)
        if self.source_file:
            serialization.remove_source_file(self.source_file)
        self.id = f"{self.name}_{current_time()}"

    def run(self, **kwargs):
        """Run the simulation and return the list of resulting environments"""
        if kwargs:
            return replace(self, **kwargs).run()

        self.logger.debug(
            dedent(
                """
        Simulation:
        ---
        """
            )
            + self.to_yaml()
        )
        param_combinations = self._collect_params(**kwargs)
        if _AVOID_RUNNING:
            _QUEUED.extend((self, param) for param in param_combinations)
            return []

        self.logger.debug("Using exporters: %s", self.exporters or [])

        exporters = serialization.deserialize_all(
            self.exporters,
            simulation=self,
            known_modules=[
                "soil.exporters",
            ],
            dump=self.dump and not self.dry_run,
            outdir=self.outdir,
            **self.exporter_params,
        )

        results = []
        for exporter in exporters:
            exporter.sim_start()

        for params in tqdm(param_combinations, desc=self.name, unit="configuration"):
            for (k, v) in params.items():
                tqdm.write(f"{k} = {v}")
            sha = hashlib.sha256()
            sha.update(repr(sorted(params.items())).encode())
            params_id = sha.hexdigest()[:7]
            for env in self._run_iters_for_params(params):
                for exporter in exporters:
                    exporter.iteration_end(env, params, params_id)
                results.append(env)

        for exporter in exporters:
            exporter.sim_end()

        return results

    def _collect_params(self):

        parameters = []
        if self.parameters:
            parameters.append(self.parameters)
        if self.matrix:
            assert isinstance(self.matrix, dict)
            for values in product(*(self.matrix.values())):
                parameters.append(dict(zip(self.matrix.keys(), values)))

        if not parameters:
            parameters = [{}]
        if self.dump:
            self.logger.info("Output directory: %s", self.outdir)

        return parameters

    def _run_iters_for_params(
        self,
        params
    ):
        """Run the simulation and yield the resulting environments."""

        try:
            if self.source_file:
                serialization.add_source_file(self.source_file)

            with utils.timer(f"running for config {params}"):
                if self.dry_run:
                    def func(*args, **kwargs):
                        return None
                else:
                    func = self._run_model

                for env in tqdm(utils.run_parallel(
                    func=func,
                    iterable=range(self.iterations),
                    **params,
                ), total=self.iterations, leave=False):
                    if env is None and self.dry_run:
                        continue

                    yield env
        finally:
            if self.source_file:
                serialization.remove_source_file(self.source_file)

    def _get_env(self, iteration_id, params):
        """Create an environment for a iteration of the simulation"""

        iteration_id = str(iteration_id)

        agent_reporters = self.agent_reporters
        agent_reporters.update(params.pop("agent_reporters", {}))
        model_reporters = self.model_reporters
        model_reporters.update(params.pop("model_reporters", {}))

        return self.model(
            id=iteration_id,
            seed=f"{self.seed}_iteration_{iteration_id}",
            dir_path=self.dir_path,
            interval=self.interval,
            logger=self.logger.getChild(iteration_id),
            agent_reporters=agent_reporters,
            model_reporters=model_reporters,
            tables=self.tables,
            **params,
        )

    def _run_model(self, iteration_id, **params):
        """
        Run a single iteration of the simulation

        """
        # Set-up iteration environment and graph
        model = self._get_env(iteration_id, params)
        with utils.timer("Simulation {} iteration {}".format(self.name, iteration_id)):

            max_time = self.max_time 
            max_steps = self.max_steps

            if (max_time is not None) and (max_steps is not None):
                is_done = lambda model: (not model.running) or (model.schedule.time >= max_time) or (model.schedule.steps >= max_steps)
            elif max_time is not None:
                is_done = lambda model: (not model.running) or (model.schedule.time >= max_time)
            elif max_steps is not None:
                is_done = lambda model: (not model.running) or (model.schedule.steps >= max_steps)
            else:
                is_done = lambda model: not model.running
            
            if not model.schedule.agents:
                raise Exception("No agents in model. This is probably a bug. Make sure that the model has agents scheduled after its initialization.")

            newline = "\n"
            self.logger.debug(
                dedent(
                    f"""
    Model stats:
    Agent count: { model.schedule.get_agent_count() }):
    Topology size: { len(model.G) if hasattr(model, "G") else 0 }
            """
                )
            )

            if self.debug:
                set_trace()

            while not is_done(model):
                self.logger.debug(
                    f'Simulation time {model.schedule.time}/{max_time}.'
                )
                model.step()

        return model

    def to_dict(self):
        d = asdict(self)
        return serialization.serialize_dict(d)

    def to_yaml(self):
        return yaml.dump(self.to_dict())


def iter_from_file(*files, **kwargs):
    for f in files:
        try:
            yield from iter_from_py(f, **kwargs)
        except ValueError as ex:
            yield from iter_from_config(f, **kwargs)


def from_file(*args, **kwargs):
    return list(iter_from_file(*args, **kwargs))


def iter_from_config(*cfgs, **kwargs):
    for config in cfgs:
        configs = list(serialization.load_config(config))
        for config, path in configs:
            d = dict(config)
            d.update(kwargs)
            if "dir_path" not in d:
                d["dir_path"] = os.path.dirname(path)
            yield Simulation(**d)


def from_config(conf_or_path):
    lst = list(iter_from_config(conf_or_path))
    if len(lst) > 1:
        raise AttributeError("Provide only one configuration")
    return lst[0]


def iter_from_py(pyfile, module_name='imported_file', **kwargs):
    """Try to load every Simulation instance in a given Python file"""
    import importlib
    added = False
    sims = []
    assert not _AVOID_RUNNING
    with do_not_run():
        assert _AVOID_RUNNING
        spec = importlib.util.spec_from_file_location(module_name, pyfile)
        folder = os.path.dirname(pyfile)
        if folder not in sys.path:
            added = True
            sys.path.append(folder)
        if not spec:
            raise ValueError(f"{pyfile} does not seem to be a Python module")
        module = importlib.util.module_from_spec(spec)
        sys.modules[module_name] = module
        spec.loader.exec_module(module)
        for (_name, sim) in inspect.getmembers(module, lambda x: isinstance(x, Simulation)):
            sims.append(sim)
        for sim in _iter_queued():
            sims.append(sim)
        if not sims:
            for (_name, sim) in inspect.getmembers(module, lambda x: inspect.isclass(x) and issubclass(x, Simulation)):
                sims.append(sim(**kwargs))
        del sys.modules[module_name]
    assert not _AVOID_RUNNING
    if not sims:
        raise AttributeError(f"No valid configurations found in {pyfile}")
    if added:
        sys.path.remove(folder)
    for sim in sims:
        yield replace(sim, **kwargs)


def from_py(pyfile):
    return next(iter_from_py(pyfile))


def run_from_file(*files, **kwargs):
    for sim in iter_from_file(*files):
        logger.info(f"Using config(s): {sim.name}")
        sim.run_simulation(**kwargs)

def run(env, iterations=1, num_processes=1, dump=False, name="test", **kwargs):
    return Simulation(model=env, iterations=iterations, name=name, dump=dump, num_processes=num_processes, **kwargs).run()