Improved docs

Fixed several bugs Added convenience methods in soil.analysis
2025-12-10 07:48:16 +00:00 · 2017-10-18 20:28:42 +02:00
parent 78364d89d5
commit a7c51742f6
69 changed files with 30969 additions and 3300 deletions
--- a/soil/init.py
+++ b/soil/init.py
@@ -4,15 +4,20 @@ import os
 import pdb
 import logging

-__version__ = "0.9.7"
+__version__ = "0.10"

 try:
    basestring
 except NameError:
    basestring = str

-logging.basicConfig()#format=FORMAT)
+logging.basicConfig()
+
+from . import agents
+from . import simulation
+from . import environment
 from . import utils
+from . import analysis


 def main():
--- a/soil/agents/BassModel.py
+++ b/soil/agents/BassModel.py
@@ -1,8 +1,8 @@
 import random
-from . import NetworkAgent
+from . import BaseAgent


-class BassModel(NetworkAgent):
+class BassModel(BaseAgent):
    """
    Settings:
        innovation_prob
--- a/soil/agents/BigMarketModel.py
+++ b/soil/agents/BigMarketModel.py
@@ -1,8 +1,8 @@
 import random
-from . import NetworkAgent
+from . import BaseAgent


-class BigMarketModel(NetworkAgent):
+class BigMarketModel(BaseAgent):
    """
    Settings:
        Names:
--- a/soil/agents/CounterModel.py
+++ b/soil/agents/CounterModel.py
@@ -1,7 +1,7 @@
-from . import NetworkAgent
+from . import BaseAgent


-class CounterModel(NetworkAgent):
+class CounterModel(BaseAgent):
    """
    Dummy behaviour. It counts the number of nodes in the network and neighbors
    in each step and adds it to its state.
@@ -16,7 +16,7 @@ class CounterModel(NetworkAgent):
        self.state['total'] = total


-class AggregatedCounter(NetworkAgent):
+class AggregatedCounter(BaseAgent):
    """
    Dummy behaviour. It counts the number of nodes in the network and neighbors
    in each step and adds it to its state.
@@ -28,4 +28,5 @@ class AggregatedCounter(NetworkAgent):
        neighbors = len(list(self.get_neighboring_agents()))
        self.state['times'] = self.state.get('times', 0) + 1
        self.state['neighbors'] = self.state.get('neighbors', 0) + neighbors
-        self.state['total'] = self.state.get('total', 0) + total
+        self.state['total'] = total = self.state.get('total', 0) + total
+        self.debug('Running for step: {}. Total: {}'.format(self.now, total))
--- a/soil/agents/ModelM2.py
+++ b/soil/agents/ModelM2.py
@@ -1,9 +1,9 @@
 import random
 import numpy as np
-from . import NetworkAgent
+from . import BaseAgent


-class SpreadModelM2(NetworkAgent):
+class SpreadModelM2(BaseAgent):
    """
    Settings:
        prob_neutral_making_denier
@@ -104,7 +104,7 @@ class SpreadModelM2(NetworkAgent):
                neighbor.state['id'] = 2  # Cured


-class ControlModelM2(NetworkAgent):
+class ControlModelM2(BaseAgent):
    """
    Settings:
        prob_neutral_making_denier
--- a/soil/agents/SISaModel.py
+++ b/soil/agents/SISaModel.py
@@ -1,9 +1,9 @@
 import random
 import numpy as np
-from . import FSM, NetworkAgent, state
+from . import FSM, state


-class SISaModel(FSM, NetworkAgent):
+class SISaModel(FSM):
    """
    Settings:
        neutral_discontent_spon_prob
--- a/soil/agents/SentimentCorrelationModel.py
+++ b/soil/agents/SentimentCorrelationModel.py
@@ -1,8 +1,8 @@
 import random
-from . import NetworkAgent
+from . import BaseAgent


-class SentimentCorrelationModel(NetworkAgent):
+class SentimentCorrelationModel(BaseAgent):
    """
    Settings:
        outside_effects_prob
--- a/soil/agents/init.py
+++ b/soil/agents/init.py
@@ -72,9 +72,10 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):
            return None

    def run(self):
+        interval = self.env.interval
        while self.alive:
            res = self.step()
-            yield res or self.env.timeout(self.env.interval)
+            yield res or self.env.timeout(interval)

    def die(self, remove=False):
        self.alive = False
@@ -99,7 +100,10 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):
            count += 1
        return count

-    def get_agents(self, state_id=None, limit_neighbors=False, **kwargs):
+    def count_neighboring_agents(self, state_id=None):
+        return len(super().get_agents(state_id, limit_neighbors=True))
+
+    def get_agents(self, state_id=None, limit_neighbors=False, iterator=False, **kwargs):
        if limit_neighbors:
            agents = super().get_agents(state_id, limit_neighbors)
        else:
@@ -113,9 +117,13 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):
                    return False
            return True

-        return filter(matches_all, agents)
+        f = filter(matches_all, agents)
+        if iterator:
+            return f
+        return list(f)

-    def log(self, message, level=logging.INFO, **kwargs):
+    def log(self, message, *args, level=logging.INFO, **kwargs):
+        message = message + " ".join(str(i) for i in args)
        message = "\t@{:>5}:\t{}".format(self.now, message)
        for k, v in kwargs:
            message += " {k}={v} ".format(k, v)
@@ -130,11 +138,6 @@ class BaseAgent(nxsim.BaseAgent, metaclass=MetaAgent):
    def info(self, *args, **kwargs):
        return self.log(*args, level=logging.INFO, **kwargs)

-class NetworkAgent(BaseAgent, nxsim.BaseNetworkAgent):
-
-    def count_neighboring_agents(self, state_id=None):
-        return self.count_agents(state_id, limit_neighbors=True)
-

 def state(func):

@@ -150,7 +153,7 @@ def state(func):
            try:
                self.state['id'] = next_state.id
            except AttributeError:
-                raise NotImplemented('State id %s is not valid.' % next_state)
+                raise ValueError('State id %s is not valid.' % next_state)
        return when

    func_wrapper.id = func.__name__
--- a/soil/analysis.py
+++ b/soil/analysis.py
@@ -4,20 +4,175 @@ import glob
 import yaml
 from os.path import join

+from . import utils

-def get_data(pattern, process=True, attributes=None):
+
+def read_data(*args, group=False, **kwargs):
+    iterable = _read_data(*args, **kwargs)
+    if group:
+        return group_trials(iterable)
+    else:
+        return list(iterable)
+
+
+def _read_data(pattern, keys=None, convert_types=False,
+               process=None, from_csv=False, **kwargs):
    for folder in glob.glob(pattern):
        config_file = glob.glob(join(folder, '*.yml'))[0]
        config = yaml.load(open(config_file))
-        for trial_data in sorted(glob.glob(join(folder, '*.environment.csv'))):
-            df = pd.read_csv(trial_data)
-            if process:
-                if attributes is not None:
-                    df = df[df['attribute'].isin(attributes)]
-                df = df.pivot_table(values='attribute', index='tstep', columns=['value'], aggfunc='count').fillna(0)
-            yield config_file, df, config
+        df = None
+        if from_csv:
+            for trial_data in sorted(glob.glob(join(folder,
+                                                    '*.environment.csv'))):
+                df = read_csv(trial_data, convert_types=convert_types)
+                if process:
+                    df = process(df, **kwargs)
+                yield config_file, df, config
+        else:
+            for trial_data in sorted(glob.glob(join(folder, '*.db.sqlite'))):
+                df = read_sql(trial_data, convert_types=convert_types,
+                              keys=keys)
+                if process:
+                    df = process(df, **kwargs)
+                yield config_file, df, config
+
+
+def read_csv(filename, keys=None, convert_types=False, **kwargs):
+    '''
+    Read a CSV in canonical form: ::
+
+        <agent_id, t_step, key, value, value_type>
+
+    '''
+    df = pd.read_csv(filename)
+    if convert_types:
+        df = convert_types_slow(df)
+    if keys:
+        df = df[df['key'].isin(keys)]
+    return df
+
+
+def read_sql(filename, keys=None, convert_types=False, limit=-1):
+    condition = ''
+    if keys:
+        k = map(lambda x: "\'{}\'".format(x), keys)
+        condition = 'where key in ({})'.format(','.join(k))
+    query = 'select * from history {} limit {}'.format(condition, limit)
+    df = pd.read_sql_query(query, 'sqlite:///{}'.format(filename))
+    if convert_types:
+        df = convert_types_slow(df)
+    return df
+
+
+def convert_row(row):
+    row['value'] = utils.convert(row['value'], row['value_type'])
+    return row
+
+
+def convert_types_slow(df):
+    '''This is a slow operation.'''
+    dtypes = get_types(df)
+    for k, v in dtypes.items():
+        t = df[df['key']==k]
+        t['value'] = t['value'].astype(v)
+    df = df.apply(convert_row, axis=1)
+    return df
+
+def split_df(df):
+    '''
+    Split a dataframe in two dataframes: one with the history of agents,
+    and one with the environment history
+    '''
+    envmask = (df['agent_id'] == 'env')
+    n_env = envmask.sum()
+    if n_env == len(df):
+        return df, None
+    elif n_env == 0:
+        return None, df
+    agents, env = [x for _, x in df.groupby(envmask)]
+    return env, agents
+
+
+def process(df, **kwargs):
+    '''
+    Process a dataframe in canonical form ``(t_step, agent_id, key, value, value_type)`` into
+    two dataframes with a column per key: one with the history of the agents, and one for the
+    history of the environment.
+    '''
+    env, agents = split_df(df)
+    return process_one(env, **kwargs), process_one(agents, **kwargs)
+
+
+def get_types(df):
+    dtypes = df.groupby(by=['key'])['value_type'].unique()
+    return {k:v[0] for k,v in dtypes.iteritems()}
+
+
+def process_one(df, *keys, columns=['key'], values='value',
+                index=['t_step', 'agent_id'], aggfunc='first', **kwargs):
+    '''
+    Process a dataframe in canonical form ``(t_step, agent_id, key, value, value_type)`` into
+    a dataframe with a column per key
+    '''
+    if df is None:
+        return df
+    if keys:
+        df = df[df['key'].isin(keys)]
+
+    dtypes = get_types(df)
+
+    df = df.pivot_table(values=values, index=index, columns=columns,
+                        aggfunc=aggfunc, **kwargs)
+    df = df.fillna(0).astype(dtypes)
+    return df
+
+
+def get_count_processed(df, *keys):
+    if keys:
+        df = df[list(keys)]
+    # p = df.groupby(level=0).apply(pd.Series.value_counts)
+    p = df.unstack().apply(pd.Series.value_counts, axis=1)
+    return p
+
+
+def get_count(df, *keys):
+    if keys:
+        df = df[df['key'].isin(keys)]
+    p = df.groupby(by=['t_step', 'key', 'value']).size().unstack(level=[1,2]).fillna(0)
+    return p
+
+
+def get_value(df, *keys, aggfunc='sum'):
+    if keys:
+        df = df[df['key'].isin(keys)]
+    p = process_one(df, *keys)
+    p = p.groupby(level='t_step').agg(aggfunc)
+    return p


 def plot_all(*args, **kwargs):
-    for config_file, df, config in sorted(get_data(*args, **kwargs)):
+    '''
+    Read all the trial data and plot the result of applying a function on them.
+    '''
+    dfs = do_all(*args, **kwargs)
+    ps = []
+    for line in dfs:
+        f, df, config = line
        df.plot(title=config['name'])
+        ps.append(df)
+    return ps
+
+def do_all(pattern, func, *keys, include_env=False, **kwargs):
+    for config_file, df, config in read_data(pattern, keys=keys):
+        p = func(df, *keys, **kwargs)
+        p.plot(title=config['name'])
+        yield config_file, p, config
+
+
+def group_trials(trials, aggfunc=['mean', 'min', 'max', 'std']):
+    trials = list(trials)
+    trials = list(map(lambda x: x[1] if isinstance(x, tuple) else x, trials))
+    return pd.concat(trials).groupby(level=0).agg(aggfunc).reorder_levels([2, 0,1] ,axis=1)
+
+
+
--- a/soil/environment.py
+++ b/soil/environment.py
@@ -41,17 +41,20 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        # executed before network agents
        self['SEED'] = seed or time.time()
        random.seed(self['SEED'])
+        self.process(self.save_state())
        self.environment_agents = environment_agents or []
        self.network_agents = network_agents or []
-        self.process(self.save_state())
        if self.dump:
-            self._db_path = os.path.join(self.get_path(), 'db.sqlite')
+            self._db_path = os.path.join(self.get_path(), '{}.db.sqlite'.format(self.name))
        else:
            self._db_path = ":memory:"
        self.create_db(self._db_path)

    def create_db(self, db_path=None):
        db_path = db_path or self._db_path
+        if os.path.exists(db_path):
+            newname = db_path.replace('db.sqlite', 'backup{}.sqlite'.format(time.time()))
+            os.rename(db_path, newname)
        self._db = sqlite3.connect(db_path)
        with self._db:
            self._db.execute('''CREATE TABLE IF NOT EXISTS history (agent_id text, t_step int, key text, value text, value_type text)''')
@@ -118,24 +121,25 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        return self.G.add_edge(agent1, agent2)

    def run(self, *args, **kwargs):
-        self._save_state()
        super().run(*args, **kwargs)
-        self._save_state()

    def _save_state(self, now=None):
        # for agent in self.agents:
        #     agent.save_state()
+        utils.logger.debug('Saving state @{}'.format(self.now))
        with self._db:
            self._db.executemany("insert into history(agent_id, t_step, key, value, value_type) values (?, ?, ?, ?, ?)", self.state_to_tuples(now=now))

    def save_state(self):
+        self._save_state()
        while self.peek() != simpy.core.Infinity:
-            utils.logger.info('Step: {}'.format(self.now))
+            delay = max(self.peek() - self.now, self.interval)
+            utils.logger.debug('Step: {}'.format(self.now))
            ev = self.event()
            ev._ok = True
            # Schedule the event with minimum priority so
-            # that it executes after all agents are done
-            self.schedule(ev, -1, self.peek())
+            # that it executes before all agents
+            self.schedule(ev, -999, delay)
            yield ev
            self._save_state()

@@ -215,7 +219,7 @@ class SoilEnvironment(nxsim.NetworkEnvironment):

        with open(csv_name, 'w') as f:
            cr = csv.writer(f)
-            cr.writerow(('agent_id', 'tstep', 'attribute', 'value'))
+            cr.writerow(('agent_id', 't_step', 'key', 'value', 'value_type'))
            for i in self.history_to_tuples():
                cr.writerow(i)

@@ -229,14 +233,16 @@ class SoilEnvironment(nxsim.NetworkEnvironment):
        if now is None:
            now = self.now
        for k, v in self.environment_params.items():
-            yield 'env', now, k, v, type(v).__name__
+            v, v_t = utils.repr(v)
+            yield 'env', now, k, v, v_t
        for agent in self.agents:
            for k, v in agent.state.items():
-                yield agent.id, now, k, v, type(v).__name__
+                v, v_t = utils.repr(v)
+                yield agent.id, now, k, v, v_t

    def history_to_tuples(self):
        with self._db:
-            res = self._db.execute("select agent_id, t_step, key, value from history ").fetchall()
+            res = self._db.execute("select agent_id, t_step, key, value, value_type from history ").fetchall()
        yield from res

    def history_to_graph(self):
--- a/soil/simulation.py
+++ b/soil/simulation.py
@@ -67,7 +67,7 @@ class SoilSimulation(NetworkSimulation):
        self.default_state = default_state or {}
        self.dir_path = dir_path or os.getcwd()
        self.interval = interval
-        self.seed = seed
+        self.seed = str(seed) or str(time.time())
        self.dump = dump
        self.environment_params = environment_params or {}

@@ -168,7 +168,7 @@ class SoilSimulation(NetworkSimulation):
        env_name = '{}_trial_{}'.format(self.name, trial_id)
        env = environment.SoilEnvironment(name=env_name,
                                          topology=self.topology.copy(),
-                                          seed=self.seed,
+                                          seed=self.seed+env_name,
                                          initial_time=0,
                                          dump=self.dump,
                                          interval=self.interval,
--- a/soil/utils.py
+++ b/soil/utils.py
@@ -13,7 +13,6 @@ from contextlib import contextmanager

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-logger.addHandler(logging.StreamHandler())


 def load_network(network_params, dir_path=None):
@@ -86,6 +85,12 @@ def agent_from_distribution(distribution, value=-1):
    raise Exception('Distribution for value {} not found in: {}'.format(value, distribution))


+def repr(v):
+    if isinstance(v, bool):
+        v = "true" if v else ""
+        return v, bool.__name__
+    return v, type(v).__name__
+
 def convert(value, type_):
    import importlib
    try: