''' Senpy Models. This implementation should mirror the JSON schema definition. For compatibility with Py3 and for easier debugging, this new version drops introspection and adds all arguments to the models. ''' from __future__ import print_function from future import standard_library standard_library.install_aliases() from future.utils import with_metaclass from past.builtins import basestring from jinja2 import Environment, BaseLoader import time import copy import json import os import jsonref from flask import Response as FlaskResponse from pyld import jsonld import logging import jmespath logging.getLogger('rdflib').setLevel(logging.WARN) logger = logging.getLogger(__name__) from rdflib import Graph from .meta import BaseMeta, CustomDict, alias DEFINITIONS_FILE = 'definitions.json' CONTEXT_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'schemas', 'context.jsonld') def get_schema_path(schema_file, absolute=False): if absolute: return os.path.realpath(schema_file) else: return os.path.join( os.path.dirname(os.path.realpath(__file__)), 'schemas', schema_file) def read_schema(schema_file, absolute=False): schema_path = get_schema_path(schema_file, absolute) schema_uri = 'file://{}'.format(schema_path) with open(schema_path) as f: return jsonref.load(f, base_uri=schema_uri) def dump_schema(schema): return jsonref.dumps(schema) def load_context(context): logging.debug('Loading context: {}'.format(context)) if not context: return context elif isinstance(context, list): contexts = [] for c in context: contexts.append(load_context(c)) return contexts elif isinstance(context, dict): return dict(context) elif isinstance(context, basestring): try: with open(context) as f: return dict(json.loads(f.read())) except IOError: return context else: raise AttributeError('Please, provide a valid context') base_context = load_context(CONTEXT_PATH) def register(rsubclass, rtype=None): BaseMeta.register(rsubclass, rtype) class BaseModel(with_metaclass(BaseMeta, CustomDict)): ''' Entities of the base model are a special kind of dictionary that emulates a JSON-LD object. The structure of the dictionary is checked via JSON-schema. For convenience, the values can also be accessed as attributes (a la Javascript). e.g.: >>> myobject.key == myobject['key'] True >>> myobject.ns__name == myobject['ns:name'] True Additionally, subclasses of this class can specify default values for their instances. These defaults are inherited by subclasses. e.g.: >>> class NewModel(BaseModel): ... mydefault = 5 >>> n1 = NewModel() >>> n1['mydefault'] == 5 True >>> n1.mydefault = 3 >>> n1['mydefault'] = 3 True >>> n2 = NewModel() >>> n2 == 5 True >>> class SubModel(NewModel): pass >>> subn = SubModel() >>> subn.mydefault == 5 True Lastly, every subclass that also specifies a schema will get registered, so it is possible to deserialize JSON and get the right type. i.e. to recover an instance of the original class from a plain JSON. ''' # schema_file = DEFINITIONS_FILE _context = base_context["@context"] def __init__(self, *args, **kwargs): auto_id = kwargs.pop('_auto_id', False) super(BaseModel, self).__init__(*args, **kwargs) if auto_id: self.id @property def id(self): if '@id' not in self: self['@id'] = 'prefix:{}_{}'.format(type(self).__name__, time.time()) return self['@id'] @id.setter def id(self, value): self['@id'] = value def flask(self, in_headers=False, headers=None, outformat='json-ld', **kwargs): """ Return the values and error to be used in flask. So far, it returns a fixed context. We should store/generate different contexts if the plugin adds more aliases. """ headers = headers or {} kwargs["with_context"] = not in_headers content, mimetype = self.serialize(format=outformat, with_mime=True, **kwargs) if outformat == 'json-ld' and in_headers: headers.update({ "Link": ('<%s>;' 'rel="http://www.w3.org/ns/json-ld#context";' ' type="application/ld+json"' % kwargs.get('context_uri')) }) return FlaskResponse( response=content, status=self.get('status', 200), headers=headers, mimetype=mimetype) def serialize(self, format='json-ld', with_mime=False, template=None, prefix=None, fields=None, **kwargs): js = self.jsonld(prefix=prefix, **kwargs) if template is not None: rtemplate = Environment(loader=BaseLoader).from_string(template) content = rtemplate.render(**self) mimetype = 'text' elif fields is not None: # Emulate field selection by constructing a template content = json.dumps(jmespath.search(fields, js)) mimetype = 'text' elif format == 'json-ld': content = json.dumps(js, indent=2, sort_keys=True) mimetype = "application/json" elif format in ['turtle', 'ntriples']: content = json.dumps(js, indent=2, sort_keys=True) logger.debug(js) context = [self._context, {'prefix': prefix, '@base': prefix}] g = Graph().parse( data=content, format='json-ld', prefix=prefix, context=context) logger.debug( 'Parsing with prefix: {}'.format(kwargs.get('prefix'))) content = g.serialize(format=format, prefix=prefix).decode('utf-8') mimetype = 'text/{}'.format(format) else: raise Error('Unknown outformat: {}'.format(format)) if with_mime: return content, mimetype else: return content def jsonld(self, with_context=False, context_uri=None, prefix=None, base=None, expanded=False, **kwargs): result = self.serializable(**kwargs) if expanded: result = jsonld.expand( result, options={ 'expandContext': [ self._context, { 'prefix': prefix, 'endpoint': prefix } ] } )[0] if not with_context: try: del result['@context'] except KeyError: pass elif context_uri: result['@context'] = context_uri else: result['@context'] = self._context return result def validate(self, obj=None): if not obj: obj = self if hasattr(obj, "jsonld"): obj = obj.jsonld() self._validator.validate(obj) def prov(self, another): self['prov:wasGeneratedBy'] = another.id def subtypes(): return BaseMeta._subtypes def from_dict(indict, cls=None, warn=True): if not cls: target = indict.get('@type', None) cls = BaseModel try: cls = subtypes()[target] except KeyError: pass if cls == BaseModel and warn: logger.warning('Created an instance of an unknown model') outdict = dict() for k, v in indict.items(): if k == '@context': pass elif isinstance(v, dict): v = from_dict(indict[k]) elif isinstance(v, list): v = v[:] for ix, v2 in enumerate(v): if isinstance(v2, dict): v[ix] = from_dict(v2) outdict[k] = copy.copy(v) return cls(**outdict) def from_string(string, **kwargs): return from_dict(json.loads(string), **kwargs) def from_json(injson, **kwargs): indict = json.loads(injson) return from_dict(indict, **kwargs) class Entry(BaseModel): schema = 'entry' text = alias('nif:isString') sentiments = alias('marl:hasOpinion', []) emotions = alias('onyx:hasEmotionSet', []) class Sentiment(BaseModel): schema = 'sentiment' polarity = alias('marl:hasPolarity') polarityValue = alias('marl:polarityValue') class Error(BaseModel, Exception): schema = 'error' def __init__(self, message='Generic senpy exception', *args, **kwargs): Exception.__init__(self, message) super(Error, self).__init__(*args, **kwargs) self.message = message def __str__(self): if not hasattr(self, 'errors'): return self.message return '{}:\n\t{}'.format(self.message, self.errors) def __hash__(self): return Exception.__hash__(self) class AggregatedEvaluation(BaseModel): schema = 'aggregatedEvaluation' evaluations = alias('senpy:evaluations', []) class Dataset(BaseModel): schema = 'dataset' class Datasets(BaseModel): schema = 'datasets' datasets = [] class Emotion(BaseModel): schema = 'emotion' class EmotionConversion(BaseModel): schema = 'emotionConversion' class EmotionConversionPlugin(BaseModel): schema = 'emotionConversionPlugin' class EmotionAnalysis(BaseModel): schema = 'emotionAnalysis' class EmotionModel(BaseModel): schema = 'emotionModel' onyx__hasEmotionCategory = [] class EmotionPlugin(BaseModel): schema = 'emotionPlugin' class EmotionSet(BaseModel): schema = 'emotionSet' onyx__hasEmotion = [] class Evaluation(BaseModel): schema = 'evaluation' metrics = alias('senpy:metrics', []) class Entity(BaseModel): schema = 'entity' class Help(BaseModel): schema = 'help' class Metric(BaseModel): schema = 'metric' class Parameter(BaseModel): schema = 'parameter' class Plugins(BaseModel): schema = 'plugins' plugins = [] class Response(BaseModel): schema = 'response' class Results(BaseModel): schema = 'results' _terse_keys = ['entries', ] activities = [] entries = [] def activity(self, id): for i in self.activities: if i.id == id: return i return None class SentimentPlugin(BaseModel): schema = 'sentimentPlugin' class Suggestion(BaseModel): schema = 'suggestion' class Topic(BaseModel): schema = 'topic' class Analysis(BaseModel): ''' A prov:Activity that results of executing a Plugin on an entry with a set of parameters. ''' schema = 'analysis' parameters = alias('prov:used', []) algorithm = alias('prov:wasAssociatedWith', []) @property def params(self): outdict = {} outdict['algorithm'] = self.algorithm for param in self.parameters: outdict[param['name']] = param['value'] return outdict @params.setter def params(self, value): for k, v in value.items(): for param in self.parameters: if param.name == k: param.value = v break else: self.parameters.append(Parameter(name=k, value=v)) # noqa: F821 def param(self, key, default=None): for param in self.parameters: if param['name'] == key: return param['value'] return default @property def plugin(self): return self._plugin @plugin.setter def plugin(self, value): self._plugin = value self['prov:wasAssociatedWith'] = value.id def run(self, request): return self.plugin.process(request, self) class Plugin(BaseModel): schema = 'plugin' extra_params = {} def activity(self, parameters=None): '''Generate an Analysis (prov:Activity) from this plugin and the given parameters''' a = Analysis() a.plugin = self if parameters: a.params = parameters return a # More classes could be added programmatically def _class_from_schema(name, schema=None, schema_file=None, base_classes=None): base_classes = base_classes or [] base_classes.append(BaseModel) attrs = {} if schema: attrs['schema'] = schema elif schema_file: attrs['schema_file'] = schema_file else: attrs['schema'] = name name = "".join((name[0].upper(), name[1:])) return BaseMeta(name, base_classes, attrs) def _add_class_from_schema(*args, **kwargs): generatedClass = _class_from_schema(*args, **kwargs) globals()[generatedClass.__name__] = generatedClass del generatedClass