diff --git a/senpy/plugins/chunker/chunker.py b/senpy/plugins/split/split.py similarity index 92% rename from senpy/plugins/chunker/chunker.py rename to senpy/plugins/split/split.py index 375498b..cc8967d 100644 --- a/senpy/plugins/chunker/chunker.py +++ b/senpy/plugins/split/split.py @@ -3,12 +3,12 @@ from senpy.models import Entry from nltk.tokenize.punkt import PunktSentenceTokenizer from nltk.tokenize.simple import LineTokenizer import nltk -class ChunkerPlugin(AnalysisPlugin): +class SplitPlugin(AnalysisPlugin): def activate(self): nltk.download('punkt') def analyse_entry(self, entry, params): - chunker_type = params.get("type", "sentence") + chunker_type = params.get("delimiter", "sentence") original_id = entry.id original_text = entry.get("text", None) if chunker_type == "sentence": diff --git a/senpy/plugins/chunker/chunker.senpy b/senpy/plugins/split/split.senpy similarity index 88% rename from senpy/plugins/chunker/chunker.senpy rename to senpy/plugins/split/split.senpy index b2bac33..4a379a4 100644 --- a/senpy/plugins/chunker/chunker.senpy +++ b/senpy/plugins/split/split.senpy @@ -1,13 +1,13 @@ --- -name: chunker -module: chunker +name: split +module: split description: A sample plugin that chunks input text author: "@militarpancho" version: '0.1' url: "https://github.com/gsi-upm/senpy" requirements: {nltk} extra_params: - type: + delimiter: aliases: - type - t