Change name to split, according to issue #37

chunker
militarpancho 7 years ago
parent f8ca595bc9
commit 83e2d415a1

@ -3,12 +3,12 @@ from senpy.models import Entry
from nltk.tokenize.punkt import PunktSentenceTokenizer from nltk.tokenize.punkt import PunktSentenceTokenizer
from nltk.tokenize.simple import LineTokenizer from nltk.tokenize.simple import LineTokenizer
import nltk import nltk
class ChunkerPlugin(AnalysisPlugin): class SplitPlugin(AnalysisPlugin):
def activate(self): def activate(self):
nltk.download('punkt') nltk.download('punkt')
def analyse_entry(self, entry, params): def analyse_entry(self, entry, params):
chunker_type = params.get("type", "sentence") chunker_type = params.get("delimiter", "sentence")
original_id = entry.id original_id = entry.id
original_text = entry.get("text", None) original_text = entry.get("text", None)
if chunker_type == "sentence": if chunker_type == "sentence":

@ -1,13 +1,13 @@
--- ---
name: chunker name: split
module: chunker module: split
description: A sample plugin that chunks input text description: A sample plugin that chunks input text
author: "@militarpancho" author: "@militarpancho"
version: '0.1' version: '0.1'
url: "https://github.com/gsi-upm/senpy" url: "https://github.com/gsi-upm/senpy"
requirements: {nltk} requirements: {nltk}
extra_params: extra_params:
type: delimiter:
aliases: aliases:
- type - type
- t - t
Loading…
Cancel
Save