mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-22 16:12:29 +00:00
Change name to split, according to issue #37
This commit is contained in:
parent
f8ca595bc9
commit
83e2d415a1
@ -3,12 +3,12 @@ from senpy.models import Entry
|
|||||||
from nltk.tokenize.punkt import PunktSentenceTokenizer
|
from nltk.tokenize.punkt import PunktSentenceTokenizer
|
||||||
from nltk.tokenize.simple import LineTokenizer
|
from nltk.tokenize.simple import LineTokenizer
|
||||||
import nltk
|
import nltk
|
||||||
class ChunkerPlugin(AnalysisPlugin):
|
class SplitPlugin(AnalysisPlugin):
|
||||||
def activate(self):
|
def activate(self):
|
||||||
nltk.download('punkt')
|
nltk.download('punkt')
|
||||||
|
|
||||||
def analyse_entry(self, entry, params):
|
def analyse_entry(self, entry, params):
|
||||||
chunker_type = params.get("type", "sentence")
|
chunker_type = params.get("delimiter", "sentence")
|
||||||
original_id = entry.id
|
original_id = entry.id
|
||||||
original_text = entry.get("text", None)
|
original_text = entry.get("text", None)
|
||||||
if chunker_type == "sentence":
|
if chunker_type == "sentence":
|
@ -1,13 +1,13 @@
|
|||||||
---
|
---
|
||||||
name: chunker
|
name: split
|
||||||
module: chunker
|
module: split
|
||||||
description: A sample plugin that chunks input text
|
description: A sample plugin that chunks input text
|
||||||
author: "@militarpancho"
|
author: "@militarpancho"
|
||||||
version: '0.1'
|
version: '0.1'
|
||||||
url: "https://github.com/gsi-upm/senpy"
|
url: "https://github.com/gsi-upm/senpy"
|
||||||
requirements: {nltk}
|
requirements: {nltk}
|
||||||
extra_params:
|
extra_params:
|
||||||
type:
|
delimiter:
|
||||||
aliases:
|
aliases:
|
||||||
- type
|
- type
|
||||||
- t
|
- t
|
Loading…
Reference in New Issue
Block a user