mirror of
				https://github.com/gsi-upm/senpy
				synced 2025-10-25 04:38:19 +00:00 
			
		
		
		
	Change name to split, according to issue #37
This commit is contained in:
		| @@ -3,12 +3,12 @@ from senpy.models import Entry | ||||
| from nltk.tokenize.punkt import PunktSentenceTokenizer | ||||
| from nltk.tokenize.simple import LineTokenizer | ||||
| import nltk | ||||
| class ChunkerPlugin(AnalysisPlugin): | ||||
| class SplitPlugin(AnalysisPlugin): | ||||
|     def activate(self): | ||||
|         nltk.download('punkt') | ||||
|      | ||||
|     def analyse_entry(self, entry, params): | ||||
|         chunker_type = params.get("type", "sentence") | ||||
|         chunker_type = params.get("delimiter", "sentence") | ||||
|         original_id = entry.id | ||||
|         original_text = entry.get("text", None) | ||||
|         if chunker_type == "sentence": | ||||
| @@ -1,13 +1,13 @@ | ||||
| --- | ||||
| name: chunker | ||||
| module: chunker | ||||
| name: split | ||||
| module: split | ||||
| description: A sample plugin that chunks input text | ||||
| author: "@militarpancho" | ||||
| version: '0.1' | ||||
| url: "https://github.com/gsi-upm/senpy" | ||||
| requirements: {nltk} | ||||
| extra_params: | ||||
|   type: | ||||
|   delimiter: | ||||
|     aliases: | ||||
|     - type | ||||
|     - t | ||||
		Reference in New Issue
	
	Block a user