mirror of
https://github.com/gsi-upm/senpy
synced 2025-09-18 12:32:21 +00:00
Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
83e2d415a1 | ||
|
f8ca595bc9 | ||
|
312e7f7f12 | ||
|
c555b9547e | ||
|
991ade8f4d | ||
|
1104e816cb | ||
|
c19d03b41d | ||
|
42c9068991 | ||
|
96843827bd | ||
|
d76e4618fe |
@@ -72,7 +72,7 @@ deploy_pypi:
|
||||
- make pip_upload
|
||||
- echo "" > ~/.pypirc && rm ~/.pypirc # If the above fails, this won't run.
|
||||
only:
|
||||
- /^v\d+\.\d+\.\d+([abc]\d*)?$/ # PEP-440 compliant version (tags)
|
||||
- /^v?\d+\.\d+\.\d+([abc]\d*)?$/ # PEP-440 compliant version (tags)
|
||||
except:
|
||||
- branches
|
||||
|
||||
@@ -98,7 +98,7 @@ clean :
|
||||
- make -e clean
|
||||
when: manual
|
||||
|
||||
cleanup_pypirc:
|
||||
cleanup_py:
|
||||
stage: clean
|
||||
when: always # this is important; run even if preceding stages failed.
|
||||
script:
|
||||
|
@@ -7,7 +7,6 @@ language: python
|
||||
|
||||
env:
|
||||
- PYV=2.7
|
||||
- PYV=3.4
|
||||
- PYV=3.5
|
||||
# run nosetests - Tests
|
||||
script: make test-$PYV
|
||||
|
7
Makefile
7
Makefile
@@ -76,14 +76,12 @@ test-%:
|
||||
test: test-$(PYMAIN)
|
||||
|
||||
dist/$(TARNAME): version
|
||||
docker run --rm -ti -v $$PWD:/usr/src/app/ -w /usr/src/app/ python:$(PYMAIN) python setup.py sdist;
|
||||
docker run --rm -ti -v $$PWD:/usr/src/app/ -w /usr/src/app/ python:$(PYMAIN) chmod -R a+rwx dist;
|
||||
|
||||
python setup.py sdist;
|
||||
|
||||
sdist: dist/$(TARNAME)
|
||||
|
||||
pip_test-%: sdist
|
||||
docker run --rm -v $$PWD/dist:/dist/ -ti python:$* pip install /dist/$(TARNAME);
|
||||
docker run --rm -v $$PWD/dist:/dist/ python:$* pip install /dist/$(TARNAME);
|
||||
|
||||
pip_test: $(addprefix pip_test-,$(PYVERSIONS))
|
||||
|
||||
@@ -133,6 +131,7 @@ push-github:
|
||||
@echo "$$GITHUB_DEPLOY_KEY" > $(KEY_FILE)
|
||||
@git remote rm github-deploy || true
|
||||
git remote add github-deploy $(GITHUB_REPO)
|
||||
@GIT_SSH_COMMAND="ssh -i $(KEY_FILE)" git fetch github-deploy $(CI_COMMIT_REF_NAME) || true
|
||||
@GIT_SSH_COMMAND="ssh -i $(KEY_FILE)" git push github-deploy $(CI_COMMIT_REF_NAME)
|
||||
rm $(KEY_FILE)
|
||||
|
||||
|
@@ -14,6 +14,7 @@ spec:
|
||||
containers:
|
||||
- name: senpy-latest
|
||||
image: gsiupm/senpy:latest
|
||||
imagePullPolicy: Always
|
||||
args:
|
||||
- "--default-plugins"
|
||||
resources:
|
||||
|
30
senpy/plugins/split/split.py
Normal file
30
senpy/plugins/split/split.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from senpy.plugins import AnalysisPlugin
|
||||
from senpy.models import Entry
|
||||
from nltk.tokenize.punkt import PunktSentenceTokenizer
|
||||
from nltk.tokenize.simple import LineTokenizer
|
||||
import nltk
|
||||
class SplitPlugin(AnalysisPlugin):
|
||||
def activate(self):
|
||||
nltk.download('punkt')
|
||||
|
||||
def analyse_entry(self, entry, params):
|
||||
chunker_type = params.get("delimiter", "sentence")
|
||||
original_id = entry.id
|
||||
original_text = entry.get("text", None)
|
||||
if chunker_type == "sentence":
|
||||
tokenizer = PunktSentenceTokenizer()
|
||||
chars = tokenizer.span_tokenize(original_text)
|
||||
for i, sentence in enumerate(tokenizer.tokenize(original_text)):
|
||||
e = Entry()
|
||||
e.text = sentence
|
||||
e.id = original_id + "#char={},{}".format(chars[i][0], chars[i][1])
|
||||
yield e
|
||||
if chunker_type == "paragraph":
|
||||
tokenizer = LineTokenizer()
|
||||
chars = tokenizer.span_tokenize(original_text)
|
||||
for i, paragraph in enumerate(tokenizer.tokenize(original_text)):
|
||||
e = Entry()
|
||||
e.text = paragraph
|
||||
chars = [char for char in chars]
|
||||
e.id = original_id + "#char={},{}".format(chars[i][0], chars[i][1])
|
||||
yield e
|
18
senpy/plugins/split/split.senpy
Normal file
18
senpy/plugins/split/split.senpy
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
name: split
|
||||
module: split
|
||||
description: A sample plugin that chunks input text
|
||||
author: "@militarpancho"
|
||||
version: '0.1'
|
||||
url: "https://github.com/gsi-upm/senpy"
|
||||
requirements: {nltk}
|
||||
extra_params:
|
||||
delimiter:
|
||||
aliases:
|
||||
- type
|
||||
- t
|
||||
required: false
|
||||
default: sentence
|
||||
options:
|
||||
- sentence
|
||||
- paragraph
|
Reference in New Issue
Block a user