You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
senpy/example-plugins/sklearn/mydata.py

34 lines
631 B
Python

'''
Create a dummy dataset.
Messages with a happy emoticon are labelled positive
Messages with a sad emoticon are labelled negative
'''
import random
dataset = []
vocabulary = ['hello', 'world', 'senpy', 'cool', 'goodbye', 'random', 'text']
emojimap = {
1: [':)', ],
-1: [':(', ]
}
for tag, values in emojimap.items():
for i in range(1000):
msg = ''
for j in range(3):
msg += random.choice(vocabulary)
msg += " "
msg += random.choice(values)
dataset.append([msg, tag])
text = []
labels = []
for i in dataset:
text.append(i[0])
labels.append(i[1])