mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-01 07:41:42 +00:00
34 lines
631 B
Python
34 lines
631 B
Python
|
'''
|
||
|
Create a dummy dataset.
|
||
|
Messages with a happy emoticon are labelled positive
|
||
|
Messages with a sad emoticon are labelled negative
|
||
|
'''
|
||
|
import random
|
||
|
|
||
|
dataset = []
|
||
|
|
||
|
vocabulary = ['hello', 'world', 'senpy', 'cool', 'goodbye', 'random', 'text']
|
||
|
|
||
|
emojimap = {
|
||
|
1: [':)', ],
|
||
|
-1: [':(', ]
|
||
|
}
|
||
|
|
||
|
|
||
|
for tag, values in emojimap.items():
|
||
|
for i in range(1000):
|
||
|
msg = ''
|
||
|
for j in range(3):
|
||
|
msg += random.choice(vocabulary)
|
||
|
msg += " "
|
||
|
msg += random.choice(values)
|
||
|
dataset.append([msg, tag])
|
||
|
|
||
|
|
||
|
text = []
|
||
|
labels = []
|
||
|
|
||
|
for i in dataset:
|
||
|
text.append(i[0])
|
||
|
labels.append(i[1])
|