|
|
@ -76,7 +76,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 33,
|
|
|
|
"execution_count": 1,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -85,7 +85,7 @@
|
|
|
|
"(2034, 2807)"
|
|
|
|
"(2034, 2807)"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"execution_count": 33,
|
|
|
|
"execution_count": 1,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -134,15 +134,41 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 34,
|
|
|
|
"execution_count": 24,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
|
|
"Requirement already satisfied: gensim in /home/cif/anaconda3/lib/python3.10/site-packages (4.3.1)\n",
|
|
|
|
|
|
|
|
"Requirement already satisfied: scipy>=1.7.0 in /home/cif/anaconda3/lib/python3.10/site-packages (from gensim) (1.10.1)\n",
|
|
|
|
|
|
|
|
"Requirement already satisfied: smart-open>=1.8.1 in /home/cif/anaconda3/lib/python3.10/site-packages (from gensim) (6.3.0)\n",
|
|
|
|
|
|
|
|
"Requirement already satisfied: numpy>=1.18.5 in /home/cif/anaconda3/lib/python3.10/site-packages (from gensim) (1.24.2)\n",
|
|
|
|
|
|
|
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|
|
|
|
|
|
|
"Requirement already satisfied: python-Levenshtein in /home/cif/anaconda3/lib/python3.10/site-packages (0.21.0)\n",
|
|
|
|
|
|
|
|
"Requirement already satisfied: Levenshtein==0.21.0 in /home/cif/anaconda3/lib/python3.10/site-packages (from python-Levenshtein) (0.21.0)\n",
|
|
|
|
|
|
|
|
"Requirement already satisfied: rapidfuzz<4.0.0,>=2.3.0 in /home/cif/anaconda3/lib/python3.10/site-packages (from Levenshtein==0.21.0->python-Levenshtein) (3.0.0)\n",
|
|
|
|
|
|
|
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"%pip install gensim\n",
|
|
|
|
|
|
|
|
"%pip install python-Levenshtein"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": 23,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"from gensim import matutils\n",
|
|
|
|
"from gensim import matutils\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"vocab = vectorizer.get_feature_names()\n",
|
|
|
|
"vocab = vectorizer.get_feature_names_out()\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"dictionary = dict([(i, s) for i, s in enumerate(vectorizer.get_feature_names())])\n",
|
|
|
|
"dictionary = dict([(i, s) for i, s in enumerate(vectorizer.get_feature_names_out())])\n",
|
|
|
|
"corpus_tfidf = matutils.Sparse2Corpus(vectors_train)"
|
|
|
|
"corpus_tfidf = matutils.Sparse2Corpus(vectors_train)"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
@ -162,7 +188,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 60,
|
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -176,23 +202,23 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 61,
|
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"text/plain": [
|
|
|
|
"[(0,\n",
|
|
|
|
"[(0,\n",
|
|
|
|
" '0.011*\"baptist\" + 0.010*\"koresh\" + 0.009*\"bible\" + 0.006*\"reality\" + 0.006*\"virtual\" + 0.005*\"scarlet\" + 0.005*\"shag\" + 0.004*\"tootsie\" + 0.004*\"kinda\" + 0.004*\"captain\"'),\n",
|
|
|
|
" '0.004*\"central\" + 0.004*\"assumptions\" + 0.004*\"matthew\" + 0.004*\"define\" + 0.004*\"holes\" + 0.003*\"killing\" + 0.003*\"item\" + 0.003*\"curious\" + 0.003*\"going\" + 0.003*\"presentations\"'),\n",
|
|
|
|
" (1,\n",
|
|
|
|
" (1,\n",
|
|
|
|
" '0.010*\"targa\" + 0.008*\"thanks\" + 0.008*\"moon\" + 0.007*\"craig\" + 0.007*\"zoroastrians\" + 0.006*\"yayayay\" + 0.005*\"unfortunately\" + 0.005*\"windows\" + 0.005*\"rayshade\" + 0.004*\"tdb\"'),\n",
|
|
|
|
" '0.002*\"mechanism\" + 0.002*\"led\" + 0.002*\"apple\" + 0.002*\"color\" + 0.002*\"mormons\" + 0.002*\"activity\" + 0.002*\"concepts\" + 0.002*\"frank\" + 0.002*\"platform\" + 0.002*\"fault\"'),\n",
|
|
|
|
" (2,\n",
|
|
|
|
" (2,\n",
|
|
|
|
" '0.009*\"mary\" + 0.007*\"whatever\" + 0.006*\"god\" + 0.005*\"ns\" + 0.005*\"lucky\" + 0.005*\"joseph\" + 0.005*\"ssrt\" + 0.005*\"samaritan\" + 0.005*\"crusades\" + 0.004*\"phobos\"'),\n",
|
|
|
|
" '0.005*\"objects\" + 0.005*\"obtained\" + 0.003*\"manhattan\" + 0.003*\"capability\" + 0.003*\"education\" + 0.003*\"men\" + 0.003*\"photo\" + 0.003*\"decent\" + 0.003*\"environmental\" + 0.003*\"pain\"'),\n",
|
|
|
|
" (3,\n",
|
|
|
|
" (3,\n",
|
|
|
|
" '0.009*\"islam\" + 0.008*\"western\" + 0.008*\"plane\" + 0.008*\"jeff\" + 0.007*\"cheers\" + 0.007*\"kent\" + 0.007*\"joy\" + 0.007*\"khomeini\" + 0.007*\"davidian\" + 0.006*\"basically\"')]"
|
|
|
|
" '0.004*\"car\" + 0.004*\"contain\" + 0.004*\"groups\" + 0.004*\"center\" + 0.004*\"evil\" + 0.004*\"maintain\" + 0.004*\"comets\" + 0.004*\"88\" + 0.004*\"density\" + 0.003*\"company\"')]"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"execution_count": 61,
|
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -211,7 +237,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 62,
|
|
|
|
"execution_count": 5,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -243,14 +269,14 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 63,
|
|
|
|
"execution_count": 6,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"Dictionary(10913 unique tokens: ['cel', 'ds', 'hi', 'nothing', 'prj']...)\n"
|
|
|
|
"Dictionary<10913 unique tokens: ['cel', 'ds', 'hi', 'nothing', 'prj']...>\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -263,7 +289,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 64,
|
|
|
|
"execution_count": 7,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -274,7 +300,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 65,
|
|
|
|
"execution_count": 8,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -286,14 +312,14 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 71,
|
|
|
|
"execution_count": 9,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"Dictionary(10913 unique tokens: ['cel', 'ds', 'hi', 'nothing', 'prj']...)\n"
|
|
|
|
"Dictionary<10913 unique tokens: ['cel', 'ds', 'hi', 'nothing', 'prj']...>\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -305,7 +331,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 72,
|
|
|
|
"execution_count": 10,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -315,7 +341,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 73,
|
|
|
|
"execution_count": 11,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -328,7 +354,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 74,
|
|
|
|
"execution_count": 12,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -346,7 +372,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 75,
|
|
|
|
"execution_count": 13,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -359,23 +385,23 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 76,
|
|
|
|
"execution_count": 14,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"text/plain": [
|
|
|
|
"[(0,\n",
|
|
|
|
"[(0,\n",
|
|
|
|
" '0.009*\"whatever\" + 0.007*\"plane\" + 0.007*\"ns\" + 0.007*\"joy\" + 0.006*\"happy\" + 0.005*\"bob\" + 0.004*\"phil\" + 0.004*\"nasa\" + 0.003*\"purdue\" + 0.003*\"neie\"'),\n",
|
|
|
|
" '0.011*\"mary\" + 0.007*\"ns\" + 0.006*\"joseph\" + 0.006*\"lucky\" + 0.006*\"ssrt\" + 0.005*\"god\" + 0.005*\"unfortunately\" + 0.004*\"rayshade\" + 0.004*\"phil\" + 0.004*\"nasa\"'),\n",
|
|
|
|
" (1,\n",
|
|
|
|
" (1,\n",
|
|
|
|
" '0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"'),\n",
|
|
|
|
" '0.009*\"thanks\" + 0.009*\"targa\" + 0.008*\"whatever\" + 0.008*\"baptist\" + 0.007*\"islam\" + 0.006*\"cheers\" + 0.006*\"kent\" + 0.006*\"zoroastrians\" + 0.006*\"joy\" + 0.006*\"lot\"'),\n",
|
|
|
|
" (2,\n",
|
|
|
|
" (2,\n",
|
|
|
|
" '0.010*\"moon\" + 0.007*\"phobos\" + 0.006*\"unfortunately\" + 0.006*\"martian\" + 0.006*\"russian\" + 0.005*\"rayshade\" + 0.005*\"anybody\" + 0.005*\"perturbations\" + 0.005*\"thanks\" + 0.004*\"apollo\"'),\n",
|
|
|
|
" '0.008*\"moon\" + 0.008*\"really\" + 0.008*\"western\" + 0.007*\"plane\" + 0.006*\"samaritan\" + 0.006*\"crusades\" + 0.006*\"baltimore\" + 0.005*\"bob\" + 0.005*\"septuagint\" + 0.005*\"virtual\"'),\n",
|
|
|
|
" (3,\n",
|
|
|
|
" (3,\n",
|
|
|
|
" '0.008*\"islam\" + 0.008*\"western\" + 0.007*\"jeff\" + 0.007*\"zoroastrians\" + 0.006*\"davidian\" + 0.006*\"basically\" + 0.005*\"bull\" + 0.005*\"gerald\" + 0.005*\"sorry\" + 0.004*\"kent\"')]"
|
|
|
|
" '0.009*\"koresh\" + 0.008*\"bible\" + 0.008*\"jeff\" + 0.007*\"basically\" + 0.006*\"gerald\" + 0.006*\"bull\" + 0.005*\"pd\" + 0.004*\"also\" + 0.003*\"dam\" + 0.003*\"feiner\"')]"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"execution_count": 76,
|
|
|
|
"execution_count": 14,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -387,14 +413,14 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 77,
|
|
|
|
"execution_count": 15,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"[(0, 0.7154438), (1, 0.10569019), (2, 0.09522807), (3, 0.08363795)]\n"
|
|
|
|
"[(0, 0.09161347), (1, 0.1133858), (2, 0.103424065), (3, 0.69157666)]\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -406,7 +432,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 78,
|
|
|
|
"execution_count": 16,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -427,14 +453,14 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 79,
|
|
|
|
"execution_count": 17,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"[(0, 0.06320839), (1, 0.80878526), (2, 0.06274223), (3, 0.065264106)]\n"
|
|
|
|
"[(0, 0.066217005), (1, 0.8084562), (2, 0.062542014), (3, 0.0627848)]\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -446,14 +472,14 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 80,
|
|
|
|
"execution_count": 18,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"\n"
|
|
|
|
"0.009*\"thanks\" + 0.009*\"targa\" + 0.008*\"whatever\" + 0.008*\"baptist\" + 0.007*\"islam\" + 0.006*\"cheers\" + 0.006*\"kent\" + 0.006*\"zoroastrians\" + 0.006*\"joy\" + 0.006*\"lot\"\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -464,15 +490,15 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 81,
|
|
|
|
"execution_count": 19,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"[(0, 0.10564032), (1, 0.67894983), (2, 0.104482815), (3, 0.11092702)]\n",
|
|
|
|
"[(0, 0.11006463), (1, 0.6813435), (2, 0.10399808), (3, 0.10459379)]\n",
|
|
|
|
"0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"\n"
|
|
|
|
"0.009*\"thanks\" + 0.009*\"targa\" + 0.008*\"whatever\" + 0.008*\"baptist\" + 0.007*\"islam\" + 0.006*\"cheers\" + 0.006*\"kent\" + 0.006*\"zoroastrians\" + 0.006*\"joy\" + 0.006*\"lot\"\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -492,7 +518,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 82,
|
|
|
|
"execution_count": 20,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -508,23 +534,23 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 83,
|
|
|
|
"execution_count": 21,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"text/plain": [
|
|
|
|
"[(0,\n",
|
|
|
|
"[(0,\n",
|
|
|
|
" '0.769*\"god\" + 0.346*\"jesus\" + 0.235*\"bible\" + 0.204*\"christian\" + 0.148*\"christians\" + 0.107*\"christ\" + 0.090*\"well\" + 0.085*\"koresh\" + 0.081*\"kent\" + 0.080*\"christianity\"'),\n",
|
|
|
|
" '-0.769*\"god\" + -0.345*\"jesus\" + -0.235*\"bible\" + -0.203*\"christian\" + -0.149*\"christians\" + -0.107*\"christ\" + -0.089*\"well\" + -0.085*\"koresh\" + -0.082*\"kent\" + -0.081*\"christianity\"'),\n",
|
|
|
|
" (1,\n",
|
|
|
|
" (1,\n",
|
|
|
|
" '-0.863*\"thanks\" + -0.255*\"please\" + -0.159*\"hello\" + -0.152*\"hi\" + 0.124*\"god\" + -0.111*\"sorry\" + -0.088*\"could\" + -0.074*\"windows\" + -0.067*\"jpeg\" + -0.063*\"gif\"'),\n",
|
|
|
|
" '-0.863*\"thanks\" + -0.255*\"please\" + -0.159*\"hello\" + -0.152*\"hi\" + 0.123*\"god\" + -0.112*\"sorry\" + -0.088*\"could\" + -0.074*\"windows\" + -0.067*\"jpeg\" + -0.063*\"gif\"'),\n",
|
|
|
|
" (2,\n",
|
|
|
|
" (2,\n",
|
|
|
|
" '-0.780*\"well\" + 0.229*\"god\" + -0.165*\"yes\" + 0.154*\"thanks\" + -0.133*\"ico\" + -0.133*\"tek\" + -0.130*\"queens\" + -0.130*\"bronx\" + -0.130*\"beauchaine\" + -0.130*\"manhattan\"'),\n",
|
|
|
|
" '0.779*\"well\" + -0.229*\"god\" + 0.165*\"yes\" + -0.154*\"thanks\" + 0.135*\"ico\" + 0.134*\"tek\" + 0.131*\"queens\" + 0.131*\"bronx\" + 0.131*\"beauchaine\" + 0.131*\"manhattan\"'),\n",
|
|
|
|
" (3,\n",
|
|
|
|
" (3,\n",
|
|
|
|
" '-0.338*\"well\" + 0.336*\"ico\" + 0.334*\"tek\" + 0.328*\"bronx\" + 0.328*\"beauchaine\" + 0.328*\"queens\" + 0.326*\"manhattan\" + 0.305*\"com\" + 0.305*\"bob\" + 0.072*\"god\"')]"
|
|
|
|
" '-0.342*\"well\" + 0.335*\"ico\" + 0.333*\"tek\" + 0.327*\"bronx\" + 0.327*\"queens\" + 0.327*\"beauchaine\" + 0.325*\"manhattan\" + 0.305*\"bob\" + 0.304*\"com\" + 0.073*\"god\"')]"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"execution_count": 83,
|
|
|
|
"execution_count": 21,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -536,7 +562,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 84,
|
|
|
|
"execution_count": 22,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -595,7 +621,7 @@
|
|
|
|
"window_display": false
|
|
|
|
"window_display": false
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"kernelspec": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3",
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
"language": "python",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
},
|
|
|
@ -609,7 +635,7 @@
|
|
|
|
"name": "python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.8.8"
|
|
|
|
"version": "3.10.10"
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"latex_envs": {
|
|
|
|
"latex_envs": {
|
|
|
|
"LaTeX_envs_menu_present": true,
|
|
|
|
"LaTeX_envs_menu_present": true,
|
|
|
|