1
0
mirror of https://github.com/gsi-upm/sitc synced 2025-08-24 02:22:21 +00:00

Remove outputs and metadata

This commit is contained in:
J. Fernando Sánchez
2019-02-28 15:30:33 +01:00
parent a1be167cc0
commit c1d3ca38ea
25 changed files with 989 additions and 14268 deletions

View File

@@ -77,9 +77,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_20newsgroups\n",
@@ -123,9 +121,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"from gensim import matutils\n",
@@ -152,10 +148,8 @@
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from gensim.models.ldamodel import LdaModel\n",
@@ -169,9 +163,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# check the topics\n",
@@ -188,9 +180,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# import the gensim.corpora module to generate dictionary\n",
@@ -222,9 +212,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# You can save the dictionary\n",
@@ -236,9 +224,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# Generate a list of docs, where each doc is a list of words\n",
@@ -249,9 +235,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# import the gensim.corpora module to generate dictionary\n",
@@ -263,9 +247,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# You can optionally save the dictionary \n",
@@ -277,9 +259,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# We can print the dictionary, it is a mappying of id and tokens\n",
@@ -290,9 +270,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"# construct the corpus representing each document as a bag-of-words (bow) vector\n",
@@ -302,9 +280,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"from gensim.models import TfidfModel\n",
@@ -317,9 +293,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"#print tf-idf of first document\n",
@@ -329,9 +303,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"from gensim.models.ldamodel import LdaModel\n",
@@ -344,9 +316,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# check the topics\n",
@@ -356,9 +326,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# check the lsa vector for the first document\n",
@@ -369,9 +337,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"#predict topics of a new doc\n",
@@ -384,9 +350,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"#transform into LDA space\n",
@@ -397,9 +361,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# print the document's single most prominent LDA topic\n",
@@ -409,9 +371,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"lda_vector_tfidf = lda_model[tfidf_model[bow_vector]]\n",
@@ -430,9 +390,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"from gensim.models.lsimodel import LsiModel\n",
@@ -448,9 +406,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# check the topics\n",
@@ -460,9 +416,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"# check the lsi vector for the first document\n",