mirror of
				https://github.com/gsi-upm/sitc
				synced 2025-10-31 15:38:18 +00:00 
			
		
		
		
	Corrected typo pane
This commit is contained in:
		| @@ -61,7 +61,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
| @@ -109,19 +109,11 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 30, | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[('I', 'PRON'), ('purchased', 'VERB'), ('this', 'DET'), ('Dell', 'NOUN'), ('monitor', 'NOUN'), ('because', 'ADP'), ('of', 'ADP'), ('budgetary', 'ADJ'), ('concerns', 'NOUN'), ('.', '.'), ('This', 'DET'), ('item', 'NOUN'), ('was', 'VERB'), ('the', 'DET'), ('most', 'ADV'), ('inexpensive', 'ADJ'), ('17', 'NUM'), ('inch', 'NOUN'), ('Apple', 'NOUN'), ('monitor', 'NOUN'), ('available', 'ADJ'), ('to', 'PRT'), ('me', 'PRON'), ('at', 'ADP'), ('the', 'DET'), ('time', 'NOUN'), ('I', 'PRON'), ('made', 'VERB'), ('the', 'DET'), ('purchase', 'NOUN'), ('.', '.'), ('My', 'PRON'), ('overall', 'ADJ'), ('experience', 'NOUN'), ('with', 'ADP'), ('this', 'DET'), ('monitor', 'NOUN'), ('was', 'VERB'), ('very', 'ADV'), ('poor', 'ADJ'), ('.', '.'), ('When', 'ADV'), ('the', 'DET'), ('screen', 'NOUN'), ('was', 'VERB'), (\"n't\", 'ADV'), ('contracting', 'VERB'), ('or', 'CONJ'), ('glitching', 'VERB'), ('the', 'DET'), ('overall', 'ADJ'), ('picture', 'NOUN'), ('quality', 'NOUN'), ('was', 'VERB'), ('poor', 'ADJ'), ('to', 'PRT'), ('fair', 'VERB'), ('.', '.'), ('I', 'PRON'), (\"'ve\", 'VERB'), ('viewed', 'VERB'), ('numerous', 'ADJ'), ('different', 'ADJ'), ('monitor', 'NOUN'), ('models', 'NOUN'), ('since', 'ADP'), ('I', 'PRON'), (\"'m\", 'VERB'), ('a', 'DET'), ('college', 'NOUN'), ('student', 'NOUN'), ('at', 'ADP'), ('UPM', 'NOUN'), ('in', 'ADP'), ('Madrid', 'NOUN'), ('and', 'CONJ'), ('this', 'DET'), ('particular', 'ADJ'), ('monitor', 'NOUN'), ('had', 'VERB'), ('as', 'ADP'), ('poor', 'ADJ'), ('of', 'ADP'), ('picture', 'NOUN'), ('quality', 'NOUN'), ('as', 'ADP'), ('any', 'DET'), ('I', 'PRON'), (\"'ve\", 'VERB'), ('seen', 'VERB'), ('.', '.')]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk import pos_tag, word_tokenize\n", | ||||
|     "print (pos_tag(word_tokenize(review), tagset='universal'))" | ||||
| @@ -136,19 +128,11 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 28, | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[('I', 'PRP'), ('purchased', 'VBD'), ('this', 'DT'), ('Dell', 'NNP'), ('monitor', 'NN'), ('because', 'IN'), ('of', 'IN'), ('budgetary', 'JJ'), ('concerns', 'NNS'), ('.', '.'), ('This', 'DT'), ('item', 'NN'), ('was', 'VBD'), ('the', 'DT'), ('most', 'RBS'), ('inexpensive', 'JJ'), ('17', 'CD'), ('inch', 'NN'), ('Apple', 'NNP'), ('monitor', 'NN'), ('available', 'JJ'), ('to', 'TO'), ('me', 'PRP'), ('at', 'IN'), ('the', 'DT'), ('time', 'NN'), ('I', 'PRP'), ('made', 'VBD'), ('the', 'DT'), ('purchase', 'NN'), ('.', '.'), ('My', 'PRP$'), ('overall', 'JJ'), ('experience', 'NN'), ('with', 'IN'), ('this', 'DT'), ('monitor', 'NN'), ('was', 'VBD'), ('very', 'RB'), ('poor', 'JJ'), ('.', '.'), ('When', 'WRB'), ('the', 'DT'), ('screen', 'NN'), ('was', 'VBD'), (\"n't\", 'RB'), ('contracting', 'VBG'), ('or', 'CC'), ('glitching', 'VBG'), ('the', 'DT'), ('overall', 'JJ'), ('picture', 'NN'), ('quality', 'NN'), ('was', 'VBD'), ('poor', 'JJ'), ('to', 'TO'), ('fair', 'VB'), ('.', '.'), ('I', 'PRP'), (\"'ve\", 'VBP'), ('viewed', 'VBN'), ('numerous', 'JJ'), ('different', 'JJ'), ('monitor', 'NN'), ('models', 'NNS'), ('since', 'IN'), ('I', 'PRP'), (\"'m\", 'VBP'), ('a', 'DT'), ('college', 'NN'), ('student', 'NN'), ('at', 'IN'), ('UPM', 'NNP'), ('in', 'IN'), ('Madrid', 'NNP'), ('and', 'CC'), ('this', 'DT'), ('particular', 'JJ'), ('monitor', 'NN'), ('had', 'VBD'), ('as', 'IN'), ('poor', 'JJ'), ('of', 'IN'), ('picture', 'NN'), ('quality', 'NN'), ('as', 'IN'), ('any', 'DT'), ('I', 'PRP'), (\"'ve\", 'VBP'), ('seen', 'VBN'), ('.', '.')]\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "print (pos_tag(word_tokenize(review)))" | ||||
|    ] | ||||
| @@ -181,19 +165,11 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "['I', 'purchase', 'Dell', 'monitor', 'because', 'of', 'budgetary', 'concern', 'item', 'be', 'most', 'inexpensive', '17', 'inch', 'Apple', 'monitor', 'available', 'me', 'at', 'time', 'I', 'make', 'purchase', 'My', 'overall', 'experience', 'with', 'monitor', 'be', 'very', 'poor', 'When', 'screen', 'be', \"n't\", 'contract', 'or', 'glitching', 'overall', 'picture', 'quality', 'be', 'poor', 'fair', 'I', \"'ve\", 'view', 'numerous', 'different', 'monitor', 'model', 'since', 'I', \"'m\", 'college', 'student', 'at', 'UPM', 'in', 'Madrid', 'and', 'particular', 'monitor', 'have', 'a', 'poor', 'of', 'picture', 'quality', 'a', 'I', \"'ve\", 'see']\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.stem import WordNetLemmatizer\n", | ||||
|     "\n", | ||||
| @@ -222,110 +198,11 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "(S\n", | ||||
|       "  I/PRP\n", | ||||
|       "  purchased/VBD\n", | ||||
|       "  this/DT\n", | ||||
|       "  (ORGANIZATION Dell/NNP)\n", | ||||
|       "  monitor/NN\n", | ||||
|       "  because/IN\n", | ||||
|       "  of/IN\n", | ||||
|       "  budgetary/JJ\n", | ||||
|       "  concerns/NNS\n", | ||||
|       "  ./.\n", | ||||
|       "  This/DT\n", | ||||
|       "  item/NN\n", | ||||
|       "  was/VBD\n", | ||||
|       "  the/DT\n", | ||||
|       "  most/RBS\n", | ||||
|       "  inexpensive/JJ\n", | ||||
|       "  17/CD\n", | ||||
|       "  inch/NN\n", | ||||
|       "  Apple/NNP\n", | ||||
|       "  monitor/NN\n", | ||||
|       "  available/JJ\n", | ||||
|       "  to/TO\n", | ||||
|       "  me/PRP\n", | ||||
|       "  at/IN\n", | ||||
|       "  the/DT\n", | ||||
|       "  time/NN\n", | ||||
|       "  I/PRP\n", | ||||
|       "  made/VBD\n", | ||||
|       "  the/DT\n", | ||||
|       "  purchase/NN\n", | ||||
|       "  ./.\n", | ||||
|       "  My/PRP$\n", | ||||
|       "  overall/JJ\n", | ||||
|       "  experience/NN\n", | ||||
|       "  with/IN\n", | ||||
|       "  this/DT\n", | ||||
|       "  monitor/NN\n", | ||||
|       "  was/VBD\n", | ||||
|       "  very/RB\n", | ||||
|       "  poor/JJ\n", | ||||
|       "  ./.\n", | ||||
|       "  When/WRB\n", | ||||
|       "  the/DT\n", | ||||
|       "  screen/NN\n", | ||||
|       "  was/VBD\n", | ||||
|       "  n't/RB\n", | ||||
|       "  contracting/VBG\n", | ||||
|       "  or/CC\n", | ||||
|       "  glitching/VBG\n", | ||||
|       "  the/DT\n", | ||||
|       "  overall/JJ\n", | ||||
|       "  picture/NN\n", | ||||
|       "  quality/NN\n", | ||||
|       "  was/VBD\n", | ||||
|       "  poor/JJ\n", | ||||
|       "  to/TO\n", | ||||
|       "  fair/VB\n", | ||||
|       "  ./.\n", | ||||
|       "  I/PRP\n", | ||||
|       "  've/VBP\n", | ||||
|       "  viewed/VBN\n", | ||||
|       "  numerous/JJ\n", | ||||
|       "  different/JJ\n", | ||||
|       "  monitor/NN\n", | ||||
|       "  models/NNS\n", | ||||
|       "  since/IN\n", | ||||
|       "  I/PRP\n", | ||||
|       "  'm/VBP\n", | ||||
|       "  a/DT\n", | ||||
|       "  college/NN\n", | ||||
|       "  student/NN\n", | ||||
|       "  at/IN\n", | ||||
|       "  (ORGANIZATION UPM/NNP)\n", | ||||
|       "  in/IN\n", | ||||
|       "  (GPE Madrid/NNP)\n", | ||||
|       "  and/CC\n", | ||||
|       "  this/DT\n", | ||||
|       "  particular/JJ\n", | ||||
|       "  monitor/NN\n", | ||||
|       "  had/VBD\n", | ||||
|       "  as/IN\n", | ||||
|       "  poor/JJ\n", | ||||
|       "  of/IN\n", | ||||
|       "  picture/NN\n", | ||||
|       "  quality/NN\n", | ||||
|       "  as/IN\n", | ||||
|       "  any/DT\n", | ||||
|       "  I/PRP\n", | ||||
|       "  've/VBP\n", | ||||
|       "  seen/VBN\n", | ||||
|       "  ./.)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk import ne_chunk, pos_tag, word_tokenize\n", | ||||
|     "ne_tagged = ne_chunk(pos_tag(word_tokenize(review)), binary=False)\n", | ||||
| @@ -357,7 +234,7 @@ | ||||
|     "We can use the StandfordParser that is integrated in NLTK, but it requires to configure the CLASSPATH, which can be a bit annoying. Instead, we are going to see some demos to understand how grammars work. In case you are interested, you can consult the [manual](http://www.nltk.org/api/nltk.parse.html) to run it.\n", | ||||
|     "\n", | ||||
|     "In the following example, you will run an interactive context-free parser, called [shift-reduce parser](http://www.nltk.org/book/ch08.html).\n", | ||||
|     "The pane on the left shows the grammar as a list of production rules. The pane on the right contains the stack  and the remaining input.\n", | ||||
|     "The panel on the left shows the grammar as a list of production rules. The panel on the right contains the stack  and the remaining input.\n", | ||||
|     "\n", | ||||
|     "You should:\n", | ||||
|     "* Run pressing 'step' until the sentence is fully analyzed. With each step, the parser either shifts one word onto the stack or reduces two subtrees of the stack into a new subtree.\n", | ||||
| @@ -366,7 +243,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
| @@ -389,90 +266,11 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "(S\n", | ||||
|       "  I/PRON\n", | ||||
|       "  purchased/VERB\n", | ||||
|       "  (NP this/DET Dell/NOUN monitor/NOUN)\n", | ||||
|       "  because/ADP\n", | ||||
|       "  of/ADP\n", | ||||
|       "  (NP budgetary/ADJ concerns/NOUN)\n", | ||||
|       "  ./.\n", | ||||
|       "  (NP This/DET item/NOUN)\n", | ||||
|       "  was/VERB\n", | ||||
|       "  (NP\n", | ||||
|       "    the/DET\n", | ||||
|       "    most/ADV\n", | ||||
|       "    inexpensive/ADJ\n", | ||||
|       "    17/NUM\n", | ||||
|       "    inch/NOUN\n", | ||||
|       "    Apple/NOUN\n", | ||||
|       "    monitor/NOUN)\n", | ||||
|       "  available/ADJ\n", | ||||
|       "  to/PRT\n", | ||||
|       "  me/PRON\n", | ||||
|       "  at/ADP\n", | ||||
|       "  (NP the/DET time/NOUN)\n", | ||||
|       "  I/PRON\n", | ||||
|       "  made/VERB\n", | ||||
|       "  (NP the/DET purchase/NOUN)\n", | ||||
|       "  ./.\n", | ||||
|       "  (NP My/PRON overall/ADJ experience/NOUN)\n", | ||||
|       "  with/ADP\n", | ||||
|       "  (NP this/DET monitor/NOUN)\n", | ||||
|       "  was/VERB\n", | ||||
|       "  very/ADV\n", | ||||
|       "  poor/ADJ\n", | ||||
|       "  ./.\n", | ||||
|       "  When/ADV\n", | ||||
|       "  (NP the/DET screen/NOUN)\n", | ||||
|       "  was/VERB\n", | ||||
|       "  n't/ADV\n", | ||||
|       "  contracting/VERB\n", | ||||
|       "  or/CONJ\n", | ||||
|       "  glitching/VERB\n", | ||||
|       "  (NP the/DET overall/ADJ picture/NOUN quality/NOUN)\n", | ||||
|       "  was/VERB\n", | ||||
|       "  poor/ADJ\n", | ||||
|       "  to/PRT\n", | ||||
|       "  fair/VERB\n", | ||||
|       "  ./.\n", | ||||
|       "  I/PRON\n", | ||||
|       "  've/VERB\n", | ||||
|       "  viewed/VERB\n", | ||||
|       "  (NP numerous/ADJ different/ADJ monitor/NOUN models/NOUN)\n", | ||||
|       "  since/ADP\n", | ||||
|       "  I/PRON\n", | ||||
|       "  'm/VERB\n", | ||||
|       "  (NP a/DET college/NOUN student/NOUN)\n", | ||||
|       "  at/ADP\n", | ||||
|       "  (NP UPM/NOUN)\n", | ||||
|       "  in/ADP\n", | ||||
|       "  (NP Madrid/NOUN)\n", | ||||
|       "  and/CONJ\n", | ||||
|       "  (NP this/DET particular/ADJ monitor/NOUN)\n", | ||||
|       "  had/VERB\n", | ||||
|       "  as/ADP\n", | ||||
|       "  poor/ADJ\n", | ||||
|       "  of/ADP\n", | ||||
|       "  (NP picture/NOUN quality/NOUN)\n", | ||||
|       "  as/ADP\n", | ||||
|       "  any/DET\n", | ||||
|       "  I/PRON\n", | ||||
|       "  've/VERB\n", | ||||
|       "  seen/VERB\n", | ||||
|       "  ./.)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from nltk.chunk.regexp import *\n", | ||||
|     "pattern = \"\"\"NP: {<PRON><ADJ><NOUN>+} \n", | ||||
| @@ -496,37 +294,11 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "[Tree('NP', [('this', 'DET'), ('Dell', 'NOUN'), ('monitor', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('budgetary', 'ADJ'), ('concerns', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('This', 'DET'), ('item', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('the', 'DET'), ('most', 'ADV'), ('inexpensive', 'ADJ'), ('17', 'NUM'), ('inch', 'NOUN'), ('Apple', 'NOUN'), ('monitor', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('the', 'DET'), ('time', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('the', 'DET'), ('purchase', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('My', 'PRON'), ('overall', 'ADJ'), ('experience', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('this', 'DET'), ('monitor', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('the', 'DET'), ('screen', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('the', 'DET'), ('overall', 'ADJ'), ('picture', 'NOUN'), ('quality', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('numerous', 'ADJ'), ('different', 'ADJ'), ('monitor', 'NOUN'), ('models', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('a', 'DET'), ('college', 'NOUN'), ('student', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('UPM', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('Madrid', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('this', 'DET'), ('particular', 'ADJ'), ('monitor', 'NOUN')]),\n", | ||||
|        " Tree('NP', [('picture', 'NOUN'), ('quality', 'NOUN')])]" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 7, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def extractTrees(parsed_tree, category='NP'):\n", | ||||
|     "    return list(parsed_tree.subtrees(filter=lambda x: x.label()==category))\n", | ||||
| @@ -536,37 +308,11 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "collapsed": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "['this Dell monitor',\n", | ||||
|        " 'budgetary concerns',\n", | ||||
|        " 'This item',\n", | ||||
|        " 'the most inexpensive 17 inch Apple monitor',\n", | ||||
|        " 'the time',\n", | ||||
|        " 'the purchase',\n", | ||||
|        " 'My overall experience',\n", | ||||
|        " 'this monitor',\n", | ||||
|        " 'the screen',\n", | ||||
|        " 'the overall picture quality',\n", | ||||
|        " 'numerous different monitor models',\n", | ||||
|        " 'a college student',\n", | ||||
|        " 'UPM',\n", | ||||
|        " 'Madrid',\n", | ||||
|        " 'this particular monitor',\n", | ||||
|        " 'picture quality']" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 8, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "def extractStrings(parsed_tree, category='NP'):\n", | ||||
|     "    return [\" \".join(word for word, pos in vp.leaves()) for vp in extractTrees(parsed_tree, category)]\n", | ||||
|   | ||||
		Reference in New Issue
	
	Block a user