From 4bd81f0f18723d309bcb8dbbaf48c2ba73b3a461 Mon Sep 17 00:00:00 2001 From: Filip Johnsson <filjo653@student.liu.se> Date: Mon, 25 Nov 2024 14:52:59 +0100 Subject: [PATCH] part 5 --- l3/TM-Lab3.ipynb | 111 ++++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 50 deletions(-) diff --git a/l3/TM-Lab3.ipynb b/l3/TM-Lab3.ipynb index fc9c593..b14e058 100644 --- a/l3/TM-Lab3.ipynb +++ b/l3/TM-Lab3.ipynb @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 332, + "execution_count": 2, "metadata": { "deletable": false, "editable": false, @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 333, + "execution_count": 3, "metadata": { "deletable": false, "editable": false, @@ -128,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 334, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -220,7 +220,7 @@ "4 Brussels " ] }, - "execution_count": 334, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -262,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 335, + "execution_count": 5, "metadata": { "deletable": false, "nbgrader": { @@ -312,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": 336, + "execution_count": 6, "metadata": { "deletable": false, "editable": false, @@ -350,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": 337, + "execution_count": 7, "metadata": { "deletable": false, "editable": false, @@ -419,7 +419,7 @@ }, { "cell_type": "code", - "execution_count": 338, + "execution_count": 8, "metadata": { "deletable": false, "editable": false, @@ -464,7 +464,7 @@ }, { "cell_type": "code", - "execution_count": 339, + "execution_count": 9, "metadata": { "deletable": false, "nbgrader": { @@ -498,7 +498,7 @@ }, { "cell_type": "code", - "execution_count": 340, + "execution_count": 10, "metadata": { "deletable": false, "nbgrader": { @@ -552,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 341, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -575,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 342, + "execution_count": 12, "metadata": { "deletable": false, "editable": false, @@ -630,7 +630,7 @@ }, { "cell_type": "code", - "execution_count": 343, + "execution_count": 13, "metadata": { "deletable": false, "editable": false, @@ -692,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 344, + "execution_count": 14, "metadata": { "tags": [ "solution" @@ -16062,7 +16062,7 @@ }, { "cell_type": "code", - "execution_count": 345, + "execution_count": 15, "metadata": { "deletable": false, "nbgrader": { @@ -16123,7 +16123,7 @@ }, { "cell_type": "code", - "execution_count": 346, + "execution_count": 16, "metadata": { "tags": [ "solution" @@ -16147,7 +16147,7 @@ }, { "cell_type": "code", - "execution_count": 347, + "execution_count": 17, "metadata": { "deletable": false, "editable": false, @@ -16193,7 +16193,7 @@ }, { "cell_type": "code", - "execution_count": 348, + "execution_count": 18, "metadata": { "deletable": false, "nbgrader": { @@ -16245,7 +16245,7 @@ }, { "cell_type": "code", - "execution_count": 349, + "execution_count": 19, "metadata": { "deletable": false, "editable": false, @@ -16290,6 +16290,7 @@ " <th>sentence</th>\n", " <th>beg</th>\n", " <th>end</th>\n", + " <th>label</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", @@ -16299,6 +16300,7 @@ " <td>LONDON 1996-08-30</td>\n", " <td>0</td>\n", " <td>1</td>\n", + " <td>--NME--</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", @@ -16306,6 +16308,7 @@ " <td>West Indian all-rounder Phil Simmons took four...</td>\n", " <td>0</td>\n", " <td>2</td>\n", + " <td>--NME--</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", @@ -16313,6 +16316,7 @@ " <td>West Indian all-rounder Phil Simmons took four...</td>\n", " <td>3</td>\n", " <td>5</td>\n", + " <td>--NME--</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", @@ -16320,6 +16324,7 @@ " <td>West Indian all-rounder Phil Simmons took four...</td>\n", " <td>12</td>\n", " <td>13</td>\n", + " <td>--NME--</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", @@ -16327,18 +16332,26 @@ " <td>West Indian all-rounder Phil Simmons took four...</td>\n", " <td>14</td>\n", " <td>15</td>\n", + " <td>--NME--</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " sentence_id sentence beg end\n", - "0 0946-001 LONDON 1996-08-30 0 1\n", - "1 0946-002 West Indian all-rounder Phil Simmons took four... 0 2\n", - "2 0946-002 West Indian all-rounder Phil Simmons took four... 3 5\n", - "3 0946-002 West Indian all-rounder Phil Simmons took four... 12 13\n", - "4 0946-002 West Indian all-rounder Phil Simmons took four... 14 15" + " sentence_id sentence beg end \\\n", + "0 0946-001 LONDON 1996-08-30 0 1 \n", + "1 0946-002 West Indian all-rounder Phil Simmons took four... 0 2 \n", + "2 0946-002 West Indian all-rounder Phil Simmons took four... 3 5 \n", + "3 0946-002 West Indian all-rounder Phil Simmons took four... 12 13 \n", + "4 0946-002 West Indian all-rounder Phil Simmons took four... 14 15 \n", + "\n", + " label \n", + "0 --NME-- \n", + "1 --NME-- \n", + "2 --NME-- \n", + "3 --NME-- \n", + "4 --NME-- " ] }, "metadata": {}, @@ -16375,7 +16388,7 @@ }, { "cell_type": "code", - "execution_count": 350, + "execution_count": 20, "metadata": { "deletable": false, "nbgrader": { @@ -16417,7 +16430,7 @@ }, { "cell_type": "code", - "execution_count": 351, + "execution_count": 21, "metadata": { "deletable": false, "editable": false, @@ -16482,7 +16495,7 @@ }, { "cell_type": "code", - "execution_count": 352, + "execution_count": 22, "metadata": { "deletable": false, "nbgrader": { @@ -16531,7 +16544,7 @@ }, { "cell_type": "code", - "execution_count": 353, + "execution_count": 23, "metadata": { "deletable": false, "editable": false, @@ -16600,7 +16613,7 @@ }, { "cell_type": "code", - "execution_count": 354, + "execution_count": 24, "metadata": { "deletable": false, "editable": false, @@ -16630,7 +16643,7 @@ }, { "cell_type": "code", - "execution_count": 355, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -16689,7 +16702,7 @@ "17438 Sweden Sweden_men's_national_ice_hockey_team 0.000059" ] }, - "execution_count": 355, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -16714,7 +16727,7 @@ }, { "cell_type": "code", - "execution_count": 356, + "execution_count": 58, "metadata": { "deletable": false, "nbgrader": { @@ -16745,8 +16758,19 @@ " quadruples consisting of the sentence id, start position, end\n", " position and the predicted entity label of each span.\n", " \"\"\"\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()" + " for row in df.itertuples():\n", + " sentence_id = row[1]\n", + " sentence = row[2]\n", + " start = row[3]\n", + " end = row[4]\n", + "\n", + " words = sentence.split()\n", + " spliced_sentence = ' '.join(words[int(start):int(end)])\n", + " entity = df_kb.loc[df_kb.mention == spliced_sentence].entity\n", + "\n", + " entity_val = entity.values[0] if len(entity.values) > 0 else \"--NME--\"\n", + "\n", + " yield sentence_id, start, end, entity_val\n" ] }, { @@ -16760,7 +16784,7 @@ }, { "cell_type": "code", - "execution_count": 357, + "execution_count": 61, "metadata": { "deletable": false, "editable": false, @@ -16779,20 +16803,7 @@ "solution" ] }, - "outputs": [ - { - "ename": "NotImplementedError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[357], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m scores \u001b[38;5;241m=\u001b[39m evaluation_scores(dev_gold_mentions, \u001b[38;5;28mset\u001b[39m(\u001b[43mmost_probable_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_dev_pred\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdf_kb\u001b[49m\u001b[43m)\u001b[49m))\n\u001b[1;32m 2\u001b[0m print_evaluation_scores(scores)\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m scores[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m.64\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision should be above 64\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", - "Cell \u001b[0;32mIn[356], line 14\u001b[0m, in \u001b[0;36mmost_probable_method\u001b[0;34m(df, df_kb)\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"An entity linker that resolves each mention to the most probably entity in a knowledge base.\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;03mArguments:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;124;03m position and the predicted entity label of each span.\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# YOUR CODE HERE\u001b[39;00m\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m()\n", - "\u001b[0;31mNotImplementedError\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "scores = evaluation_scores(dev_gold_mentions, set(most_probable_method(df_dev_pred, df_kb)))\n", "print_evaluation_scores(scores)\n", -- GitLab