diff --git a/l5/TM-Lab5.ipynb b/l5/TM-Lab5.ipynb index 04bf3af94fa21022bb29aa54484ece376059110d..c2faaa6690e24ffa5bc24682d5719bf476d2f3a0 100644 --- a/l5/TM-Lab5.ipynb +++ b/l5/TM-Lab5.ipynb @@ -984,7 +984,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 128, "metadata": { "deletable": false, "nbgrader": { @@ -1000,6 +1000,17 @@ }, "outputs": [], "source": [ + "def get_bigrams(text):\n", + " doc = nlp(text)\n", + " bigram = []\n", + " bigrams = []\n", + " for i in range(len(doc)-1):\n", + " token = doc[i]\n", + " \n", + " bigram = f\"{doc[i]}, {doc[i+1]}\"\n", + " bigrams.append(bigram)\n", + " return np.unique(bigrams)\n", + " \n", "def rouge_2(system, reference):\n", " \"\"\"Compute the ROUGE-2 score between a system output and a reference.\n", " \n", @@ -1011,7 +1022,20 @@ " The F1-score of the ROUGE-2 metric between system output and reference.\n", " \"\"\"\n", " # YOUR CODE HERE\n", - " raise NotImplementedError()" + " \n", + " system_bigrams = get_bigrams(system)\n", + " reference_bigrams = get_bigrams(reference)\n", + " matching_bigrams = 0\n", + " for elem in system_bigrams:\n", + " if elem in reference_bigrams:\n", + " matching_bigrams += 1\n", + "\n", + " if matching_bigrams == 0:\n", + " return 0\n", + " precision = matching_bigrams / len(reference_bigrams)\n", + " recall = matching_bigrams / len(system_bigrams)\n", + " f1_score = 2 * precision * recall / (precision + recall)\n", + " return f1_score" ] }, { @@ -1025,7 +1049,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 129, "metadata": { "deletable": false, "editable": false, @@ -1041,7 +1065,20 @@ "task": false } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div class=\"alert alert-success\"><strong>Checks have passed!</strong></div>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "assert rouge_2(\"System output.\", \"Reference summary.\") == 0.0, \"Two strings without any bigram overlap should return a score of zero\"\n", "assert rouge_2(\"Two identical strings.\", \"Two identical strings.\") == 1.0, \"Two identical strings should return a score of one\"\n",