From 1af473d8bf71de3accd536f582d7401473cf992c Mon Sep 17 00:00:00 2001 From: jackkolm <jack.kolm@outlook.com> Date: Fri, 21 Mar 2025 01:53:02 +0100 Subject: [PATCH] remove old files and add gitignore --- project/.gitignore | 5 + project/games.py | 12 - project/project.ipynb | 745 ------------------------------------------ project/scraping.py | 17 - project/test.py | 8 - 5 files changed, 5 insertions(+), 782 deletions(-) create mode 100644 project/.gitignore delete mode 100644 project/games.py delete mode 100644 project/project.ipynb delete mode 100644 project/scraping.py delete mode 100644 project/test.py diff --git a/project/.gitignore b/project/.gitignore new file mode 100644 index 0000000..0c57c3f --- /dev/null +++ b/project/.gitignore @@ -0,0 +1,5 @@ +Defuncts/ +.venv +text.txt +__pycache__/ +../.venv \ No newline at end of file diff --git a/project/games.py b/project/games.py deleted file mode 100644 index 84dcbb1..0000000 --- a/project/games.py +++ /dev/null @@ -1,12 +0,0 @@ -import pandas as pd - -# Path to your CSV file -file_path = 'all_games.csv' - -# Load the CSV file into a DataFrame -df = pd.read_csv(file_path) - -# Display the first few rows of the DataFrame -#print(df.head()) -print(df["summary"][0]) - diff --git a/project/project.ipynb b/project/project.ipynb deleted file mode 100644 index 7e69dd3..0000000 --- a/project/project.ipynb +++ /dev/null @@ -1,745 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Importing the data\n", - "\n", - "I got the data from: https://www.kaggle.com/datasets/deepcontractor/top-video-games-19952021-metacritic\n", - "\n", - "I simply downloaded it locally as a csv and load it from the directory. " - ] - }, - { - "cell_type": "code", - "execution_count": 224, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " name platform release_date \\\n", - "0 The Legend of Zelda: Ocarina of Time Nintendo 64 November 23, 1998 \n", - "1 Tony Hawk's Pro Skater 2 PlayStation September 20, 2000 \n", - "2 Grand Theft Auto IV PlayStation 3 April 29, 2008 \n", - "3 SoulCalibur Dreamcast September 8, 1999 \n", - "4 Grand Theft Auto IV Xbox 360 April 29, 2008 \n", - "\n", - " summary meta_score user_review \n", - "0 As a young boy, Link is tricked by Ganondorf, ... 99 9.1 \n", - "1 As most major publishers' development efforts ... 98 7.4 \n", - "2 [Metacritic's 2008 PS3 Game of the Year; Also ... 98 7.7 \n", - "3 This is a tale of souls and swords, transcendi... 98 8.4 \n", - "4 [Metacritic's 2008 Xbox 360 Game of the Year; ... 98 7.9 \n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "# Path to your CSV file\n", - "file_path = 'all_games.csv'\n", - "\n", - "# Load the CSV file into a DataFrame\n", - "df = pd.read_csv(file_path)\n", - "\n", - "# Display the first few rows of the DataFrame\n", - "print(df.head())\n", - "#print(df[\"summary\"][0])\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next I plot the data for me to visualize the spread of the dataset, so I can get a better idea of what preprocessing might be appropriate before training." - ] - }, - { - "cell_type": "code", - "execution_count": 225, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[99 98 98 ... 76 76 76]\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAMmdJREFUeJzt3Qd0lFX+//FvQgk9FKnSQUrowQIIrgiCgC4KroA0KbKwICVKc+msgiBVKas0XUCa4l9BSujSe+9NcClh6T1Anv/53nNmfjMkYoCEmcx9v84ZZ+Z5bmbuZWLyyW1PkOM4jgAAAFgs2NcVAAAA8DUCEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9ZL7ugJJQUxMjJw6dUrSp08vQUFBvq4OAACIB91q8erVq5IrVy4JDn5wHxCBKB40DOXJk8fX1QAAAI/g5MmTkjt37geWIRDFg/YMuf5BM2TI4OvqAACAeLhy5Yrp0HD9Hn8QAlE8uIbJNAwRiAAASFriM92FSdUAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1vNpIOrXr58EBQV53YoVK+Y+f+vWLWnfvr1kyZJF0qVLJ/Xr15ezZ896vcaJEyekTp06kiZNGsmWLZt07dpV7t6961VmxYoVEh4eLiEhIVK4cGGZMmXKE2sjAADwf8l9XYESJUrIkiVL3M+TJ/+/KnXp0kXmz58vs2fPltDQUOnQoYPUq1dP1qxZY87fu3fPhKEcOXLI2rVr5fTp09KsWTNJkSKFfPrpp6bMsWPHTJm2bdvKtGnTZOnSpdK6dWvJmTOn1KxZ0wctBoAE0C9UkqR+l31dA8A/A5EGIA0097t8+bJMnDhRpk+fLq+88oo5NnnyZClevLisX79eKlSoIIsXL5a9e/eaQJU9e3YpW7asDBw4ULp37256n1KmTCnjx4+XAgUKyLBhw8xr6NevXr1aRowYQSACAAD+MYfo0KFDkitXLilYsKA0btzYDIGpLVu2yJ07d6R69erusjqcljdvXlm3bp15rvelSpUyYchFQ86VK1dkz5497jKer+Eq43qNuNy+fdu8hucNAAAELp8GohdeeMHM51m4cKGMGzfODG9VqVJFrl69KmfOnDE9PBkzZvT6Gg0/ek7pvWcYcp13nXtQGQ05N2/ejLNegwYNMkN0rluePHkStN0AAMC/+HTIrFatWu7HpUuXNgEpX758MmvWLEmdOrXP6tWzZ0+JiIhwP9fwRCgCACBw+XwOkSftDSpSpIgcPnxYXn31VYmOjpZLly559RLpKjPXnCO937hxo9druFaheZa5f2WaPs+QIcMfhi5djaY3AEDCyt9jviQ1xwfX8XUVYMMcIk/Xrl2TI0eOmBVg5cuXN6vFdFWYy4EDB8wco4oVK5rner9r1y6Jiopyl4mMjDRhJywszF3G8zVcZVyvAQAA4NNA9NFHH8nKlSvl+PHjZtn8W2+9JcmSJZNGjRqZuTutWrUyQ1fLly83k6xbtGhhgoyuMFM1atQwwadp06ayY8cOWbRokfTq1cvsXeTq4dHl9kePHpVu3brJ/v37ZezYsWZITpf0AwAA+HzI7Pfffzfh5/z585I1a1apXLmyWVKvj5UujQ8ODjYbMurKL10dpoHGRcPTvHnzpF27diYopU2bVpo3by4DBgxwl9El97qXkQagUaNGSe7cuWXChAksuQcAAG5BjuM4//cUcdFJ1dpjpXsj6XAcAPhcEt2YMf+t6ZLUMIfIjt/ffjWHCAAAwBcIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPb8JRIMHD5agoCDp3Lmz+9itW7ekffv2kiVLFkmXLp3Ur19fzp496/V1J06ckDp16kiaNGkkW7Zs0rVrV7l7965XmRUrVkh4eLiEhIRI4cKFZcqUKU+sXQAAwP/5RSDatGmT/Pvf/5bSpUt7He/SpYv8/PPPMnv2bFm5cqWcOnVK6tWr5z5/7949E4aio6Nl7dq18s0335iw06dPH3eZY8eOmTJVq1aV7du3m8DVunVrWbRo0RNtIwAA8F8+D0TXrl2Txo0by9dffy2ZMmVyH798+bJMnDhRhg8fLq+88oqUL19eJk+ebILP+vXrTZnFixfL3r17ZerUqVK2bFmpVauWDBw4UMaMGWNCkho/frwUKFBAhg0bJsWLF5cOHTrI22+/LSNGjPBZmwEAgH/xeSDSITHtwalevbrX8S1btsidO3e8jhcrVkzy5s0r69atM8/1vlSpUpI9e3Z3mZo1a8qVK1dkz5497jL3v7aWcb1GXG7fvm1ew/MGAAACV3JfvvmMGTNk69atZsjsfmfOnJGUKVNKxowZvY5r+NFzrjKeYch13nXuQWU05Ny8eVNSp04d670HDRok/fv3T4AWAgCApMBnPUQnT56UTp06ybRp0yRVqlTiT3r27GmG7Fw3rSsAAAhcPgtEOiQWFRVlVn8lT57c3HTi9OjRo81j7cXReUCXLl3y+jpdZZYjRw7zWO/vX3Xmev5nZTJkyBBn75DS1Wh63vMGAAACl88CUbVq1WTXrl1m5Zfr9uyzz5oJ1q7HKVKkkKVLl7q/5sCBA2aZfcWKFc1zvdfX0GDlEhkZaQJMWFiYu4zna7jKuF4DAADAZ3OI0qdPLyVLlvQ6ljZtWrPnkOt4q1atJCIiQjJnzmxCzgcffGCCTIUKFcz5GjVqmODTtGlTGTJkiJkv1KtXLzNRW3t5VNu2beXLL7+Ubt26ScuWLWXZsmUya9YsmT9/vg9aDQAA/JFPJ1X/GV0aHxwcbDZk1JVfujps7Nix7vPJkiWTefPmSbt27UxQ0kDVvHlzGTBggLuMLrnX8KN7Go0aNUpy584tEyZMMK8FAACgghzHcfineDBdkRYaGmomWDOfCIBf6BcqSVH+W9MlqTk+uI6vq4An8Pvb5/sQAQAA+BqBCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWO+hA9HChQtl9erV7udjxoyRsmXLyrvvvisXL15M6PoBAAD4XyDq2rWrXLlyxTzetWuXfPjhh1K7dm05duyYREREJEYdAQAAElXyh/0CDT5hYWHm8ffffy+vv/66fPrpp7J161YTjAAAAAK+hyhlypRy48YN83jJkiVSo0YN8zhz5szuniMAAICA7iGqXLmyGRp78cUXZePGjTJz5kxz/ODBg5I7d+7EqCMAAIB/9RB9+eWXkjx5cpkzZ46MGzdOnn76aXN8wYIF8tprryVGHQEAAPyrhyhv3rwyb968WMdHjBiRUHUCAADw/32Ijhw5Ir169ZJGjRpJVFSUu4doz549CV0/AAAA/wtEK1eulFKlSsmGDRvkhx9+kGvXrpnjO3bskL59+yZGHQEAAPwrEPXo0UP+9a9/SWRkpFlx5vLKK6/I+vXrE7p+AAAA/heIdDPGt956K9bxbNmyyf/+97+EqhcAAID/BqKMGTPK6dOnYx3ftm2be8UZAABAQAeihg0bSvfu3eXMmTMSFBQkMTExsmbNGvnoo4+kWbNmiVNLAAAAfwpEepmOYsWKSZ48ecyEar2Mx0svvSSVKlUyK88AAAACfh8inUj99ddfS+/evWX37t0mFJUrV06eeeaZxKkhAACAvwUizw0a9QYAAGBdIHIcx1y2Y/ny5WZTRp1D5En3JgIAAAjoQNS5c2f597//LVWrVpXs2bObidUAAABWBaL//Oc/pheodu3aiVMjAAAAf19lFhoaKgULFkyc2gAAACSFQNSvXz/p37+/3Lx5M3FqBAAA4O9DZu+8845899135lId+fPnlxQpUnid37p1a0LWDwASX79QX9cAQFILRM2bN5ctW7ZIkyZNmFQNAADsDETz58+XRYsWSeXKlROnRgAAAP4+h0gv2ZEhQ4bEqQ0AAEBSCETDhg2Tbt26yfHjxxOnRgAAAP4+ZKZzh27cuCGFChWSNGnSxJpUfeHChYSsHwAAgP8FopEjRyZOTQAAAJLSKjMAAIBA8shXu1e3bt2S6Ohor2NMuAYAAAE/qfr69evSoUMHszFj2rRpJVOmTF43AACAgA9EusJs2bJlMm7cOAkJCZEJEyaYS3nkypVLvv3228SpJQAAgD8Nmf38888m+Lz88svSokULqVKlihQuXFjy5csn06ZNk8aNGydOTQEAAPylh0iX1buudq/zhVzL7HXn6lWrVj3Ua2kvU+nSpc3r6K1ixYqyYMECrzlK7du3lyxZski6dOmkfv36cvbsWa/XOHHihNSpU8dsAaDDeF27dpW7d+96lVmxYoWEh4ebHi0Nb1OmTHnYZgMAgAD20IFIw9CxY8fM42LFismsWbPcPUcZM2Z8qNfKnTu3DB482FwbbfPmzfLKK69I3bp1Zc+ePeZ8ly5dzOvOnj1bVq5cKadOnZJ69eq5v/7evXsmDOnE7rVr18o333xjwk6fPn3cZbSuWqZq1aqyfft26dy5s7Ru3dpcfgQAAEAFOY7jPMw/xYgRIyRZsmTSsWNHWbJkibzxxhuiL3Hnzh0ZPny4dOrU6bH+ZTNnzixDhw6Vt99+W7JmzSrTp083j9X+/fulePHism7dOqlQoYLpTXr99ddNUNILzarx48dL9+7d5dy5c5IyZUrzWK+/tnv3bvd7NGzYUC5duiQLFy6Msw63b982N5crV66YS5ZcvnyZVXRAIOJq909M/lvTJak5PriOr6uAR6S/v0NDQ+P1+/uhe4i010bDkKpevboJKRpatm3b9lhhSHt7ZsyYYVax6dCZ9hppyNL3cNEeqbx585pApPS+VKlS7jCkatasaf4BXL1MWsbzNVxlXK8Rl0GDBpl/QNdNwxAAAAhcj7UPkdLJ1Hp7VLt27TIBSOcL6TyhuXPnSlhYmBne0h6e+4fhNPycOXPGPNZ7zzDkOu8696AyGppu3rwpqVOnjlWnnj17SkRERKweIgAAYHkg0vCwdOlSM0TlCg2ew0o6jDZw4EBJlSrVQ1WgaNGiJvxod9acOXPMTtg6X8iXdPK13gAAgB3iHYh0wrLOxXEFoi+//FJKlCjh7mHRoTPdi0iH1B6G9gLpyi9Vvnx52bRpk4waNUoaNGhgJkvrXB/PXiJdZZYjRw7zWO83btzo9XquVWieZe5fmabPdSwxrt4hAABgn3jPIdI9htq0aeN1TOcOLV++3Nx0IrRrxdnjiImJMT1PGo5SpEhheqVcDhw4YJbZ6xCb0nsdcouKinKXiYyMNGFHh91cZTxfw1XG9RoAAADx7iE6fPiwmcDsokNjwcH/l6eef/55s2fQw9Bht1q1apmJ0levXjUBS/cM0iXxOpm5VatWZi6PrjzTkPPBBx+YIKMrzFSNGjVM8GnatKkMGTLEzBfq1auXqYdryKtt27amN0t32G7ZsqXZZVuDm/Z2AQAAPFQg0qErzzlDuqw9rp6dh6E9O82aNZPTp0+bAKSbNGoYevXVV91L/DV06YaM+tq6Omzs2LFe85bmzZsn7dq1M0FJr62mc5AGDBjgLlOgQAETfnQoT4fidO8jvdyIvhYAAMBDBSINErqXj06CjsvOnTtNmYcxceLEB57XXqgxY8aY2x/RFW6//PLLA19HLzOi2wIAAAA81hyi2rVrmx2gdXl8XCvQ9AKvuiM0AABAwPYQffzxx2bujfYQdejQQYoUKeKe6KxzdPT6YVoGAAAgYAORbmao1wvT+To9evQwl+tQQUFBZs6Pzu25fwNEAACAgNupWico6/W/9Ar3uupM6R5CugoMAADAqkt3aADSZfYAAACB4KEv7goAABBoCEQAAMB6BCIAAGC9eAWi8PBwuXjxonmsu0DfuHEjsesFAADgX4Fo3759cv36dfNYN2C8du1aYtcLAADAv1aZlS1bVlq0aCGVK1c2+w99/vnnki5dujjL6m7WAAAAAReIpkyZIn379jUXUtWNGBcsWCDJk8f+Uj1HIAIAAAEZiPRyHTNmzDCP9erzS5culWzZsiV23QAAAPxzY8aYmJjEqQkAAEBS2qn6yJEjMnLkSDPZWoWFhUmnTp2kUKFCCV0/AAAA/9uHaNGiRSYAbdy4UUqXLm1uGzZskBIlSkhkZGTi1BIAAMCfeoj0SvddunSRwYMHxzrevXt3c+V7AACAgO4h0mGyVq1axTresmVL2bt3b0LVCwAAwH8DUdasWWX79u2xjusxVp4BAAArhszef/99adOmjRw9elQqVapkjq1Zs0Y+++wziYiISIw6AgAA+Fcg6t27t6RPn16GDRsmPXv2NMdy5col/fr1k44dOyZGHQEAAPwrEOlu1DqpWm9Xr141xzQgAQAAWLUPkQtBCAAAWDmpGgAAINAQiAAAgPUIRAAAwHoPFYju3Lkj1apVk0OHDiVejQAAAPw5EKVIkUJ27tyZeLUBAABICkNmTZo0kYkTJyZObQAAAJLCsvu7d+/KpEmTZMmSJVK+fHlJmzat1/nhw4cnZP0AAAD8LxDt3r1bwsPDzeODBw/G2rQRAAAg4APR8uXLE6cmAAAASW3Z/eHDh2XRokVy8+ZN89xxnISsFwAAgP8GovPnz5ul90WKFJHatWvL6dOnzfFWrVrJhx9+mBh1BAAA8K9ApBd11eX3J06ckDRp0riPN2jQQBYuXJjQ9QMAAPC/OUSLFy82Q2W5c+f2Ov7MM8/Ib7/9lpB1AwAA8M8eouvXr3v1DLlcuHBBQkJCEqpeAAAA/huIqlSpIt9++63XUvuYmBgZMmSIVK1aNaHrBwAA4H9DZhp8dFL15s2bJTo6Wrp16yZ79uwxPURr1qxJnFoCAAD4Uw9RyZIlzYaMlStXlrp165ohtHr16sm2bdukUKFCiVNLAAAAf+ohUqGhofLPf/4z4WsDAACQVALRxYsXzQVe9+3bZ56HhYVJixYtJHPmzAldPwAAAP8bMlu1apXkz59fRo8ebYKR3vRxgQIFzDkAAICA7yFq37692YRx3LhxkixZMnPs3r178o9//MOc27VrV2LUEwAAwH96iPQaZnqJDlcYUvo4IiLCnAMAAAj4QBQeHu6eO+RJj5UpUyah6gUAAOBfQ2Y7d+50P+7YsaN06tTJ9AZVqFDBHFu/fr2MGTNGBg8enHg1BQAASCRBjuM4f1YoODjY7Ej9Z0W1jM4nCjRXrlwxWw1cvnxZMmTI4OvqAEho/UJ9XQNr5L81XZKa44Pr+LoKeAK/v+PVQ3Ts2LFHrQsAAIDfi1cgypcvX+LXBAAAICltzHjq1ClZvXq1REVFmQu7etI5RgAAAAEdiKZMmSJ///vfJWXKlJIlSxYzb8hFHxOIAABAwAei3r17S58+faRnz55msjUAAEBS99CJ5saNG9KwYUPCEAAACBgPnWpatWols2fPTpzaAAAAJIUhs0GDBsnrr78uCxculFKlSkmKFCm8zg8fPjwh6wcAAOCfgWjRokVStGhR8/z+SdUAAASS/D3mS1LDZpJPIBANGzZMJk2aJO+9994jvB0AAEAAzCEKCQmRF198MXFqAwAAkBQCkV7Y9YsvvkiQN9fht+eee07Sp08v2bJlkzfffFMOHDjgVebWrVvSvn17s+dRunTppH79+nL27FmvMidOnJA6depImjRpzOt07dpV7t6961VmxYoVEh4ebgJd4cKFzX5KAAAAjzRktnHjRlm2bJnMmzdPSpQoEWtS9Q8//BDv11q5cqUJOxqKNMB8/PHHUqNGDdm7d6+kTZvWlOnSpYvMnz/frGzTC7R16NBB6tWrJ2vWrDHn9WKyGoZy5Mgha9euldOnT0uzZs1MvT799FP3tdi0TNu2bWXatGmydOlSad26teTMmVNq1qzJdwIAAJaL19XuPbVo0eKB5ydPnvzIlTl37pzp4dGg9NJLL5mr02bNmlWmT58ub7/9timzf/9+KV68uKxbt04qVKggCxYsMKve9HIi2bNnN2XGjx8v3bt3N6+nO2rrYw1Vu3fvdr+X7qV06dIls1rufrdv3zY3z6vl5smTh6vdA4GKq90/MUnxavdJEZOqE+lq9wkVeP6MVlhlzpzZ3G/ZskXu3Lkj1atXd5cpVqyY5M2b1x2I9F6X/7vCkNJen3bt2smePXukXLlypozna7jKdO7c+Q+H8vr3759IrQQAAP7Gb7ab1ovEakDRCdslS5Y0x86cOWN6eDJmzOhVVsOPnnOV8QxDrvOucw8qo8nx5s2bseqilyXRcOa6nTx5MoFbCwAA/MlD9xAVKFDggfsNHT169JEqonOJdEhr9erV4ms68VpvAADADg8diO4fZtIhrW3btpm5OLq661HoRGmdpL1q1SrJnTu3+7hOlI6OjjZzfTx7iXSVmZ5zldGJ3p5cq9A8y9y/Mk2f63hi6tSpH6nOAADA4kCky+7jMmbMGNm8efNDvZbO5/7ggw9k7ty5Zlm89j55Kl++vFktpqvCdLm90mX5usy+YsWK5rnef/LJJxIVFWUmZKvIyEgTdsLCwtxlfvnlF6/X1jKu1wAAAHZLsDlEtWrVku+///6hh8mmTp1qVpHpXkQ610dvrnk9OjNcLyYbEREhy5cvN5OsdZWbBhmdUK10mb4Gn6ZNm8qOHTvMZUV69eplXts17KXL7XUor1u3bmaV2tixY2XWrFlmST8AAECCBaI5c+a4V4fF17hx48yk5ZdfftnsCeS6zZw5011mxIgRZlm99hDpUnwd/vLc6yhZsmRmuE3vNSg1adLE7EM0YMAAdxntedJl99orVKZMGXP5kQkTJrAHEQAAeLR9iHQZu+ekav1y7dXRPX+056VNmzZi8z4GAJIg9iF6YtiH6MlgH6InsA+RXl7DU3BwsNk8UXt5dI8gAACApOahA1Hfvn0TpyYAAAC2b8wIAADg9z1EOjT2oA0ZlZ6//yrzAAAAAROIdK+gP6LXChs9erS5/AYAAEDABqK6devGOqabJPbo0UN+/vlnady4sddSdwAAgICeQ3Tq1Cl5//33zVXmdYhs+/bt8s0330i+fPkSvoYAAAD+FIh0HX/37t2lcOHCsmfPHnNJDe0dcl2dHgAAIKCHzIYMGSKfffaZ2Sn6u+++i3MIDQAAIKADkc4V0ivDa++QDo/pLS6el9UAAAAIqECk1wf7s2X3AAAAAR2IpkyZkrg1ARAYuC4YgCSInaoBAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACs59NAtGrVKnnjjTckV65cEhQUJD/++KPXecdxpE+fPpIzZ05JnTq1VK9eXQ4dOuRV5sKFC9K4cWPJkCGDZMyYUVq1aiXXrl3zKrNz506pUqWKpEqVSvLkySNDhgx5Iu0DAABJg08D0fXr16VMmTIyZsyYOM9rcBk9erSMHz9eNmzYIGnTppWaNWvKrVu33GU0DO3Zs0ciIyNl3rx5JmS1adPGff7KlStSo0YNyZcvn2zZskWGDh0q/fr1k6+++uqJtBEAAPi/5L5881q1aplbXLR3aOTIkdKrVy+pW7euOfbtt99K9uzZTU9Sw4YNZd++fbJw4ULZtGmTPPvss6bMF198IbVr15bPP//c9DxNmzZNoqOjZdKkSZIyZUopUaKEbN++XYYPH+4VnAAAgL38dg7RsWPH5MyZM2aYzCU0NFReeOEFWbdunXmu9zpM5gpDSssHBwebHiVXmZdeesmEIRftZTpw4IBcvHgxzve+ffu26VnyvAEAgMDlt4FIw5DSHiFP+tx1Tu+zZcvmdT558uSSOXNmrzJxvYbne9xv0KBBJny5bjrvCAAABC6/DUS+1LNnT7l8+bL7dvLkSV9XCQAA2BiIcuTIYe7Pnj3rdVyfu87pfVRUlNf5u3fvmpVnnmXieg3P97hfSEiIWbXmeQMAAIHLbwNRgQIFTGBZunSp+5jO5dG5QRUrVjTP9f7SpUtm9ZjLsmXLJCYmxsw1cpXRlWd37txxl9EVaUWLFpVMmTI90TYBAAD/5NNApPsF6YovvbkmUuvjEydOmH2JOnfuLP/617/kp59+kl27dkmzZs3MyrE333zTlC9evLi89tpr8v7778vGjRtlzZo10qFDB7MCTcupd99910yo1v2JdHn+zJkzZdSoURIREeHLpgMAAD/i02X3mzdvlqpVq7qfu0JK8+bNZcqUKdKtWzezV5Euj9eeoMqVK5tl9rrBoosuq9cQVK1aNbO6rH79+mbvIhedFL148WJp3769lC9fXp566imz2SNL7gEAgEuQoxv+4IF0qE6DlU6wZj4R8Cf6hfq6BvBj+W9N93UVrHB8cB1fVyHJ/f72aQ8RgD9BuAAAuydVAwAAPCkEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAesl9XQHgiekX6usaAAD8FD1EAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB67FQNAECAyd9jviQ1xwfX8en700MEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPjRnxaPqF+roGAAAkGHqIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYz6pVZmPGjJGhQ4fKmTNnpEyZMvLFF1/I888/7+tqsWILAAAfs6aHaObMmRIRESF9+/aVrVu3mkBUs2ZNiYqK8nXVAACAj1kTiIYPHy7vv/++tGjRQsLCwmT8+PGSJk0amTRpkq+rBgAAfMyKIbPo6GjZsmWL9OzZ030sODhYqlevLuvWrYtV/vbt2+bmcvnyZXN/5cqVxKngbSdxXhcA/EzM7Ru+rgL81JVE+B3rek3H+fPfs1YEov/9739y7949yZ49u9dxfb5///5Y5QcNGiT9+/ePdTxPnjyJWk8ACHzv+LoC8FOhIxPvta9evSqhoQ+er2tFIHpY2pOk841cYmJi5MKFC5IlSxYJCgpK8PSqQevkyZOSIUMGCUS0MTDQxsBAGwODDW1MiHZqz5CGoVy5cv1pWSsC0VNPPSXJkiWTs2fPeh3X5zly5IhVPiQkxNw8ZcyYMVHrqB90IH9TK9oYGGhjYKCNgcGGNj5uO/+sZ8iqSdUpU6aU8uXLy9KlS716ffR5xYoVfVo3AADge1b0ECkdAmvevLk8++yzZu+hkSNHyvXr182qMwAAYDdrAlGDBg3k3Llz0qdPH7MxY9myZWXhwoWxJlo/aTo0p3sj3T9EF0hoY2CgjYGBNgYGG9r4pNsZ5MRnLRoAAEAAs2IOEQAAwIMQiAAAgPUIRAAAwHoEIgAAYD0C0ROglwJ57rnnJH369JItWzZ588035cCBA15lbt26Je3btze7YadLl07q168fayNJfzZu3DgpXbq0e/Ms3d9pwYIFAdO+uAwePNjsXN65c+eAaWe/fv1MmzxvxYoVC5j2ufz3v/+VJk2amHakTp1aSpUqJZs3b3af17UmuiI1Z86c5rxe9/DQoUOSlOTPnz/WZ6k3/fwC5bPUSzL17t1bChQoYD6nQoUKycCBA72uWxUIn6XutKw/Z/Lly2faUKlSJdm0aVOSbeOqVavkjTfeMLtH6/fkjz/+6HU+Pu3Rq0c0btzY/L7RjZNbtWol165de7yK6SozJK6aNWs6kydPdnbv3u1s377dqV27tpM3b17n2rVr7jJt27Z18uTJ4yxdutTZvHmzU6FCBadSpUpOUvHTTz858+fPdw4ePOgcOHDA+fjjj50UKVKYNgdC++63ceNGJ3/+/E7p0qWdTp06uY8n9Xb27dvXKVGihHP69Gn37dy5cwHTPnXhwgUnX758znvvveds2LDBOXr0qLNo0SLn8OHD7jKDBw92QkNDnR9//NHZsWOH89e//tUpUKCAc/PmTSepiIqK8vocIyMjNSU4y5cvD5jP8pNPPnGyZMnizJs3zzl27Jgze/ZsJ126dM6oUaMC6rN85513nLCwMGflypXOoUOHzP+nGTJkcH7//fck2cZffvnF+ec//+n88MMP5nty7ty5Xufj057XXnvNKVOmjLN+/Xrn119/dQoXLuw0atTosepFIPLRDyr9JtBvbnXp0iUTHvR/Zpd9+/aZMuvWrXOSqkyZMjkTJkwIuPZdvXrVeeaZZ8wvmL/85S/uQBQI7dQftPpDJi6B0D7VvXt3p3Llyn94PiYmxsmRI4czdOhQr7aHhIQ43333nZNU6fdpoUKFTPsC5bOsU6eO07JlS69j9erVcxo3bhwwn+WNGzecZMmSmdDnKTw83ISKpN5GuS8Qxac9e/fuNV+3adMmd5kFCxY4QUFBzn//+99HrgtDZj5w+fJlc585c2Zzv2XLFrlz547pFnTRYYq8efPKunXrJKnRbuwZM2aYncB16CzQ2qfDDHXq1PFqjwqUdmrXtHZlFyxY0HRJnzhxIqDa99NPP5kd6//2t7+ZIexy5crJ119/7T5/7Ngxs3mrZzv1WkgvvPBCkmqnp+joaJk6daq0bNnSDFEEymepQ0d6CaaDBw+a5zt27JDVq1dLrVq1AuazvHv3rvmZmipVKq/jOpSkbQ2ENnqKT3v0XofJ9P9jFy0fHBwsGzZskEdlzU7V/kKvoaZjwS+++KKULFnSHNMPX6+3dv8FZHUXbT2XVOzatcsEIJ2boHMS5s6dK2FhYbJ9+/aAaJ/SoLd161av8XuXQPgc9YfOlClTpGjRonL69Gnp37+/VKlSRXbv3h0Q7VNHjx41c970cj4ff/yx+Sw7duxo2qaX93G15f5d7JNaOz3pHI1Lly7Je++9Z54HymfZo0cPczV0DXN6AW8NDp988okJ8ioQPkude6o/V3VuVPHixU3dv/vuOxMKChcuHBBt9BSf9ui9/jHjKXny5KaT4XHaTCDyQe+C/nLRZB9o9Jeohh/tAZszZ4755bJy5UoJFCdPnpROnTpJZGRkrL/WAoXrL2ulk+Q1IOlEzlmzZpm/SAPljxL9y/LTTz81z7WHSP+fHD9+vPmeDUQTJ040n632/AUS/b6cNm2aTJ8+XUqUKGF+/ugfnNrOQPos//Of/5jevaefftoEv/DwcGnUqJHp6UPCYcjsCerQoYPMmzdPli9fLrlz53Yfz5Ejh+nS1r/gPOmKDz2XVOhfnPoXS/ny5c3KujJlysioUaMCpn36wycqKsr8MNK/RvSmgW/06NHmsf4FEwjt9KQ9CEWKFJHDhw8HzOeoK1e059KT/uXtGhp0teX+FVdJrZ0uv/32myxZskRat27tPhYon2XXrl1NL1HDhg3NSsGmTZtKly5dzM+fQPosdfWc/qzRVVT6h9nGjRvNkKcOawdKG13i0x6915/F9w8t6sqzx2kzgegJ0HljGoZ0CGnZsmVmiagnDRApUqQwY+Euuixff0BrV2lS/kv89u3bAdO+atWqmWFB/SvUddOeBu2edz0OhHZ60h/AR44cMSEiUD5HHa6+f9sLnYOiPWFK///UH6qe7dRhGZ2bkJTa6TJ58mQzvKDz3lwC5bO8ceOGmTfiSXtQ9GdPIH6WadOmNf8vXrx4URYtWiR169YNuDYWiEd79F7DvGcPmf5u1c9de7Uf2SNPx0a8tWvXziwhXLFihdcyWF094KJLYHUp/rJly8wS2IoVK5pbUtGjRw+zak6Xvu7cudM81xn/ixcvDoj2/RHPVWaB0M4PP/zQfJ/q57hmzRqnevXqzlNPPWVWRgZC+1xbJiRPntws2dYlzNOmTXPSpEnjTJ061WvZb8aMGZ3/9//+n/l+rlu3rl8vY/4j9+7dM5+Xrqy7XyB8ls2bN3eefvpp97J7Xcat36/dunULqM9y4cKFZhWVbhGhP1N1JegLL7zgREdHJ8k2Xr161dm2bZu5aQwZPny4efzbb7/Fuz267L5cuXJm64zVq1eblb8su08C9AOP66Z7E7noB/2Pf/zDLFXXH85vvfWWCU1JhS591b1dUqZM6WTNmtWpVq2aOwwFQvviG4iSejsbNGjg5MyZ03yO+otGn3vuz5PU2+fy888/OyVLljRLeYsVK+Z89dVXXud16W/v3r2d7NmzmzL6/az7ayU1ur+S/qyJq+6B8FleuXLF/P+nwS5VqlROwYIFzVL027dvB9RnOXPmTNM2/f9Sl6S3b9/eLEVPqm1cvnx5nL8TNeDGtz3nz583AUj3ndI9mVq0aGGC1uMI0v88XgcXAABA0sYcIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRgCfmvffek6CgIGnbtm2sc+3btzfntEx8rVixwnzN/VdtfxTnzp2Tdu3aSd68eSUkJMRcYLJmzZqyZs2ax35tAP6PQATgicqTJ4/MmDFDbt686T5269YtmT59ugkjvlK/fn3Ztm2bfPPNN3Lw4EH56aef5OWXX5bz588n2ntGR0cn2msDeDgEIgBPVHh4uAlFP/zwg/uYPtYwVK5cOa+yMTExMmjQIClQoICkTp1aypQpI3PmzDHnjh8/LlWrVjWPM2XK5NW7tHDhQqlcubJkzJhRsmTJIq+//rocOXLkD+ukPUy//vqrfPbZZ+Y18+XLJ88//7z07NlT/vrXv3qV+/vf/y7Zs2eXVKlSScmSJWXevHnu899//72UKFHC9DDlz59fhg0b5vU+emzgwIHSrFkzyZAhg7Rp08YcX716tVSpUsW0Uf9tOnbsKNevX3/Mf2kAD4NABOCJa9mypUyePNn9fNKkSdKiRYtY5TQMffvttzJ+/HjZs2ePdOnSRZo0aSIrV640wUEDiDpw4ICcPn1aRo0aZZ5rmIiIiJDNmzfL0qVLJTg4WN566y0TsOKSLl06c/vxxx/l9u3bcZbRr61Vq5YZQps6dars3btXBg8eLMmSJTPnt2zZIu+88440bNhQdu3aJf369ZPevXvLlClTvF7n888/N8FOe6P0vAa11157zfRQ7dy5U2bOnGkCUocOHR7jXxjAQ/u/C98DQOJq3ry5U7duXScqKsoJCQlxjh8/bm6pUqVyzp07Z85pGXXr1i0nTZo0ztq1a71eo1WrVk6jRo3M4+XLlzv6Y+zixYsPfF99bS23a9euPywzZ84cJ1OmTKYulSpVcnr27Ons2LHDfX7RokVOcHCwc+DAgTi//t1333VeffVVr2Ndu3Z1wsLC3M/z5cvnvPnmm7Ha06ZNG69jv/76q3mvmzdvPrBdABIOPUQAnrisWbNKnTp1TO+J9hTp46eeesqrzOHDh+XGjRvy6quvuntw9KY9Rg8a/lKHDh2SRo0aScGCBc3QlA5VqRMnTvzh12gPzalTp8zcIe2x0QnbOrzn6uHZvn275M6dW4oUKRLn1+/bt09efPFFr2P6XOty794997Fnn33Wq8yOHTvMe3i2USdza4/UsWPHHthOAAkneQK+FgA81LCZa1hozJgxsc5fu3bN3M+fP1+efvppr3M6R+dB3njjDTMP6Ouvv5ZcuXKZcKHzff5sErPOC9IApjcdzmrdurX07dvXzE3S+T0JIW3atLHaqfOSdN7Q/Xw5yRywDYEIgE9oL4wGFJ0MrT0i9wsLCzPBR3t1/vKXv8T5GilTpjT3nj0wuipM5xRpGNKJykrn5DwKrYPOK1KlS5eW33//3axAi6uXqHjx4rGW6OtzLeuaZxQX7YXS+UiFCxd+pDoCSBgEIgA+oSFBh5lcj++XPn16+eijj8xEau3h0VVjly9fNiFDh8GaN29ueoE0UOlKr9q1a5teHF1xpivLvvrqK8mZM6cJVD169HhgXTRE/e1vfzO9Vhp89L11QvaQIUOkbt26poyGspdeeskMrQ0fPtwEmP3795v313D34YcfynPPPWdWkTVo0EDWrVsnX375pYwdO/aB7929e3epUKGC6S3THintQdKAFBkZab4ewBOSgPORACBek6r/iOekahUTE+OMHDnSKVq0qJMiRQona9asTs2aNZ2VK1e6ywwYMMDJkSOHExQU5P7ayMhIp3jx4mbidunSpZ0VK1aYSdVz586N8311AnePHj2c8PBwJzQ01Ezm1vfs1auXc+PGDXe58+fPOy1atHCyZMliJl+XLFnSmTdvntfEbJ1ErXXNmzevM3ToUK/30UnVI0aMiPX+GzduNBOy06VL56RNm9bU+ZNPPon3vyuAxxek/3lS4QsAAMAfscoMAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAAGK7/w/UCIO3Mz315gAAAABJRU5ErkJggg==", - "text/plain": [ - "<Figure size 640x480 with 1 Axes>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "def plot_games_on_meta_score(data):\n", - " scores = np.array(data[\"meta_score\"])\n", - " scores_big = scores[scores > 75]\n", - " print(scores_big)\n", - " scores_small = scores[scores <= 75]\n", - " two_sets = [scores_big, scores_small]\n", - " plt.hist(two_sets, bins=10, stacked=True)\n", - " plt.xlabel(\"Meta Score\")\n", - " plt.ylabel(\"Number of Games\")\n", - " plt.show()\n", - "plot_games_on_meta_score(df)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Preparing the dataset\n", - "\n", - "Here I make modifications, then I shuffle and split the dataset into separate test and training datasets." - ] - }, - { - "cell_type": "code", - "execution_count": 226, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "18686\n", - "18606\n" - ] - } - ], - "source": [ - "# Making new PandasFrame with new classes, one class for all games with a score less than 70, one between 70 and 89, and one between 90 and 100.\n", - "\n", - "def split_data(data):\n", - " \"\"\"\n", - " Split data into two parts; a training dataset, and a test dataset.\n", - " Returns the two parts as a tuple.c\n", - " \"\"\"\n", - " test_data = pd.DataFrame() # Initialize an empty DataFrame for test data\n", - " drop_indexes = []\n", - " for i in range(1, 101):\n", - " row = data.loc[data['meta_score'] == i]\n", - " # Check if the row is an empty DataFrame\n", - " if row.empty:\n", - " continue\n", - " if row.isnull().values.any():\n", - " print(\"null\")\n", - " drop_indexes.append(row.index[0])\n", - "\n", - " # Add the rows to test_data\n", - " test_data = pd.concat([test_data, row])\n", - " # HERE I want to delete the row that's been added to the test data from the original data\n", - " data.drop(drop_indexes, inplace=True)\n", - "\n", - " # Shuffle the remaining data for training\n", - " data = data.sample(frac=1.0, random_state=200)\n", - " training_data = data\n", - " return training_data, test_data\n", - "\n", - "def make_classes(data):\n", - "\n", - " for index, row in df.iterrows():\n", - " #data.at[index, \"class\"] = row[\"meta_score\"]\n", - "\n", - " if row[\"meta_score\"] < 70:\n", - " data.at[index, \"class\"] = \"bad\"\n", - " elif row[\"meta_score\"] < 80:\n", - " data.at[index, \"class\"] = \"average\"\n", - " else:\n", - " data.at[index, \"class\"] = \"good\"\n", - " # Remove all columns except name, summary and class\n", - " #data = data[[\"name\", \"summary\", \"class\"]]\n", - " # It seems that one or more entries in df[\"summary\"] are NaN (I get document is np.nan error)\n", - " # I will remove these entries\n", - " #np.data[\"class\"]\n", - " #least_amount = data['class'].value_counts()[\"good\"]\n", - "\n", - " bad_data = df.loc[df['class'] == \"bad\"]\n", - " good_data = df.loc[df['class'] == \"good\"]\n", - " average_data = df.loc[df['class'] == \"average\"]\n", - " least_amount = min([len(bad_data), len(good_data), len(average_data)])\n", - "\n", - " bad_data = bad_data.sample(frac=1.0, random_state=200)\n", - " good_data = good_data.sample(frac=1.0, random_state=200)\n", - " average_data = average_data.sample(frac=1.0, random_state=200)\n", - " bad_data = bad_data[:least_amount]\n", - " good_data = good_data[:least_amount]\n", - " average_data = average_data[:least_amount]\n", - " data = pd.concat([bad_data, good_data, average_data])\n", - " randomised_data = data.sample(frac=1.0, random_state=201)\n", - " data = randomised_data\n", - " data = data.dropna(subset=[\"summary\"])\n", - "\n", - " return data\n", - "\n", - " \n", - "\n", - "def make_binary_classes(data):\n", - "\n", - " for index, row in df.iterrows():\n", - " #data.at[index, \"class\"] = row[\"meta_score\"]\n", - "\n", - " if row[\"meta_score\"] < 75:\n", - " data.at[index, \"class\"] = \"bad\"\n", - " else:\n", - " data.at[index, \"class\"] = \"good\"\n", - " # Remove all columns except name, summary and class\n", - " #data = data[[\"name\", \"summary\", \"class\"]]\n", - " # It seems that one or more entries in df[\"summary\"] are NaN (I get document is np.nan error)\n", - " # I will remove these entries\n", - " #np.data[\"class\"]\n", - " #least_amount = data['class'].value_counts()[\"good\"]\n", - "\n", - " bad_data = df.loc[df['class'] == \"bad\"]\n", - " good_data = df.loc[df['class'] == \"good\"]\n", - " least_amount = min([len(bad_data), len(good_data)])\n", - "\n", - " bad_data = bad_data.sample(frac=1.0, random_state=200)\n", - " good_data = good_data.sample(frac=1.0, random_state=200)\n", - " bad_data = bad_data[:least_amount]\n", - " good_data = good_data[:least_amount]\n", - " data = pd.concat([bad_data, good_data])\n", - " randomised_data = data.sample(frac=1.0, random_state=201)\n", - " data = randomised_data\n", - " data = data.dropna(subset=[\"summary\"])\n", - "\n", - " return data\n", - "\n", - "\n", - "\n", - "\n", - "#df = make_classes(df)\n", - "#least_amount = df['class'].value_counts()[\"average\"]\n", - "#print(least_amount)\n", - "df = df.dropna(subset=[\"summary\"])\n", - "print(len(df))\n", - "training, test = split_data(df)\n", - "print(len(training))\n", - "train_X = np.array(training[\"summary\"])\n", - "\n", - "train_Y = np.array(training[\"meta_score\"])\n", - "test_X = np.array(test[\"summary\"])\n", - "test_Y = np.array(test[\"meta_score\"])\n", - "\n", - "#lenghts_of_summaries = [len(x) for x in train_X]\n", - "#average_length = sum(lenghts_of_summaries) / len(lenghts_of_summaries)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 227, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[98 98 98 ... 76 76 76]\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkYAAAGwCAYAAABM/qr1AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAMkdJREFUeJzt3Qd4VFX+//FvQiDUhN4kdCmhB5UiqAiCgC4KroAICEEWFpamNJcSYRVEqUtTkeIi0hRXYCEgVXqRXkIXXOpKCS0EyP093/O/M/8ZgphgwpS8X88zzsy9ZybnMDH55LQbYFmWJQAAAJBAT1cAAADAWxCMAAAAbAQjAAAAG8EIAADARjACAACwEYwAAABsBCMAAABbkOMBfltCQoKcPn1asmXLJgEBAZ6uDgAASALdqvHq1atSsGBBCQxMWl8QwSgJNBSFhYV5uhoAAOAhnDp1SgoVKpSksgSjJNCeIsc/bEhIiKerAwAAkiA2NtZ0bDh+jycFwSgJHMNnGooIRgAA+JbkTINh8jUAAICNYAQAAGAjGAEAANgIRgAAADaCEQAAgI1gBAAAYCMYAQAA2AhGAAAANoIRAACAjWAEAABgIxgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAEAANiCHA8AAEh1UaHic6KueLoGeIToMQIAALARjAAAAGwEIwAAAG8IRlFRURIQEOB2K1OmjPN8XFycdOnSRXLlyiVZs2aVZs2ayblz59ze4+TJk9K4cWPJnDmz5M2bV3r37i137txxK7N69WqJiIiQ4OBgKVmypEyfPv2RtREAAPgOj/cYlStXTs6cOeO8rVu3znmuZ8+esnDhQpk3b56sWbNGTp8+LU2bNnWev3v3rglF8fHxsmHDBpkxY4YJPYMGDXKWOX78uClTp04d2blzp/To0UM6dOgg0dHRj7ytAADAu3l8VVpQUJDkz58/0fErV67IF198IbNmzZLnn3/eHJs2bZqULVtWNm3aJNWrV5dly5bJ/v375YcffpB8+fJJ5cqVZejQodK3b1/TG5UhQwaZPHmyFCtWTEaOHGneQ1+v4Wv06NHSoEGDR95eAADgvTzeY3T48GEpWLCgFC9eXFq1amWGxtT27dvl9u3bUq9ePWdZHWYrXLiwbNy40TzX+woVKphQ5KBhJzY2Vvbt2+cs4/oejjKO97ifW7dumfdwvQEAAP/n0WBUrVo1M/S1dOlSmTRpkhn2ql27tly9elXOnj1renyyZ8/u9hoNQXpO6b1rKHKcd5x7UBkNOzdv3rxvvYYNGyahoaHOW1hYWIq2GwAAeCePDqU1bNjQ+bhixYomKBUpUkTmzp0rmTJl8li9+vfvL7169XI+1xBFOAIAwP95fCjNlfYOlSpVSo4cOWLmHemk6suXL7uV0VVpjjlJen/vKjXH898rExIS8pvhS1ev6XnXGwAA8H9eFYyuXbsmR48elQIFCkjVqlUlffr0smLFCuf5mJgYMwepRo0a5rne79mzR86fP+8ss3z5chNkwsPDnWVc38NRxvEeAAAAXhGM3n33XbMM/8SJE2a5/auvvirp0qWTli1bmrk9kZGRZkhr1apVZjJ2u3btTKDRFWmqfv36JgC1bt1adu3aZZbgDxgwwOx9pL0+qlOnTnLs2DHp06ePHDx4UCZOnGiG6nQrAAAAAK+ZY/TLL7+YEPTrr79Knjx5pFatWmYpvj5WuqQ+MDDQbOyoK8V0NZkGGwcNUYsWLZLOnTubwJQlSxZp27atDBkyxFlGl+ovXrzYBKGxY8dKoUKFZMqUKSzVBwAAiQRYlmUlPgxXOvlae7B0byXmGwHAHxAVKj4n6oqna4BH+Pvbq+YYAQAAeBLBCAAAwEYwAgAAsBGMAAAAbAQjAAAAG8EIAADAG/YxAgCkoWXvgA+gxwgAAMBGMAIAALARjAAAAGwEIwAAABvBCAAAwEYwAgAAsBGMAAAAbAQjAAAAG8EIAADARjACAACwEYwAAABsBCMAAAAbwQgAAMBGMAIAALARjAAAAGwEIwAAABvBCAAAwEYwAgAAsBGMAAAAbAQjAAAAG8EIAADARjACAACwEYwAAABsBCMAAABbkOMBAACprWjcLPE1JzxdATxS9BgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAEAANgIRgAAADaCEQAAgI1gBAAAYCMYAQAA2AhGAAAANoIRAACAjWAEAABgIxgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAEAANgIRgAAADaCEQAAgI1gBAAAYCMYAQAA2AhGAAAANoIRAACAjWAEAABgIxgBAADYCEYAAADeFoyGDx8uAQEB0qNHD+exuLg46dKli+TKlUuyZs0qzZo1k3Pnzrm97uTJk9K4cWPJnDmz5M2bV3r37i137txxK7N69WqJiIiQ4OBgKVmypEyfPv2RtQsAAPgOrwhGW7dulU8//VQqVqzodrxnz56ycOFCmTdvnqxZs0ZOnz4tTZs2dZ6/e/euCUXx8fGyYcMGmTFjhgk9gwYNcpY5fvy4KVOnTh3ZuXOnCV4dOnSQ6OjoR9pGAADg/TwejK5duyatWrWSzz//XHLkyOE8fuXKFfniiy9k1KhR8vzzz0vVqlVl2rRpJgBt2rTJlFm2bJns379fZs6cKZUrV5aGDRvK0KFDZcKECSYsqcmTJ0uxYsVk5MiRUrZsWenatau89tprMnr06N+s061btyQ2NtbtBgAA/J/Hg5EOlWmPTr169dyOb9++XW7fvu12vEyZMlK4cGHZuHGjea73FSpUkHz58jnLNGjQwASZffv2Ocvc+95axvEe9zNs2DAJDQ113sLCwlKsvQAAwHt5NBjNnj1bfvrpJxNE7nX27FnJkCGDZM+e3e24hiA95yjjGooc5x3nHlRGw9PNmzfvW6/+/fubHivH7dSpU3+wpQAAwBcEeeoLa9jo3r27LF++XDJmzCjeRCdp6w0AAKQtHusx0qGy8+fPm9ViQUFB5qYTrMeNG2cea6+OzhO6fPmy2+t0VVr+/PnNY72/d5Wa4/nvlQkJCZFMmTKlcisBAIAv8Vgwqlu3ruzZs8esFHPcnnjiCTMR2/E4ffr0smLFCudrYmJizPL8GjVqmOd6r++hActBe6A09ISHhzvLuL6Ho4zjPQAAADw+lJYtWzYpX76827EsWbKYPYscxyMjI6VXr16SM2dOE3b+9re/mUBTvXp1c75+/fomALVu3VpGjBhh5hMNGDDATOh2DIV16tRJxo8fL3369JH27dvLypUrZe7cubJ48WIPtBoAAHgzjwWjpNAl9YGBgWZjR11Cr6vJJk6c6DyfLl06WbRokXTu3NkEJg1Wbdu2lSFDhjjL6FJ9DUG6J9LYsWOlUKFCMmXKFPNeAAAArgIsy7LcjiARXcGmy/Z1hZr2XAGAx0WFii8qGjdLfM2J4Y09XQU8wt/fHt/HCAAAwFsQjAAAAGwEIwAAABvBCAAAwEYwAgAAsBGMAAAAbAQjAAAAG8EIAADARjACAACwEYwAAABsBCMAAAAbwQgAAMBGMAIAALARjAAAAGwEIwAAABvBCAAAwEYwAgAAsBGMAAAAbAQjAAAAG8EIAADARjACAACwEYwAAAAeNhgtXbpU1q1b53w+YcIEqVy5srzxxhty6dKl5L4dAACA7waj3r17S2xsrHm8Z88eeeedd6RRo0Zy/Phx6dWrV2rUEQAA4JEISu4LNACFh4ebx99884289NJL8uGHH8pPP/1kAhIAAECa6THKkCGD3Lhxwzz+4YcfpH79+uZxzpw5nT1JAAAAaaLHqFatWmbI7Omnn5YtW7bInDlzzPFDhw5JoUKFUqOOAAAA3tljNH78eAkKCpL58+fLpEmT5LHHHjPHlyxZIi+++GJq1BEAAMA7e4wKFy4sixYtSnR89OjRKVUnAAAA39nH6OjRozJgwABp2bKlnD9/3tljtG/fvpSuHwAAgPcGozVr1kiFChVk8+bN8u2338q1a9fM8V27dsngwYNTo44AAADeGYz69esn//jHP2T58uVmhZrD888/L5s2bUrp+gEAAHhvMNJNHV999dVEx/PmzSv/+9//UqpeAAAA3h+MsmfPLmfOnEl0fMeOHc4VagAAAGkiGLVo0UL69u0rZ8+elYCAAElISJD169fLu+++K23atEmdWgIAAHhjMNLLf5QpU0bCwsLMxGu9PMgzzzwjNWvWNCvVAAAA0sw+Rjrh+vPPP5eBAwfK3r17TTiqUqWKPP7446lTQwAAAG8NRq4bPeoNAAAgzQYjy7LM5UBWrVplNnfUOUaudG8jAACANBGMevToIZ9++qnUqVNH8uXLZyZgAwAApMlg9K9//cv0CjVq1Ch1agQAAOArq9JCQ0OlePHiqVMbAAAAXwpGUVFR8v7778vNmzdTp0YAAAC+MpT2+uuvy9dff20uAVK0aFFJnz692/mffvopJesHAADgvcGobdu2sn37dnnzzTeZfA0AANJ2MFq8eLFER0dLrVq1UqdGAAAAvjLHSC8FEhISkjq1AQAA8KVgNHLkSOnTp4+cOHEidWoEAADgK0NpOrfoxo0bUqJECcmcOXOiydcXL15MyfoBAAB4bzAaM2ZM6tQEAADAF1elAQAA+KNkByNXcXFxEh8f73aMidkAACDNTL6+fv26dO3a1WzwmCVLFsmRI4fbDQAAIM0EI12RtnLlSpk0aZIEBwfLlClTzCVCChYsKF9++WXq1BIAAMAbh9IWLlxoAtBzzz0n7dq1k9q1a0vJkiWlSJEi8tVXX0mrVq1Sp6YAAADe1mOky/GLFy/unE/kWJ6vO2GvXbs25WsIAADgrcFIQ9Hx48fN4zJlysjcuXOdPUnZs2dP+RoCAAB4azDS4bNdu3aZx/369ZMJEyZIxowZpWfPntK7d+/UqCMAAIB3BiMNQN26dTOP69WrJwcPHpRZs2bJjh07pHv37sl6L53AXbFiRTMkp7caNWrIkiVL3LYD6NKli+TKlUuyZs0qzZo1k3Pnzrm9x8mTJ6Vx48ZmF25dKafh7M6dO25lVq9eLREREWayuM6Hmj59enKbDQAA0oA/tI+R0knXensYhQoVkuHDh8vjjz8ulmXJjBkzpEmTJiZklStXzoSwxYsXy7x58yQ0NNRsE9C0aVNZv369ef3du3dNKMqfP79s2LBBzpw5I23atDGXKfnwww9NGR320zKdOnUyk8NXrFghHTp0kAIFCkiDBg3+aPMB+IOoUE/XAICXCLA0kSTBzZs3Tah46aWXzPP+/fvLrVu3nOfTpUsnQ4cONcNqf0TOnDnl448/ltdee03y5MljeqP0sdLeqbJly8rGjRulevXqpndJ63P69GnJly+fKTN58mTp27evXLhwQTJkyGAea7jau3ev82u0aNFCLl++LEuXLr1vHbRdrm2LjY2VsLAwuXLlChtYAv6IYPTIFI2bJb7mxPDGnq4CHpL+/taOleT8/k7yUJr25nz66afO5+PHjze9NNq7o7eZM2eaobGHpb0/s2fPNhtI6pDa9u3b5fbt22a4zkEnexcuXNgEI6X3FSpUcIYipb1A+g+xb98+ZxnX93CUcbzH/QwbNsz8QzpuGooAAID/S3Iw0mGojh07uh3T3pxVq1aZm/byOFaoJceePXvM/CGd/6PDXQsWLJDw8HA5e/as6fG5d6WbhiA9p/TeNRQ5zjvOPaiMhiftBbsf7Q3TdOm4nTp1KtntAgAAfjzH6MiRI6Z3xkGHzAID/3+ueuqpp8xE6eQqXbq07Ny50wSQ+fPnm4vUrlmzRjxJQ5reAABA2pLkYKRzclzn3egcHlcJCQlu55NKe4V0pZiqWrWqbN26VcaOHSvNmzc3F6jVr+vaa6Sr0nSytdL7LVu2uL2fY9Waa5l7V7Lpcx1rzJQpU7LrCwAA/FdgclaQuU5gvtfu3btNmT/KEbA0JOnqMp3w7RATE2OW5+scJKX3OhR3/vx5Z5nly5eb0KPDcY4yru/hKON4DwAAgGQHo0aNGsmgQYPM3kL30rk6eiFZXRafHDqXRy8jcuLECRNw9LnuOaTXW9NJz5GRkdKrVy8zh0knY+vmkhpodEWaql+/vglArVu3NptORkdHy4ABA8yQnmMoTOctHTt2zFz8Vle1TZw40cyF0q0AAAAAHmoo7b333jOBQucE6X5CpUqVcvbi6Ao13VRRyySH9vTovkO6/5AGId3sUcPNCy+8YM6PHj3azGPSjR21F0lXk2mwcd0iYNGiRdK5c2cTmLJkyWLmKA0ZMsRZplixYma5vgYhHaLTXq0pU6awhxEAAHj4fYwcmyVqCNGhKMfLAgICTJDRwOK4uKy/eZh9EAD4EPYxemTYxwje/vs7WTtfa++Lbop48eJFs0pN6cRp3ZQRAAAgTV4SRIOQLs8HAABI0xeRBQAA8FcEIwAAABvBCAAAIDnBKCIiQi5dumQe61L4GzduJOVlAAAA/heMDhw4YK56r3Qjx2vXrqV2vQAAALxzVVrlypXNrtO1atUy+xd98sknkjVr1vuW1d2xAQAA/DYYTZ8+XQYPHmx2mdYNHZcsWSJBQYlfqucIRgAAwK+DkV4GZPbs2eaxXqJDL8qaN2/e1K4bAACAd2/wmJCQkDo1AQAA8MWdr48ePSpjxowxk7KVXuG+e/fuUqJEiZSuHwAAgPfuYxQdHW2C0JYtW6RixYrmtnnzZilXrpy5uCwAAECa6THq16+f9OzZU4YPH57oeN++feWFF15IyfoBAAB4b4+RDp9FRkYmOt6+fXvZv39/StULAADA+4NRnjx5ZOfOnYmO6zFWqgEAgDQ1lPb2229Lx44d5dixY1KzZk1zbP369fLRRx9Jr169UqOOAAAA3hmMBg4cKNmyZZORI0dK//79zbGCBQtKVFSUdOvWLTXqCAAA4J3BSHe31snXert69ao5pkEJAAAgTe5j5EAgAgAAaXryNQAAgL8iGAEAANgIRgAAAA8TjG7fvi1169aVw4cPJ+dlAAAA/heM0qdPL7t370692gAAAPjSUNqbb74pX3zxRerUBgAAwJeW69+5c0emTp0qP/zwg1StWlWyZMnidn7UqFEpWT8AAADvDUZ79+6ViIgI8/jQoUOJNn8EAABIM8Fo1apVqVMTAAAAX12uf+TIEYmOjpabN2+a55ZlpWS9AAAAvD8Y/frrr2bJfqlSpaRRo0Zy5swZczwyMlLeeeed1KgjAACAdw6l6cVjddn+yZMnpWzZss7jzZs3l169esnIkSNTuo4AAHhM0X6LxdecGN7Y01VIO8Fo2bJlZgitUKFCbscff/xx+fnnn1OybgAAAN49lHb9+nXJnDlzouMXL16U4ODglKoXAACA9wej2rVry5dffum2RD8hIUFGjBghderUSen6AQAAeO9QmgYgnXy9bds2iY+Plz59+si+fftMj9H69etTp5YAAADe2GNUvnx5s7FjrVq1pEmTJmZorWnTprJjxw4pUaJE6tQSAADAG3uMVGhoqPz9739P+doAAAD4WjC6dOmSuZDsgQMHzPPw8HBp166d5MyZM6XrBwAA4L1DaWvXrpWiRYvKuHHjTEDSmz4uVqyYOQcAAJBmeoy6dOliNnOcNGmSpEuXzhy7e/eu/PWvfzXn9uzZkxr1BAAA8L4eI71Gml76wxGKlD7WXa/1HAAAQJoJRhEREc65Ra70WKVKlVKqXgAAAN45lLZ7927n427dukn37t1N71D16tXNsU2bNsmECRNk+PDhqVdTAACAVBZgWZb1e4UCAwPNDte/V1TL6HwjfxMbG2u2KLhy5YqEhIR4ujoAUlpUqKdrkGYUjZvl6SqkCVxE9uF/fyepx+j48eNJejMAAABflqRgVKRIkdSvCQAAgC9u8Hj69GlZt26dnD9/3lxA1pXOQQIAAEgTwWj69Onyl7/8RTJkyCC5cuUy84oc9DHBCAAApJlgNHDgQBk0aJD079/fTMoGAADwF8lONjdu3JAWLVoQigAAgN9JdrqJjIyUefPmpU5tAAAAfGkobdiwYfLSSy/J0qVLpUKFCpI+fXq386NGjUrJ+gEAAHh3MIqOjpbSpUub5/dOvgYAAEgzwWjkyJEydepUeeutt1KnRgAAAL4yxyg4OFiefvrp1KkNAACALwUjvYDsP//5z9SpDQAAgC8Foy1btsiMGTOkePHi8vLLL0vTpk3dbsmdr/Tkk09KtmzZJG/evPLKK69ITEyMW5m4uDjp0qWL2Uwya9as0qxZMzl37pxbmZMnT0rjxo0lc+bM5n169+4td+7ccSuzevVqiYiIMD1eJUuWNBtVAgAA/KFglD17dhOAnn32WcmdO7e5aq3rLTnWrFljQs+mTZtk+fLlcvv2balfv75cv37dWaZnz56ycOFCs0WAltfLkbgGsLt375pQFB8fLxs2bDChTUOPbkLpehFcLVOnTh3ZuXOn9OjRQzp06GAmkQMAADgEWJZliZe4cOGC6fHRAPTMM8/IlStXJE+ePDJr1ix57bXXTJmDBw9K2bJlZePGjVK9enVZsmSJ2T5AA1O+fPlMmcmTJ0vfvn3N++mlS/Tx4sWLZe/evc6vpZtUXr582Ww78HtiY2NN6NP6hISEpOK/AACPiEreH3V4eEXjZnm6CmnCieGNPV0Fr/Awv7+9avtqrbjKmTOnud++fbvpRapXr56zTJkyZaRw4cImGCm91/2UHKFINWjQwPxj7Nu3z1nG9T0cZRzvca9bt26Z17veAACA/0v2cv1ixYo9cL+iY8eOPVRFEhISzBCXrngrX768OXb27FnT46PDd640BOk5RxnXUOQ47zj3oDIaeG7evCmZMmVKNPfp/ffff6h2AACANBSMNLy40h6dHTt2mCEpnfT8sHSukQ51rVu3TjxNL5Dbq1cv53MNUGFhYR6tEwAA8MJgpMv172fChAmybdu2h6pE165dZdGiRbJ27VopVKiQ83j+/PnNpGqdC+Taa6Sr0vSco4yulHPlWLXmWubelWz6XMcb7+0tUrpyTW8AACBtSbE5Rg0bNpRvvvkmWa/Red8aihYsWCArV640w3Suqlataq7FtmLFCucxXc6vy/Nr1Khhnuv9nj175Pz5884yusJNQ094eLizjOt7OMo43gMAAOCheox+y/z5852TppMzfKYrzv7973+bvYwcc4J0Brn25Oh9ZGSkGdbS99aw87e//c0EGl2RpnR5vwag1q1by4gRI8x7DBgwwLy3o9enU6dOMn78eOnTp4+0b9/ehLC5c+ealWoAAAAPHYyqVKniNvlae300jOjS+IkTJybrvSZNmmTun3vuObfj06ZNc16LbfTo0RIYGGg2dtTVYrqazPXrpEuXzgzDde7c2QSmLFmySNu2bWXIkCHOMtoTpSFI90QaO3asGa6bMmWKeS8AAICH3sfo3tVaGlp0ryENN7qU3h+xjxHg59jH6JFhH6NHg32MHv73d7J7jAYPHpzclwAAAPgEr9rgEQAAwJOS3GOkQ2YP2thR6fl7L94KAADgd8FIl9T/Fr20xrhx48zu1QAAAH4fjJo0aZLomO4p1K9fP1m4cKG0atXKbSUYAABAmphjpFeyf/vtt83FW3XobOfOnTJjxgwpUqRIytcQAADAG4ORLnfr27evlCxZ0ly5XneT1t4ix0VfAQAA0sRQmu4q/dFHH5nrjn399df3HVoDAABIE8FI5xLpZTq0t0iHzfR2P99++21K1g8AAMD7glGbNm1+d7k+AABAmghG06dPT92aAAAAeBg7XwMAANgIRgAAADaCEQAAgI1gBAAAYCMYAQAA2AhGAAAANoIRAACAjWAEAABgIxgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAEAANgIRgAAADaCEQAAgI1gBAAAYCMYAQAA2AhGAAAANoIRAACAjWAEAABgIxgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAEAANiCHA8AIEVEhXq6BgDw0OgxAgAAsBGMAAAAbAQjAAAAG8EIAADARjACAACwEYwAAABsBCMAAAAbwQgAAMBGMAIAALARjAAAAGwEIwAAABvBCAAAwEYwAgAAsBGMAAAAbAQjAAAAG8EIAADARjACAACwEYwAAABsBCMAAAAbwQgAAMBGMAIAAPCGYLR27Vp5+eWXpWDBghIQECDfffed23nLsmTQoEFSoEAByZQpk9SrV08OHz7sVubixYvSqlUrCQkJkezZs0tkZKRcu3bNrczu3buldu3akjFjRgkLC5MRI0Y8kvYBAADf4tFgdP36dalUqZJMmDDhvuc1wIwbN04mT54smzdvlixZskiDBg0kLi7OWUZD0b59+2T58uWyaNEiE7Y6duzoPB8bGyv169eXIkWKyPbt2+Xjjz+WqKgo+eyzzx5JGwEAgO8I8uQXb9iwobndj/YWjRkzRgYMGCBNmjQxx7788kvJly+f6Vlq0aKFHDhwQJYuXSpbt26VJ554wpT55z//KY0aNZJPPvnE9ER99dVXEh8fL1OnTpUMGTJIuXLlZOfOnTJq1Ci3AAUAAOC1c4yOHz8uZ8+eNcNnDqGhoVKtWjXZuHGjea73OnzmCEVKywcGBpoeJkeZZ555xoQiB+11iomJkUuXLt33a9+6dcv0NLneAACA//PaYKShSGkPkSt97jin93nz5nU7HxQUJDlz5nQrc7/3cP0a9xo2bJgJYY6bzksCAAD+z2uDkSf1799frly54rydOnXK01UCAABpORjlz5/f3J87d87tuD53nNP78+fPu52/c+eOWanmWuZ+7+H6Ne4VHBxsVrm53gAAgP/z2mBUrFgxE1xWrFjhPKZzfXTuUI0aNcxzvb98+bJZbeawcuVKSUhIMHORHGV0pdrt27edZXQFW+nSpSVHjhyPtE0AAMC7eTQY6X5DukJMb44J1/r45MmTZl+jHj16yD/+8Q/5/vvvZc+ePdKmTRuz0uyVV14x5cuWLSsvvviivP3227JlyxZZv369dO3a1axY03LqjTfeMBOvdX8jXdY/Z84cGTt2rPTq1cuTTQcAAF7Io8v1t23bJnXq1HE+d4SVtm3byvTp06VPnz5mryNdVq89Q7Vq1TLL83WjRgddjq9hqG7dumY1WrNmzczeRw46eXrZsmXSpUsXqVq1quTOndtsGslSfQAAcK8ASzcMwgPpEJ4GLJ2IzXwj4HdEhXq6BvBiReNmeboKacKJ4Y09XQWf/f3ttXOMAAAA0tRQGoDfQe8LADxS9BgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAEAANgIRgAAADaCEQAAgI1gBAAAYCMYAQAA2AhGAAAANoIRAACAjWAEAABgIxgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAEAANgIRgAAADaCEQAAgI1gBAAAYCMYAQAA2AhGAAAANoIRAACAjWAEAABgIxgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAEAANgIRgAAADaCEQAAgI1gBAAAYAtyPAD8XlSop2sAAPByBCMAAPxM0X6LxdecGN5YvAFDaQAAADaCEQAAgI1gBAAAYCMYAQAA2AhGAAAANoIRAACAjWAEAABgIxgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAEAANgIRgAAADaCEQAAgI1gBAAAYCMYAQAA2AhGAAAAtiDHAyBZokI9XQMAAFIcPUYAAAA2ghEAAICNYAQAAGAjGAEAAKTFYDRhwgQpWrSoZMyYUapVqyZbtmzxdJUAAIAXSTOr0ubMmSO9evWSyZMnm1A0ZswYadCggcTExEjevHk9WzlWeAEA4BXSTI/RqFGj5O2335Z27dpJeHi4CUiZM2eWqVOnerpqAADAS6SJHqP4+HjZvn279O/f33ksMDBQ6tWrJxs3bkxU/tatW+bmcOXKFXMfGxubOhW8ZaXO+wKAl0m4dcPTVYCXik2F37GO97SspP+eTRPB6H//+5/cvXtX8uXL53Zcnx88eDBR+WHDhsn777+f6HhYWFiq1hMA/N/rnq4AvFTomNR776tXr0poaNKmraSJYJRc2rOk85EcEhIS5OLFi5IrVy4JCAhI8TSrgevUqVMSEhIi/op2+o+00EZFO/0L7UybbbQsy4SiggULJvn900Qwyp07t6RLl07OnTvndlyf58+fP1H54OBgc3OVPXv2VK2jfrj++k3sinb6j7TQRkU7/QvtTHttDE1iT1GamnydIUMGqVq1qqxYscKtF0if16hRw6N1AwAA3iNN9BgpHRpr27atPPHEE/LUU0+Z5frXr183q9QAAADSVDBq3ry5XLhwQQYNGiRnz56VypUry9KlSxNNyH7UdMhu8ODBiYbu/A3t9B9poY2KdvoX2uk/glO5jQFWctawAQAA+LE0MccIAAAgKQhGAAAANoIRAACAjWAEAABgIxg9AnqJkSeffFKyZcsmefPmlVdeeUViYmLcysTFxUmXLl3M7tpZs2aVZs2aJdqQ0ttNmjRJKlas6Nx0S/eIWrJkiV+18X6GDx9udkTv0aOHX7U1KirKtMv1VqZMGb9qo/rvf/8rb775pmlHpkyZpEKFCrJt2zbneV2foqtZCxQoYM7rNRYPHz4svqRo0aKJPku96efnT5+lXvpp4MCBUqxYMfNZlShRQoYOHep2nSx/+DyV7uasP3OKFCli2lGzZk3ZunWrT7dz7dq18vLLL5tdqvX787vvvnM7n5Q26VUqWrVqZX4H6cbMkZGRcu3ateRVRFelIXU1aNDAmjZtmrV3715r586dVqNGjazChQtb165dc5bp1KmTFRYWZq1YscLatm2bVb16datmzZqWL/n++++txYsXW4cOHbJiYmKs9957z0qfPr1pt7+08V5btmyxihYtalWsWNHq3r2787g/tHXw4MFWuXLlrDNnzjhvFy5c8Ks2Xrx40SpSpIj11ltvWZs3b7aOHTtmRUdHW0eOHHGWGT58uBUaGmp999131q5du6w//elPVrFixaybN29avuL8+fNun+Py5cs1KVirVq3ym89SffDBB1auXLmsRYsWWcePH7fmzZtnZc2a1Ro7dqxffZ7q9ddft8LDw601a9ZYhw8fNv+/hoSEWL/88ovPtvM///mP9fe//9369ttvzffnggUL3M4npU0vvviiValSJWvTpk3Wjz/+aJUsWdJq2bJlsupBMPLQDyn90PUbWl2+fNkECP2f2OHAgQOmzMaNGy1fliNHDmvKlCl+2carV69ajz/+uPkl8+yzzzqDkb+0VX/Q6g+Y+/GXNvbt29eqVavWb55PSEiw8ufPb3388cdubQ8ODra+/vpry1fp92qJEiVM+/zls1SNGze22rdv73asadOmVqtWrfzq87xx44aVLl06EwBdRUREmGDhD+2Ue4JRUtq0f/9+87qtW7c6yyxZssQKCAiw/vvf/yb5azOU5gFXrlwx9zlz5jT327dvl9u3b5tuQQcdsihcuLBs3LhRfJF2ac+ePdvsLq5Dav7YRh16aNy4sVublD+1VbuptVu7ePHipnv65MmTftXG77//3uyG/+c//9kMc1epUkU+//xz5/njx4+bDWFd26nXXapWrZpPtdNVfHy8zJw5U9q3b2+GK/zls1Q6nKSXejp06JB5vmvXLlm3bp00bNjQrz7PO3fumJ+xGTNmdDuuw0vaXn9pp6uktEnvdfhM/5920PKBgYGyefNmSao0s/O1t9BrtOm48NNPPy3ly5c3x/TD1uu53XuhWt2VW8/5kj179pggpHMWdK7CggULJDw8XHbu3Ok3bVQa+n766Se3MX0Hf/k89QfO9OnTpXTp0nLmzBl5//33pXbt2rJ3716/aeOxY8fM3Di9ZNB7771nPs9u3bqZtuklhBxtuXeHfF9rpyudt3H58mV56623zHN/+SxVv379zJXXNdjphcM1PHzwwQcm1Ct/+Tx1vqr+nNX5U2XLljX1//rrr00wKFmypN+001VS2qT3+geOq6CgINMJkZx2E4w80Mugv1g01fsj/SWqIUh7xebPn29+uaxZs0b8yalTp6R79+6yfPnyRH+x+RPHX9lKJ9VrUNKJnnPnzjV/mfrLHyr61+WHH35onmuPkf7/OXnyZPO964+++OIL89lqT6C/0e/Nr776SmbNmiXlypUzP4v0D1Ftq799nv/6179Mr99jjz1mQmBERIS0bNnS9ADij2Eo7RHq2rWrLFq0SFatWiWFChVyHs+fP7/p3ta/4lzpqhA950v0L0/9i6Vq1apmNV6lSpVk7NixftVG/cFz/vx584NI/xrRm4a/cePGmcf6F4y/tNWV9iiUKlVKjhw54jefp65u0R5NV/oXuGPI0NGWe1do+Vo7HX7++Wf54YcfpEOHDs5j/vJZqt69e5teoxYtWpjVha1bt5aePXuan0X+9nnqijv9uaMrrvSPtS1btpghUR329qd2OiSlTXqvP5vvHXbUlWrJaTfB6BHQeWQainRYaeXKlWYpqSsNEenTpzdj4w66nF9/OGt3qa//RX7r1i2/amPdunXNkKH+Neq4aa+Ddtc7HvtLW13pD+CjR4+aMOEvn6cOad+7dYbOT9GeMaX/r+oPVNd26lCNzlfwpXY6TJs2zQw16Nw4B3/5LNWNGzfMfBJX2puiP4f88fNUWbJkMf9PXrp0SaKjo6VJkyZ+2c5iSWiT3mvAd+0109+5+vlrj3eSpdgUcvymzp07myWGq1evdlsyqysLHHS5rC7hX7lypVkuW6NGDXPzJf369TMr7XSZ7O7du81zXQ2wbNkyv2njb3FdleYvbX3nnXfM96x+nuvXr7fq1atn5c6d26yq9Jc26nYLQUFBZpm3Lnn+6quvrMyZM1szZ850WyKcPXt269///rf5vm7SpInXL3u+n7t375rPS1fi3csfPkvVtm1b67HHHnMu19dl3/o926dPH7/7PJcuXWpWXOkWE/ozVleQVqtWzYqPj/fZdl69etXasWOHuWk8GTVqlHn8888/J7lNuly/SpUqZvuNdevWmZXDLNf3QvoB3++mexs56Af717/+1Sxv1x/Mr776qglPvkSXyeqeMBkyZLDy5Mlj1a1b1xmK/KWNSQ1G/tDW5s2bWwUKFDCfp/6y0eeu+/v4QxvVwoULrfLly5tlv2XKlLE+++wzt/O6THjgwIFWvnz5TBn9vtZ9unyN7s+kP3fuV3d/+SxjY2PN/4ca8jJmzGgVL17cLF+/deuW332ec+bMMe3T/z91GXuXLl3M8nVfbueqVavu+7tSA29S2/Trr7+aIKT7V+m+Tu3atTOBKzkC9D8p2+EFAADgm5hjBAAAYCMYAQAA2AhGAAAANoIRAACAjWAEAABgIxgBAADYCEYAAAA2ghEAAICNYAQAAGAjGAF4ZN566y0JCAiQTp06JTrXpUsXc07LJNXq1avNa+69MvzDuHDhgnTu3FkKFy4swcHB5oKVDRo0kPXr1//h9wbgOwhGAB6psLAwmT17tty8edN5LC4uTmbNmmVCiac0a9ZMduzYITNmzJBDhw7J999/L88995z8+uuvqfY14+PjU+29ATwcghGARyoiIsKEo2+//dZ5TB9rKKpSpYpb2YSEBBk2bJgUK1ZMMmXKJJUqVZL58+ebcydOnJA6deqYxzly5HDrbVq6dKnUqlVLsmfPLrly5ZKXXnpJjh49+pt10h6nH3/8UT766CPznkWKFJGnnnpK+vfvL3/605/cyv3lL3+RfPnyScaMGaV8+fKyaNEi5/lvvvlGypUrZ3qcihYtKiNHjnT7Onps6NCh0qZNGwkJCZGOHTua4+vWrZPatWubNuq/Tbdu3eT69et/8F8awMMgGAF45Nq3by/Tpk1zPp86daq0a9cuUTkNRV9++aVMnjxZ9u3bJz179pQ333xT1qxZYwKEBhEVExMjZ86ckbFjx5rnGip69eol27ZtkxUrVkhgYKC8+uqrJmjdT9asWc3tu+++k1u3bt23jL62YcOGZmht5syZsn//fhk+fLikS5fOnN++fbu8/vrr0qJFC9mzZ49ERUXJwIEDZfr06W7v88knn5iAp71Tel4D24svvmh6rHbv3i1z5swxQalr165/4F8YwEOzAOARadu2rdWkSRPr/PnzVnBwsHXixAlzy5gxo3XhwgVzTsuouLg4K3PmzNaGDRvc3iMyMtJq2bKlebxq1SpLf4xdunTpgV9X31vL7dmz5zfLzJ8/38qRI4epS82aNa3+/ftbu3btcp6Pjo62AgMDrZiYmPu+/o033rBeeOEFt2O9e/e2wsPDnc+LFClivfLKK4na07FjR7djP/74o/laN2/efGC7AKQ8eowAPHJ58uSRxo0bm94U7TnSx7lz53Yrc+TIEblx44a88MILzh4dvWkP0oOGxdThw4elZcuWUrx4cTNkpUNY6uTJk7/5Gu2xOX36tJlbpD04OrFbh/0cPT47d+6UQoUKSalSpe77+gMHDsjTTz/tdkyfa13u3r3rPPbEE0+4ldm1a5f5Gq5t1Enf2kN1/PjxB7YTQMoLSoX3BIAkDac5hosmTJiQ6Py1a9fM/eLFi+Wxxx5zO6dzeB7k5ZdfNvOEPv/8cylYsKAJGTof6PcmO+u8IQ1ietNhrg4dOsjgwYPN3CWd/5MSsmTJkqidOm9J5xXdy5OT0YG0imAEwCO0V0aDik6a1h6Se4WHh5sApL08zz777H3fI0OGDObetUdGV5HpnCMNRTqhWemcnYehddB5R6pixYryyy+/mBVr9+s1Klu2bKKl/fpcyzrmId2P9krpfKWSJUs+VB0BpCyCEQCP0LCgw0+Ox/fKli2bvPvuu2bCtfb46CqzK1eumLChw2Nt27Y1vUIarHRlWKNGjUyvjq5Q05Von332mRQoUMAEq379+j2wLhqm/vznP5teLA1A+rV14vaIESOkSZMmpoyGs2eeecYMuY0aNcoEmYMHD5qvryHvnXfekSeffNKsOmvevLls3LhRxo8fLxMnTnzg1+7bt69Ur17d9J5pD5X2KGlQWr58uXk9gEcsFeYtAcADJ1//FtfJ1yohIcEaM2aMVbp0aSt9+vRWnjx5rAYNGlhr1qxxlhkyZIiVP39+KyAgwPna5cuXW2XLljUTvCtWrGitXr3aTL5esGDBfb+uTvTu16+fFRERYYWGhppJ3/o1BwwYYN24ccNZ7tdff7XatWtn5cqVy0zSLl++vLVo0SK3Cdw62VrrWrhwYevjjz92+zo6+Xr06NGJvv6WLVvMxO2sWbNaWbJkMXX+4IMPkvzvCiDlBOh/HnUYAwAA8EasSgMAALARjAAAAGwEIwAAABvBCAAAwEYwAgAAsBGMAAAAbAQjAAAAG8EIAADARjACAACwEYwAAABsBCMAAAD5f/4Pt8fERs8AMrwAAAAASUVORK5CYII=", - "text/plain": [ - "<Figure size 640x480 with 1 Axes>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def plot_classified_data(data):\n", - " \"\"\"\n", - " Plot the data returned from and classified in make_classes().\n", - " Amount for each of the three classes (good, bad, average) is displayed, ONLY.\n", - " Three bins, one for the amount of good, one for averge, and one for bad.\n", - " \"\"\"\n", - " good = data['class'].value_counts()[\"good\"]\n", - " average = data['class'].value_counts()[\"average\"]\n", - " bad = data['class'].value_counts()[\"bad\"]\n", - " #print(good, average, bad)\n", - " data = [good, average, bad]\n", - " plt.bar([\"Good\", \"Average\", \"Bad\"], data)\n", - " plt.xlabel(\"Class\")\n", - " plt.ylabel(\"Number of Games\")\n", - " plt.show()\n", - "\n", - "#plot_classified_data(df)\n", - "plot_games_on_meta_score(df)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# A simpler classifier: Multinomial Naive Bayes\n", - "Here I try out a multinomial naive bayes classifier model." - ] - }, - { - "cell_type": "code", - "execution_count": 228, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.dummy import DummyClassifier\n", - "\"\"\"\n", - "Alternative pipelines:\n", - "\n", - "pipeline = Pipeline(\n", - " steps=[\n", - " (\n", - " \"vectorizer\",\n", - " FeatureUnion(\n", - " [\n", - " (\"word\", TfidfVectorizer(ngram_range=(1, 2), max_features=5000)),\n", - " (\"char\", CountVectorizer(ngram_range=(1, 3), max_features=5000)),\n", - " ]\n", - " ),\n", - " ),\n", - " (\"classifier\", MultinomialNB()),\n", - " ]\n", - ")\n", - " pipeline_2 = Pipeline(steps=[\n", - " ('vectorizer', CountVectorizer()),\n", - " ('classifier', MultinomialNB())\n", - " ])\n", - "\n", - "\"\"\"\n", - "\n", - "\n", - "from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB, CategoricalNB\n", - "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n", - "from sklearn.pipeline import Pipeline, FeatureUnion\n", - "from sklearn.metrics import classification_report, precision_score\n", - "from sklearn.feature_extraction.text import TfidfVectorizer\n", - "\n", - "\n", - "def multinomial_naive_bayes_classifier_model(train_X, train_Y):\n", - " \n", - " \n", - " pipeline_3 = Pipeline(steps=[\n", - " ('vectorizer', TfidfVectorizer(ngram_range=(1, 2), max_features=5000)),\n", - " ('classifier', MultinomialNB())\n", - " ])\n", - " model = pipeline_3.fit(train_X, train_Y)\n", - " return model\n", - "\n", - "#model = multinomial_naive_bayes_classifier_model(train_X, train_Y)\n", - "dc_stratified = DummyClassifier(strategy='stratified')\n", - "dc_model = dc_stratified.fit(train_X, train_Y)\n", - "#print(model.score(test_X, test_Y))\n", - "#dc_predicted = dc_model.predict(test_X)\n", - "#print(classification_report(test_Y, dc_predicted))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 229, - "metadata": {}, - "outputs": [], - "source": [ - "def predict_against_test_data(test_data, model):\n", - " test_X = np.array(test_data[\"summary\"])\n", - " test_Y = np.array(test_data[\"class\"])\n", - " predicted = model.predict(test_X)\n", - " score = (precision_score(test_Y, predicted, average='macro'))\n", - " print(f'Macro precision score against test data: {score}')\n", - " print(\"Classification report against test data:\")\n", - " print(classification_report(test_Y, predicted))\n", - "\n", - "#predict_against_test_data(test, model)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Neural network(s)\n", - "Let's gooo" - ] - }, - { - "cell_type": "code", - "execution_count": 230, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/12\n", - "\u001b[1m582/582\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m54s\u001b[0m 85ms/step - loss: 4149.0005 - mean_absolute_error: 62.0885 - val_loss: 1062.4858 - val_mean_absolute_error: 30.4659\n", - "Epoch 2/12\n", - "\u001b[1m582/582\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 85ms/step - loss: 1824.0758 - mean_absolute_error: 35.6030 - val_loss: 669.7858 - val_mean_absolute_error: 23.5815\n", - "Epoch 3/12\n", - "\u001b[1m582/582\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 85ms/step - loss: 1812.9156 - mean_absolute_error: 35.7669 - val_loss: 710.8091 - val_mean_absolute_error: 24.3751\n", - "Epoch 4/12\n", - "\u001b[1m 65/582\u001b[0m \u001b[32m━━\u001b[0m\u001b[37m━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[1m37s\u001b[0m 72ms/step - loss: 1776.4143 - mean_absolute_error: 34.8605" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[230], line 92\u001b[0m\n\u001b[1;32m 88\u001b[0m log_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlogs/fit/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m datetime\u001b[38;5;241m.\u001b[39mdatetime\u001b[38;5;241m.\u001b[39mnow()\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mY\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mm\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m-\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mH\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mM\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mS\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 89\u001b[0m \u001b[38;5;66;03m#tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)\u001b[39;00m\n\u001b[1;32m 90\u001b[0m \n\u001b[1;32m 91\u001b[0m \u001b[38;5;66;03m# Train the model\u001b[39;00m\n\u001b[0;32m---> 92\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 93\u001b[0m \u001b[43m \u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 94\u001b[0m \u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m12\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m32\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mX_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m#callbacks=[tensorboard_callback]\u001b[39;49;00m\n\u001b[1;32m 99\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# CNN Model\u001b[39;00m\n\u001b[1;32m 102\u001b[0m model\u001b[38;5;241m.\u001b[39madd(Embedding(input_dim\u001b[38;5;241m=\u001b[39mmax_vocab_size, output_dim\u001b[38;5;241m=\u001b[39membedding_dim))\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py:117\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 117\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py:371\u001b[0m, in \u001b[0;36mTensorFlowTrainer.fit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq)\u001b[0m\n\u001b[1;32m 369\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m step, iterator \u001b[38;5;129;01min\u001b[39;00m epoch_iterator:\n\u001b[1;32m 370\u001b[0m callbacks\u001b[38;5;241m.\u001b[39mon_train_batch_begin(step)\n\u001b[0;32m--> 371\u001b[0m logs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43miterator\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 372\u001b[0m callbacks\u001b[38;5;241m.\u001b[39mon_train_batch_end(step, logs)\n\u001b[1;32m 373\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstop_training:\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py:219\u001b[0m, in \u001b[0;36mTensorFlowTrainer._make_function.<locals>.function\u001b[0;34m(iterator)\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mfunction\u001b[39m(iterator):\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\n\u001b[1;32m 217\u001b[0m iterator, (tf\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mIterator, tf\u001b[38;5;241m.\u001b[39mdistribute\u001b[38;5;241m.\u001b[39mDistributedIterator)\n\u001b[1;32m 218\u001b[0m ):\n\u001b[0;32m--> 219\u001b[0m opt_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmulti_step_on_iterator\u001b[49m\u001b[43m(\u001b[49m\u001b[43miterator\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m opt_outputs\u001b[38;5;241m.\u001b[39mhas_value():\n\u001b[1;32m 221\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 152\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:833\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 830\u001b[0m compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxla\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnonXla\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 832\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m OptionalXlaContext(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile):\n\u001b[0;32m--> 833\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 835\u001b[0m new_tracing_count \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexperimental_get_tracing_count()\n\u001b[1;32m 836\u001b[0m without_tracing \u001b[38;5;241m=\u001b[39m (tracing_count \u001b[38;5;241m==\u001b[39m new_tracing_count)\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:878\u001b[0m, in \u001b[0;36mFunction._call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m 876\u001b[0m \u001b[38;5;66;03m# In this case we have not created variables on the first call. So we can\u001b[39;00m\n\u001b[1;32m 877\u001b[0m \u001b[38;5;66;03m# run the first trace but we should fail if variables are created.\u001b[39;00m\n\u001b[0;32m--> 878\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mtracing_compilation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall_function\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_variable_creation_config\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_created_variables:\n\u001b[1;32m 882\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCreating variables on a non-first call to a function\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 883\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m decorated with tf.function.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compilation.py:139\u001b[0m, in \u001b[0;36mcall_function\u001b[0;34m(args, kwargs, tracing_options)\u001b[0m\n\u001b[1;32m 137\u001b[0m bound_args \u001b[38;5;241m=\u001b[39m function\u001b[38;5;241m.\u001b[39mfunction_type\u001b[38;5;241m.\u001b[39mbind(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 138\u001b[0m flat_inputs \u001b[38;5;241m=\u001b[39m function\u001b[38;5;241m.\u001b[39mfunction_type\u001b[38;5;241m.\u001b[39munpack_inputs(bound_args)\n\u001b[0;32m--> 139\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_flat\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# pylint: disable=protected-access\u001b[39;49;00m\n\u001b[1;32m 140\u001b[0m \u001b[43m \u001b[49m\u001b[43mflat_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcaptured_inputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunction\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcaptured_inputs\u001b[49m\n\u001b[1;32m 141\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/tensorflow/python/eager/polymorphic_function/concrete_function.py:1322\u001b[0m, in \u001b[0;36mConcreteFunction._call_flat\u001b[0;34m(self, tensor_inputs, captured_inputs)\u001b[0m\n\u001b[1;32m 1318\u001b[0m possible_gradient_type \u001b[38;5;241m=\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPossibleTapeGradientTypes(args)\n\u001b[1;32m 1319\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (possible_gradient_type \u001b[38;5;241m==\u001b[39m gradients_util\u001b[38;5;241m.\u001b[39mPOSSIBLE_GRADIENT_TYPES_NONE\n\u001b[1;32m 1320\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m executing_eagerly):\n\u001b[1;32m 1321\u001b[0m \u001b[38;5;66;03m# No tape is watching; skip to running the function.\u001b[39;00m\n\u001b[0;32m-> 1322\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_inference_function\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall_preflattened\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1323\u001b[0m forward_backward \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_select_forward_and_backward_functions(\n\u001b[1;32m 1324\u001b[0m args,\n\u001b[1;32m 1325\u001b[0m possible_gradient_type,\n\u001b[1;32m 1326\u001b[0m executing_eagerly)\n\u001b[1;32m 1327\u001b[0m forward_function, args_with_tangents \u001b[38;5;241m=\u001b[39m forward_backward\u001b[38;5;241m.\u001b[39mforward()\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/tensorflow/python/eager/polymorphic_function/atomic_function.py:216\u001b[0m, in \u001b[0;36mAtomicFunction.call_preflattened\u001b[0;34m(self, args)\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mcall_preflattened\u001b[39m(\u001b[38;5;28mself\u001b[39m, args: Sequence[core\u001b[38;5;241m.\u001b[39mTensor]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 215\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Calls with flattened tensor inputs and returns the structured output.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 216\u001b[0m flat_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall_flat\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfunction_type\u001b[38;5;241m.\u001b[39mpack_output(flat_outputs)\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/tensorflow/python/eager/polymorphic_function/atomic_function.py:242\u001b[0m, in \u001b[0;36mAtomicFunction.call_flat\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m!=\u001b[39m expected_len:\n\u001b[1;32m 235\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 236\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSignature specifies \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexpected_len\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m arguments, got: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(args)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m Expected inputs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcached_definition\u001b[38;5;241m.\u001b[39msignature\u001b[38;5;241m.\u001b[39minput_arg\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m Received inputs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00margs\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m Function Type: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfunction_type\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 240\u001b[0m )\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mInterpolateRuntimeError\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ops\u001b[38;5;241m.\u001b[39mcontrol_dependencies(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_options\u001b[38;5;241m.\u001b[39mcontrol_captures):\n\u001b[1;32m 244\u001b[0m \u001b[38;5;66;03m# The caller must use record_operation to record this operation in the\u001b[39;00m\n\u001b[1;32m 245\u001b[0m \u001b[38;5;66;03m# eager case, so we enforce the same requirement for the non-eager\u001b[39;00m\n\u001b[1;32m 246\u001b[0m \u001b[38;5;66;03m# case by explicitly pausing recording. We don't have a gradient\u001b[39;00m\n\u001b[1;32m 247\u001b[0m \u001b[38;5;66;03m# registered for PartitionedCall, so recording this operation confuses\u001b[39;00m\n\u001b[1;32m 248\u001b[0m \u001b[38;5;66;03m# forwardprop code (GradientTape manages to ignore it).\u001b[39;00m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m record\u001b[38;5;241m.\u001b[39mstop_recording():\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/tensorflow/python/eager/polymorphic_function/atomic_function.py:641\u001b[0m, in \u001b[0;36mInterpolateRuntimeError.__init__\u001b[0;34m(self, top_level_func)\u001b[0m\n\u001b[1;32m 637\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Context Manager that interpolates exceptions received by AtomicFunction.\"\"\"\u001b[39;00m\n\u001b[1;32m 639\u001b[0m DENY_LIST_PHRASES \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m<embedded\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 641\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, top_level_func):\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_func \u001b[38;5;241m=\u001b[39m top_level_func\n\u001b[1;32m 644\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21minterpolate\u001b[39m(\u001b[38;5;28mself\u001b[39m, message, node_names, graph_debug_info):\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "\n", - "# Padding sequences to ensure uniform input size\n", - "#X_train = pad_sequences(X_train_sequences, padding='post', truncating='pre', maxlen=max_sequence_length)\n", - "#X_test = pad_sequences(X_test_sequences, padding='post', truncating='pre',maxlen=max_sequence_length)\n", - "\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from tensorflow.keras.models import Sequential\n", - "from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, Conv2D, LSTM\n", - "from tensorflow.keras.preprocessing.text import Tokenizer\n", - "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", - "from sklearn.calibration import LabelEncoder\n", - "from tensorflow.keras.utils import to_categorical\n", - "from tensorflow.keras.optimizers import Adam\n", - "from tensorflow.keras.callbacks import TensorBoard\n", - "import datetime\n", - "\n", - "RUN_NETWORK = True\n", - "RNN = True\n", - "\n", - "train_X = np.array(training[\"summary\"])\n", - "test_X = np.array(test[\"summary\"])\n", - "\n", - "train_Y = np.array(training[\"meta_score\"])\n", - "test_Y = np.array(test[\"meta_score\"])\n", - "\n", - "#from sklearn.model_selection import train_test_split\n", - "#from sklearn.metrics import classification_report\n", - "\n", - "# Example data (replace with your dataset)\n", - "#texts = [\"This game is amazing!\", \"Worst game ever.\", \"I love the graphics and story!\", \"Terrible gameplay.\"]\n", - "#labels = [1, 0, 1, 0] # 1 = Positive, 0 = Negative\n", - "\n", - "# Hyperparameters\n", - "max_vocab_size = 6000 # Maximum number of words in the vocabulary\n", - "max_sequence_length = 180 # Maximum length of each text sequence\n", - "embedding_dim = 6000 # Dimension of the embedding layer\n", - "\n", - "# Step 1: Tokenize and preprocess the text\n", - "tokenizer = Tokenizer(num_words=max_vocab_size)\n", - "\n", - "tokenizer.fit_on_texts(train_X)\n", - "tokenizer.fit_on_texts(test_X)\n", - "\n", - "\n", - "X_train_sequences = tokenizer.texts_to_sequences(train_X)\n", - "X_test_sequences = tokenizer.texts_to_sequences(test_X)\n", - "\n", - "\n", - "X_train = pad_sequences(X_train_sequences, maxlen=max_sequence_length, padding='post', truncating='pre')\n", - "X_test = pad_sequences(X_test_sequences, maxlen=max_sequence_length, padding='post', truncating='pre')\n", - "#label_encoder = LabelEncoder()\n", - "#Y_encoded_train = label_encoder.fit_transform(train_Y)\n", - "#Y_encoded_test = label_encoder.transform(test_Y)\n", - "\n", - "#y_train_one_hot = to_categorical(Y_encoded_train, num_classes=len(set(Y_encoded_train)))\n", - "#y_test_one_hot = to_categorical(Y_encoded_test, num_classes=len(set(Y_encoded_test)))\n", - "\n", - "y_train = train_Y\n", - "y_test = test_Y\n", - "\n", - "#print(\"Max sequence length:\", max(len(x) for x in X_train))\n", - "#print(\"Min sequence length:\", min(len(x) for x in X_train))\n", - "\n", - "# Build the model\n", - "model = Sequential()\n", - "\n", - "if RUN_NETWORK:\n", - " if RNN:\n", - " # Embedding layer\n", - " model.add(Embedding(input_dim=max_vocab_size, output_dim=128))\n", - " \n", - " # LSTM layer\n", - " model.add(LSTM(32, return_sequences=False))\n", - " # Dense and Dropout layers\n", - " model.add(Dense(16, activation='relu'))\n", - " model.add(Dropout(0.5))\n", - " # Final layer: Predict a continuous score\n", - " model.add(Dense(1, activation='linear')) # Linear activation for regression output\n", - "\n", - " # Compile the model\n", - " model.compile(\n", - " loss=\"mean_squared_error\", #'mean_squared_error', # Use MSE for regression\n", - " optimizer=Adam(learning_rate=0.001),\n", - " metrics=['mean_absolute_error'] # Optionally track MAE\n", - " )\n", - "\n", - " # Set up TensorBoard callback\n", - " log_dir = \"logs/fit/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n", - " #tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)\n", - "\n", - " # Train the model\n", - " model.fit(\n", - " X_train,\n", - " y_train,\n", - " epochs=12,\n", - " batch_size=32,\n", - " validation_data=(X_test, y_test)\n", - " #callbacks=[tensorboard_callback]\n", - " )\n", - " else:\n", - " # CNN Model\n", - " model.add(Embedding(input_dim=max_vocab_size, output_dim=embedding_dim))\n", - " model.add(Conv1D(filters=64, kernel_size=5, activation='relu'))\n", - " model.add(GlobalMaxPooling1D())\n", - " model.add(Dense(32, activation='relu'))\n", - " model.add(Dropout(0.5))\n", - " model.add(Dense(1, activation='linear')) # Linear activation for regression output\n", - "\n", - " model.compile(\n", - " loss='mean_squared_error',\n", - " optimizer=Adam(learning_rate=0.001),\n", - " metrics=['mean_absolute_error']\n", - " )\n", - "\n", - " model.fit(X_train, y_train, epochs=12, batch_size=32, validation_data=(X_test, y_test))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Vectorizing the data with TFID" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Enter the Aperture Science Enrichment Center and experience Bridge Constructor Portal – the unique merging of the classic Portal and Bridge Constructor games. As a new employee in the Aperture Science test lab, it's your job to build bridges, ramps, slides, and other constructions in 60 test chambers and get the Bendies safely across the finish line in their vehicles. Make use of the many Portal gadgets, like portals, propulsion gel, repulsion gel, aerial faith plates, cubes, and more to bypass the sentry turrets, acid pools and laser barriers, solve switch puzzles, and make it through the test chambers unscathed. Let Ellen McLain, the original voice of GLaDOS, guide you through the tutorial, and learn all the tips and tricks that make a true Aperture Science employee. [Nintendo]\n" - ] - }, - { - "ename": "IndexError", - "evalue": "index 76 is out of bounds for axis 1 with size 75", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[61], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(train_X[\u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 7\u001b[0m y_train_one_hot \u001b[38;5;241m=\u001b[39m to_categorical(Y_encoded_train, num_classes\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(Y_encoded_train)))\n\u001b[0;32m----> 8\u001b[0m y_test_one_hot \u001b[38;5;241m=\u001b[39m \u001b[43mto_categorical\u001b[49m\u001b[43m(\u001b[49m\u001b[43mY_encoded_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_classes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mset\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mY_encoded_test\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/utils/numerical_utils.py:99\u001b[0m, in \u001b[0;36mto_categorical\u001b[0;34m(x, num_classes)\u001b[0m\n\u001b[1;32m 97\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 98\u001b[0m categorical \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mzeros((batch_size, num_classes))\n\u001b[0;32m---> 99\u001b[0m \u001b[43mcategorical\u001b[49m\u001b[43m[\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marange\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 100\u001b[0m output_shape \u001b[38;5;241m=\u001b[39m input_shape \u001b[38;5;241m+\u001b[39m (num_classes,)\n\u001b[1;32m 101\u001b[0m categorical \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mreshape(categorical, output_shape)\n", - "\u001b[0;31mIndexError\u001b[0m: index 76 is out of bounds for axis 1 with size 75" - ] - } - ], - "source": [ - "train_X = np.array(training[\"summary\"])\n", - "test_X = np.array(test[\"summary\"])\n", - "vectorizer = TfidfVectorizer(ngram_range=(2, 3), max_features=5000)\n", - "vectorizer.fit_transform(train_X)\n", - "vectorizer.fit_transform(test_X)\n", - "print(train_X[0])\n", - "#y_train_one_hot = to_categorical(Y_encoded_train, num_classes=len(set(Y_encoded_train)))\n", - "#y_test_one_hot = to_categorical(Y_encoded_test, num_classes=len(set(Y_encoded_test)))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 14949\n'y' sizes: 3737\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[64], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m v_model\u001b[38;5;241m.\u001b[39madd(Dense(\u001b[38;5;241m1\u001b[39m, activation\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlinear\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m 7\u001b[0m v_model\u001b[38;5;241m.\u001b[39mcompile(loss\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmean_squared_error\u001b[39m\u001b[38;5;124m'\u001b[39m, optimizer\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madam\u001b[39m\u001b[38;5;124m'\u001b[39m, metrics\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124maccuracy\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m----> 8\u001b[0m \u001b[43mv_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_X\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mT\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalidation_data\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m16\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m v_model\u001b[38;5;241m.\u001b[39msummary()\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 120\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/trainers/data_adapters/data_adapter_utils.py:115\u001b[0m, in \u001b[0;36mcheck_data_cardinality\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 111\u001b[0m sizes \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28mstr\u001b[39m(i\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]) \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m tree\u001b[38;5;241m.\u001b[39mflatten(single_data)\n\u001b[1;32m 113\u001b[0m )\n\u001b[1;32m 114\u001b[0m msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlabel\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m sizes: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msizes\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n", - "\u001b[0;31mValueError\u001b[0m: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 14949\n'y' sizes: 3737\n" - ] - } - ], - "source": [ - "\n", - "\n", - "v_model = Sequential()\n", - "v_model.add(Dense(32, activation='relu'))\n", - "v_model.add(Dropout(0.5))\n", - "v_model.add(Dense(16, activation='relu'))\n", - "v_model.add(Dense(1, activation='linear'))\n", - "\n", - "v_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_absolute_error'])\n", - "v_model.fit(train_X, y_test, validation_data = (X_test, y_test), batch_size = 16, epochs = 5)\n", - "v_model.summary()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Arguments `target` and `output` must have the same rank (ndim). Received: target.shape=(None,), output.shape=(None, 3)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[83], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m model\u001b[38;5;241m.\u001b[39mcompile(optimizer\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madam\u001b[39m\u001b[38;5;124m'\u001b[39m, loss\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbinary_crossentropy\u001b[39m\u001b[38;5;124m'\u001b[39m, metrics\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124maccuracy\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Step 5: Train the model\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m history \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrain_padded\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mY_encoded_train\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m16\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_split\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.2\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 120\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", - "File \u001b[0;32m~/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/backend/tensorflow/nn.py:767\u001b[0m, in \u001b[0;36mbinary_crossentropy\u001b[0;34m(target, output, from_logits)\u001b[0m\n\u001b[1;32m 764\u001b[0m output \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mconvert_to_tensor(output)\n\u001b[1;32m 766\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(target\u001b[38;5;241m.\u001b[39mshape) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(output\u001b[38;5;241m.\u001b[39mshape):\n\u001b[0;32m--> 767\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 768\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mArguments `target` and `output` must have the same rank \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 769\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(ndim). Received: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 770\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtarget.shape=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtarget\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, output.shape=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00moutput\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 771\u001b[0m )\n\u001b[1;32m 772\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m e1, e2 \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(target\u001b[38;5;241m.\u001b[39mshape, output\u001b[38;5;241m.\u001b[39mshape):\n\u001b[1;32m 773\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m e1 \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m e2 \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m e1 \u001b[38;5;241m!=\u001b[39m e2:\n", - "\u001b[0;31mValueError\u001b[0m: Arguments `target` and `output` must have the same rank (ndim). Received: target.shape=(None,), output.shape=(None, 3)" - ] - } - ], - "source": [ - "# Step 4: Compile the model\n", - "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", - "\n", - "# Step 5: Train the model\n", - "history = model.fit(\n", - " X_train, np.array(Y_encoded_train),\n", - " epochs=5,\n", - " batch_size=16,\n", - " validation_split=0.2,\n", - " verbose=1\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jackkolm/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/ops/nn.py:907: UserWarning: You are using a softmax over axis -1 of a tensor of shape (None, 1). This axis has size 1. The softmax operation will always return the value 1, which is likely not what you intended. Did you mean to use a sigmoid instead?\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Test Accuracy: 0.40\n", - "\u001b[1m 38/117\u001b[0m \u001b[32m━━━━━━\u001b[0m\u001b[37m━━━━━━━━━━━━━━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jackkolm/Documents/TextMining/project/.venv/lib/python3.12/site-packages/keras/src/ops/nn.py:907: UserWarning: You are using a softmax over axis -1 of a tensor of shape (32, 1). This axis has size 1. The softmax operation will always return the value 1, which is likely not what you intended. Did you mean to use a sigmoid instead?\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1m117/117\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 6ms/step\n", - " precision recall f1-score support\n", - "\n", - " average 0.50 1.00 0.67 1864\n", - " bad 0.00 0.00 0.00 1489\n", - " good 0.00 0.00 0.00 384\n", - "\n", - " accuracy 0.50 3737\n", - " macro avg 0.17 0.33 0.22 3737\n", - "weighted avg 0.25 0.50 0.33 3737\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jackkolm/Documents/TextMining/project/.venv/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", - "/Users/jackkolm/Documents/TextMining/project/.venv/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n", - "/Users/jackkolm/Documents/TextMining/project/.venv/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n" - ] - } - ], - "source": [ - "loss, accuracy = model.evaluate(test_padded, np.array(Y_encoded_test), verbose=0)\n", - "print(f\"Test Accuracy: {accuracy:.2f}\")\n", - "\n", - "# Step 7: Classification Report\n", - "y_pred = model.predict(test_padded)\n", - "y_pred_classes = np.argmax(y_pred, axis=1)\n", - "print(classification_report(Y_encoded_test, y_pred_classes, target_names=label_encoder.classes_))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/project/scraping.py b/project/scraping.py deleted file mode 100644 index 5e97fba..0000000 --- a/project/scraping.py +++ /dev/null @@ -1,17 +0,0 @@ -from bs4 import BeautifulSoup -from selenium import webdriver -import time - -PATH = 'C:\Program Files (x86)\chromedriver.exe' - -target_url = "https://x.com/scrapingdog" - -driver = webdriver.Chrome(PATH) - -driver.get(target_url) -time.sleep(5) - -resp = driver.page_source -driver.close() - -print(resp) \ No newline at end of file diff --git a/project/test.py b/project/test.py deleted file mode 100644 index 72fe929..0000000 --- a/project/test.py +++ /dev/null @@ -1,8 +0,0 @@ -import requests - -headers = { - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', -} - -response = requests.get('https://www.imdb.com/find/?q=back%20in%20action&s=tt&exact=true&ref_=fn_ttl_ex', headers=headers) -print(response.text) -- GitLab