diff --git a/2_Evaluator_Block/configs.py b/2_Evaluator_Block/configs.py index d58a423f80b87f1c73f5344237d18a6c1175abce..c0a0002c921ae0d97bf1206549529f3ea8821f17 100644 --- a/2_Evaluator_Block/configs.py +++ b/2_Evaluator_Block/configs.py @@ -26,7 +26,7 @@ class EvalConfig: full_metrics = ["novelty"] # Split parameters - test_size = 0.3 # -- configure the test_size (from 0 to 1) -- + test_size = 0.25 # -- configure the test_size (from 0 to 1) -- # Loo parameters top_n_value = 10 # -- configure the numer of recommendations (> 1) -- diff --git a/2_Evaluator_Block/constants.py b/2_Evaluator_Block/constants.py index e91304ece888415e93b8ee38f016b797acf8b4fc..6d18220868d9259d25f2b63770a2bac9b78ded36 100644 --- a/2_Evaluator_Block/constants.py +++ b/2_Evaluator_Block/constants.py @@ -6,7 +6,7 @@ class Constant: """Constants for dataset paths and column names.""" - DATA_PATH = Path('../data/small') # -- fill here the dataset size to use + DATA_PATH = Path('../data/tiny') # -- fill here the dataset size to use # Content CONTENT_PATH = DATA_PATH / 'content' # Path to content data diff --git a/2_Evaluator_Block/evaluator.ipynb b/2_Evaluator_Block/evaluator.ipynb index 017f1cae5a4c7eabf3b3b606a6081aca510dd441..f014373a6924ee31f7d63ac4b6fe11f77ea1d09d 100644 --- a/2_Evaluator_Block/evaluator.ipynb +++ b/2_Evaluator_Block/evaluator.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": 58, "id": "6aaf9140", "metadata": {}, "outputs": [ @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 183, + "execution_count": 59, "id": "d6d82188", "metadata": {}, "outputs": [], @@ -74,8 +74,6 @@ " algo.fit(trainset)\n", " # Predict ratings for the testset\n", " predictions = algo.test(testset)\n", - "\n", - "\n", " return predictions\n", "\n", "\n", @@ -84,7 +82,7 @@ " # -- implement the function generate_loo_top_n --\n", " # Create a LeaveOneOut split\n", " loo = LeaveOneOut(n_splits=1)\n", - "\n", + " \n", " for trainset, testset in loo.split(ratings_dataset):\n", " algo.fit(trainset) # Train the algorithm on the training set\n", " anti_testset = trainset.build_anti_testset() # Build the anti test-set\n", @@ -117,7 +115,7 @@ " anti_testset_top_n = top_n\n", " return anti_testset_top_n\n", "\n", - "def precompute_information(movie_data):\n", + "def precomputed_information(movie_data):\n", " \"\"\" Returns a dictionary that precomputes relevant information for evaluating in full mode\n", " \n", " Dictionary keys:\n", @@ -139,8 +137,6 @@ " precomputed_dict[\"item_to_rank\"] = item_to_rank\n", " \n", " return precomputed_dict\n", - " \n", - "\n", "\n", "def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):\n", " \"\"\" Create a DataFrame evaluating various models on metrics specified in an evaluation config. \n", @@ -160,7 +156,7 @@ " assert metric in available_metrics['split']\n", " evaluation_function, parameters = available_metrics[\"split\"][metric]\n", " evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters) \n", - "\n", + " \n", " # Type 2 : loo evaluations\n", " if len(eval_config.loo_metrics) > 0:\n", " print('Training loo predictions')\n", @@ -197,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": 60, "id": "f1849e55", "metadata": {}, "outputs": [], @@ -219,7 +215,6 @@ "\n", " return hit_rate\n", "\n", - "\n", "def get_novelty(anti_testset_top_n, item_to_rank):\n", " \"\"\"Compute the average novelty of the top-n recommendation over the users (full metric)\n", " \n", @@ -251,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 185, + "execution_count": 61, "id": "704f4d2a", "metadata": {}, "outputs": [ @@ -264,20 +259,99 @@ "- computing metric mae\n", "- computing metric rmse\n", "Training loo predictions\n", - "Training full predictions\n" + "Training full predictions\n", + "Handling model baseline_2\n", + "Training split predictions\n", + "- computing metric mae\n", + "- computing metric rmse\n", + "Training loo predictions\n", + "Training full predictions\n", + "Handling model baseline_3\n", + "Training split predictions\n", + "- computing metric mae\n", + "- computing metric rmse\n", + "Training loo predictions\n", + "Training full predictions\n", + "Handling model baseline_4\n", + "Training split predictions\n", + "- computing metric mae\n", + "- computing metric rmse\n", + "Training loo predictions\n", + "Training full predictions\n", + "The data has been exported to the evaluation report\n" ] }, { - "ename": "AttributeError", - "evalue": "'dict' object has no attribute 'to_csv'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[185], line 20\u001b[0m\n\u001b[1;32m 18\u001b[0m precomputed_dict \u001b[38;5;241m=\u001b[39m precompute_information(pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../data/small/evidence/ratings.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 19\u001b[0m evaluation_report \u001b[38;5;241m=\u001b[39m create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n\u001b[0;32m---> 20\u001b[0m export_evaluation_report(evaluation_report)\n", - "File \u001b[0;32m~/vscodeworkspace/recomsys/Analytics_UI/loaders.py:49\u001b[0m, in \u001b[0;36mexport_evaluation_report\u001b[0;34m(report)\u001b[0m\n\u001b[1;32m 47\u001b[0m report_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mevaluation_report_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpd\u001b[38;5;241m.\u001b[39mTimestamp\u001b[38;5;241m.\u001b[39mnow()\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mY-\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mm-\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 48\u001b[0m export_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../data\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msmall\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mevaluations\u001b[39m\u001b[38;5;124m\"\u001b[39m, report_name)\n\u001b[0;32m---> 49\u001b[0m df_items\u001b[38;5;241m.\u001b[39mto_csv(export_path, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe data has been exported to the evaluation report\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df_items\n", - "\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'to_csv'" - ] + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>mae</th>\n", + " <th>rmse</th>\n", + " <th>hit_rate</th>\n", + " <th>novelty</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>baseline_1</th>\n", + " <td>1.544940</td>\n", + " <td>1.776982</td>\n", + " <td>0.112150</td>\n", + " <td>99.405607</td>\n", + " </tr>\n", + " <tr>\n", + " <th>baseline_2</th>\n", + " <td>1.491063</td>\n", + " <td>1.844761</td>\n", + " <td>0.009346</td>\n", + " <td>429.942991</td>\n", + " </tr>\n", + " <tr>\n", + " <th>baseline_3</th>\n", + " <td>0.868139</td>\n", + " <td>1.066303</td>\n", + " <td>0.074766</td>\n", + " <td>99.405607</td>\n", + " </tr>\n", + " <tr>\n", + " <th>baseline_4</th>\n", + " <td>0.727803</td>\n", + " <td>0.927636</td>\n", + " <td>0.158879</td>\n", + " <td>57.328037</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " mae rmse hit_rate novelty\n", + "baseline_1 1.544940 1.776982 0.112150 99.405607\n", + "baseline_2 1.491063 1.844761 0.009346 429.942991\n", + "baseline_3 0.868139 1.066303 0.074766 99.405607\n", + "baseline_4 0.727803 0.927636 0.158879 57.328037" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -298,7 +372,7 @@ "}\n", "\n", "sp_ratings = load_ratings(surprise_format=True)\n", - "precomputed_dict = precompute_information(pd.read_csv(\"../data/small/evidence/ratings.csv\"))\n", + "precomputed_dict = precomputed_information(pd.read_csv(\"../data/tiny/evidence/ratings.csv\"))\n", "evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n", "export_evaluation_report(evaluation_report)" ] @@ -320,7 +394,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/2_Evaluator_Block/loaders.py b/2_Evaluator_Block/loaders.py index 7ccf4924644d425454af8e67a197425584dc22cc..a945f83808a759eef10202664fef0d239095dca1 100644 --- a/2_Evaluator_Block/loaders.py +++ b/2_Evaluator_Block/loaders.py @@ -24,7 +24,7 @@ def load_ratings(surprise_format=False): return surprise_data else: return df_ratings - +print(load_ratings()) def load_items(): """Loads items data. @@ -44,12 +44,8 @@ def export_evaluation_report(report): Returns: DataFrame: Merged ratings and items data. """ - df_ratings = load_ratings() - df_items = load_items() - df_merge = pd.merge(df_ratings, df_items, on='movieId') - report_name = f"evaluation_report_{pd.Timestamp.now().strftime('%Y-%m-%d')}.csv" - export_path = os.path.join("../data", "small", "evaluations", report_name) - df_merge.to_csv(export_path, index=False) + export_path = os.path.join("../data", "tiny", "evaluations", report_name) + report.to_csv(export_path, index=False) print("The data has been exported to the evaluation report") - return df_merge \ No newline at end of file + return report \ No newline at end of file