From 7dd59e1bce784fb312ab6e1eb012863da59c8faf Mon Sep 17 00:00:00 2001 From: Adrienucl <adrien.payen@student.uclouvain.be> Date: Tue, 30 Apr 2024 11:32:53 +0200 Subject: [PATCH] update analytics --- user_based.ipynb | 81 ++++++++---------------------------------------- 1 file changed, 13 insertions(+), 68 deletions(-) diff --git a/user_based.ipynb b/user_based.ipynb index fded6f50..a1135883 100644 --- a/user_based.ipynb +++ b/user_based.ipynb @@ -11,19 +11,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "id": "00d1b249", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "# reloads modules automatically before entering the execution of code\n", "%load_ext autoreload\n", @@ -56,53 +47,7 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "aafd1712", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Computing the msd similarity matrix...\n", - "Done computing similarity matrix.\n", - "user: 11 item: 364 r_ui = 4.00 est = 3.42 {'was_impossible': True, 'reason': 'User and/or item is unknown.'}\n" - ] - } - ], - "source": [ - "\n", - "# Create Surprise Dataset from the pandas DataFrame and Reader\n", - "surprise_data = load_ratings(surprise_format=True)\n", - "\n", - "trainset = surprise_data.build_full_trainset()\n", - "\n", - "\n", - "testset = trainset.build_anti_testset()\n", - "\n", - "\n", - "sim_options = {\n", - " 'name': 'msd', # Mean Squared Difference (Mean Square Error)\n", - " 'user_based': True, # User-based collaborative filtering\n", - " 'min_support': 3 # Minimum number of common ratings required\n", - "}\n", - "\n", - "\n", - "# Build an algorithm, and train it.\n", - "algo = KNNWithMeans(sim_options=sim_options, k=3, min_k=2)\n", - "algo.fit(trainset)\n", - "algo.test(testset)\n", - "\n", - "\n", - "uid = str(11) # raw user id (as in the ratings file). They are **strings**!\n", - "iid = str(364) \n", - "\n", - "pred = algo.predict(uid, iid, r_ui=4, verbose=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "id": "cf3ccdc0", "metadata": {}, "outputs": [], @@ -132,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "id": "e6fb78b7", "metadata": {}, "outputs": [ @@ -169,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "id": "ffe89c56", "metadata": {}, "outputs": [ @@ -330,7 +275,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "id": "cc806424", "metadata": {}, "outputs": [ @@ -482,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "id": "d03ed9eb", "metadata": {}, "outputs": [ @@ -626,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "id": "be53ae27", "metadata": {}, "outputs": [ @@ -683,7 +628,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "id": "c20d8e19", "metadata": {}, "outputs": [ @@ -695,10 +640,10 @@ "Done computing similarity matrix.\n", "Computing the cosine similarity matrix...\n", "Done computing similarity matrix.\n", - "RMSE: 0.9501\n", - "RMSE: 0.9613\n", - "RMSE with MSD similarity: 0.9500902346226462\n", - "RMSE with Jaccard similarity: 0.9612909313186003\n" + "RMSE: 0.9683\n", + "RMSE: 0.9824\n", + "RMSE with MSD similarity: 0.9682664011125741\n", + "RMSE with Jaccard similarity: 0.9824127884570012\n" ] } ], -- GitLab