diff --git a/content_based.ipynb b/content_based.ipynb index fde28dcb0de4b0fb95a32527c4011f4cf2dffdf5..24950d71f8423398557fa69e21b9709eededc86e 100644 --- a/content_based.ipynb +++ b/content_based.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": 756, "id": "277473a3", "metadata": {}, "outputs": [ @@ -53,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 185, + "execution_count": 757, "id": "e8378976", "metadata": {}, "outputs": [ @@ -143,8 +143,8 @@ "df_items = load_items()\n", "df_ratings = load_ratings()\n", "df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n", - "df_genome_score = pd.read_csv(\"data/hackathon/content/genome-scores.csv\")\n", - "df_genome_tag = pd.read_csv(\"data/hackathon/content/genome-tags.csv\")\n", + "#df_genome_score = pd.read_csv(\"data/hackathon/content/genome-scores.csv\")\n", + "# df_genome_tag = pd.read_csv(\"data/hackathon/content/genome-tags.csv\")\n", "\n", "\n", "# Example 1 : create title_length features\n", @@ -169,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 186, + "execution_count": 758, "id": "16b0a602", "metadata": {}, "outputs": [ @@ -177,7 +177,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "{}\n" + "0\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "None\n", + "{'n_character_title': array([0.03019692])}\n", + "{'n_character_title': array([0.04098154])}\n", + "{'n_character_title': array([0.02942264])}\n", + "{'n_character_title': array([0.08196307])}\n", + "{'n_character_title': array([0.02798739])}\n" ] } ], @@ -245,18 +256,25 @@ " \n", " # Preallocate user profiles\n", " self.user_profile = {u: None for u in trainset.all_users()}\n", - " \n", + "\n", " self.user_profile_explain = {u: {} for u in trainset.all_users()}\n", "\n", - " for u in self.user_profile :\n", + " for u in self.user_profile_explain :\n", + " print(u)\n", " user_ratings = np.array([rating for _, rating in trainset.ur[u]])\n", + "\n", " feature_values = self.content_features.values\n", "\n", - " weighted_features = feature_values.T.dot(user_ratings)\n", + " fv = feature_values.astype(int)\n", + "\n", + " weighted_features = fv/np.linalg.norm(fv)\n", "\n", " feature_importance = weighted_features / np.sum(user_ratings)\n", "\n", " self.user_profile_explain[u] = dict(zip(self.content_features.columns, feature_importance))\n", + " \n", + "\n", + "\n", "\n", "\n", " if self.regressor_method == 'random_score':\n", @@ -371,6 +389,7 @@ " else:\n", " # Si aucune caractéristique appropriée n'est disponible\n", " continue # Ou gère le cas d'erreur/exception ici\n", + " \n", " y = df_user['user_ratings'].values\n", " svr_regressor = SVR(kernel='rbf', C=10, epsilon=0.2)\n", " svr_regressor.fit(X, y)\n", @@ -477,6 +496,7 @@ " else:\n", " # Si aucune caractéristique appropriée n'est disponible\n", " continue # Ou gère le cas d'erreur/exception ici\n", + "\n", " y = df_user['user_ratings'].values\n", " rf_regressor = RandomForestRegressor(n_estimators=100)\n", " rf_regressor.fit(X, y)\n", @@ -546,23 +566,32 @@ "\n", " return score\n", "\n", - " def explain(self, u) :\n", + " def explain(self, u) : \n", " if u in self.user_profile_explain :\n", " return self.user_profile_explain[u]\n", " else :\n", - " return {}\n", + " return None\n", + "\n", + "\n", + "cb = ContentBased(\"title_length\", \"random_sample\")\n", + "sp_ratings = load_ratings(surprise_format=True)\n", + "train_set = sp_ratings.build_full_trainset()\n", + "print(cb.fit(train_set))\n", + "\n", + "print(cb.explain(0))\n", "\n", + "print(cb.explain(1))\n", "\n", - "cb = ContentBased(\"movie_year\", \"random_sample\")\n", + "print(cb.explain(2))\n", "\n", - "print(cb.explain('11'))\n", + "print(cb.explain(3))\n", "\n", - "print('test')" + "print(cb.explain(4))\n" ] }, { "cell_type": "code", - "execution_count": 187, + "execution_count": 759, "id": "baab88b7", "metadata": {}, "outputs": [ @@ -867,7 +896,7 @@ }, { "cell_type": "code", - "execution_count": 188, + "execution_count": 760, "id": "69d12f7d", "metadata": {}, "outputs": [],