diff --git a/configs.py b/configs.py
index 2fdbd42033f60f409b2029ee4868980916704a28..c0f669fba58487f879da2fa769b4afd7a276846f 100644
--- a/configs.py
+++ b/configs.py
@@ -8,32 +8,80 @@ class EvalConfig:
# List of models to evaluate, each tuple containing model_name, model class, and model parameters (dict)
+
models = [
("baseline_1", ModelBaseline1, {}),
("baseline_2", ModelBaseline2, {}),
("baseline_3", ModelBaseline3, {}),
("baseline_4", ModelBaseline4, {}),
+
+ ("title_length_ContentBased_sample", ContentBased, {"features_method" : "relevance", "regressor_method" : "random_sample"}),
+ ("title_length_ContentBased_score", ContentBased, {"features_method" : "relevance", "regressor_method" : "random_score"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "relevance", "regressor_method" : "linear_regression"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "relevance", "regressor_method" : "svr_regression"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "relevance", "regressor_method" : "gradient_boosting"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "relevance", "regressor_method" : "random_forest"}),
+
+ ("title_length_ContentBased_sample", ContentBased, {"features_method" : "combination", "regressor_method" : "random_sample"}),
+ ("title_length_ContentBased_score", ContentBased, {"features_method" : "combination", "regressor_method" : "random_score"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "combination", "regressor_method" : "linear_regression"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "combination", "regressor_method" : "svr_regression"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "combination", "regressor_method" : "gradient_boosting"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "combination", "regressor_method" : "random_forest"}),
+
("title_length_ContentBased_sample", ContentBased, {"features_method" : "title_length", "regressor_method" : "random_sample"}),
("title_length_ContentBased_score", ContentBased, {"features_method" : "title_length", "regressor_method" : "random_score"}),
("title_length_ContentBased_Lr", ContentBased, {"features_method" : "title_length", "regressor_method" : "linear_regression"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "title_length", "regressor_method" : "svr_regression"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "title_length", "regressor_method" : "gradient_boosting"}),
+ ("title_length_ContentBased_Lr", ContentBased, {"features_method" : "title_length", "regressor_method" : "random_forest"}),
+
("movie_year_ContentBased_sample", ContentBased, {"features_method" : "movie_year", "regressor_method" : "random_sample"}),
("movie_year_ContentBased_score", ContentBased, {"features_method" : "movie_year", "regressor_method" : "random_score"}),
- #("movie_year_ContentBased_Lr", ContentBased, {"features_method" : "movie_year", "regressor_method" : "linear_regression"})
+ #("movie_year_ContentBased_Lr", ContentBased, {"features_method" : "movie_year", "regressor_method" : "linear_regression"}),
+ #("movie_year_ContentBased_Lr", ContentBased, {"features_method" : "movie_year", "regressor_method" : "svr_regression"}),
+ #("movie_year_ContentBased_Lr", ContentBased, {"features_method" : "movie_year", "regressor_method" : "gradient_boosting"}),
+ #("movie_year_ContentBased_Lr", ContentBased, {"features_method" : "movie_year", "regressor_method" : "random_forest"}),
+
+
("genres_ContentBased_sample", ContentBased, {"features_method" : "genres", "regressor_method" : "random_sample"}),
("genres_ContentBased_score", ContentBased, {"features_method" : "genres", "regressor_method" : "random_score"}),
#("genres_ContentBased_Lr", ContentBased, {"features_method" : "genres", "regressor_method" : "linear_regression"}),
+ #("genres_ContentBased_Lr", ContentBased, {"features_method" : "genres", "regressor_method" : "svr_regression"}),
+ #("genres_ContentBased_Lr", ContentBased, {"features_method" : "genres", "regressor_method" : "gradient_boosting"}),
+ #("genres_ContentBased_Lr", ContentBased, {"features_method" : "genres", "regressor_method" : "random_forest"}),
+
("rating_ContentBased_sample", ContentBased, {"features_method" : "rating", "regressor_method" : "random_sample"}),
("rating_ContentBased_score", ContentBased, {"features_method" : "rating", "regressor_method" : "random_score"}),
#("rating_ContentBased_Lr", ContentBased, {"features_method" : "rating", "regressor_method" : "linear_regression"}),
+ #("rating_ContentBased_Lr", ContentBased, {"features_method" : "rating", "regressor_method" : "svr_regression"}),
+ #("rating_ContentBased_Lr", ContentBased, {"features_method" : "rating", "regressor_method" : "gradient_boosting"}),
+ #("rating_ContentBased_Lr", ContentBased, {"features_method" : "rating", "regressor_method" : "random_forest"}),
+
+
("tags_ContentBased_sample", ContentBased, {"features_method" : "tags", "regressor_method" : "random_sample"}),
("tags_ContentBased_score", ContentBased, {"features_method" : "tags", "regressor_method" : "random_score"}),
#("tags_ContentBased_Lr", ContentBased, {"features_method" : "tags", "regressor_method" : "linear_regression"}),
+ #("tags_ContentBased_Lr", ContentBased, {"features_method" : "tags", "regressor_method" : "svr_regression"}),
+ #("tags_ContentBased_Lr", ContentBased, {"features_method" : "tags", "regressor_method" : "gradient_boosting"}),
+ #("tags_ContentBased_Lr", ContentBased, {"features_method" : "tags", "regressor_method" : "random_forest"}),
+
+
+
("tags_length_ContentBased_sample", ContentBased, {"features_method" : "tags_length", "regressor_method" : "random_sample"}),
("tags_length_ContentBased_score", ContentBased, {"features_method" : "tags_length", "regressor_method" : "random_score"}),
#("tags_length_ContentBased_Lr", ContentBased, {"features_method" : "tags_length", "regressor_method" : "linear_regression"}),
+ #("tags_length_ContentBased_Lr", ContentBased, {"features_method" : "tags_length", "regressor_method" : "svr_regression"}),
+ #("tags_length_ContentBased_Lr", ContentBased, {"features_method" : "tags_length", "regressor_method" : "gradient_boosting"}),
+ #("tags_length_ContentBased_Lr", ContentBased, {"features_method" : "tags_length", "regressor_method" : "random_forest"}),
+
+
("timestamp_ContentBased_sample", ContentBased, {"features_method" : "timestamp", "regressor_method" : "random_sample"}),
("timestamp_ContentBased_score", ContentBased, {"features_method" : "timestamp", "regressor_method" : "random_score"}),
#("timestamp_ContentBased_Lr", ContentBased, {"features_method" : "timestamp", "regressor_method" : "linear_regression"})
+ #("timestamp_ContentBased_Lr", ContentBased, {"features_method" : "timestamp", "regressor_method" : "svr_regression"})
+ #("timestamp_ContentBased_Lr", ContentBased, {"features_method" : "timestamp", "regressor_method" : "gradient_boosting"})
+ #("timestamp_ContentBased_Lr", ContentBased, {"features_method" : "timestamp", "regressor_method" : "random_forest"})
# model_name, model class, model parameters (dict)
]
diff --git a/constants.py b/constants.py
index c67ccc27c854da09dac88ddf30eb8913c67c3782..e6125570df096d8edd74e9f50c99041779d73b03 100644
--- a/constants.py
+++ b/constants.py
@@ -6,7 +6,7 @@ class Constant:
"""Constants for dataset paths and column names."""
- DATA_PATH = Path('data/hackathon') # -- fill here the dataset size to use
+ DATA_PATH = Path('data/test') # -- fill here the dataset size to use
# Content
CONTENT_PATH = DATA_PATH / 'content' # Path to content data
diff --git a/content_based.ipynb b/content_based.ipynb
index 7c12361204ffd09164375fb170cd551108919f56..eebd7d8541ff2a49cfc9ad67ca63598390b2bf39 100644
--- a/content_based.ipynb
+++ b/content_based.ipynb
@@ -10,7 +10,7 @@
},
{
"cell_type": "code",
- "execution_count": 86,
+ "execution_count": 34,
"id": "277473a3",
"metadata": {},
"outputs": [
@@ -38,6 +38,8 @@
"from constants import Constant as C\n",
"\n",
"from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor\n",
+ "from sklearn.svm import SVR\n",
"from sklearn.feature_extraction.text import TfidfVectorizer"
]
},
@@ -51,7 +53,7 @@
},
{
"cell_type": "code",
- "execution_count": 82,
+ "execution_count": 35,
"id": "e8378976",
"metadata": {},
"outputs": [
@@ -137,8 +139,13 @@
}
],
"source": [
+ "# All the dataframes\n",
"df_items = load_items()\n",
"df_ratings = load_ratings()\n",
+ "df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n",
+ "df_genome_score = pd.read_csv(\"data/hackathon/content/genome-scores.csv\")\n",
+ "df_genome_tag = pd.read_csv(\"data/hackathon/content/genome-tags.csv\")\n",
+ "\n",
"\n",
"# Example 1 : create title_length features\n",
"df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
@@ -162,7 +169,7 @@
},
{
"cell_type": "code",
- "execution_count": 83,
+ "execution_count": 36,
"id": "16b0a602",
"metadata": {},
"outputs": [],
@@ -176,31 +183,48 @@
" def create_content_features(self, features_method):\n",
" \"\"\"Content Analyzer\"\"\"\n",
" df_items = load_items()\n",
+ " df_ratings = load_ratings()\n",
+ " df_tag = df_tag = pd.read_csv(C.CONTENT_PATH/C.TAGS_FILENAME)\n",
+ " df_genome_score = pd.read_csv(\"data/hackathon/content/genome-scores.csv\")\n",
+ " df_genome_tag = pd.read_csv(\"data/hackathon/content/genome-tags.csv\")\n",
+ "\n",
" if features_method is None:\n",
" df_features = None\n",
+ "\n",
+ " elif features_method == \"relevance\" :\n",
+ " df_features = df_genome_score.groupby('movieId')[\"relevance\"].transform('mean').to_frame('avg_relevance')\n",
+ "\n",
" elif features_method == \"title_length\": # a naive method that creates only 1 feature based on title length\n",
" df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')\n",
- "\n",
+ " \n",
" elif features_method == \"movie_year\" :\n",
- " df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False)\n",
+ " df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
"\n",
" elif features_method == \"genres\" :\n",
" genres_list = df_items['genres'].str.split('|').explode().unique()\n",
" for genre in genres_list:\n",
- " df_features = df_items['genres'].str.contains(genre).astype(int)\n",
- "\n",
+ " df_features = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')\n",
+ " \n",
+ " elif features_method == \"combination\":\n",
+ " genres_list = df_items['genres'].str.split('|').explode().unique()\n",
+ " df_movie = df_items['title'].str.extract(r'\\((\\d{4})\\)', expand=False).to_frame('movie_year')\n",
+ " for genre in genres_list:\n",
+ " df_genre = df_items['genres'].str.contains(genre).astype(int).to_frame(genre)\n",
+ " \n",
+ " df_tag['tag_length'] = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(',')) if isinstance(x, str) else 0)\n",
+ " df_features = pd.concat([df_genre, df_tag, df_movie], axis=1)\n",
+ " \n",
" elif features_method == \"rating\" :\n",
" df_features = df_ratings.groupby('movieId')['rating'].transform('mean').to_frame('avg_rating')\n",
+ " \n",
"\n",
" elif features_method == \"tags\" :\n",
- " df_features = df_tag['tag'].apply(lambda x: len(x.split(',')))\n",
+ " df_features = df_tag['tag'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0).to_frame('tags')\n",
"\n",
" elif features_method == \"tags_length\" :\n",
- " df_features = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(',')))\n",
+ " \n",
+ " df_features = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(','))if isinstance(x, str) else 0).to_frame('n_character_tags')\n",
"\n",
- " elif features_method == \"timestamp\" :\n",
- " df_features = df_ratings['timestamp_sin'] = np.sin(2 * np.pi * df_ratings['timestamp'] / 86400)\n",
- " df_features = df_ratings['timestamp_cos'] = np.cos(2 * np.pi * df_ratings['timestamp'] / 86400)\n",
"\n",
" else: # (implement other feature creations here)\n",
" raise NotImplementedError(f'Feature method {features_method} not yet implemented')\n",
@@ -221,6 +245,7 @@
" elif self.regressor_method == 'random_sample':\n",
" for u in self.user_profile:\n",
" self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]\n",
+ "\n",
" elif self.regressor_method == 'linear_regression' :\n",
" for u in self.user_profile:\n",
"\n",
@@ -232,8 +257,42 @@
" df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
"\n",
" df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
+ " \n",
+ " if 'n_character_title' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['n_character_title'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'avg_relevance' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'movie_year' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['movie_year'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'genres' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['genres'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'combination' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['combination'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'avg_rating' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['avg_rating'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'tags' in df_user.columns:\n",
+ " # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
+ " X = df_user['tags'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'n_character_tags' in df_user.columns:\n",
+ " # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
+ " X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
"\n",
- " X = df_user['n_character_title'].values.reshape(-1,1)\n",
+ " else:\n",
+ " # Si aucune caractéristique appropriée n'est disponible\n",
+ " continue # Ou gère le cas d'erreur/exception ici\n",
"\n",
" y = df_user['user_ratings'].values\n",
"\n",
@@ -243,6 +302,165 @@
" \n",
" # Store the computed user profile\n",
" self.user_profile[u] = linear_regressor\n",
+ "\n",
+ " elif self.regressor_method == 'svr_regression':\n",
+ " for u in self.user_profile:\n",
+ "\n",
+ " user_ratings = [rating for _, rating in trainset.ur[u]]\n",
+ " item_ids = [iid for iid, _ in trainset.ur[u]]\n",
+ "\n",
+ " df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
+ "\n",
+ " df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
+ "\n",
+ " df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
+ "\n",
+ " if 'n_character_title' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['n_character_title'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'avg_relevance' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'movie_year' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['movie_year'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'genres' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['genres'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'combination' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['combination'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'avg_rating' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['avg_rating'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'tags' in df_user.columns:\n",
+ " # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
+ " X = df_user['tags'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'n_character_tags' in df_user.columns:\n",
+ " # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
+ " X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
+ "\n",
+ " else:\n",
+ " # Si aucune caractéristique appropriée n'est disponible\n",
+ " continue # Ou gère le cas d'erreur/exception ici\n",
+ " y = df_user['user_ratings'].values\n",
+ " svr_regressor = SVR(kernel='rbf', C=10, epsilon=0.2)\n",
+ " svr_regressor.fit(X, y)\n",
+ " self.user_profile[u] = svr_regressor\n",
+ "\n",
+ " elif self.regressor_method == 'gradient_boosting':\n",
+ " for u in self.user_profile:\n",
+ "\n",
+ " user_ratings = [rating for _, rating in trainset.ur[u]]\n",
+ " item_ids = [iid for iid, _ in trainset.ur[u]]\n",
+ "\n",
+ " df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
+ "\n",
+ " df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
+ "\n",
+ " df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
+ "\n",
+ " if 'n_character_title' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['n_character_title'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'avg_relevance' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'movie_year' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['movie_year'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'genres' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['genres'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'combination' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['combination'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'avg_rating' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['avg_rating'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'tags' in df_user.columns:\n",
+ " # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
+ " X = df_user['tags'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'n_character_tags' in df_user.columns:\n",
+ " # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
+ " X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
+ "\n",
+ " else:\n",
+ " # Si aucune caractéristique appropriée n'est disponible\n",
+ " continue # Ou gère le cas d'erreur/exception ici\n",
+ " \n",
+ " y = df_user['user_ratings'].values\n",
+ " gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)\n",
+ " gb_regressor.fit(X, y)\n",
+ " self.user_profile[u] = gb_regressor\n",
+ "\n",
+ "\n",
+ " elif self.regressor_method == 'random_forest':\n",
+ " for u in self.user_profile:\n",
+ "\n",
+ " user_ratings = [rating for _, rating in trainset.ur[u]]\n",
+ " item_ids = [iid for iid, _ in trainset.ur[u]]\n",
+ "\n",
+ " df_user = pd.DataFrame({'item_id': item_ids, 'user_ratings': user_ratings})\n",
+ "\n",
+ " df_user[\"item_id\"] = df_user[\"item_id\"].map(trainset.to_raw_iid)\n",
+ "\n",
+ " df_user = df_user.merge(self.content_features, left_on = \"item_id\", right_index = True, how = 'left')\n",
+ "\n",
+ " if 'n_character_title' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['n_character_title'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'avg_relevance' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['avg_relevance'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'movie_year' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['movie_year'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'genres' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['genres'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'combination' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['combination'].values.reshape(-1, 1)\n",
+ " \n",
+ " elif 'avg_rating' in df_user.columns:\n",
+ " # Si 'n_character_title' est disponible comme caractéristique\n",
+ " X = df_user['avg_rating'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'tags' in df_user.columns:\n",
+ " # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
+ " X = df_user['tags'].values.reshape(-1, 1)\n",
+ "\n",
+ " elif 'n_character_tags' in df_user.columns:\n",
+ " # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)\n",
+ " X = df_user['n_character_tags'].values.reshape(-1, 1)\n",
+ "\n",
+ " else:\n",
+ " # Si aucune caractéristique appropriée n'est disponible\n",
+ " continue # Ou gère le cas d'erreur/exception ici\n",
+ " y = df_user['user_ratings'].values\n",
+ " rf_regressor = RandomForestRegressor(n_estimators=100)\n",
+ " rf_regressor.fit(X, y)\n",
+ " self.user_profile[u] = rf_regressor\n",
+ "\n",
" else : \n",
" pass\n",
"\n",
@@ -272,6 +490,34 @@
" linear_regressor = self.user_profile[u]\n",
"\n",
" score= linear_regressor.predict(item_features)[0]\n",
+ " \n",
+ " elif self.regressor_method == 'svr_regression':\n",
+ "\n",
+ " raw_item_id = self.trainset.to_raw_iid(i)\n",
+ "\n",
+ " item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
+ "\n",
+ " svr_regressor = self.user_profile[u]\n",
+ " score = svr_regressor.predict(item_features)[0]\n",
+ " \n",
+ " elif self.regressor_method == 'gradient_boosting':\n",
+ "\n",
+ " raw_item_id = self.trainset.to_raw_iid(i)\n",
+ "\n",
+ " item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
+ "\n",
+ " gradient_boosting = self.user_profile[u]\n",
+ " score = gradient_boosting.predict(item_features)[0]\n",
+ " \n",
+ " elif self.regressor_method == 'random_forest':\n",
+ "\n",
+ " raw_item_id = self.trainset.to_raw_iid(i)\n",
+ "\n",
+ " item_features = self.content_features.loc[raw_item_id:raw_item_id, :].values\n",
+ "\n",
+ " randomforest = self.user_profile[u]\n",
+ " score = randomforest.predict(item_features)[0]\n",
+ " \n",
" else : \n",
" score = None\n",
"\n",
@@ -282,7 +528,7 @@
},
{
"cell_type": "code",
- "execution_count": 92,
+ "execution_count": 37,
"id": "baab88b7",
"metadata": {},
"outputs": [
@@ -587,7 +833,7 @@
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": 38,
"id": "69d12f7d",
"metadata": {},
"outputs": [
@@ -596,39 +842,54 @@
"output_type": "stream",
"text": [
"title_length :\n",
- "user: 11 item: 1214 r_ui = None est = 1.19 {'was_impossible': False}\n",
- "user: 11 item: 1214 r_ui = None est = 4.50 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 3.28 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 1.50 {'was_impossible': False}\n",
"user: 11 item: 1214 r_ui = None est = 1.09 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 0.80 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 1.00 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 1.41 {'was_impossible': False}\n",
"\n",
"\n",
"movie_year : \n",
- "user: 11 item: 1214 r_ui = None est = 1.24 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 1.67 {'was_impossible': False}\n",
"user: 11 item: 1214 r_ui = None est = 4.50 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 3.15 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 1.75 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 1.50 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 1.56 {'was_impossible': False}\n",
+ "\n",
+ "\n",
+ "relevance : \n",
+ "user: 11 item: 1214 r_ui = None est = 3.61 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 5.00 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 3.13 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 1.70 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 2.17 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 2.18 {'was_impossible': False}\n",
"\n",
"\n",
"genres : \n",
- "user: 11 item: 1214 r_ui = None est = 4.94 {'was_impossible': False}\n",
- "user: 11 item: 1214 r_ui = None est = 4.00 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 3.95 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 1.50 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 0.50 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 3.50 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 3.17 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 3.17 {'was_impossible': False}\n",
"\n",
"\n",
"rating : \n",
- "user: 11 item: 1214 r_ui = None est = 1.76 {'was_impossible': False}\n",
- "user: 11 item: 1214 r_ui = None est = 3.00 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 3.46 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 4.50 {'was_impossible': False}\n",
"\n",
"\n",
"tags : \n",
- "user: 11 item: 1214 r_ui = None est = 3.67 {'was_impossible': False}\n",
- "user: 11 item: 1214 r_ui = None est = 4.00 {'was_impossible': False}\n",
- "\n",
- "\n",
- "tags_length : \n",
- "user: 11 item: 1214 r_ui = None est = 1.73 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 3.47 {'was_impossible': False}\n",
"user: 11 item: 1214 r_ui = None est = 1.00 {'was_impossible': False}\n",
"\n",
"\n",
- "timestamp : \n",
- "user: 11 item: 1214 r_ui = None est = 3.65 {'was_impossible': False}\n",
- "user: 11 item: 1214 r_ui = None est = 5.00 {'was_impossible': False}\n"
+ "tags_length : \n",
+ "user: 11 item: 1214 r_ui = None est = 2.29 {'was_impossible': False}\n",
+ "user: 11 item: 1214 r_ui = None est = 4.00 {'was_impossible': False}\n"
]
}
],
@@ -645,41 +906,63 @@
" prediction = content_algo.predict(anti_test_set_first[0], anti_test_set_first[1])\n",
" print(prediction)\n",
"\n",
- "# (call here the test functions with different regressor methods)\n",
+ "\n",
+ "\n",
"print(\"title_length :\")\n",
"test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_score\")\n",
"test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"random_sample\")\n",
"test_contentbased_class(feature_method = \"title_length\" , regressor_method = \"linear_regression\")\n",
+ "test_contentbased_class(feature_method= \"title_length\", regressor_method= \"svr_regression\")\n",
+ "test_contentbased_class(feature_method= \"title_length\", regressor_method= \"gradient_boosting\")\n",
+ "test_contentbased_class(feature_method= \"title_length\", regressor_method= \"random_forest\")\n",
"print(\"\\n\")\n",
"print(\"movie_year : \")\n",
- "test_contentbased_class(feature_method=\"movie_year\", regressor_method=\"random_score\")\n",
- "test_contentbased_class(feature_method=\"movie_year\", regressor_method=\"random_sample\")\n",
- "#test_contentbased_class(feature_method=\"movie_year\", regressor_method=\"linear_regression\")\n",
+ "test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_score\")\n",
+ "test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_sample\")\n",
+ "test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"linear_regression\")\n",
+ "test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"svr_regression\")\n",
+ "test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"gradient_boosting\")\n",
+ "test_contentbased_class(feature_method= \"movie_year\", regressor_method= \"random_forest\")\n",
+ "print(\"\\n\")\n",
+ "print(\"relevance : \") \n",
+ "test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_score\")\n",
+ "test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_sample\")\n",
+ "test_contentbased_class(feature_method= \"relevance\", regressor_method= \"linear_regression\")\n",
+ "test_contentbased_class(feature_method= \"relevance\", regressor_method= \"svr_regression\")\n",
+ "test_contentbased_class(feature_method= \"relevance\", regressor_method= \"gradient_boosting\")\n",
+ "test_contentbased_class(feature_method= \"relevance\", regressor_method= \"random_forest\")\n",
"print(\"\\n\")\n",
- "print(\"genres : \")\n",
- "test_contentbased_class(feature_method=\"genres\", regressor_method=\"random_score\")\n",
- "test_contentbased_class(feature_method=\"genres\", regressor_method=\"random_sample\")\n",
- "#test_contentbased_class(feature_method=\"genres\", regressor_method=\"linear_regression\")\n",
+ "print(\"genres : \") \n",
+ "test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_score\")\n",
+ "test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_sample\")\n",
+ "test_contentbased_class(feature_method= \"genres\", regressor_method= \"linear_regression\")\n",
+ "test_contentbased_class(feature_method= \"genres\", regressor_method= \"svr_regression\")\n",
+ "test_contentbased_class(feature_method= \"genres\", regressor_method= \"gradient_boosting\")\n",
+ "test_contentbased_class(feature_method= \"genres\", regressor_method= \"random_forest\")\n",
"print(\"\\n\")\n",
"print(\"rating : \")\n",
- "test_contentbased_class(feature_method=\"rating\", regressor_method=\"random_score\")\n",
- "test_contentbased_class(feature_method=\"rating\", regressor_method=\"random_sample\")\n",
- "#test_contentbased_class(feature_method=\"rating\", regressor_method=\"linear_regression\")\n",
+ "test_contentbased_class(feature_method= \"rating\", regressor_method=\"random_score\")\n",
+ "test_contentbased_class(feature_method= \"rating\", regressor_method=\"random_sample\")\n",
+ "#test_contentbased_class(feature_method= \"rating\", regressor_method=\"linear_regression\")\n",
+ "#test_contentbased_class(feature_method=\"rating\", regressor_method=\"svr_regression\")\n",
+ "#test_contentbased_class(feature_method=\"rating\", regressor_method=\"gradient_boosting\")\n",
+ "#test_contentbased_class(feature_method=\"rating\", regressor_method=\"random_forest\")\n",
"print(\"\\n\")\n",
"print(\"tags : \")\n",
"test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_score\")\n",
"test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_sample\")\n",
"#test_contentbased_class(feature_method=\"tags\", regressor_method=\"linear_regression\")\n",
+ "# test_contentbased_class(feature_method=\"tags\", regressor_method=\"svr_regression\")\n",
+ "# test_contentbased_class(feature_method=\"tags\", regressor_method=\"gradient_boosting\")\n",
+ "# test_contentbased_class(feature_method=\"tags\", regressor_method=\"random_forest\")\n",
"print(\"\\n\")\n",
"print(\"tags_length : \")\n",
"test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_score\")\n",
"test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_sample\")\n",
- "#test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"linear_regression\")\n",
- "print(\"\\n\")\n",
- "print(\"timestamp : \")\n",
- "test_contentbased_class(feature_method=\"timestamp\", regressor_method=\"random_score\")\n",
- "test_contentbased_class(feature_method=\"timestamp\", regressor_method=\"random_sample\")\n",
- "#test_contentbased_class(feature_method=\"timestamp\", regressor_method=\"linear_regression\")"
+ "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"linear_regression\")\n",
+ "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"svr_regression\")\n",
+ "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"gradient_boosting\")\n",
+ "# test_contentbased_class(feature_method=\"tags_length\", regressor_method=\"random_forest\")"
]
}
],
diff --git a/evaluator.ipynb b/evaluator.ipynb
index efef1b7475bc2ec4598c9b8af197c29d5c358977..fac2fd4aac639618a6584cb667d973ec8415c5a4 100644
--- a/evaluator.ipynb
+++ b/evaluator.ipynb
@@ -13,19 +13,10 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 1,
"id": "6aaf9140",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The autoreload extension is already loaded. To reload it, use:\n",
- " %reload_ext autoreload\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# reloads modules automatically before entering the execution of code\n",
"%load_ext autoreload\n",
@@ -59,7 +50,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 2,
"id": "d6d82188",
"metadata": {},
"outputs": [],
@@ -201,7 +192,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 3,
"id": "f1849e55",
"metadata": {},
"outputs": [],
@@ -257,7 +248,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 4,
"id": "704f4d2a",
"metadata": {},
"outputs": [
@@ -270,309 +261,23 @@
"- computing metric mae\n",
"- computing metric rmse\n",
"Training loo predictions\n",
- "Training full predictions\n",
- "Handling model baseline_2\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model baseline_3\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model baseline_4\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model title_length_ContentBased_sample\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model title_length_ContentBased_score\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model title_length_ContentBased_Lr\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model movie_year_ContentBased_sample\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model movie_year_ContentBased_score\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model genres_ContentBased_sample\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model genres_ContentBased_score\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model rating_ContentBased_sample\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model rating_ContentBased_score\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model tags_ContentBased_sample\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model tags_ContentBased_score\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model tags_length_ContentBased_sample\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model tags_length_ContentBased_score\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model timestamp_ContentBased_sample\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "Handling model timestamp_ContentBased_score\n",
- "Training split predictions\n",
- "- computing metric mae\n",
- "- computing metric rmse\n",
- "Training loo predictions\n",
- "Training full predictions\n",
- "The data has been exported to the evaluation report\n"
+ "Training full predictions\n"
]
},
{
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>mae</th>\n",
- " <th>rmse</th>\n",
- " <th>hit_rate</th>\n",
- " <th>novelty</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>baseline_1</th>\n",
- " <td>1.312500</td>\n",
- " <td>1.667708</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>baseline_2</th>\n",
- " <td>1.315250</td>\n",
- " <td>1.572990</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>baseline_3</th>\n",
- " <td>1.318182</td>\n",
- " <td>1.465689</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>baseline_4</th>\n",
- " <td>1.363953</td>\n",
- " <td>1.523985</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>title_length_ContentBased_sample</th>\n",
- " <td>1.375000</td>\n",
- " <td>1.750000</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>title_length_ContentBased_score</th>\n",
- " <td>1.556280</td>\n",
- " <td>2.063469</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>title_length_ContentBased_Lr</th>\n",
- " <td>1.625729</td>\n",
- " <td>1.773594</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>movie_year_ContentBased_sample</th>\n",
- " <td>2.250000</td>\n",
- " <td>2.610077</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>movie_year_ContentBased_score</th>\n",
- " <td>1.866274</td>\n",
- " <td>2.111422</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>genres_ContentBased_sample</th>\n",
- " <td>1.875000</td>\n",
- " <td>2.271136</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>genres_ContentBased_score</th>\n",
- " <td>1.463388</td>\n",
- " <td>1.793363</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>rating_ContentBased_sample</th>\n",
- " <td>1.289773</td>\n",
- " <td>1.715759</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>rating_ContentBased_score</th>\n",
- " <td>2.482206</td>\n",
- " <td>2.795490</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>tags_ContentBased_sample</th>\n",
- " <td>1.937500</td>\n",
- " <td>2.128673</td>\n",
- " <td>0.5</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>tags_ContentBased_score</th>\n",
- " <td>1.683499</td>\n",
- " <td>1.782805</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>tags_length_ContentBased_sample</th>\n",
- " <td>1.187500</td>\n",
- " <td>1.704773</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>tags_length_ContentBased_score</th>\n",
- " <td>1.564917</td>\n",
- " <td>1.944345</td>\n",
- " <td>0.5</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>timestamp_ContentBased_sample</th>\n",
- " <td>1.875000</td>\n",
- " <td>2.277608</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>timestamp_ContentBased_score</th>\n",
- " <td>1.265317</td>\n",
- " <td>1.512329</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " mae rmse hit_rate novelty\n",
- "baseline_1 1.312500 1.667708 1.0 4.0\n",
- "baseline_2 1.315250 1.572990 1.0 4.0\n",
- "baseline_3 1.318182 1.465689 1.0 4.0\n",
- "baseline_4 1.363953 1.523985 1.0 4.0\n",
- "title_length_ContentBased_sample 1.375000 1.750000 1.0 4.0\n",
- "title_length_ContentBased_score 1.556280 2.063469 1.0 4.0\n",
- "title_length_ContentBased_Lr 1.625729 1.773594 1.0 4.0\n",
- "movie_year_ContentBased_sample 2.250000 2.610077 1.0 4.0\n",
- "movie_year_ContentBased_score 1.866274 2.111422 1.0 4.0\n",
- "genres_ContentBased_sample 1.875000 2.271136 1.0 4.0\n",
- "genres_ContentBased_score 1.463388 1.793363 1.0 4.0\n",
- "rating_ContentBased_sample 1.289773 1.715759 1.0 4.0\n",
- "rating_ContentBased_score 2.482206 2.795490 1.0 4.0\n",
- "tags_ContentBased_sample 1.937500 2.128673 0.5 4.0\n",
- "tags_ContentBased_score 1.683499 1.782805 1.0 4.0\n",
- "tags_length_ContentBased_sample 1.187500 1.704773 1.0 4.0\n",
- "tags_length_ContentBased_score 1.564917 1.944345 0.5 4.0\n",
- "timestamp_ContentBased_sample 1.875000 2.277608 1.0 4.0\n",
- "timestamp_ContentBased_score 1.265317 1.512329 1.0 4.0"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "KeyboardInterrupt",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[4], line 16\u001b[0m\n\u001b[1;32m 14\u001b[0m sp_ratings \u001b[38;5;241m=\u001b[39m load_ratings(surprise_format\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 15\u001b[0m precomputed_dict \u001b[38;5;241m=\u001b[39m precomputed_information(pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata/tiny/evidence/ratings.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[0;32m---> 16\u001b[0m evaluation_report \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_evaluation_report\u001b[49m\u001b[43m(\u001b[49m\u001b[43mEvalConfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msp_ratings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprecomputed_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mAVAILABLE_METRICS\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 17\u001b[0m export_evaluation_report(evaluation_report)\n",
+ "Cell \u001b[0;32mIn[2], line 114\u001b[0m, in \u001b[0;36mcreate_evaluation_report\u001b[0;34m(eval_config, sp_ratings, precomputed_dict, available_metrics)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(eval_config\u001b[38;5;241m.\u001b[39mfull_metrics) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTraining full predictions\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 114\u001b[0m anti_testset_top_n \u001b[38;5;241m=\u001b[39m \u001b[43mgenerate_full_top_n\u001b[49m\u001b[43m(\u001b[49m\u001b[43malgo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msp_ratings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43meval_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m metric \u001b[38;5;129;01min\u001b[39;00m eval_config\u001b[38;5;241m.\u001b[39mfull_metrics:\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m metric \u001b[38;5;129;01min\u001b[39;00m available_metrics[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfull\u001b[39m\u001b[38;5;124m'\u001b[39m]\n",
+ "Cell \u001b[0;32mIn[2], line 44\u001b[0m, in \u001b[0;36mgenerate_full_top_n\u001b[0;34m(algo, ratings_dataset, eval_config)\u001b[0m\n\u001b[1;32m 42\u001b[0m algo\u001b[38;5;241m.\u001b[39mfit(full_trainset) \u001b[38;5;66;03m# Train the algorithm on the full training set\u001b[39;00m\n\u001b[1;32m 43\u001b[0m anti_testset \u001b[38;5;241m=\u001b[39m full_trainset\u001b[38;5;241m.\u001b[39mbuild_anti_testset() \u001b[38;5;66;03m# Build the anti test-set\u001b[39;00m\n\u001b[0;32m---> 44\u001b[0m predictions \u001b[38;5;241m=\u001b[39m \u001b[43malgo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtest\u001b[49m\u001b[43m(\u001b[49m\u001b[43manti_testset\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Get predictions on the anti test-set\u001b[39;00m\n\u001b[1;32m 45\u001b[0m top_n \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m uid, iid, _, est, _ \u001b[38;5;129;01min\u001b[39;00m predictions:\n",
+ "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/surprise/prediction_algorithms/algo_base.py:161\u001b[0m, in \u001b[0;36mAlgoBase.test\u001b[0;34m(self, testset, verbose)\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Test the algorithm on given testset, i.e. estimate all the ratings\u001b[39;00m\n\u001b[1;32m 143\u001b[0m \u001b[38;5;124;03min the given testset.\u001b[39;00m\n\u001b[1;32m 144\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;124;03m that contains all the estimated ratings.\u001b[39;00m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;66;03m# The ratings are translated back to their original scale.\u001b[39;00m\n\u001b[1;32m 160\u001b[0m predictions \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43muid\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43miid\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mr_ui_trans\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m (uid, iid, r_ui_trans) \u001b[38;5;129;01min\u001b[39;00m testset\n\u001b[1;32m 163\u001b[0m ]\n\u001b[1;32m 164\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m predictions\n",
+ "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/surprise/prediction_algorithms/algo_base.py:119\u001b[0m, in \u001b[0;36mAlgoBase.predict\u001b[0;34m(self, uid, iid, r_ui, clip, verbose)\u001b[0m\n\u001b[1;32m 117\u001b[0m lower_bound, higher_bound \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrainset\u001b[38;5;241m.\u001b[39mrating_scale\n\u001b[1;32m 118\u001b[0m est \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmin\u001b[39m(higher_bound, est)\n\u001b[0;32m--> 119\u001b[0m est \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mmax\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mlower_bound\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 121\u001b[0m pred \u001b[38;5;241m=\u001b[39m Prediction(uid, iid, r_ui, est, details)\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m verbose:\n",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+ ]
}
],
"source": [
@@ -590,7 +295,7 @@
"}\n",
"\n",
"sp_ratings = load_ratings(surprise_format=True)\n",
- "precomputed_dict = precomputed_information(pd.read_csv(\"data/tiny/evidence/ratings.csv\"))\n",
+ "precomputed_dict = precomputed_information(pd.read_csv(\"data/test/evidence/ratings.csv\"))\n",
"evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)\n",
"export_evaluation_report(evaluation_report)"
]
diff --git a/models.py b/models.py
index 790d8feacef92ecbca42e92f7c233bad6e65d040..13fe7ae5d6835ff4324de063cba7eda470c41333 100644
--- a/models.py
+++ b/models.py
@@ -5,7 +5,7 @@ from collections import defaultdict
import pandas as pd
import numpy as np
import random as rd
-from surprise import AlgoBase, SVD, KNNWithMeans
+from surprise import AlgoBase, SVD
from surprise import PredictionImpossible
# import local
@@ -120,36 +120,32 @@ class ContentBased(AlgoBase):
df_features = df_items[C.LABEL_COL].apply(lambda x: len(x)).to_frame('n_character_title')
elif features_method == "movie_year" :
- df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\((\d{4})\)', expand=False)
+ df_features = df_items['movie_year'] = df_items['title'].str.extract(r'\((\d{4})\)', expand=False).to_frame('movie_year')
elif features_method == "genres" :
genres_list = df_items['genres'].str.split('|').explode().unique()
for genre in genres_list:
- df_features = df_items['genres'].str.contains(genre).astype(int)
+ df_features = df_items['genres'].str.contains(genre).astype(int).to_frame('genres')
- elif features_method == "combination" :
+ elif features_method == "combination":
genres_list = df_items['genres'].str.split('|').explode().unique()
- df_movie = df_items['movie_year'] = df_items['title'].str.extract(r'\((\d{4})\)', expand=False)
+ df_movie = df_items['title'].str.extract(r'\((\d{4})\)', expand=False).to_frame('movie_year')
for genre in genres_list:
- df_genre = df_items['genres'].str.contains(genre).astype(int)
- df_tag = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(','))if isinstance(x, str) else 0)
- df_year = df_items['movie_year'] = df_items['title'].str.extract(r'\((\d{4})\)', expand=False)
-
+ df_genre = df_items['genres'].str.contains(genre).astype(int).to_frame(genre)
+
+ df_tag['tag_length'] = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(',')) if isinstance(x, str) else 0)
df_features = pd.concat([df_genre, df_tag, df_movie], axis=1)
-
+
elif features_method == "rating" :
df_features = df_ratings.groupby('movieId')['rating'].transform('mean').to_frame('avg_rating')
elif features_method == "tags" :
- df_features = df_tag['tag'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0)
+ df_features = df_tag['tag'].apply(lambda x: len(x.split(',')) if isinstance(x, str) else 0).to_frame('tags')
elif features_method == "tags_length" :
- df_features = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(','))if isinstance(x, str) else 0)
+ df_features = df_tag['tag'].apply(lambda x: sum(len(tag) for tag in x.split(','))if isinstance(x, str) else 0).to_frame('n_character_tags')
- elif features_method == "timestamp" :
- df_features = df_ratings['timestamp_sin'] = np.sin(2 * np.pi * df_ratings['timestamp'] / 86400)
- df_features = df_ratings['timestamp_cos'] = np.cos(2 * np.pi * df_ratings['timestamp'] / 86400)
else: # (implement other feature creations here)
raise NotImplementedError(f'Feature method {features_method} not yet implemented')
@@ -170,6 +166,7 @@ class ContentBased(AlgoBase):
elif self.regressor_method == 'random_sample':
for u in self.user_profile:
self.user_profile[u] = [rating for _, rating in self.trainset.ur[u]]
+
elif self.regressor_method == 'linear_regression' :
for u in self.user_profile:
@@ -181,8 +178,42 @@ class ContentBased(AlgoBase):
df_user["item_id"] = df_user["item_id"].map(trainset.to_raw_iid)
df_user = df_user.merge(self.content_features, left_on = "item_id", right_index = True, how = 'left')
+
+ if 'n_character_title' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['n_character_title'].values.reshape(-1, 1)
+
+ elif 'avg_relevance' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['avg_relevance'].values.reshape(-1, 1)
+
+ elif 'movie_year' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['movie_year'].values.reshape(-1, 1)
+
+ elif 'genres' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['genres'].values.reshape(-1, 1)
+
+ elif 'combination' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['combination'].values.reshape(-1, 1)
+
+ elif 'avg_rating' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['avg_rating'].values.reshape(-1, 1)
- X = df_user['n_character_title'].values.reshape(-1,1)
+ elif 'tags' in df_user.columns:
+ # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
+ X = df_user['tags'].values.reshape(-1, 1)
+
+ elif 'n_character_tags' in df_user.columns:
+ # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
+ X = df_user['n_character_tags'].values.reshape(-1, 1)
+
+ else:
+ # Si aucune caractéristique appropriée n'est disponible
+ continue # Ou gère le cas d'erreur/exception ici
y = df_user['user_ratings'].values
@@ -205,9 +236,43 @@ class ContentBased(AlgoBase):
df_user = df_user.merge(self.content_features, left_on = "item_id", right_index = True, how = 'left')
- X = df_user['n_character_title'].values.reshape(-1, 1)
+ if 'n_character_title' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['n_character_title'].values.reshape(-1, 1)
+
+ elif 'avg_relevance' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['avg_relevance'].values.reshape(-1, 1)
+
+ elif 'movie_year' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['movie_year'].values.reshape(-1, 1)
+
+ elif 'genres' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['genres'].values.reshape(-1, 1)
+
+ elif 'combination' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['combination'].values.reshape(-1, 1)
+
+ elif 'avg_rating' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['avg_rating'].values.reshape(-1, 1)
+
+ elif 'tags' in df_user.columns:
+ # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
+ X = df_user['tags'].values.reshape(-1, 1)
+
+ elif 'n_character_tags' in df_user.columns:
+ # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
+ X = df_user['n_character_tags'].values.reshape(-1, 1)
+
+ else:
+ # Si aucune caractéristique appropriée n'est disponible
+ continue # Ou gère le cas d'erreur/exception ici
y = df_user['user_ratings'].values
- svr_regressor = SVR(kernel='rbf', C=0.00000000001, epsilon=0.2)
+ svr_regressor = SVR(kernel='rbf', C=10, epsilon=0.2)
svr_regressor.fit(X, y)
self.user_profile[u] = svr_regressor
@@ -223,7 +288,42 @@ class ContentBased(AlgoBase):
df_user = df_user.merge(self.content_features, left_on = "item_id", right_index = True, how = 'left')
- X = df_user['n_character_title'].values.reshape(-1, 1)
+ if 'n_character_title' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['n_character_title'].values.reshape(-1, 1)
+
+ elif 'avg_relevance' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['avg_relevance'].values.reshape(-1, 1)
+
+ elif 'movie_year' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['movie_year'].values.reshape(-1, 1)
+
+ elif 'genres' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['genres'].values.reshape(-1, 1)
+
+ elif 'combination' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['combination'].values.reshape(-1, 1)
+
+ elif 'avg_rating' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['avg_rating'].values.reshape(-1, 1)
+
+ elif 'tags' in df_user.columns:
+ # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
+ X = df_user['tags'].values.reshape(-1, 1)
+
+ elif 'n_character_tags' in df_user.columns:
+ # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
+ X = df_user['n_character_tags'].values.reshape(-1, 1)
+
+ else:
+ # Si aucune caractéristique appropriée n'est disponible
+ continue # Ou gère le cas d'erreur/exception ici
+
y = df_user['user_ratings'].values
gb_regressor = GradientBoostingRegressor(n_estimators=10000000, learning_rate=0.1, max_depth=3)
gb_regressor.fit(X, y)
@@ -242,12 +342,45 @@ class ContentBased(AlgoBase):
df_user = df_user.merge(self.content_features, left_on = "item_id", right_index = True, how = 'left')
- X = df_user['n_character_title'].values.reshape(-1, 1)
+ if 'n_character_title' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['n_character_title'].values.reshape(-1, 1)
+
+ elif 'avg_relevance' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['avg_relevance'].values.reshape(-1, 1)
+
+ elif 'movie_year' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['movie_year'].values.reshape(-1, 1)
+
+ elif 'genres' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['genres'].values.reshape(-1, 1)
+
+ elif 'combination' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['combination'].values.reshape(-1, 1)
+
+ elif 'avg_rating' in df_user.columns:
+ # Si 'n_character_title' est disponible comme caractéristique
+ X = df_user['avg_rating'].values.reshape(-1, 1)
+
+ elif 'tags' in df_user.columns:
+ # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
+ X = df_user['tags'].values.reshape(-1, 1)
+
+ elif 'n_character_tags' in df_user.columns:
+ # Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
+ X = df_user['n_character_tags'].values.reshape(-1, 1)
+
+ else:
+ # Si aucune caractéristique appropriée n'est disponible
+ continue # Ou gère le cas d'erreur/exception ici
y = df_user['user_ratings'].values
rf_regressor = RandomForestRegressor(n_estimators=100)
rf_regressor.fit(X, y)
self.user_profile[u] = rf_regressor
-
else :
pass
@@ -326,39 +459,52 @@ def test_contentbased_class(feature_method, regressor_method):
prediction = content_algo.predict(anti_test_set_first[0], anti_test_set_first[1])
print(prediction)
-"""
+
+
print("title_length :")
test_contentbased_class(feature_method = "title_length" , regressor_method = "random_score")
test_contentbased_class(feature_method = "title_length" , regressor_method = "random_sample")
test_contentbased_class(feature_method = "title_length" , regressor_method = "linear_regression")
+test_contentbased_class(feature_method="title_length", regressor_method="svr_regression")
+test_contentbased_class(feature_method="title_length", regressor_method="gradient_boosting")
+test_contentbased_class(feature_method="title_length", regressor_method="random_forest")
print("\n")
print("movie_year : ")
test_contentbased_class(feature_method="movie_year", regressor_method="random_score")
test_contentbased_class(feature_method="movie_year", regressor_method="random_sample")
-#test_contentbased_class(feature_method="movie_year", regressor_method="linear_regression")
+test_contentbased_class(feature_method="movie_year", regressor_method="linear_regression")
+test_contentbased_class(feature_method="movie_year", regressor_method="svr_regression")
+test_contentbased_class(feature_method="movie_year", regressor_method="gradient_boosting")
+test_contentbased_class(feature_method="movie_year", regressor_method="random_forest")
print("\n")
print("genres : ")
test_contentbased_class(feature_method="genres", regressor_method="random_score")
test_contentbased_class(feature_method="genres", regressor_method="random_sample")
-#test_contentbased_class(feature_method="genres", regressor_method="linear_regression")
+test_contentbased_class(feature_method="genres", regressor_method="linear_regression")
+test_contentbased_class(feature_method="genres", regressor_method="svr_regression")
+test_contentbased_class(feature_method="genres", regressor_method="gradient_boosting")
+test_contentbased_class(feature_method="genres", regressor_method="random_forest")
print("\n")
print("rating : ")
test_contentbased_class(feature_method="rating", regressor_method="random_score")
test_contentbased_class(feature_method="rating", regressor_method="random_sample")
-#test_contentbased_class(feature_method="rating", regressor_method="linear_regression")
+test_contentbased_class(feature_method="rating", regressor_method="linear_regression")
+# test_contentbased_class(feature_method="rating", regressor_method="svr_regression")
+# test_contentbased_class(feature_method="rating", regressor_method="gradient_boosting")
+# test_contentbased_class(feature_method="rating", regressor_method="random_forest")
print("\n")
print("tags : ")
test_contentbased_class(feature_method="tags", regressor_method="random_score")
test_contentbased_class(feature_method="tags", regressor_method="random_sample")
#test_contentbased_class(feature_method="tags", regressor_method="linear_regression")
+# test_contentbased_class(feature_method="tags", regressor_method="svr_regression")
+# test_contentbased_class(feature_method="tags", regressor_method="gradient_boosting")
+# test_contentbased_class(feature_method="tags", regressor_method="random_forest")
print("\n")
print("tags_length : ")
test_contentbased_class(feature_method="tags_length", regressor_method="random_score")
test_contentbased_class(feature_method="tags_length", regressor_method="random_sample")
-#test_contentbased_class(feature_method="tags_length", regressor_method="linear_regression")
-print("\n")
-print("timestamp : ")
-test_contentbased_class(feature_method="timestamp", regressor_method="random_score")
-test_contentbased_class(feature_method="timestamp", regressor_method="random_sample")
-#test_contentbased_class(feature_method="timestamp", regressor_method="linear_regression")
-"""
\ No newline at end of file
+# test_contentbased_class(feature_method="tags_length", regressor_method="linear_regression")
+# test_contentbased_class(feature_method="tags_length", regressor_method="svr_regression")
+# test_contentbased_class(feature_method="tags_length", regressor_method="gradient_boosting")
+# test_contentbased_class(feature_method="tags_length", regressor_method="random_forest")
\ No newline at end of file