From fdfdc3a7120ef8798f4115e71bbbb60a7b6f3f76 Mon Sep 17 00:00:00 2001
From: Adrien <adrien.payen@student.uclouvain.be>
Date: Fri, 24 May 2024 22:26:16 +0200
Subject: [PATCH] update

---
 Home.py        |  2 +-
 README.md      | 58 +++++++++++++++++++++++++++++++++++++++-----------
 recommender.py | 30 +++++++++++++-------------
 3 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/Home.py b/Home.py
index 955aa963..97916b1c 100644
--- a/Home.py
+++ b/Home.py
@@ -200,7 +200,7 @@ def display_content_based_recommendations(user_name, user_id=-1, n=15):
     cols_html = ""
 
     # Get top N recommendations using content-based filtering
-    top_n_recommendations = test_contentbased_class(["title_length", "movie_year", "genre", "avg_rating"], "ridge_regression", user_id=-1, n=15)
+    top_n_recommendations = test_contentbased_class(["title_length", "movie_year", "genre", "avg_rating"], "random_forest", user_id=-1, n=15)
     
     if top_n_recommendations:
         st.subheader(f"Discover Great Content")  # Display section title
diff --git a/README.md b/README.md
index d7ed9045..5a08275e 100644
--- a/README.md
+++ b/README.md
@@ -21,11 +21,10 @@ pip install streamlit
 pip install requests
 ```
 
-
 ## Project Structure
 The project is organized into the following key components:
 
-### Configuration and Constants
+### Configuration
 1. ***configs.py***
  - Defines an `EvalConfig` class for storing configurations for evaluating multiple recommendation models. 
     
@@ -38,7 +37,6 @@ The project is organized into the following key components:
 2. ***constants.py***
 - This code defines a Constant class that stores paths to datasets and column names for content and evidence data. Paths to content, evidence, and evaluation directories are defined based on the data directory path. File names and column names for article and rating data are specified, along with the rating scale.
 
-### Data Loaders
 3. ***loaders.py***
 - Loads rating and item data from specified CSV files in the Constant class of the constants module.
     
@@ -50,7 +48,6 @@ The project is organized into the following key components:
     
 - The data is loaded into pandas DataFrames, with an option available to load rating data in the format expected by the Surprise library if needed.
 
-### Recommender Models
 4. ***models.py***
 - Defines several basic recommendation algorithms for the Surprise library.
     
@@ -58,13 +55,18 @@ The project is organized into the following key components:
 
 - Recommendation algorithms are defined as classes inheriting from Surprise's `AlgoBase` class, each implementing an `estimate`  method to predict user ratings for items.
 
-### Analytics and Evaluation 
-5. ***analytics_ui.ipynb***
+
+### Backend folder
+This folder contains all the data, jupyter notebook, scripts python used to improve user experience.
+
+#### Analytics
+***analytics_small.ipynb***
 - Performs data analysis to understand the datasets and their properties.
 
 - Analyzes the number of ratings, unique users, unique items, and distribution of ratings.
 
-6. ***evaluator.ipynb***
+#### Evaluation 
+***evaluator.ipynb***
 
  - Evaluates different recommendation models using various cross-validation techniques.
 
@@ -79,17 +81,47 @@ The project is organized into the following key components:
  - Exports the evaluation report to a CSV file.
 
 
-7. ***analytics_tiny.ipynb***
+#### Content Based
+***hackathon_make_predictions.ipynb***
+
+Define a function make_hackathon_prediction that takes feature_method and regressor_method as input.
+
+Inside this function:
+
+- Load the training data and converts it into the format suitable for Surprise.
+- Train a Content-Based model (ContentBased) on the training set using the specified feature and regressor methods.
+- Make predictions on the test set by loading the test data from a CSV file and converting it into records.
+
+Converts the predictions into a DataFrame and saves them as a CSV file.
+
+It then calls this function with specific parameters and prints the generated predictions.
+
+***content_based.ipynb***
+1. ***Feature Extraction Methods***
+The system supports the following feature extraction methods:
+- `genre`: Extracts genres of the movies using TF-IDF vectorization.
+- `movie_year`: Extracts the release year of the movies.
+- `avg_rating`: Computes the average rating for each movie.
+- `title_length`: Computes the length of the movie title.
 
- - Analyzes a smaller version of the dataset for debugging purposes.
+2. ***Regression Models***
+The system supports the following regression models for predicting user ratings:
 
- - Similar analyses to `analytics_ui.ipynb`, but on a smaller scale to speed up computation time.
+- `linear_regression`
+- `random_forest`
+- `lasso_regression`
+- `gradient_boosting`
+- `ridge_regression`
+- `svr_regression`
+- `elastic_net`
+- `knn_regression`
+- `decision_tree` 
+- `adaboost`
+- `xgboost`
+- `lightgbm`
 
- 8. ***analytics_test.ipynb***
 
- - Analyzes a test dataset to understand algorithm behaviors during development.
 
- - Similar analyses to `analytics_ui.ipynb`, but on a smaller test dataset to better understand how algorithms work.
 
 
 ### Datasets
diff --git a/recommender.py b/recommender.py
index 1b7ff0c4..5c447df5 100644
--- a/recommender.py
+++ b/recommender.py
@@ -620,14 +620,14 @@ def compare_similarity_measures(trainset,testset):
     results['KNN_MSD_MAE'] = mae_msd
 
     # Train and evaluate KNN model with Pearson correlation similarity
-    sim_options_pearson = {'name': 'pearson', 'user_based': True}
-    knn_pearson = KNNWithMeans(sim_options=sim_options_pearson)
-    knn_pearson.fit(trainset)
-    predictions_pearson = knn_pearson.test(testset)
-    rmse_pearson = accuracy.rmse(predictions_pearson)
-    mae_pearson = accuracy.mae(predictions_pearson)
-    results['KNN_Pearson_RMSE'] = rmse_pearson
-    results['KNN_Pearson_MAE'] = mae_pearson
+    sim_options_cosine = {'name': 'cosine', 'user_based': True}
+    knn_cosine = KNNWithMeans(sim_options=sim_options_cosine)
+    knn_cosine.fit(trainset)
+    predictions_cosine = knn_cosine.test(testset)
+    rmse_cosine = accuracy.rmse(predictions_cosine)
+    mae_cosine = accuracy.mae(predictions_cosine)
+    results['KNN_cosine_RMSE'] = rmse_cosine
+    results['KNN_cosine_MAE'] = mae_cosine
 
 
     # Train and evaluate UserBased model with MSD similarity
@@ -640,13 +640,13 @@ def compare_similarity_measures(trainset,testset):
     results['UserBased_MSD_MAE'] = mae_user_based_msd
 
     # Train and evaluate UserBased model with Pearson correlation similarity
-    user_based_pearson = UserBased(sim_options={'name': 'pearson'})
-    user_based_pearson.fit(trainset)
-    predictions_user_based_pearson = user_based_pearson.test(testset)
-    rmse_user_based_pearson = accuracy.rmse(predictions_user_based_pearson)
-    mae_user_based_pearson = accuracy.mae(predictions_user_based_pearson)
-    results['UserBased_Pearson_RMSE'] = rmse_user_based_pearson
-    results['UserBased_Pearson_MAE'] = mae_user_based_pearson
+    user_based_cosine = UserBased(sim_options={'name': 'cosine'})
+    user_based_cosine.fit(trainset)
+    predictions_user_based_cosine = user_based_cosine.test(testset)
+    rmse_user_based_cosine = accuracy.rmse(predictions_user_based_cosine)
+    mae_user_based_cosine = accuracy.mae(predictions_user_based_cosine)
+    results['UserBased_cosine_RMSE'] = rmse_user_based_cosine
+    results['UserBased_cosine_MAE'] = mae_user_based_cosine
 
 
     # Train and evaluate OtherUserBased models
-- 
GitLab