Newer
Older
from itertools import combinations
# Available feature extraction methods
features_methods = [
'genre', 'movie_year', 'avg_rating',
'title_length'
]
# Available regression methods
regressor_methods = [
'linear_regression','random_forest', 'lasso_regression','gradient_boosting',
'ridge_regression', 'svr_regression'
]
# Generate all possible combinations of feature extraction methods
feature_combinations = []
for r in range(1, len(features_methods) + 1):
feature_combinations.extend(combinations(features_methods, r))
# Generate all possible combinations of regression methods and feature extraction methods
model_combinations = []
for feature_set in feature_combinations:
for regressor in regressor_methods:
# Create a unique model name for the combination
model_name = f"combination_{regressor}_{'_'.join(feature_set)}"
# Define the arguments to be passed to the model
arguments = {
"features_method": list(feature_set),
"regressor_method": regressor
}
# Append the combination to the list
model_combinations.append((model_name, ContentBased, arguments))
class EvalConfig:
"""Configuration settings for evaluation."""
# List of models to evaluate, each tuple containing model_name, model class, and model parameters (dict)
models = [
("baseline_1", ModelBaseline1, {}),
("baseline_2", ModelBaseline2, {}),
("baseline_3", ModelBaseline3, {}),
("title_length_ContentBased_sample", ContentBased, {"title_length","random_sample"}),
("title_length_ContentBased_score", ContentBased, {"features_method" : ["title_length"], "regressor_method" : "random_score"}),
("title_length_ContentBased_Lr", ContentBased, {"features_method" : ["title_length"], "regressor_method" : "linear_regression"}),
("title_length_ContentBased_Lr", ContentBased, {"features_method" : ["title_length"], "regressor_method" : "svr_regression"}),
("title_length_ContentBased_Lr", ContentBased, {"features_method" : ["title_length"], "regressor_method" : "gradient_boosting"}),
("title_length_ContentBased_Lr", ContentBased, {"features_method" : ["title_length"], "regressor_method" : "random_forest"}),
("movie_year_ContentBased_sample", ContentBased, {"features_method" : "movie_year", "regressor_method" : "random_sample"}),
("movie_year_ContentBased_score", ContentBased, {"features_method" : "movie_year", "regressor_method" : "random_score"}),
#("movie_year_ContentBased_Lr", ContentBased, {"features_method" : "movie_year", "regressor_method" : "linear_regression"}),
#("movie_year_ContentBased_Lr", ContentBased, {"features_method" : "movie_year", "regressor_method" : "svr_regression"}),
#("movie_year_ContentBased_Lr", ContentBased, {"features_method" : "movie_year", "regressor_method" : "gradient_boosting"}),
#("movie_year_ContentBased_Lr", ContentBased, {"features_method" : "movie_year", "regressor_method" : "random_forest"}),
("genres_ContentBased_sample", ContentBased, {"features_method" : "genres", "regressor_method" : "random_sample"}),
("genres_ContentBased_score", ContentBased, {"features_method" : "genres", "regressor_method" : "random_score"}),
#("genres_ContentBased_Lr", ContentBased, {"features_method" : "genres", "regressor_method" : "linear_regression"}),
#("genres_ContentBased_Lr", ContentBased, {"features_method" : "genres", "regressor_method" : "svr_regression"}),
#("genres_ContentBased_Lr", ContentBased, {"features_method" : "genres", "regressor_method" : "gradient_boosting"}),
#("genres_ContentBased_Lr", ContentBased, {"features_method" : "genres", "regressor_method" : "random_forest"}),
("rating_ContentBased_sample", ContentBased, {"features_method" : "rating", "regressor_method" : "random_sample"}),
("rating_ContentBased_score", ContentBased, {"features_method" : "rating", "regressor_method" : "random_score"}),
#("rating_ContentBased_Lr", ContentBased, {"features_method" : "rating", "regressor_method" : "linear_regression"}),
#("rating_ContentBased_Lr", ContentBased, {"features_method" : "rating", "regressor_method" : "svr_regression"}),
#("rating_ContentBased_Lr", ContentBased, {"features_method" : "rating", "regressor_method" : "gradient_boosting"}),
#("rating_ContentBased_Lr", ContentBased, {"features_method" : "rating", "regressor_method" : "random_forest"}),
("tags_ContentBased_sample", ContentBased, {"features_method" : "tags", "regressor_method" : "random_sample"}),
("tags_ContentBased_score", ContentBased, {"features_method" : "tags", "regressor_method" : "random_score"}),
#("tags_ContentBased_Lr", ContentBased, {"features_method" : "tags", "regressor_method" : "linear_regression"}),
#("tags_ContentBased_Lr", ContentBased, {"features_method" : "tags", "regressor_method" : "svr_regression"}),
#("tags_ContentBased_Lr", ContentBased, {"features_method" : "tags", "regressor_method" : "gradient_boosting"}),
#("tags_ContentBased_Lr", ContentBased, {"features_method" : "tags", "regressor_method" : "random_forest"}),
("tags_length_ContentBased_sample", ContentBased, {"features_method" : "tags_length", "regressor_method" : "random_sample"}),
("tags_length_ContentBased_score", ContentBased, {"features_method" : "tags_length", "regressor_method" : "random_score"}),
#("tags_length_ContentBased_Lr", ContentBased, {"features_method" : "tags_length", "regressor_method" : "linear_regression"}),
#("tags_length_ContentBased_Lr", ContentBased, {"features_method" : "tags_length", "regressor_method" : "svr_regression"}),
#("tags_length_ContentBased_Lr", ContentBased, {"features_method" : "tags_length", "regressor_method" : "gradient_boosting"}),
#("tags_length_ContentBased_Lr", ContentBased, {"features_method" : "tags_length", "regressor_method" : "random_forest"}),
("timestamp_ContentBased_sample", ContentBased, {"features_method" : "timestamp", "regressor_method" : "random_sample"}),
("timestamp_ContentBased_score", ContentBased, {"features_method" : "timestamp", "regressor_method" : "random_score"}),
#("timestamp_ContentBased_Lr", ContentBased, {"features_method" : "timestamp", "regressor_method" : "linear_regression"})
#("timestamp_ContentBased_Lr", ContentBased, {"features_method" : "timestamp", "regressor_method" : "svr_regression"})
#("timestamp_ContentBased_Lr", ContentBased, {"features_method" : "timestamp", "regressor_method" : "gradient_boosting"})
#("timestamp_ContentBased_Lr", ContentBased, {"features_method" : "timestamp", "regressor_method" : "random_forest"})
# Add the combinations of ContentBased models to the list of models
models.extend(model_combinations)
# Print the models for verification
for model in models:
print(model)
# Metrics to compute for split evaluation
split_metrics = ["mae", "rmse"]
# Metrics to compute for Leave-One-Out (LOO) evaluation
loo_metrics = ["hit_rate"]
# Metrics to compute for full dataset evaluation
full_metrics = ["novelty"]
# Split parameters
# Loo parameters
top_n_value = 10 # -- configure the numer of recommendations (> 1) --