Skip to content
GitLab
Explorer
Connexion
S'inscrire
Navigation principale
Rechercher ou aller à…
Projet
R
recomsys
Gestion
Activité
Membres
Labels
Programmation
Tickets
Tableaux des tickets
Jalons
Wiki
Code
Requêtes de fusion
Dépôt
Branches
Validations
Étiquettes
Graphe du dépôt
Comparer les révisions
Extraits de code
Compilation
Pipelines
Jobs
Planifications de pipeline
Artéfacts
Déploiement
Releases
Registre de paquets
Registre de conteneur
Registre de modèles
Opération
Environnements
Modules Terraform
Surveillance
Incidents
Analyse
Données d'analyse des chaînes de valeur
Analyse des contributeurs
Données d'analyse CI/CD
Données d'analyse du dépôt
Expériences du modèle
Aide
Aide
Support
Documentation de GitLab
Comparer les forfaits GitLab
Forum de la communauté
Contribuer à GitLab
Donner votre avis
Conditions générales et politique de confidentialité
Raccourcis clavier
?
Extraits de code
Groupes
Projets
Afficher davantage de fils d'Ariane
recommender_system
recomsys
Validations
4c44fb82
Valider
4c44fb82
rédigé
1 year ago
par
Adrien Payen
Parcourir les fichiers
Options
Téléchargements
Correctifs
Plain Diff
update
parent
a7471cdc
Branches
master
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Modifications
1
Masquer les modifications d'espaces
En ligne
Côte à côte
Affichage de
1 fichier modifié
content_based.ipynb
+2
-1
2 ajouts, 1 suppression
content_based.ipynb
avec
2 ajouts
et
1 suppression
content_based.ipynb
+
2
−
1
Voir le fichier @
4c44fb82
...
@@ -556,7 +556,8 @@
...
@@ -556,7 +556,8 @@
"cb = ContentBased(\"movie_year\", \"random_sample\")\n",
"cb = ContentBased(\"movie_year\", \"random_sample\")\n",
"\n",
"\n",
"print(cb.explain('11'))\n",
"print(cb.explain('11'))\n",
"\n"
"\n",
"print('test')"
]
]
},
},
{
{
...
...
%% Cell type:markdown id:82d5ca82 tags:
%% Cell type:markdown id:82d5ca82 tags:
# Packages
# Packages
%% Cell type:code id:277473a3 tags:
%% Cell type:code id:277473a3 tags:
```
python
```
python
%
load_ext
autoreload
%
load_ext
autoreload
%
autoreload
2
%
autoreload
2
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
import
random
as
rd
import
random
as
rd
from
surprise
import
AlgoBase
from
surprise
import
AlgoBase
from
surprise.prediction_algorithms.predictions
import
PredictionImpossible
from
surprise.prediction_algorithms.predictions
import
PredictionImpossible
from
loaders
import
load_ratings
from
loaders
import
load_ratings
from
loaders
import
load_items
from
loaders
import
load_items
from
constants
import
Constant
as
C
from
constants
import
Constant
as
C
from
sklearn.linear_model
import
LinearRegression
from
sklearn.linear_model
import
LinearRegression
from
sklearn.ensemble
import
GradientBoostingRegressor
,
RandomForestRegressor
from
sklearn.ensemble
import
GradientBoostingRegressor
,
RandomForestRegressor
from
sklearn.svm
import
SVR
from
sklearn.svm
import
SVR
from
sklearn.feature_extraction.text
import
TfidfVectorizer
from
sklearn.feature_extraction.text
import
TfidfVectorizer
```
```
%% Output
%% Output
The autoreload extension is already loaded. To reload it, use:
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
%reload_ext autoreload
%% Cell type:markdown id:a42c16bf tags:
%% Cell type:markdown id:a42c16bf tags:
# Explore and select content features
# Explore and select content features
%% Cell type:code id:e8378976 tags:
%% Cell type:code id:e8378976 tags:
```
python
```
python
# All the dataframes
# All the dataframes
df_items
=
load_items
()
df_items
=
load_items
()
df_ratings
=
load_ratings
()
df_ratings
=
load_ratings
()
df_tag
=
pd
.
read_csv
(
C
.
CONTENT_PATH
/
C
.
TAGS_FILENAME
)
df_tag
=
pd
.
read_csv
(
C
.
CONTENT_PATH
/
C
.
TAGS_FILENAME
)
df_genome_score
=
pd
.
read_csv
(
"
data/hackathon/content/genome-scores.csv
"
)
df_genome_score
=
pd
.
read_csv
(
"
data/hackathon/content/genome-scores.csv
"
)
df_genome_tag
=
pd
.
read_csv
(
"
data/hackathon/content/genome-tags.csv
"
)
df_genome_tag
=
pd
.
read_csv
(
"
data/hackathon/content/genome-tags.csv
"
)
# Example 1 : create title_length features
# Example 1 : create title_length features
df_features
=
df_items
[
C
.
LABEL_COL
].
apply
(
lambda
x
:
len
(
x
)).
to_frame
(
'
n_character_title
'
)
df_features
=
df_items
[
C
.
LABEL_COL
].
apply
(
lambda
x
:
len
(
x
)).
to_frame
(
'
n_character_title
'
)
display
(
df_features
.
head
())
display
(
df_features
.
head
())
df_tag
=
pd
.
read_csv
(
C
.
CONTENT_PATH
/
C
.
TAGS_FILENAME
)
df_tag
=
pd
.
read_csv
(
C
.
CONTENT_PATH
/
C
.
TAGS_FILENAME
)
df_features
=
df_tag
[
C
.
TAG
]
df_features
=
df_tag
[
C
.
TAG
]
display
(
df_features
.
head
())
display
(
df_features
.
head
())
# (explore here other features)
# (explore here other features)
```
```
%% Output
%% Output
%% Cell type:markdown id:a2c9a2b6 tags:
%% Cell type:markdown id:a2c9a2b6 tags:
# Build a content-based model
# Build a content-based model
When ready, move the following class in the
*models.py*
script
When ready, move the following class in the
*models.py*
script
%% Cell type:code id:16b0a602 tags:
%% Cell type:code id:16b0a602 tags:
```
python
```
python
class
ContentBased
(
AlgoBase
):
class
ContentBased
(
AlgoBase
):
def
__init__
(
self
,
features_method
,
regressor_method
):
def
__init__
(
self
,
features_method
,
regressor_method
):
AlgoBase
.
__init__
(
self
)
AlgoBase
.
__init__
(
self
)
self
.
regressor_method
=
regressor_method
self
.
regressor_method
=
regressor_method
self
.
content_features
=
self
.
create_content_features
(
features_method
)
self
.
content_features
=
self
.
create_content_features
(
features_method
)
self
.
user_profile_explain
=
{}
self
.
user_profile_explain
=
{}
def
create_content_features
(
self
,
features_method
):
def
create_content_features
(
self
,
features_method
):
"""
Content Analyzer
"""
"""
Content Analyzer
"""
df_items
=
load_items
()
df_items
=
load_items
()
df_ratings
=
load_ratings
()
df_ratings
=
load_ratings
()
df_tag
=
df_tag
=
pd
.
read_csv
(
C
.
CONTENT_PATH
/
C
.
TAGS_FILENAME
)
df_tag
=
df_tag
=
pd
.
read_csv
(
C
.
CONTENT_PATH
/
C
.
TAGS_FILENAME
)
df_genome_score
=
pd
.
read_csv
(
"
data/hackathon/content/genome-scores.csv
"
)
df_genome_score
=
pd
.
read_csv
(
"
data/hackathon/content/genome-scores.csv
"
)
df_genome_tag
=
pd
.
read_csv
(
"
data/hackathon/content/genome-tags.csv
"
)
df_genome_tag
=
pd
.
read_csv
(
"
data/hackathon/content/genome-tags.csv
"
)
if
features_method
is
None
:
if
features_method
is
None
:
df_features
=
None
df_features
=
None
elif
features_method
==
"
relevance
"
:
elif
features_method
==
"
relevance
"
:
df_features
=
df_genome_score
.
groupby
(
'
movieId
'
)[
"
relevance
"
].
transform
(
'
mean
'
).
to_frame
(
'
avg_relevance
'
)
df_features
=
df_genome_score
.
groupby
(
'
movieId
'
)[
"
relevance
"
].
transform
(
'
mean
'
).
to_frame
(
'
avg_relevance
'
)
elif
features_method
==
"
title_length
"
:
# a naive method that creates only 1 feature based on title length
elif
features_method
==
"
title_length
"
:
# a naive method that creates only 1 feature based on title length
df_features
=
df_items
[
C
.
LABEL_COL
].
apply
(
lambda
x
:
len
(
x
)).
to_frame
(
'
n_character_title
'
)
df_features
=
df_items
[
C
.
LABEL_COL
].
apply
(
lambda
x
:
len
(
x
)).
to_frame
(
'
n_character_title
'
)
elif
features_method
==
"
movie_year
"
:
elif
features_method
==
"
movie_year
"
:
df_features
=
df_items
[
'
movie_year
'
]
=
df_items
[
'
title
'
].
str
.
extract
(
r
'
\((\d{4})\)
'
,
expand
=
False
).
to_frame
(
'
movie_year
'
)
df_features
=
df_items
[
'
movie_year
'
]
=
df_items
[
'
title
'
].
str
.
extract
(
r
'
\((\d{4})\)
'
,
expand
=
False
).
to_frame
(
'
movie_year
'
)
elif
features_method
==
"
genres
"
:
elif
features_method
==
"
genres
"
:
genres_list
=
df_items
[
'
genres
'
].
str
.
split
(
'
|
'
).
explode
().
unique
()
genres_list
=
df_items
[
'
genres
'
].
str
.
split
(
'
|
'
).
explode
().
unique
()
for
genre
in
genres_list
:
for
genre
in
genres_list
:
df_features
=
df_items
[
'
genres
'
].
str
.
contains
(
genre
).
astype
(
int
).
to_frame
(
'
genres
'
)
df_features
=
df_items
[
'
genres
'
].
str
.
contains
(
genre
).
astype
(
int
).
to_frame
(
'
genres
'
)
elif
features_method
==
"
combination
"
:
elif
features_method
==
"
combination
"
:
df_length
=
df_items
[
C
.
LABEL_COL
].
apply
(
lambda
x
:
len
(
x
)).
to_frame
(
'
n_character_title
'
)
df_length
=
df_items
[
C
.
LABEL_COL
].
apply
(
lambda
x
:
len
(
x
)).
to_frame
(
'
n_character_title
'
)
df_movie
=
df_items
[
'
title
'
].
str
.
extract
(
r
'
\((\d{4})\)
'
,
expand
=
False
).
to_frame
(
'
movie_year
'
)
df_movie
=
df_items
[
'
title
'
].
str
.
extract
(
r
'
\((\d{4})\)
'
,
expand
=
False
).
to_frame
(
'
movie_year
'
)
genres_list
=
df_items
[
'
genres
'
].
str
.
split
(
'
|
'
).
explode
().
unique
()
genres_list
=
df_items
[
'
genres
'
].
str
.
split
(
'
|
'
).
explode
().
unique
()
for
genre
in
genres_list
:
for
genre
in
genres_list
:
df_genre
=
df_items
[
'
genres
'
].
str
.
contains
(
genre
).
astype
(
int
).
to_frame
(
'
genres
'
)
df_genre
=
df_items
[
'
genres
'
].
str
.
contains
(
genre
).
astype
(
int
).
to_frame
(
'
genres
'
)
df_features
=
pd
.
concat
([
df_genre
,
df_length
,
df_movie
],
axis
=
1
)
df_features
=
pd
.
concat
([
df_genre
,
df_length
,
df_movie
],
axis
=
1
)
elif
features_method
==
"
rating
"
:
elif
features_method
==
"
rating
"
:
df_features
=
df_ratings
.
groupby
(
'
movieId
'
)[
'
rating
'
].
transform
(
'
mean
'
).
to_frame
(
'
avg_rating
'
)
df_features
=
df_ratings
.
groupby
(
'
movieId
'
)[
'
rating
'
].
transform
(
'
mean
'
).
to_frame
(
'
avg_rating
'
)
elif
features_method
==
"
tags
"
:
elif
features_method
==
"
tags
"
:
df_features
=
df_tag
[
'
tag
'
].
apply
(
lambda
x
:
len
(
x
.
split
(
'
,
'
))
if
isinstance
(
x
,
str
)
else
0
).
to_frame
(
'
tags
'
)
df_features
=
df_tag
[
'
tag
'
].
apply
(
lambda
x
:
len
(
x
.
split
(
'
,
'
))
if
isinstance
(
x
,
str
)
else
0
).
to_frame
(
'
tags
'
)
elif
features_method
==
"
tags_length
"
:
elif
features_method
==
"
tags_length
"
:
df_features
=
df_tag
[
'
tag
'
].
apply
(
lambda
x
:
sum
(
len
(
tag
)
for
tag
in
x
.
split
(
'
,
'
))
if
isinstance
(
x
,
str
)
else
0
).
to_frame
(
'
n_character_tags
'
)
df_features
=
df_tag
[
'
tag
'
].
apply
(
lambda
x
:
sum
(
len
(
tag
)
for
tag
in
x
.
split
(
'
,
'
))
if
isinstance
(
x
,
str
)
else
0
).
to_frame
(
'
n_character_tags
'
)
else
:
# (implement other feature creations here)
else
:
# (implement other feature creations here)
raise
NotImplementedError
(
f
'
Feature method
{
features_method
}
not yet implemented
'
)
raise
NotImplementedError
(
f
'
Feature method
{
features_method
}
not yet implemented
'
)
return
df_features
return
df_features
def
fit
(
self
,
trainset
):
def
fit
(
self
,
trainset
):
"""
Profile Learner
"""
"""
Profile Learner
"""
AlgoBase
.
fit
(
self
,
trainset
)
AlgoBase
.
fit
(
self
,
trainset
)
# Preallocate user profiles
# Preallocate user profiles
self
.
user_profile
=
{
u
:
None
for
u
in
trainset
.
all_users
()}
self
.
user_profile
=
{
u
:
None
for
u
in
trainset
.
all_users
()}
self
.
user_profile_explain
=
{
u
:
{}
for
u
in
trainset
.
all_users
()}
self
.
user_profile_explain
=
{
u
:
{}
for
u
in
trainset
.
all_users
()}
for
u
in
self
.
user_profile
:
for
u
in
self
.
user_profile
:
user_ratings
=
np
.
array
([
rating
for
_
,
rating
in
trainset
.
ur
[
u
]])
user_ratings
=
np
.
array
([
rating
for
_
,
rating
in
trainset
.
ur
[
u
]])
feature_values
=
self
.
content_features
.
values
feature_values
=
self
.
content_features
.
values
weighted_features
=
feature_values
.
T
.
dot
(
user_ratings
)
weighted_features
=
feature_values
.
T
.
dot
(
user_ratings
)
feature_importance
=
weighted_features
/
np
.
sum
(
user_ratings
)
feature_importance
=
weighted_features
/
np
.
sum
(
user_ratings
)
self
.
user_profile_explain
[
u
]
=
dict
(
zip
(
self
.
content_features
.
columns
,
feature_importance
))
self
.
user_profile_explain
[
u
]
=
dict
(
zip
(
self
.
content_features
.
columns
,
feature_importance
))
if
self
.
regressor_method
==
'
random_score
'
:
if
self
.
regressor_method
==
'
random_score
'
:
for
u
in
self
.
user_profile
:
for
u
in
self
.
user_profile
:
self
.
user_profile
[
u
]
=
rd
.
uniform
(
0.5
,
5
)
self
.
user_profile
[
u
]
=
rd
.
uniform
(
0.5
,
5
)
elif
self
.
regressor_method
==
'
random_sample
'
:
elif
self
.
regressor_method
==
'
random_sample
'
:
for
u
in
self
.
user_profile
:
for
u
in
self
.
user_profile
:
self
.
user_profile
[
u
]
=
[
rating
for
_
,
rating
in
self
.
trainset
.
ur
[
u
]]
self
.
user_profile
[
u
]
=
[
rating
for
_
,
rating
in
self
.
trainset
.
ur
[
u
]]
elif
self
.
regressor_method
==
'
linear_regression
'
:
elif
self
.
regressor_method
==
'
linear_regression
'
:
for
u
in
self
.
user_profile
:
for
u
in
self
.
user_profile
:
user_ratings
=
[
rating
for
_
,
rating
in
trainset
.
ur
[
u
]]
user_ratings
=
[
rating
for
_
,
rating
in
trainset
.
ur
[
u
]]
item_ids
=
[
iid
for
iid
,
_
in
trainset
.
ur
[
u
]]
item_ids
=
[
iid
for
iid
,
_
in
trainset
.
ur
[
u
]]
df_user
=
pd
.
DataFrame
({
'
item_id
'
:
item_ids
,
'
user_ratings
'
:
user_ratings
})
df_user
=
pd
.
DataFrame
({
'
item_id
'
:
item_ids
,
'
user_ratings
'
:
user_ratings
})
df_user
[
"
item_id
"
]
=
df_user
[
"
item_id
"
].
map
(
trainset
.
to_raw_iid
)
df_user
[
"
item_id
"
]
=
df_user
[
"
item_id
"
].
map
(
trainset
.
to_raw_iid
)
df_user
=
df_user
.
merge
(
self
.
content_features
,
left_on
=
"
item_id
"
,
right_index
=
True
,
how
=
'
left
'
)
df_user
=
df_user
.
merge
(
self
.
content_features
,
left_on
=
"
item_id
"
,
right_index
=
True
,
how
=
'
left
'
)
if
'
n_character_title
'
in
df_user
.
columns
:
if
'
n_character_title
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
n_character_title
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
n_character_title
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
avg_relevance
'
in
df_user
.
columns
:
elif
'
avg_relevance
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
avg_relevance
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
avg_relevance
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
movie_year
'
in
df_user
.
columns
:
elif
'
movie_year
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
movie_year
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
movie_year
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
genres
'
in
df_user
.
columns
:
elif
'
genres
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
genres
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
genres
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
combination
'
in
df_user
.
columns
:
elif
'
combination
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
combination
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
combination
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
avg_rating
'
in
df_user
.
columns
:
elif
'
avg_rating
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
avg_rating
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
avg_rating
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
tags
'
in
df_user
.
columns
:
elif
'
tags
'
in
df_user
.
columns
:
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
X
=
df_user
[
'
tags
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
tags
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
n_character_tags
'
in
df_user
.
columns
:
elif
'
n_character_tags
'
in
df_user
.
columns
:
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
X
=
df_user
[
'
n_character_tags
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
n_character_tags
'
].
values
.
reshape
(
-
1
,
1
)
else
:
else
:
# Si aucune caractéristique appropriée n'est disponible
# Si aucune caractéristique appropriée n'est disponible
continue
# Ou gère le cas d'erreur/exception ici
continue
# Ou gère le cas d'erreur/exception ici
y
=
df_user
[
'
user_ratings
'
].
values
y
=
df_user
[
'
user_ratings
'
].
values
linear_regressor
=
LinearRegression
(
fit_intercept
=
False
)
linear_regressor
=
LinearRegression
(
fit_intercept
=
False
)
linear_regressor
.
fit
(
X
,
y
)
linear_regressor
.
fit
(
X
,
y
)
# Store the computed user profile
# Store the computed user profile
self
.
user_profile
[
u
]
=
linear_regressor
self
.
user_profile
[
u
]
=
linear_regressor
elif
self
.
regressor_method
==
'
svr_regression
'
:
elif
self
.
regressor_method
==
'
svr_regression
'
:
for
u
in
self
.
user_profile
:
for
u
in
self
.
user_profile
:
user_ratings
=
[
rating
for
_
,
rating
in
trainset
.
ur
[
u
]]
user_ratings
=
[
rating
for
_
,
rating
in
trainset
.
ur
[
u
]]
item_ids
=
[
iid
for
iid
,
_
in
trainset
.
ur
[
u
]]
item_ids
=
[
iid
for
iid
,
_
in
trainset
.
ur
[
u
]]
df_user
=
pd
.
DataFrame
({
'
item_id
'
:
item_ids
,
'
user_ratings
'
:
user_ratings
})
df_user
=
pd
.
DataFrame
({
'
item_id
'
:
item_ids
,
'
user_ratings
'
:
user_ratings
})
df_user
[
"
item_id
"
]
=
df_user
[
"
item_id
"
].
map
(
trainset
.
to_raw_iid
)
df_user
[
"
item_id
"
]
=
df_user
[
"
item_id
"
].
map
(
trainset
.
to_raw_iid
)
df_user
=
df_user
.
merge
(
self
.
content_features
,
left_on
=
"
item_id
"
,
right_index
=
True
,
how
=
'
left
'
)
df_user
=
df_user
.
merge
(
self
.
content_features
,
left_on
=
"
item_id
"
,
right_index
=
True
,
how
=
'
left
'
)
if
'
n_character_title
'
in
df_user
.
columns
:
if
'
n_character_title
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
n_character_title
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
n_character_title
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
avg_relevance
'
in
df_user
.
columns
:
elif
'
avg_relevance
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
avg_relevance
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
avg_relevance
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
movie_year
'
in
df_user
.
columns
:
elif
'
movie_year
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
movie_year
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
movie_year
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
genres
'
in
df_user
.
columns
:
elif
'
genres
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
genres
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
genres
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
combination
'
in
df_user
.
columns
:
elif
'
combination
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
combination
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
combination
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
avg_rating
'
in
df_user
.
columns
:
elif
'
avg_rating
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
avg_rating
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
avg_rating
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
tags
'
in
df_user
.
columns
:
elif
'
tags
'
in
df_user
.
columns
:
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
X
=
df_user
[
'
tags
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
tags
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
n_character_tags
'
in
df_user
.
columns
:
elif
'
n_character_tags
'
in
df_user
.
columns
:
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
X
=
df_user
[
'
n_character_tags
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
n_character_tags
'
].
values
.
reshape
(
-
1
,
1
)
else
:
else
:
# Si aucune caractéristique appropriée n'est disponible
# Si aucune caractéristique appropriée n'est disponible
continue
# Ou gère le cas d'erreur/exception ici
continue
# Ou gère le cas d'erreur/exception ici
y
=
df_user
[
'
user_ratings
'
].
values
y
=
df_user
[
'
user_ratings
'
].
values
svr_regressor
=
SVR
(
kernel
=
'
rbf
'
,
C
=
10
,
epsilon
=
0.2
)
svr_regressor
=
SVR
(
kernel
=
'
rbf
'
,
C
=
10
,
epsilon
=
0.2
)
svr_regressor
.
fit
(
X
,
y
)
svr_regressor
.
fit
(
X
,
y
)
self
.
user_profile
[
u
]
=
svr_regressor
self
.
user_profile
[
u
]
=
svr_regressor
elif
self
.
regressor_method
==
'
gradient_boosting
'
:
elif
self
.
regressor_method
==
'
gradient_boosting
'
:
for
u
in
self
.
user_profile
:
for
u
in
self
.
user_profile
:
user_ratings
=
[
rating
for
_
,
rating
in
trainset
.
ur
[
u
]]
user_ratings
=
[
rating
for
_
,
rating
in
trainset
.
ur
[
u
]]
item_ids
=
[
iid
for
iid
,
_
in
trainset
.
ur
[
u
]]
item_ids
=
[
iid
for
iid
,
_
in
trainset
.
ur
[
u
]]
df_user
=
pd
.
DataFrame
({
'
item_id
'
:
item_ids
,
'
user_ratings
'
:
user_ratings
})
df_user
=
pd
.
DataFrame
({
'
item_id
'
:
item_ids
,
'
user_ratings
'
:
user_ratings
})
df_user
[
"
item_id
"
]
=
df_user
[
"
item_id
"
].
map
(
trainset
.
to_raw_iid
)
df_user
[
"
item_id
"
]
=
df_user
[
"
item_id
"
].
map
(
trainset
.
to_raw_iid
)
df_user
=
df_user
.
merge
(
self
.
content_features
,
left_on
=
"
item_id
"
,
right_index
=
True
,
how
=
'
left
'
)
df_user
=
df_user
.
merge
(
self
.
content_features
,
left_on
=
"
item_id
"
,
right_index
=
True
,
how
=
'
left
'
)
if
'
n_character_title
'
in
df_user
.
columns
:
if
'
n_character_title
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
n_character_title
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
n_character_title
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
avg_relevance
'
in
df_user
.
columns
:
elif
'
avg_relevance
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
avg_relevance
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
avg_relevance
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
movie_year
'
in
df_user
.
columns
:
elif
'
movie_year
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
movie_year
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
movie_year
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
genres
'
in
df_user
.
columns
:
elif
'
genres
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
genres
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
genres
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
combination
'
in
df_user
.
columns
:
elif
'
combination
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
combination
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
combination
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
avg_rating
'
in
df_user
.
columns
:
elif
'
avg_rating
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
avg_rating
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
avg_rating
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
tags
'
in
df_user
.
columns
:
elif
'
tags
'
in
df_user
.
columns
:
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
X
=
df_user
[
'
tags
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
tags
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
n_character_tags
'
in
df_user
.
columns
:
elif
'
n_character_tags
'
in
df_user
.
columns
:
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
X
=
df_user
[
'
n_character_tags
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
n_character_tags
'
].
values
.
reshape
(
-
1
,
1
)
else
:
else
:
# Si aucune caractéristique appropriée n'est disponible
# Si aucune caractéristique appropriée n'est disponible
continue
# Ou gère le cas d'erreur/exception ici
continue
# Ou gère le cas d'erreur/exception ici
y
=
df_user
[
'
user_ratings
'
].
values
y
=
df_user
[
'
user_ratings
'
].
values
gb_regressor
=
GradientBoostingRegressor
(
n_estimators
=
100
,
learning_rate
=
0.1
,
max_depth
=
3
)
gb_regressor
=
GradientBoostingRegressor
(
n_estimators
=
100
,
learning_rate
=
0.1
,
max_depth
=
3
)
gb_regressor
.
fit
(
X
,
y
)
gb_regressor
.
fit
(
X
,
y
)
self
.
user_profile
[
u
]
=
gb_regressor
self
.
user_profile
[
u
]
=
gb_regressor
elif
self
.
regressor_method
==
'
random_forest
'
:
elif
self
.
regressor_method
==
'
random_forest
'
:
for
u
in
self
.
user_profile
:
for
u
in
self
.
user_profile
:
user_ratings
=
[
rating
for
_
,
rating
in
trainset
.
ur
[
u
]]
user_ratings
=
[
rating
for
_
,
rating
in
trainset
.
ur
[
u
]]
item_ids
=
[
iid
for
iid
,
_
in
trainset
.
ur
[
u
]]
item_ids
=
[
iid
for
iid
,
_
in
trainset
.
ur
[
u
]]
df_user
=
pd
.
DataFrame
({
'
item_id
'
:
item_ids
,
'
user_ratings
'
:
user_ratings
})
df_user
=
pd
.
DataFrame
({
'
item_id
'
:
item_ids
,
'
user_ratings
'
:
user_ratings
})
df_user
[
"
item_id
"
]
=
df_user
[
"
item_id
"
].
map
(
trainset
.
to_raw_iid
)
df_user
[
"
item_id
"
]
=
df_user
[
"
item_id
"
].
map
(
trainset
.
to_raw_iid
)
df_user
=
df_user
.
merge
(
self
.
content_features
,
left_on
=
"
item_id
"
,
right_index
=
True
,
how
=
'
left
'
)
df_user
=
df_user
.
merge
(
self
.
content_features
,
left_on
=
"
item_id
"
,
right_index
=
True
,
how
=
'
left
'
)
if
'
n_character_title
'
in
df_user
.
columns
:
if
'
n_character_title
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
n_character_title
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
n_character_title
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
avg_relevance
'
in
df_user
.
columns
:
elif
'
avg_relevance
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
avg_relevance
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
avg_relevance
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
movie_year
'
in
df_user
.
columns
:
elif
'
movie_year
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
movie_year
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
movie_year
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
genres
'
in
df_user
.
columns
:
elif
'
genres
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
genres
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
genres
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
combination
'
in
df_user
.
columns
:
elif
'
combination
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
combination
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
combination
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
avg_rating
'
in
df_user
.
columns
:
elif
'
avg_rating
'
in
df_user
.
columns
:
# Si 'n_character_title' est disponible comme caractéristique
# Si 'n_character_title' est disponible comme caractéristique
X
=
df_user
[
'
avg_rating
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
avg_rating
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
tags
'
in
df_user
.
columns
:
elif
'
tags
'
in
df_user
.
columns
:
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
X
=
df_user
[
'
tags
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
tags
'
].
values
.
reshape
(
-
1
,
1
)
elif
'
n_character_tags
'
in
df_user
.
columns
:
elif
'
n_character_tags
'
in
df_user
.
columns
:
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
# Si une autre caractéristique est disponible (remplace 'other_feature' par le nom de ta caractéristique)
X
=
df_user
[
'
n_character_tags
'
].
values
.
reshape
(
-
1
,
1
)
X
=
df_user
[
'
n_character_tags
'
].
values
.
reshape
(
-
1
,
1
)
else
:
else
:
# Si aucune caractéristique appropriée n'est disponible
# Si aucune caractéristique appropriée n'est disponible
continue
# Ou gère le cas d'erreur/exception ici
continue
# Ou gère le cas d'erreur/exception ici
y
=
df_user
[
'
user_ratings
'
].
values
y
=
df_user
[
'
user_ratings
'
].
values
rf_regressor
=
RandomForestRegressor
(
n_estimators
=
100
)
rf_regressor
=
RandomForestRegressor
(
n_estimators
=
100
)
rf_regressor
.
fit
(
X
,
y
)
rf_regressor
.
fit
(
X
,
y
)
self
.
user_profile
[
u
]
=
rf_regressor
self
.
user_profile
[
u
]
=
rf_regressor
else
:
else
:
pass
pass
# (implement here the regressor fitting)
# (implement here the regressor fitting)
def
estimate
(
self
,
u
,
i
):
def
estimate
(
self
,
u
,
i
):
"""
Scoring component used for item filtering
"""
"""
Scoring component used for item filtering
"""
# First, handle cases for unknown users and items
# First, handle cases for unknown users and items
if
not
(
self
.
trainset
.
knows_user
(
u
)
and
self
.
trainset
.
knows_item
(
i
)):
if
not
(
self
.
trainset
.
knows_user
(
u
)
and
self
.
trainset
.
knows_item
(
i
)):
raise
PredictionImpossible
(
'
User and/or item is unkown.
'
)
raise
PredictionImpossible
(
'
User and/or item is unkown.
'
)
if
self
.
regressor_method
==
'
random_score
'
:
if
self
.
regressor_method
==
'
random_score
'
:
rd
.
seed
()
rd
.
seed
()
score
=
rd
.
uniform
(
0.5
,
5
)
score
=
rd
.
uniform
(
0.5
,
5
)
elif
self
.
regressor_method
==
'
random_sample
'
:
elif
self
.
regressor_method
==
'
random_sample
'
:
rd
.
seed
()
rd
.
seed
()
score
=
rd
.
choice
(
self
.
user_profile
[
u
])
score
=
rd
.
choice
(
self
.
user_profile
[
u
])
elif
self
.
regressor_method
==
'
linear_regression
'
:
elif
self
.
regressor_method
==
'
linear_regression
'
:
raw_item_id
=
self
.
trainset
.
to_raw_iid
(
i
)
raw_item_id
=
self
.
trainset
.
to_raw_iid
(
i
)
item_features
=
self
.
content_features
.
loc
[
raw_item_id
:
raw_item_id
,
:].
values
item_features
=
self
.
content_features
.
loc
[
raw_item_id
:
raw_item_id
,
:].
values
linear_regressor
=
self
.
user_profile
[
u
]
linear_regressor
=
self
.
user_profile
[
u
]
score
=
linear_regressor
.
predict
(
item_features
)[
0
]
score
=
linear_regressor
.
predict
(
item_features
)[
0
]
elif
self
.
regressor_method
==
'
svr_regression
'
:
elif
self
.
regressor_method
==
'
svr_regression
'
:
raw_item_id
=
self
.
trainset
.
to_raw_iid
(
i
)
raw_item_id
=
self
.
trainset
.
to_raw_iid
(
i
)
item_features
=
self
.
content_features
.
loc
[
raw_item_id
:
raw_item_id
,
:].
values
item_features
=
self
.
content_features
.
loc
[
raw_item_id
:
raw_item_id
,
:].
values
svr_regressor
=
self
.
user_profile
[
u
]
svr_regressor
=
self
.
user_profile
[
u
]
score
=
svr_regressor
.
predict
(
item_features
)[
0
]
score
=
svr_regressor
.
predict
(
item_features
)[
0
]
elif
self
.
regressor_method
==
'
gradient_boosting
'
:
elif
self
.
regressor_method
==
'
gradient_boosting
'
:
raw_item_id
=
self
.
trainset
.
to_raw_iid
(
i
)
raw_item_id
=
self
.
trainset
.
to_raw_iid
(
i
)
item_features
=
self
.
content_features
.
loc
[
raw_item_id
:
raw_item_id
,
:].
values
item_features
=
self
.
content_features
.
loc
[
raw_item_id
:
raw_item_id
,
:].
values
gradient_boosting
=
self
.
user_profile
[
u
]
gradient_boosting
=
self
.
user_profile
[
u
]
score
=
gradient_boosting
.
predict
(
item_features
)[
0
]
score
=
gradient_boosting
.
predict
(
item_features
)[
0
]
elif
self
.
regressor_method
==
'
random_forest
'
:
elif
self
.
regressor_method
==
'
random_forest
'
:
raw_item_id
=
self
.
trainset
.
to_raw_iid
(
i
)
raw_item_id
=
self
.
trainset
.
to_raw_iid
(
i
)
item_features
=
self
.
content_features
.
loc
[
raw_item_id
:
raw_item_id
,
:].
values
item_features
=
self
.
content_features
.
loc
[
raw_item_id
:
raw_item_id
,
:].
values
randomforest
=
self
.
user_profile
[
u
]
randomforest
=
self
.
user_profile
[
u
]
score
=
randomforest
.
predict
(
item_features
)[
0
]
score
=
randomforest
.
predict
(
item_features
)[
0
]
else
:
else
:
score
=
None
score
=
None
# (implement here the regressor prediction)
# (implement here the regressor prediction)
return
score
return
score
def
explain
(
self
,
u
)
:
def
explain
(
self
,
u
)
:
if
u
in
self
.
user_profile_explain
:
if
u
in
self
.
user_profile_explain
:
return
self
.
user_profile_explain
[
u
]
return
self
.
user_profile_explain
[
u
]
else
:
else
:
return
{}
return
{}
cb
=
ContentBased
(
"
movie_year
"
,
"
random_sample
"
)
cb
=
ContentBased
(
"
movie_year
"
,
"
random_sample
"
)
print
(
cb
.
explain
(
'
11
'
))
print
(
cb
.
explain
(
'
11
'
))
print
(
'
test
'
)
```
```
%% Output
%% Output
{}
{}
%% Cell type:code id:baab88b7 tags:
%% Cell type:code id:baab88b7 tags:
```
python
```
python
from
pprint
import
pprint
from
pprint
import
pprint
# Créer une instance de TfidfVectorizer pour les genres
# Créer une instance de TfidfVectorizer pour les genres
tfidf_vectorizer
=
TfidfVectorizer
()
tfidf_vectorizer
=
TfidfVectorizer
()
# Fit et transform pour calculer la matrice TF-IDF des genres
# Fit et transform pour calculer la matrice TF-IDF des genres
tfidf_matrix
=
tfidf_vectorizer
.
fit_transform
(
df_items
[
'
genres
'
])
tfidf_matrix
=
tfidf_vectorizer
.
fit_transform
(
df_items
[
'
genres
'
])
# Obtenir les noms des genres (features)
# Obtenir les noms des genres (features)
genre_names
=
tfidf_vectorizer
.
get_feature_names_out
()
genre_names
=
tfidf_vectorizer
.
get_feature_names_out
()
# Créer un DataFrame à partir de la matrice TF-IDF des genres
# Créer un DataFrame à partir de la matrice TF-IDF des genres
df_tfidf
=
pd
.
DataFrame
(
tfidf_matrix
.
toarray
(),
columns
=
genre_names
)
df_tfidf
=
pd
.
DataFrame
(
tfidf_matrix
.
toarray
(),
columns
=
genre_names
)
print
(
"
Matrice TF-IDF des genres :
"
)
print
(
"
Matrice TF-IDF des genres :
"
)
display
(
df_tfidf
)
display
(
df_tfidf
)
```
```
%% Output
%% Output
Matrice TF-IDF des genres :
Matrice TF-IDF des genres :
%% Cell type:markdown id:ffd75b7e tags:
%% Cell type:markdown id:ffd75b7e tags:
The following script test the ContentBased class
The following script test the ContentBased class
%% Cell type:code id:69d12f7d tags:
%% Cell type:code id:69d12f7d tags:
```
python
```
python
def
test_contentbased_class
(
feature_method
,
regressor_method
):
def
test_contentbased_class
(
feature_method
,
regressor_method
):
"""
Test the ContentBased class.
"""
Test the ContentBased class.
Tries to make a prediction on the first (user,item ) tuple of the anti_test_set
Tries to make a prediction on the first (user,item ) tuple of the anti_test_set
"""
"""
sp_ratings
=
load_ratings
(
surprise_format
=
True
)
sp_ratings
=
load_ratings
(
surprise_format
=
True
)
train_set
=
sp_ratings
.
build_full_trainset
()
train_set
=
sp_ratings
.
build_full_trainset
()
content_algo
=
ContentBased
(
feature_method
,
regressor_method
)
content_algo
=
ContentBased
(
feature_method
,
regressor_method
)
content_algo
.
fit
(
train_set
)
content_algo
.
fit
(
train_set
)
anti_test_set_first
=
train_set
.
build_anti_testset
()[
0
]
anti_test_set_first
=
train_set
.
build_anti_testset
()[
0
]
prediction
=
content_algo
.
predict
(
anti_test_set_first
[
0
],
anti_test_set_first
[
1
])
prediction
=
content_algo
.
predict
(
anti_test_set_first
[
0
],
anti_test_set_first
[
1
])
print
(
prediction
)
print
(
prediction
)
# print("title_length :")
# print("title_length :")
# test_contentbased_class(feature_method = "title_length" , regressor_method = "random_score")
# test_contentbased_class(feature_method = "title_length" , regressor_method = "random_score")
# test_contentbased_class(feature_method = "title_length" , regressor_method = "random_sample")
# test_contentbased_class(feature_method = "title_length" , regressor_method = "random_sample")
# test_contentbased_class(feature_method = "title_length" , regressor_method = "linear_regression")
# test_contentbased_class(feature_method = "title_length" , regressor_method = "linear_regression")
# test_contentbased_class(feature_method= "title_length", regressor_method= "svr_regression")
# test_contentbased_class(feature_method= "title_length", regressor_method= "svr_regression")
# test_contentbased_class(feature_method= "title_length", regressor_method= "gradient_boosting")
# test_contentbased_class(feature_method= "title_length", regressor_method= "gradient_boosting")
# test_contentbased_class(feature_method= "title_length", regressor_method= "random_forest")
# test_contentbased_class(feature_method= "title_length", regressor_method= "random_forest")
# print("\n")
# print("\n")
# print("movie_year : ")
# print("movie_year : ")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "random_score")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "random_score")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "random_sample")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "random_sample")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "linear_regression")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "linear_regression")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "svr_regression")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "svr_regression")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "gradient_boosting")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "gradient_boosting")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "random_forest")
# test_contentbased_class(feature_method= "movie_year", regressor_method= "random_forest")
# print("\n")
# print("\n")
# print("relevance : ")
# print("relevance : ")
# test_contentbased_class(feature_method= "relevance", regressor_method= "random_score")
# test_contentbased_class(feature_method= "relevance", regressor_method= "random_score")
# test_contentbased_class(feature_method= "relevance", regressor_method= "random_sample")
# test_contentbased_class(feature_method= "relevance", regressor_method= "random_sample")
# test_contentbased_class(feature_method= "relevance", regressor_method= "linear_regression")
# test_contentbased_class(feature_method= "relevance", regressor_method= "linear_regression")
# test_contentbased_class(feature_method= "relevance", regressor_method= "svr_regression")
# test_contentbased_class(feature_method= "relevance", regressor_method= "svr_regression")
# test_contentbased_class(feature_method= "relevance", regressor_method= "gradient_boosting")
# test_contentbased_class(feature_method= "relevance", regressor_method= "gradient_boosting")
# test_contentbased_class(feature_method= "relevance", regressor_method= "random_forest")
# test_contentbased_class(feature_method= "relevance", regressor_method= "random_forest")
# print("\n")
# print("\n")
# print("genres : ")
# print("genres : ")
# test_contentbased_class(feature_method= "genres", regressor_method= "random_score")
# test_contentbased_class(feature_method= "genres", regressor_method= "random_score")
# test_contentbased_class(feature_method= "genres", regressor_method= "random_sample")
# test_contentbased_class(feature_method= "genres", regressor_method= "random_sample")
# test_contentbased_class(feature_method= "genres", regressor_method= "linear_regression")
# test_contentbased_class(feature_method= "genres", regressor_method= "linear_regression")
# test_contentbased_class(feature_method= "genres", regressor_method= "svr_regression")
# test_contentbased_class(feature_method= "genres", regressor_method= "svr_regression")
# test_contentbased_class(feature_method= "genres", regressor_method= "gradient_boosting")
# test_contentbased_class(feature_method= "genres", regressor_method= "gradient_boosting")
# test_contentbased_class(feature_method= "genres", regressor_method= "random_forest")
# test_contentbased_class(feature_method= "genres", regressor_method= "random_forest")
# print("\n")
# print("\n")
# print("rating : ")
# print("rating : ")
# test_contentbased_class(feature_method= "rating", regressor_method="random_score")
# test_contentbased_class(feature_method= "rating", regressor_method="random_score")
# test_contentbased_class(feature_method= "rating", regressor_method="random_sample")
# test_contentbased_class(feature_method= "rating", regressor_method="random_sample")
# # test_contentbased_class(feature_method= "rating", regressor_method="linear_regression")
# # test_contentbased_class(feature_method= "rating", regressor_method="linear_regression")
# #test_contentbased_class(feature_method="rating", regressor_method="svr_regression")
# #test_contentbased_class(feature_method="rating", regressor_method="svr_regression")
# #test_contentbased_class(feature_method="rating", regressor_method="gradient_boosting")
# #test_contentbased_class(feature_method="rating", regressor_method="gradient_boosting")
# #test_contentbased_class(feature_method="rating", regressor_method="random_forest")
# #test_contentbased_class(feature_method="rating", regressor_method="random_forest")
# print("\n")
# print("\n")
# print("tags : ")
# print("tags : ")
# test_contentbased_class(feature_method="tags", regressor_method="random_score")
# test_contentbased_class(feature_method="tags", regressor_method="random_score")
# test_contentbased_class(feature_method="tags", regressor_method="random_sample")
# test_contentbased_class(feature_method="tags", regressor_method="random_sample")
# #test_contentbased_class(feature_method="tags", regressor_method="linear_regression")
# #test_contentbased_class(feature_method="tags", regressor_method="linear_regression")
# # test_contentbased_class(feature_method="tags", regressor_method="svr_regression")
# # test_contentbased_class(feature_method="tags", regressor_method="svr_regression")
# # test_contentbased_class(feature_method="tags", regressor_method="gradient_boosting")
# # test_contentbased_class(feature_method="tags", regressor_method="gradient_boosting")
# # test_contentbased_class(feature_method="tags", regressor_method="random_forest")
# # test_contentbased_class(feature_method="tags", regressor_method="random_forest")
# print("\n")
# print("\n")
# print("tags_length : ")
# print("tags_length : ")
# test_contentbased_class(feature_method="tags_length", regressor_method="random_score")
# test_contentbased_class(feature_method="tags_length", regressor_method="random_score")
# test_contentbased_class(feature_method="tags_length", regressor_method="random_sample")
# test_contentbased_class(feature_method="tags_length", regressor_method="random_sample")
# test_contentbased_class(feature_method="tags_length", regressor_method="linear_regression")
# test_contentbased_class(feature_method="tags_length", regressor_method="linear_regression")
# test_contentbased_class(feature_method="tags_length", regressor_method="svr_regression")
# test_contentbased_class(feature_method="tags_length", regressor_method="svr_regression")
# test_contentbased_class(feature_method="tags_length", regressor_method="gradient_boosting")
# test_contentbased_class(feature_method="tags_length", regressor_method="gradient_boosting")
# test_contentbased_class(feature_method="tags_length", regressor_method="random_forest")
# test_contentbased_class(feature_method="tags_length", regressor_method="random_forest")
# print("\n")
# print("\n")
# print("combination : ")
# print("combination : ")
# test_contentbased_class(feature_method="combination", regressor_method="random_score")
# test_contentbased_class(feature_method="combination", regressor_method="random_score")
# test_contentbased_class(feature_method="combination", regressor_method="random_sample")
# test_contentbased_class(feature_method="combination", regressor_method="random_sample")
# test_contentbased_class(feature_method="combination", regressor_method="linear_regression")
# test_contentbased_class(feature_method="combination", regressor_method="linear_regression")
# test_contentbased_class(feature_method="combination", regressor_method="svr_regression")
# test_contentbased_class(feature_method="combination", regressor_method="svr_regression")
# test_contentbased_class(feature_method="combination", regressor_method="gradient_boosting")
# test_contentbased_class(feature_method="combination", regressor_method="gradient_boosting")
# test_contentbased_class(feature_method="combination", regressor_method="random_forest")
# test_contentbased_class(feature_method="combination", regressor_method="random_forest")
```
```
...
...
Ce diff est replié.
Cliquez pour l'agrandir.
Aperçu
0%
Chargement en cours
Veuillez réessayer
ou
joindre un nouveau fichier
.
Annuler
You are about to add
0
people
to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Enregistrer le commentaire
Annuler
Veuillez vous
inscrire
ou vous
se connecter
pour commenter