Initial commit

fdd701b2 · Christophe Desagre · fdd701b2 · fdd701b2 · fdd701b2 · fdd701b2
--- a/.RData
+++ b/.RData
--- a/.Rbuildignore
+++ b/.Rbuildignore
+^.*\.Rproj$
+^\.Rproj\.user$
--- a/.Rhistory
+++ b/.Rhistory
+iris$Sepal.Length
+devtools::load_all()
+?advise.good.movie
+?mean
+meen(c(0, 1, 3))
+devtools::document()
+devtools::document()
+?advise.good.movie
+x <- sample(1000)
+usethis::use_data(x)
+y <- sample(1000)
+usethis::use_data(y, internal=TRUE)
+y
+devtools::document()
+devtools::document()
+usethis::use_vignette("my_vignette")
+library(moviesdemo)
+?moviesdemo
+?advise.good.movie
+devtools::buid()
+devtools::build()
+library(testthat)
+expect_equal(10,10) ## OK
+expect_equal(10, 10 + 1E-7) ## OK
+expect_equal(10, 11) ## ERROR
+# Match for character vectors
+string <- "Testing is fun"
+expect_match(string, "Testing is fun")
+expect_match(string, "testing is fun") # case sensitive
+# Inspect printed output
+expect_output(str(list(1:10, letters)), "List of 2") ## OK
+expect_output(str(list(1:10, letters)), "List of 4") ## ERROR
+expect_message(message("Hello"), "Hello")
+# You may have to change the text to "Production de NaN"
+# if your R version is in French
+expect_warning(log(-1), "NaNs produced")
+# You may have to change the text to "argument non numérique
+# pour un opérateur binaire" if your R version is in French
+expect_error(1 / "a", "non-numeric argument")
+devtools::test()
+usethis::use_testthat()
+devtools::test()
+devtools::test()
+devtools::check()
+test_dir()
--- a/.gitignore
+++ b/.gitignore
+inst/doc
+.Rproj.user
--- a/DESCRIPTION
+++ b/DESCRIPTION
+Package: moviesdemo
+Type: Package
+Title: What the Package Does (Title Case)
+Version: 0.1.0
+Author: Who wrote it
+Maintainer: The package maintainer <yourself@somewhere.net>
+Description: More about what it does (maybe more than one line)
+    Use four spaces when indenting paragraphs within the Description.
+License: What license is it under?
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 7.2.2
+Depends: 
+    R (>= 2.10)
+Import: graphics
+Suggests: 
+    knitr,
+    rmarkdown,
+    testthat (>= 3.0.0)
+VignetteBuilder: knitr
+Config/testthat/edition: 3
--- a/NAMESPACE
+++ b/NAMESPACE
+# Generated by roxygen2: do not edit by hand
+
+S3method(print,advice)
+export(advise.good.movie)
+import(graphics)
--- a/R/advise.R
+++ b/R/advise.R
+#' Advise movies based on another movie
+#'
+#' \code{advise.good.movie} takes as input a movie from the movie database and gives as output a number
+#' of movies that are similar.
+#'
+#' @param similar_to character, movie title from the database.
+#' @param how_many integer, how many movies to advise.
+#' @param draw_scores if \code{TRUE}, draws a barplot with the similarity scores.
+#' @param ... additional arguments.
+#'
+#' @return A list with the following elements:
+#' \describe{
+#'   \item{\code{selected}}{movie title used to advise other movies}
+#'   \item{\code{to_watch}}{advised movie title(s)}
+#'   \item{\code{movie_ids}}{line number in the \code{movies} database of the advised movie(s)}
+#'   \item{\code{scores}}{similarity scores of the advised movie(s)}
+#' }
+#'
+#'@examples
+#' suggestions <- advise.good.movie(similar_to = "Interstellar", how_many = 3,
+#' draw_scores = TRUE, weights = c("genre"=1, "popularity"=1, "rating"=1,
+#' "production company"=1))
+#'
+#' @import graphics
+#'
+#' @export
+
+advise.good.movie <- function(similar_to, how_many, draw_scores = FALSE, ...){
+  dots <- list(...)
+  weights <- if(is.null(dots$weights)) rep(-1, 4) else dots$weights
+  movies <- moviesdemo::movies
+  names(weights) <- c("genre", "popularity", "rating", "production company")
+  movie <- match.arg(similar_to, movies$title)
+
+  if(!isTRUE(is.numeric(how_many) & (length(how_many) == 1))) {
+    stop("Argument 'how_many' should be a number...")
+  }
+
+  for(x in names(weights)){
+
+    index <- which(names(weights) == x)
+    while(weights[index] < 0){
+      weight <- readline(paste0("From 0 to 5, how important is the ", x, "?"))
+      weight <- (if(isTRUE((as.numeric(weight) %in% 0:5) & (length(weight) == 1))){
+        as.numeric(weight) } else -1)
+      if(weight < 0){
+        message("Try again, please provide an integer between 0 and 5...")
+      }
+      weights[index] <- weight
+    }
+  }
+
+  indices <- (1:dim(movies)[1])[-which(movies$title == movie)]
+
+  genre <- c(scale(log(sapply(indices, function(i) sim.genres(movie, movies$title[i])) + 0.01)))
+  pop <- c(scale(log(movies$popularity[indices] + 0.01)))
+  rating <- c(scale(movies$vote[indices]))
+  producer <- c(scale(log(sapply(indices, function(i) sim.producers(movie, movies$title[i])) + 0.01)))
+
+  total_scores <- weights[1] * genre + (weights[2] / 4) * pop +
+                  (weights[3] / 4) * rating + (weights[4] / 2) * producer
+  top_indices <- indices[order(total_scores, decreasing = T)[1:how_many]]
+
+  scores <- sort(total_scores, decreasing = T)[1:how_many]
+  to_watch <- movies$title[top_indices]
+
+  if (draw_scores) {
+    names(scores) <- to_watch
+    m <- barplot(scores, ylim=c(0, ceiling(max(scores))), col = "blue", main = "Movie scores")
+    text(m, scores * 0.9, labels = round(scores, 2), col = "white")
+  }
+
+  return.list <- list(selected = movie, to_watch = to_watch, movie_ids = top_indices, scores = scores)
+
+  class(return.list) <- "advice"
+
+  return(return.list)
+
+}
--- a/R/data_movies.R
+++ b/R/data_movies.R
+#' TMDb 4800 movie dataset
+#'
+#' Metadata on 4800 movies from The Movie Database (TMDb) from the Kaggle website.
+#'
+#' The variables are as follows:
+#'    \describe{
+#'      \item{title}{character, title of the film.}
+#'      \item{genres}{string of characters, genres of the film.}
+#'      \item{popularity}{numeric, popularity of the film in terms of views.}
+#'      \item{vote}{numeric, voted rating of the film between 0 and 10.}
+#'      \item{language}{factor, original language.}
+#'      \item{producers}{string of characters, production companies.}
+#'      \item{release}{date, release date of the film.}
+#'      \item{runtime}{numeric, runtime in minutes.}
+#'      \item{plot}{character, plot summary of the film.}
+#'   }
+#'
+#' @format A data frame with 4800 rows and 9 variables.
+#'
+#' @source \url{https://www.kaggle.com/tmdb/tmdb-movie-metadata}
+"movies"
--- a/R/hello.R
+++ b/R/hello.R
+# Hello, world!
+#
+# This is an example function named 'hello' 
+# which prints 'Hello, world!'.
+#
+# You can learn more about package authoring with RStudio at:
+#
+#   http://r-pkgs.had.co.nz/
+#
+# Some useful keyboard shortcuts for package authoring:
+#
+#   Install Package:           'Ctrl + Shift + B'
+#   Check Package:             'Ctrl + Shift + E'
+#   Test Package:              'Ctrl + Shift + T'
+
+hello <- function() {
+  print("Hello, world!")
+}
--- a/R/moviesdemo.R
+++ b/R/moviesdemo.R
+#' moviesdemo: A demo R package.
+#'
+#' A demo package for the SMCS course on creating and publishing R packages,
+#' based on metadata of 4800 movies from The Movie Database.
+#'
+#' @section moviesdemo functions:
+#' \describe{
+#'   \item{\code{advise.good.movie}}{Advise movies based on another movie}
+#'   \item{\code{sim.genres}}{Movie similarity based on production companies}
+#'   \item{\code{sim.producers}}{Movie similarity based on production companies}
+#' }
+#' @section Database:
+#' The database used is the TMDb 4800 movies Database (see \code{\link{movies}})
+#'
+#'
+#' @docType package
+#' @name moviesdemo
+NULL
--- a/R/print_advise.R
+++ b/R/print_advise.R
+#' Printing method for the advise.good.movie function
+#'
+#' \code{print.advise} prints the selected value and the advised movies
+#'
+#' @param advice Output of a call to \code{\link{advise.good.movie}}.
+#'
+#' @method print advice
+#' @export
+
+print.advice <- function(advice){
+
+  cat("Based on the movie",
+      advice$selected,
+      "we advise to watch",
+      paste(advice$to_watch,collapse=" and "))
+
+}
--- a/R/sim.genres.R
+++ b/R/sim.genres.R
+# Movie similarity based on genre
+#
+# The similarity of 2 movies A and B is measured based on their
+# potential common genre.
+#
+# parameters
+################
+# movie_A character, one movie title from the movie database
+# movie_B character, another movie title from the movie database
+#
+# return
+################
+# A scalar : the similarity based on genre
+#
+sim.genres <- function(movie_A, movie_B){
+
+  movies <- moviesdemo::movies
+
+  movie_A <- match.arg(movie_A, movies$title)
+  movie_B <- match.arg(movie_B, movies$title)
+
+  genres_A <- movies$genres[[which(movies$title == movie_A)]]
+  genres_B <- movies$genres[[which(movies$title == movie_B)]]
+
+  return(length(intersect(genres_A, genres_B)) / length(union(genres_A, genres_B)))
+}
--- a/R/sim.producers.R
+++ b/R/sim.producers.R
+# Movie similarity based on production companies
+#
+# The similarity of 2 movies A and B is measured based on their potential common production companies.
+#
+# parameters
+################
+# movie_A character, one movie title from the movie database
+# movie_B character, another movie title from the movie database
+#
+# return
+################
+# A scalar : the similarity based on production companies
+#
+sim.producers <- function(movie_A, movie_B){
+
+  movies <- moviesdemo::movies
+
+  movie_A <- match.arg(movie_A, movies$title)
+  movie_B <- match.arg(movie_B, movies$title)
+
+  producers_A <- movies$producers[[which(movies$title == movie_A)]]
+  producers_B <- movies$producers[[which(movies$title == movie_B)]]
+
+  return(length(intersect(producers_A, producers_B)) / length(union(producers_A, producers_B)))
+}
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/data/movies.rda
+++ b/data/movies.rda
--- a/data/x.rda
+++ b/data/x.rda
--- a/man/advise.good.movie.Rd
+++ b/man/advise.good.movie.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/advise.R
+\name{advise.good.movie}
+\alias{advise.good.movie}
+\title{Advise movies based on another movie}
+\usage{
+advise.good.movie(similar_to, how_many, draw_scores = FALSE, ...)
+}
+\arguments{
+\item{similar_to}{character, movie title from the database.}
+
+\item{how_many}{integer, how many movies to advise.}
+
+\item{draw_scores}{if \code{TRUE}, draws a barplot with the similarity scores.}
+
+\item{...}{additional arguments.}
+}
+\value{
+A list with the following elements:
+\describe{
+  \item{\code{selected}}{movie title used to advise other movies}
+  \item{\code{to_watch}}{advised movie title(s)}
+  \item{\code{movie_ids}}{line number in the \code{movies} database of the advised movie(s)}
+  \item{\code{scores}}{similarity scores of the advised movie(s)}
+}
+}
+\description{
+\code{advise.good.movie} takes as input a movie from the movie database and gives as output a number
+of movies that are similar.
+}
+\examples{
+suggestions <- advise.good.movie(similar_to = "Interstellar", how_many = 3,
+draw_scores = TRUE, weights = c("genre"=1, "popularity"=1, "rating"=1,
+"production company"=1))
+
+}
--- a/man/hello.Rd
+++ b/man/hello.Rd
+\name{hello}
+\alias{hello}
+\title{Hello, World!}
+\usage{
+hello()
+}
+\description{
+Prints 'Hello, world!'.
+}
+\examples{
+hello()
+}
--- a/man/movies.Rd
+++ b/man/movies.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data_movies.R
+\docType{data}
+\name{movies}
+\alias{movies}
+\title{TMDb 4800 movie dataset}
+\format{
+A data frame with 4800 rows and 9 variables.
+}
+\source{
+\url{https://www.kaggle.com/tmdb/tmdb-movie-metadata}
+}
+\usage{
+movies
+}
+\description{
+Metadata on 4800 movies from The Movie Database (TMDb) from the Kaggle website.
+}
+\details{
+The variables are as follows:
+   \describe{
+     \item{title}{character, title of the film.}
+     \item{genres}{string of characters, genres of the film.}
+     \item{popularity}{numeric, popularity of the film in terms of views.}
+     \item{vote}{numeric, voted rating of the film between 0 and 10.}
+     \item{language}{factor, original language.}
+     \item{producers}{string of characters, production companies.}
+     \item{release}{date, release date of the film.}
+     \item{runtime}{numeric, runtime in minutes.}
+     \item{plot}{character, plot summary of the film.}
+  }
+}
+\keyword{datasets}
--- a/man/moviesdemo.Rd
+++ b/man/moviesdemo.Rd
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/moviesdemo.R
+\docType{package}
+\name{moviesdemo}
+\alias{moviesdemo}
+\title{moviesdemo: A demo R package.}
+\description{
+A demo package for the SMCS course on creating and publishing R packages,
+based on metadata of 4800 movies from The Movie Database.
+}
+\section{moviesdemo functions}{
+
+\describe{
+  \item{\code{advise.good.movie}}{Advise movies based on another movie}
+  \item{\code{sim.genres}}{Movie similarity based on production companies}
+  \item{\code{sim.producers}}{Movie similarity based on production companies}
+}
+}
+
+\section{Database}{
+
+The database used is the TMDb 4800 movies Database (see \code{\link{movies}})
+}
+