diff --git a/a-data-collection/docextraction.ipynb b/a-data-collection/exercise2.ipynb similarity index 97% rename from a-data-collection/docextraction.ipynb rename to a-data-collection/exercise2.ipynb index 8d19a88292bb8f28cb765d2d0b412570b0b7c349..4e3d76fcaff710702969ca20b1758a2a281a5bb7 100644 --- a/a-data-collection/docextraction.ipynb +++ b/a-data-collection/exercise2.ipynb @@ -193,9 +193,10 @@ " def extract_summary(self, movie, soup) -> None:\n", " \"\"\"\n", " This function extract the summary from a movie/tv-show\n", - " It use the find_all method of BeautifulSoup to find the \"overview\" class\n", + " Loop over the divs of BeautifulSoup to find the \"overview\" class\n", " \"\"\"\n", - " divs = soup.find_all(\"div\")\n", + " ## @COMPLETE : find all the divs\n", + " # divs = ...\n", " for div in divs:\n", " div_class = div.get(\"class\")\n", " if div_class is not None:\n", @@ -211,8 +212,10 @@ " It use the select method of BeautifulSoup to extract actors displayed on the page.\n", " Actor are defined in people scroller cards\n", " \"\"\"\n", - "\n", - " soup_results = soup.select(\"ol[class='people scroller'] li[class='card'] p a\")\n", + " \n", + " ## @COMPLETE : find the selector string\n", + " # selector = \"\"\n", + " soup_results = soup.select(selector)\n", " actors = [soup_result.text for soup_result in soup_results]\n", " print(actors)\n", "\n",