From 74bf75f37f2fe39e84c985e35dd6577550a2efcc Mon Sep 17 00:00:00 2001
From: R-icntay <63848664+R-icntay@users.noreply.github.com>
Date: Mon, 23 Aug 2021 19:06:30 +0300
Subject: [PATCH] Explain what tibble is

---
 .../solution/R/lesson_10-R.ipynb              | 90 ++++++++++---------
 .../1-Introduction/solution/R/lesson_10.Rmd   |  8 +-
 2 files changed, 51 insertions(+), 47 deletions(-)
diff --git a/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb b/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb
index 251bbe08..87c33b5e 100644
--- a/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb
+++ b/4-Classification/1-Introduction/solution/R/lesson_10-R.ipynb
@@ -163,12 +163,12 @@
       "cell_type": "code",
       "execution_count": null,
       "source": [
-        "# Basic information about the data\n",
-        "df %>%\n",
-        "  introduce()\n",
-        "\n",
-        "# Visualize basic information above\n",
-        "df %>% \n",
+        "# Basic information about the data\r\n",
+        "df %>%\r\n",
+        "  introduce()\r\n",
+        "\r\n",
+        "# Visualize basic information above\r\n",
+        "df %>% \r\n",
         "  plot_intro(ggtheme = theme_light())"
       ],
       "outputs": [],
@@ -193,17 +193,17 @@
       "cell_type": "code",
       "execution_count": null,
       "source": [
-        "# Count observations per cuisine\n",
-        "df %>% \n",
-        "  count(cuisine) %>% \n",
-        "  arrange(n)\n",
-        "\n",
-        "# Plot the distribution\n",
-        "theme_set(theme_light())\n",
-        "df %>% \n",
-        "  count(cuisine) %>% \n",
-        "  ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\n",
-        "  geom_col(fill = \"midnightblue\", alpha = 0.7) +\n",
+        "# Count observations per cuisine\r\n",
+        "df %>% \r\n",
+        "  count(cuisine) %>% \r\n",
+        "  arrange(n)\r\n",
+        "\r\n",
+        "# Plot the distribution\r\n",
+        "theme_set(theme_light())\r\n",
+        "df %>% \r\n",
+        "  count(cuisine) %>% \r\n",
+        "  ggplot(mapping = aes(x = n, y = reorder(cuisine, -n))) +\r\n",
+        "  geom_col(fill = \"midnightblue\", alpha = 0.7) +\r\n",
         "  ylab(\"cuisine\")"
       ],
       "outputs": [],
@@ -214,15 +214,17 @@
     {
       "cell_type": "markdown",
       "source": [
-        "There are a finite number of cuisines, but the distribution of data is uneven. You can fix that! Before doing so, explore a little more.\n",
-        "\n",
-        "Next, let's assign each cuisine into its individual table and find out how much data is available (rows, columns) per cuisine.\n",
-        "\n",
-        "<p >\n",
-        "   <img src=\"../../images/dplyr_filter.jpg\"\n",
-        "   width=\"600\"/>\n",
-        "   <figcaption>Artwork by @allison_horst</figcaption>\n",
-        "\n"
+        "There are a finite number of cuisines, but the distribution of data is uneven. You can fix that! Before doing so, explore a little more.\r\n",
+        "\r\n",
+        "Next, let's assign each cuisine into its individual tibble and find out how much data is available (rows, columns) per cuisine.\r\n",
+        "\r\n",
+        "> A tibble is a modern reimagining of the data frame, keeping what time has proven to be effective, and throwing out what is not.\r\n",
+        "\r\n",
+        "<p >\r\n",
+        "   <img src=\"../../images/dplyr_filter.jpg\"\r\n",
+        "   width=\"600\"/>\r\n",
+        "   <figcaption>Artwork by @allison_horst</figcaption>\r\n",
+        "\r\n"
       ],
       "metadata": {
         "id": "vVvyDb1kG2in"
@@ -232,24 +234,24 @@
       "cell_type": "code",
       "execution_count": null,
       "source": [
-        "# Create individual tables for the cuisines\n",
-        "thai_df <- df %>% \n",
-        "  filter(cuisine == \"thai\")\n",
-        "japanese_df <- df %>% \n",
-        "  filter(cuisine == \"japanese\")\n",
-        "chinese_df <- df %>% \n",
-        "  filter(cuisine == \"chinese\")\n",
-        "indian_df <- df %>% \n",
-        "  filter(cuisine == \"indian\")\n",
-        "korean_df <- df %>% \n",
-        "  filter(cuisine == \"korean\")\n",
-        "\n",
-        "\n",
-        "# Find out how much data is avilable per cuisine\n",
-        "cat(\" thai df:\", dim(thai_df), \"\\n\",\n",
-        "    \"japanese df:\", dim(japanese_df), \"\\n\",\n",
-        "    \"chinese_df:\", dim(chinese_df), \"\\n\",\n",
-        "    \"indian_df:\", dim(indian_df), \"\\n\",\n",
+        "# Create individual tibble for the cuisines\r\n",
+        "thai_df <- df %>% \r\n",
+        "  filter(cuisine == \"thai\")\r\n",
+        "japanese_df <- df %>% \r\n",
+        "  filter(cuisine == \"japanese\")\r\n",
+        "chinese_df <- df %>% \r\n",
+        "  filter(cuisine == \"chinese\")\r\n",
+        "indian_df <- df %>% \r\n",
+        "  filter(cuisine == \"indian\")\r\n",
+        "korean_df <- df %>% \r\n",
+        "  filter(cuisine == \"korean\")\r\n",
+        "\r\n",
+        "\r\n",
+        "# Find out how much data is avilable per cuisine\r\n",
+        "cat(\" thai df:\", dim(thai_df), \"\\n\",\r\n",
+        "    \"japanese df:\", dim(japanese_df), \"\\n\",\r\n",
+        "    \"chinese_df:\", dim(chinese_df), \"\\n\",\r\n",
+        "    \"indian_df:\", dim(indian_df), \"\\n\",\r\n",
         "    \"korean_df:\", dim(korean_df))"
       ],
       "outputs": [],
diff --git a/4-Classification/1-Introduction/solution/R/lesson_10.Rmd b/4-Classification/1-Introduction/solution/R/lesson_10.Rmd
index 783543d1..b979d56f 100644
--- a/4-Classification/1-Introduction/solution/R/lesson_10.Rmd
+++ b/4-Classification/1-Introduction/solution/R/lesson_10.Rmd
@@ -34,7 +34,7 @@ Classification is one of the fundamental activities of the machine learning rese
 
 To state the process in a more scientific way, your classification method creates a predictive model that enables you to map the relationship between input variables to output variables.
 
-![Binary vs. multiclass problems for classification algorithms to handle. Infographic by Jen Looper](../images/binary-multiclass.png)
+![Binary vs. multiclass problems for classification algorithms to handle. Infographic by Jen Looper](../../images/binary-multiclass.png){width="500"}
 
 Before starting the process of cleaning our data, visualizing it, and prepping it for our ML tasks, let's learn a bit about the various ways machine learning can be leveraged to classify data.
 
@@ -127,7 +127,9 @@ There are a finite number of cuisines, but the distribution of data is uneven. Y
 
 2.  Next, let's assign each cuisine into it's individual tibble and find out how much data is available (rows, columns) per cuisine.
 
-![Artwork by \@allison_horst](../images/dplyr_filter.jpg)
+> A tibble, or tbl_df, is a modern reimagining of the data.frame, keeping what time has proven to be effective, and throwing out what is not.
+
+![Artwork by \@allison_horst](../../images/dplyr_filter.jpg)
 
 ```{r cuisine_df}
 # Create individual tibbles for the cuisines
@@ -297,7 +299,7 @@ df_select %>%
 
 ## Preprocessing data using recipes 👩‍🍳👨‍🍳 - Dealing with imbalanced data ⚖️
 
-![Artwork by \@allison_horst](../images/recipes.png)
+![Artwork by \@allison_horst](../../images/recipes.png)
 
 Given that this lesson is about cuisines, we have to put `recipes` into context .