From 1fdfd7fb5017b14c20547419b1d071beca6bd63d Mon Sep 17 00:00:00 2001 From: Jasmine Date: Mon, 27 Sep 2021 00:36:36 -0400 Subject: [PATCH] data prep lesson and assignement --- .../08-data-preparation/README.md | 5 +- .../08-data-preparation/assignment.ipynb | 142 ++++++++++++++++++ .../08-data-preparation/assignment.md | 8 +- .../08-data-preparation/index.html | 23 +++ 4-Data-Science-Lifecycle/README.md | 7 +- data/form.csv | 11 ++ 6 files changed, 189 insertions(+), 7 deletions(-) create mode 100644 2-Working-With-Data/08-data-preparation/assignment.ipynb create mode 100644 2-Working-With-Data/08-data-preparation/index.html create mode 100644 data/form.csv diff --git a/2-Working-With-Data/08-data-preparation/README.md b/2-Working-With-Data/08-data-preparation/README.md index 230cf02..45e8c48 100644 --- a/2-Working-With-Data/08-data-preparation/README.md +++ b/2-Working-With-Data/08-data-preparation/README.md @@ -39,14 +39,13 @@ Give the exercises in the [notebook](4-Data-Science-Lifecycle\15-analyzing\noteb ## Review & Self Study -There are many ways to discover and approach preparing your data for analysis and modeling and cleaning the data is an important step that is a "hands on" experience. Try these challenges from Kaggle to try some of techniques that this lesson didn't cover. +There are many ways to discover and approach preparing your data for analysis and modeling and cleaning the data is an important step that is a "hands on" experience. Try these challenges from Kaggle to explore techniques that this lesson didn't cover. - [Data Cleaning Challenge: Parsing Dates](https://www.kaggle.com/rtatman/data-cleaning-challenge-parsing-dates/) - [Data Cleaning Challenge: Scale and Normalize Data](https://www.kaggle.com/rtatman/data-cleaning-challenge-scale-and-normalize-data) - ## Assignment -[Assignment Title](assignment.md) +[Evaluating Data from a Form](assignment.md) diff --git a/2-Working-With-Data/08-data-preparation/assignment.ipynb b/2-Working-With-Data/08-data-preparation/assignment.ipynb new file mode 100644 index 0000000..35978e9 --- /dev/null +++ b/2-Working-With-Data/08-data-preparation/assignment.ipynb @@ -0,0 +1,142 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "# Assignment: Evaluating Data from a Form\r\n", + "\r\n", + "A client has been testing a [small form](index.html) to gather some basic data about their client-base. They have brought their findings to you to validate the data they have gathered. You can open the `index.html` page in a browser to take a look at the form.\r\n", + "\r\n", + "You have been provided a [dataset of csv records](../../data/form.csv)that contain entries from the form as well as some basic visualizations.The client pointed out that some of the visualizations look incorrect but they're unsure about how to resolve them. You can explore it in the [assignment notebook](assignment.ipynb).\r\n", + "\r\n", + "## Instructions\r\n", + "\r\n", + "Use the techniques in this lesson to make recommendations about the form so it captures accurate and consistent information. " + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "!pip install pandas\r\n", + "!pip install matplotlib" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "import pandas as pd\r\n", + "import matplotlib.pyplot as plt\r\n", + "\r\n", + "#Loading the dataset\r\n", + "path = '../../data/form.csv'\r\n", + "form_df = pd.read_csv(path)\r\n", + "print(form_df)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " birth_month state pet\n", + "0 January NaN Cats\n", + "1 JAN CA Cats\n", + "2 Sept Hawaii Dog\n", + "3 january AK Dog\n", + "4 July RI Cats\n", + "5 September California Cats\n", + "6 April CA Dog\n", + "7 January California Cats\n", + "8 November FL Dog\n", + "9 December Florida Cats\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "form_df['state'].value_counts().plot(kind='bar');\r\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "form_df['birth_month'].value_counts().plot(kind='bar');\r\n", + "plt.show()" + ], + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [], + "metadata": {} + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.9.7", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.9.7 64-bit ('venv': venv)" + }, + "interpreter": { + "hash": "6b9b57232c4b57163d057191678da2030059e733b8becc68f245de5a75abe84e" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/2-Working-With-Data/08-data-preparation/assignment.md b/2-Working-With-Data/08-data-preparation/assignment.md index b7af641..527a435 100644 --- a/2-Working-With-Data/08-data-preparation/assignment.md +++ b/2-Working-With-Data/08-data-preparation/assignment.md @@ -1,7 +1,13 @@ -# Title +# Evaluating Data from a Form + +A client has been testing a [small form](index.html) to gather some basic data about their client-base. They have brought their findings to you to validate the data they have gathered. You can open the `index.html` page in the browser to take a look at the form. + +You have been provided a [dataset of csv records](../../data/form.csv)that contain entries from the form as well as some basic visualizations.The client pointed out that some of the visualizations look incorrect but they're unsure about how to resolve them. You can explore it in the [assignment notebook](assignment.ipynb). ## Instructions +Use the techniques in this lesson to make recommendations about the form so it captures accurate and consistent information. + ## Rubric Exemplary | Adequate | Needs Improvement diff --git a/2-Working-With-Data/08-data-preparation/index.html b/2-Working-With-Data/08-data-preparation/index.html new file mode 100644 index 0000000..1594034 --- /dev/null +++ b/2-Working-With-Data/08-data-preparation/index.html @@ -0,0 +1,23 @@ + + + + + + + + Entry Form + + + +

Please Fill out the Form (* required)

+ *
+
+ +
+ + + + \ No newline at end of file diff --git a/4-Data-Science-Lifecycle/README.md b/4-Data-Science-Lifecycle/README.md index c17bc4e..683d187 100644 --- a/4-Data-Science-Lifecycle/README.md +++ b/4-Data-Science-Lifecycle/README.md @@ -3,13 +3,14 @@ ![communication](images/communication.jpg) > Photo by Headway on Unsplash -In these lessons, you'll explore some of the aspects of the Data Science lifeycle, including analysis and communication around data. +In these lessons, you'll explore some of the aspects of the Data Science lifecycle, including analysis and communication around data. + ### Topics 1. [Introduction](14-Introduction/README.md) 2. [Analyzing](15-Analyzing/README.md) -3. [Communication](https://github.com/microsoft/Data-Science-For-Beginners/tree/main/4-Data-Science-Lifecycle/16-communication) +3. [Communication](16-communication/README.md) ### Credits -These lessons were written with ❤️ by [Jalen McGee](https://twitter.com/JalenMCG) +These lessons were written with ❤️ by [Jalen McGee](https://twitter.com/JalenMCG) and [Jasmine Greenaway](https://twitter.com/paladique) diff --git a/data/form.csv b/data/form.csv new file mode 100644 index 0000000..f8df357 --- /dev/null +++ b/data/form.csv @@ -0,0 +1,11 @@ +birth_month,state,pet +January,,Cats +JAN,CA,Cats +Sept,Hawaii,Dog +january,AK,Dog +July,RI,Cats +September,California,Cats +April,CA,Dog +January,California,Cats +November,FL,Dog +December,Florida,Cats \ No newline at end of file