From 6392d8037da8d9162650200655618121a1cfd57a Mon Sep 17 00:00:00 2001 From: Keshav Sharma <61562452+keshav340@users.noreply.github.com> Date: Sat, 9 Oct 2021 05:53:58 -0700 Subject: [PATCH] Bonus lesson added --- 2-Working-With-Data/07-python/bonus-lesson.ipynb | 1 + 1 file changed, 1 insertion(+) create mode 100644 2-Working-With-Data/07-python/bonus-lesson.ipynb diff --git a/2-Working-With-Data/07-python/bonus-lesson.ipynb b/2-Working-With-Data/07-python/bonus-lesson.ipynb new file mode 100644 index 00000000..44dcbb39 --- /dev/null +++ b/2-Working-With-Data/07-python/bonus-lesson.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"name":"ir","display_name":"R","language":"R"},"language_info":{"name":"R","codemirror_mode":"r","pygments_lexer":"r","mimetype":"text/x-r-source","file_extension":".r","version":"4.0.5"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This R environment comes with many helpful analytics packages installed\n# It is defined by the kaggle/rstats Docker image: https://github.com/kaggle/docker-rstats\n# For example, here's a helpful package to load\n\nlibrary(tidyverse) # metapackage of all tidyverse packages\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nlist.files(path = \"../input\")\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"051d70d956493feee0c6d64651c6a088724dca2a","_execution_state":"idle","execution":{"iopub.status.busy":"2021-10-08T18:11:24.952103Z","iopub.execute_input":"2021-10-08T18:11:24.954562Z","iopub.status.idle":"2021-10-08T18:11:26.309552Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"library(zoo)\nlibrary(dplyr)\nlibrary(ggplot2)\nlibrary(ggfortify)\nlibrary(dplyr)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:15:22.966831Z","iopub.execute_input":"2021-10-09T12:15:22.968941Z","iopub.status.idle":"2021-10-09T12:15:23.546202Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"markdown","source":"## Series","metadata":{}},{"cell_type":"markdown","source":"### Series is like a list or 1D-array, but with index.It is generally used to represent data associated with time.","metadata":{}},{"cell_type":"code","source":"stock_prices = c(799,1174.8,865.1,1334.6,635.4,918.5,685.5,998.6,784.2,985,882.8,1071)\nstock_prices_glennmark = ts(stock_prices,start = c(2020,1,1),frequency = 12)\nstock_prices_glennmark\nplot(stock_prices_glennmark)\n","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:15:54.612622Z","iopub.execute_input":"2021-10-09T12:15:54.735899Z","iopub.status.idle":"2021-10-09T12:15:55.008804Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"# CREATING A TIME SERIES\nrandomData<- rnorm(100)\nmonth <- ts(randomData,start=c(2020,1),frequency=12)\nplot(month)\n## it bit much longer a it is 100/4 takes much longer year","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:16:03.558553Z","iopub.execute_input":"2021-10-09T12:16:03.560254Z","iopub.status.idle":"2021-10-09T12:16:03.679285Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"markdown","source":"**DataFrame**\n#### It is a two dimensional data structure in which data is inserted in tabular form.","metadata":{}},{"cell_type":"code","source":"STOCK_PRICE <- data.frame(\nUSD=c(1.02,1.085,2.05,3.45,4.05,5.32,3.45,4.65,6.65,9.23,10.25,12.69),\nEURO = c(0.85,1.02,1.85,3.02,3.35,4.35,2.65,3.95,5.55,8.90,9.80,11.50))\nSTOCK_PRICES <- ts(STOCK_PRICE,start = c(2020,1,1),frequency = 12)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:24:50.750390Z","iopub.execute_input":"2021-10-09T12:24:50.752297Z","iopub.status.idle":"2021-10-09T12:24:50.767609Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"STOCK_PRICES","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:24:53.233062Z","iopub.execute_input":"2021-10-09T12:24:53.234839Z","iopub.status.idle":"2021-10-09T12:24:53.260478Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"markdown","source":"## Reading Data into R","metadata":{}},{"cell_type":"markdown","source":"### 1. Read a CSV File into R","metadata":{}},{"cell_type":"markdown","source":"","metadata":{}},{"cell_type":"code","source":"dataset <- read.table(file=\" \",header = TRUE, sep = \",\")","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"### Data Manipluation","metadata":{}},{"cell_type":"code","source":"dim(STOCK_PRICES )","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:16:21.200787Z","iopub.execute_input":"2021-10-09T12:16:21.202859Z","iopub.status.idle":"2021-10-09T12:16:21.224888Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"head(STOCK_PRICES )","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:16:26.089604Z","iopub.execute_input":"2021-10-09T12:16:26.091545Z","iopub.status.idle":"2021-10-09T12:16:26.114797Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"summary(STOCK_PRICES )","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:16:28.433758Z","iopub.execute_input":"2021-10-09T12:16:28.435883Z","iopub.status.idle":"2021-10-09T12:16:28.457663Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"markdown","source":"#### We ae slicing to find subsets columns of 5-7 of datasets","metadata":{}},{"cell_type":"code","source":"STOCK_PRICES [5,]\n# series","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:16:35.848118Z","iopub.execute_input":"2021-10-09T12:16:35.850327Z","iopub.status.idle":"2021-10-09T12:16:35.871805Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"code","source":"STOCK_PRICES [3:8,]","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:16:50.004019Z","iopub.execute_input":"2021-10-09T12:16:50.006116Z","iopub.status.idle":"2021-10-09T12:16:50.034007Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"slice(STOCK_PRICE ,3:8)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:25:09.777288Z","iopub.execute_input":"2021-10-09T12:25:09.778989Z","iopub.status.idle":"2021-10-09T12:25:09.806709Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"markdown","source":"### Pip OPERATOR","metadata":{}},{"cell_type":"code","source":"STOCK_PRICE %>% slice(3:5)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:25:25.256136Z","iopub.execute_input":"2021-10-09T12:25:25.257863Z","iopub.status.idle":"2021-10-09T12:25:25.278594Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"markdown","source":"### SLICE SUBSETS the first n rows of dataframe and slice tail subsets the last n rows of dataframe","metadata":{}},{"cell_type":"code","source":"slice_head(STOCK_PRICE,n=2)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:25:55.079932Z","iopub.execute_input":"2021-10-09T12:25:55.081670Z","iopub.status.idle":"2021-10-09T12:25:55.103342Z"},"trusted":true},"execution_count":19,"outputs":[]},{"cell_type":"code","source":"slice_tail(STOCK_PRICE,n=2)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:26:04.997678Z","iopub.execute_input":"2021-10-09T12:26:04.999435Z","iopub.status.idle":"2021-10-09T12:26:05.021799Z"},"trusted":true},"execution_count":20,"outputs":[]},{"cell_type":"markdown","source":"### Slice_max subsets the n rows of a dataset with n largest value with respect to a variable","metadata":{}},{"cell_type":"code","source":"slice_max(STOCK_PRICE,EURO, n=3)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:26:29.905938Z","iopub.execute_input":"2021-10-09T12:26:29.907674Z","iopub.status.idle":"2021-10-09T12:26:29.934518Z"},"trusted":true},"execution_count":21,"outputs":[]},{"cell_type":"code","source":"slice_sample(STOCK_PRICE,n=5)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:26:42.047503Z","iopub.execute_input":"2021-10-09T12:26:42.049113Z","iopub.status.idle":"2021-10-09T12:26:42.071494Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"markdown","source":"## Filtering","metadata":{}},{"cell_type":"markdown","source":"### filters out the rows of dataframe which doesn't meet criteria","metadata":{}},{"cell_type":"code","source":"STOCK_PRICE[STOCK_PRICE$EURO>3.35,]","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:29:42.107479Z","iopub.execute_input":"2021-10-09T12:29:42.109116Z","iopub.status.idle":"2021-10-09T12:29:42.131224Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"markdown","source":"","metadata":{}},{"cell_type":"markdown","source":"## Grouping ","metadata":{}},{"cell_type":"markdown","source":"### group_by function group rows of data and frame with respect to one or more variabes","metadata":{}},{"cell_type":"code","source":"library(dplyr)","metadata":{"execution":{"iopub.status.busy":"2021-10-08T13:13:57.808888Z","iopub.execute_input":"2021-10-08T13:13:57.810559Z","iopub.status.idle":"2021-10-08T13:13:57.843966Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df1 <- group_by(STOCK_PRICE,EURO)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:36:27.270590Z","iopub.execute_input":"2021-10-09T12:36:27.272456Z","iopub.status.idle":"2021-10-09T12:36:27.298281Z"},"trusted":true},"execution_count":40,"outputs":[]},{"cell_type":"code","source":"summarise(df1,mymean =mean(EURO))","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:36:38.887295Z","iopub.execute_input":"2021-10-09T12:36:38.889081Z","iopub.status.idle":"2021-10-09T12:36:38.919223Z"},"trusted":true},"execution_count":41,"outputs":[]},{"cell_type":"code","source":"df2<-group_by(df1,USD>3.2)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:36:53.022367Z","iopub.execute_input":"2021-10-09T12:36:53.023993Z","iopub.status.idle":"2021-10-09T12:36:53.051015Z"},"trusted":true},"execution_count":42,"outputs":[]},{"cell_type":"code","source":"summarise(df2,mymean = mean(USD))","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:37:05.348959Z","iopub.execute_input":"2021-10-09T12:37:05.350547Z","iopub.status.idle":"2021-10-09T12:37:05.374010Z"},"trusted":true},"execution_count":44,"outputs":[]},{"cell_type":"markdown","source":"## SELECT","metadata":{}},{"cell_type":"code","source":"df<-select(STOCK_PRICE,EURO,USD)\ndf","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:37:45.562234Z","iopub.execute_input":"2021-10-09T12:37:45.563814Z","iopub.status.idle":"2021-10-09T12:37:45.591553Z"},"trusted":true},"execution_count":47,"outputs":[]},{"cell_type":"code","source":"df<- select(df,-EURO)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:37:55.423706Z","iopub.execute_input":"2021-10-09T12:37:55.425323Z","iopub.status.idle":"2021-10-09T12:37:55.440506Z"},"trusted":true},"execution_count":48,"outputs":[]},{"cell_type":"markdown","source":"","metadata":{}},{"cell_type":"code","source":"df","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:37:58.655523Z","iopub.execute_input":"2021-10-09T12:37:58.657351Z","iopub.status.idle":"2021-10-09T12:37:58.687523Z"},"trusted":true},"execution_count":49,"outputs":[]},{"cell_type":"markdown","source":"## Arrange","metadata":{}},{"cell_type":"code","source":"df <- arrange(STOCK_PRICE, desc(USD))","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:38:19.525371Z","iopub.execute_input":"2021-10-09T12:38:19.526944Z","iopub.status.idle":"2021-10-09T12:38:19.545423Z"},"trusted":true},"execution_count":51,"outputs":[]},{"cell_type":"code","source":"df","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:38:22.301574Z","iopub.execute_input":"2021-10-09T12:38:22.303359Z","iopub.status.idle":"2021-10-09T12:38:22.326992Z"},"trusted":true},"execution_count":52,"outputs":[]},{"cell_type":"code","source":"df <- arrange(STOCK_PRICE, desc(USD),EURO)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:38:45.428273Z","iopub.execute_input":"2021-10-09T12:38:45.429970Z","iopub.status.idle":"2021-10-09T12:38:45.446438Z"},"trusted":true},"execution_count":54,"outputs":[]},{"cell_type":"code","source":"df","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:38:48.362531Z","iopub.execute_input":"2021-10-09T12:38:48.364239Z","iopub.status.idle":"2021-10-09T12:38:48.390254Z"},"trusted":true},"execution_count":55,"outputs":[]},{"cell_type":"code","source":"rename(STOCK_PRICE, DOLLAR = USD)","metadata":{"execution":{"iopub.status.busy":"2021-10-09T12:39:28.495960Z","iopub.execute_input":"2021-10-09T12:39:28.497625Z","iopub.status.idle":"2021-10-09T12:39:28.525934Z"},"trusted":true},"execution_count":57,"outputs":[]},{"cell_type":"markdown","source":"","metadata":{}},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]} \ No newline at end of file