From 3d767613cec16a0148f9ee21be354081853a3b26 Mon Sep 17 00:00:00 2001
From: Ayush Singh <81796368+ayush714@users.noreply.github.com>
Date: Sun, 22 Aug 2021 20:13:45 +0530
Subject: [PATCH] Notebook_Julia_Editing_Linear_Regression.ipynb
Notebook_Julia_Editing_Linear_Regression.ipynb
---
...book_Julia_Editing_Linear_Regression.ipynb | 2500 +++++++++++++++++
1 file changed, 2500 insertions(+)
create mode 100644 2-Regression/3-Linear/solution/Notebook_Julia_Editing_Linear_Regression.ipynb
diff --git a/2-Regression/3-Linear/solution/Notebook_Julia_Editing_Linear_Regression.ipynb b/2-Regression/3-Linear/solution/Notebook_Julia_Editing_Linear_Regression.ipynb
new file mode 100644
index 00000000..36473d38
--- /dev/null
+++ b/2-Regression/3-Linear/solution/Notebook_Julia_Editing_Linear_Regression.ipynb
@@ -0,0 +1,2500 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Linear Regression using Julia \n",
+ "\n",
+ "In this notebook, I presented Julia version of the solutions, Here I have used Linear Regression on Boston dataset, This notebook does not contain polynomial regression part, because It is still in progress. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import Pkg\n",
+ "# Pkg.add(\"Plots\")\n",
+ "# Pkg.add(\"Lathe\")\n",
+ "# Pkg.add(\"GLM\")\n",
+ "# Pkg.add(\"StatsPlots\")\n",
+ "# Pkg.add(\"MLBase\")\n",
+ "# Pkg.add(\"Metrics\") \n",
+ "\n",
+ "# Uncomment this if you haven't installed the above libraries "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# packages we will use \n",
+ "using Plots\n",
+ "using Lathe\n",
+ "using GLM\n",
+ "using Statistics\n",
+ "using StatsPlots\n",
+ "using MLBase \n",
+ "using Metrics \n",
+ "using DataFrames, CSV\n",
+ "using Polynomials"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
5 rows × 14 columns (omitted printing of 5 columns)
| CRIM | ZN | INDUS | CHAS | NOX | RM | AGE | DIS | RAD |
---|
| Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 |
---|
1 | 0.00632 | 18.0 | 2.31 | 0.0 | 0.538 | 6.575 | 65.2 | 4.09 | 1.0 |
---|
2 | 0.02731 | 0.0 | 7.07 | 0.0 | 0.469 | 6.421 | 78.9 | 4.9671 | 2.0 |
---|
3 | 0.02729 | 0.0 | 7.07 | 0.0 | 0.469 | 7.185 | 61.1 | 4.9671 | 2.0 |
---|
4 | 0.03237 | 0.0 | 2.18 | 0.0 | 0.458 | 6.998 | 45.8 | 6.0622 | 3.0 |
---|
5 | 0.06905 | 0.0 | 2.18 | 0.0 | 0.458 | 7.147 | 54.2 | 6.0622 | 3.0 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cccccccccc}\n",
+ "\t& CRIM & ZN & INDUS & CHAS & NOX & RM & AGE & DIS & RAD & \\\\\n",
+ "\t\\hline\n",
+ "\t& Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & \\\\\n",
+ "\t\\hline\n",
+ "\t1 & 0.00632 & 18.0 & 2.31 & 0.0 & 0.538 & 6.575 & 65.2 & 4.09 & 1.0 & $\\dots$ \\\\\n",
+ "\t2 & 0.02731 & 0.0 & 7.07 & 0.0 & 0.469 & 6.421 & 78.9 & 4.9671 & 2.0 & $\\dots$ \\\\\n",
+ "\t3 & 0.02729 & 0.0 & 7.07 & 0.0 & 0.469 & 7.185 & 61.1 & 4.9671 & 2.0 & $\\dots$ \\\\\n",
+ "\t4 & 0.03237 & 0.0 & 2.18 & 0.0 & 0.458 & 6.998 & 45.8 & 6.0622 & 3.0 & $\\dots$ \\\\\n",
+ "\t5 & 0.06905 & 0.0 & 2.18 & 0.0 & 0.458 & 7.147 & 54.2 & 6.0622 & 3.0 & $\\dots$ \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "\u001b[1m5×14 DataFrame\u001b[0m\n",
+ "\u001b[1m Row \u001b[0m│\u001b[1m CRIM \u001b[0m\u001b[1m ZN \u001b[0m\u001b[1m INDUS \u001b[0m\u001b[1m CHAS \u001b[0m\u001b[1m NOX \u001b[0m\u001b[1m RM \u001b[0m\u001b[1m AGE \u001b[0m\u001b[1m DIS \u001b[0m\u001b[1m\u001b[0m ⋯\n",
+ "\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m\u001b[0m ⋯\n",
+ "─────┼──────────────────────────────────────────────────────────────────────────\n",
+ " 1 │ 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.09 ⋯\n",
+ " 2 │ 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671\n",
+ " 3 │ 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671\n",
+ " 4 │ 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622\n",
+ " 5 │ 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 ⋯\n",
+ "\u001b[36m 6 columns omitted\u001b[0m"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# loading the data \n",
+ "\n",
+ "df = CSV.read(\"data\\\\out.csv\", DataFrame)\n",
+ "first(df,5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(506, 14)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# printing the size of the data \n",
+ "\n",
+ "println(size(df))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "14 rows × 7 columns
| variable | mean | min | median | max | nmissing | eltype |
---|
| Symbol | Float64 | Float64 | Float64 | Float64 | Int64 | DataType |
---|
1 | CRIM | 3.61352 | 0.00632 | 0.25651 | 88.9762 | 0 | Float64 |
---|
2 | ZN | 11.3636 | 0.0 | 0.0 | 100.0 | 0 | Float64 |
---|
3 | INDUS | 11.1368 | 0.46 | 9.69 | 27.74 | 0 | Float64 |
---|
4 | CHAS | 0.06917 | 0.0 | 0.0 | 1.0 | 0 | Float64 |
---|
5 | NOX | 0.554695 | 0.385 | 0.538 | 0.871 | 0 | Float64 |
---|
6 | RM | 6.28463 | 3.561 | 6.2085 | 8.78 | 0 | Float64 |
---|
7 | AGE | 68.5749 | 2.9 | 77.5 | 100.0 | 0 | Float64 |
---|
8 | DIS | 3.79504 | 1.1296 | 3.20745 | 12.1265 | 0 | Float64 |
---|
9 | RAD | 9.54941 | 1.0 | 5.0 | 24.0 | 0 | Float64 |
---|
10 | TAX | 408.237 | 187.0 | 330.0 | 711.0 | 0 | Float64 |
---|
11 | PTRATIO | 18.4555 | 12.6 | 19.05 | 22.0 | 0 | Float64 |
---|
12 | B | 356.674 | 0.32 | 391.44 | 396.9 | 0 | Float64 |
---|
13 | LSTAT | 12.6531 | 1.73 | 11.36 | 37.97 | 0 | Float64 |
---|
14 | SalePrice | 22.5328 | 5.0 | 21.2 | 50.0 | 0 | Float64 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccccccc}\n",
+ "\t& variable & mean & min & median & max & nmissing & eltype\\\\\n",
+ "\t\\hline\n",
+ "\t& Symbol & Float64 & Float64 & Float64 & Float64 & Int64 & DataType\\\\\n",
+ "\t\\hline\n",
+ "\t1 & CRIM & 3.61352 & 0.00632 & 0.25651 & 88.9762 & 0 & Float64 \\\\\n",
+ "\t2 & ZN & 11.3636 & 0.0 & 0.0 & 100.0 & 0 & Float64 \\\\\n",
+ "\t3 & INDUS & 11.1368 & 0.46 & 9.69 & 27.74 & 0 & Float64 \\\\\n",
+ "\t4 & CHAS & 0.06917 & 0.0 & 0.0 & 1.0 & 0 & Float64 \\\\\n",
+ "\t5 & NOX & 0.554695 & 0.385 & 0.538 & 0.871 & 0 & Float64 \\\\\n",
+ "\t6 & RM & 6.28463 & 3.561 & 6.2085 & 8.78 & 0 & Float64 \\\\\n",
+ "\t7 & AGE & 68.5749 & 2.9 & 77.5 & 100.0 & 0 & Float64 \\\\\n",
+ "\t8 & DIS & 3.79504 & 1.1296 & 3.20745 & 12.1265 & 0 & Float64 \\\\\n",
+ "\t9 & RAD & 9.54941 & 1.0 & 5.0 & 24.0 & 0 & Float64 \\\\\n",
+ "\t10 & TAX & 408.237 & 187.0 & 330.0 & 711.0 & 0 & Float64 \\\\\n",
+ "\t11 & PTRATIO & 18.4555 & 12.6 & 19.05 & 22.0 & 0 & Float64 \\\\\n",
+ "\t12 & B & 356.674 & 0.32 & 391.44 & 396.9 & 0 & Float64 \\\\\n",
+ "\t13 & LSTAT & 12.6531 & 1.73 & 11.36 & 37.97 & 0 & Float64 \\\\\n",
+ "\t14 & SalePrice & 22.5328 & 5.0 & 21.2 & 50.0 & 0 & Float64 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "\u001b[1m14×7 DataFrame\u001b[0m\n",
+ "\u001b[1m Row \u001b[0m│\u001b[1m variable \u001b[0m\u001b[1m mean \u001b[0m\u001b[1m min \u001b[0m\u001b[1m median \u001b[0m\u001b[1m max \u001b[0m\u001b[1m nmissing \u001b[0m\u001b[1m eltype\u001b[0m ⋯\n",
+ "\u001b[1m \u001b[0m│\u001b[90m Symbol \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m DataTy\u001b[0m ⋯\n",
+ "─────┼──────────────────────────────────────────────────────────────────────────\n",
+ " 1 │ CRIM 3.61352 0.00632 0.25651 88.9762 0 Float6 ⋯\n",
+ " 2 │ ZN 11.3636 0.0 0.0 100.0 0 Float6\n",
+ " 3 │ INDUS 11.1368 0.46 9.69 27.74 0 Float6\n",
+ " 4 │ CHAS 0.06917 0.0 0.0 1.0 0 Float6\n",
+ " 5 │ NOX 0.554695 0.385 0.538 0.871 0 Float6 ⋯\n",
+ " 6 │ RM 6.28463 3.561 6.2085 8.78 0 Float6\n",
+ " 7 │ AGE 68.5749 2.9 77.5 100.0 0 Float6\n",
+ " 8 │ DIS 3.79504 1.1296 3.20745 12.1265 0 Float6\n",
+ " 9 │ RAD 9.54941 1.0 5.0 24.0 0 Float6 ⋯\n",
+ " 10 │ TAX 408.237 187.0 330.0 711.0 0 Float6\n",
+ " 11 │ PTRATIO 18.4555 12.6 19.05 22.0 0 Float6\n",
+ " 12 │ B 356.674 0.32 391.44 396.9 0 Float6\n",
+ " 13 │ LSTAT 12.6531 1.73 11.36 37.97 0 Float6 ⋯\n",
+ " 14 │ SalePrice 22.5328 5.0 21.2 50.0 0 Float6\n",
+ "\u001b[36m 1 column omitted\u001b[0m"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# describing the data \n",
+ "\n",
+ "describe(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "14-element Vector{String}:\n",
+ " \"CRIM\"\n",
+ " \"ZN\"\n",
+ " \"INDUS\"\n",
+ " \"CHAS\"\n",
+ " \"NOX\"\n",
+ " \"RM\"\n",
+ " \"AGE\"\n",
+ " \"DIS\"\n",
+ " \"RAD\"\n",
+ " \"TAX\"\n",
+ " \"PTRATIO\"\n",
+ " \"B\"\n",
+ " \"LSTAT\"\n",
+ " \"SalePrice\""
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# printing the names of the dataframe \n",
+ "\n",
+ "names(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# boxplot for out target variable \n",
+ "\n",
+ "boxplot(df.SalePrice, title = \"Box Plot - SalePrice\", ylabel = \"CRIM\", legend = false)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "