pull/717/head
jnopareboateng 2 years ago
parent 78fdf4a8fc
commit f806bf4262

3
.gitignore vendored

@ -360,4 +360,5 @@ MigrationBackup/
.Rdata
.Rhistory
ML-For-Beginners.Rproj
.venv
.obsidian

File diff suppressed because one or more lines are too long

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -744,12 +744,80 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import datasets,"
"linnerud = datasets.load_linnerud()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Features: ['Chins', 'Situps', 'Jumps']\n"
]
}
],
"source": [
"features = print(\"Features: \", linnerud.feature_names)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Labels: ['Weight', 'Waist', 'Pulse']\n"
]
}
],
"source": [
"target = print(\"Labels: \", linnerud.target_names)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Found input variables with inconsistent numbers of samples: [14, 6]",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32md:\\DEV WORK\\Data Science Library\\ML-For-Beginners\\2-Regression\\1-Tools\\notebook.ipynb Cell 15\u001b[0m line \u001b[0;36m2\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/DEV%20WORK/Data%20Science%20Library/ML-For-Beginners/2-Regression/1-Tools/notebook.ipynb#X40sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m model \u001b[39m=\u001b[39m linear_model\u001b[39m.\u001b[39mLinearRegression()\n\u001b[1;32m----> <a href='vscode-notebook-cell:/d%3A/DEV%20WORK/Data%20Science%20Library/ML-For-Beginners/2-Regression/1-Tools/notebook.ipynb#X40sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m model\u001b[39m.\u001b[39;49mfit(X_train,y_train)\n",
"File \u001b[1;32md:\\DEV WORK\\Data Science Library\\ML-For-Beginners\\.venv\\lib\\site-packages\\sklearn\\base.py:1151\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1144\u001b[0m estimator\u001b[39m.\u001b[39m_validate_params()\n\u001b[0;32m 1146\u001b[0m \u001b[39mwith\u001b[39;00m config_context(\n\u001b[0;32m 1147\u001b[0m skip_parameter_validation\u001b[39m=\u001b[39m(\n\u001b[0;32m 1148\u001b[0m prefer_skip_nested_validation \u001b[39mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1149\u001b[0m )\n\u001b[0;32m 1150\u001b[0m ):\n\u001b[1;32m-> 1151\u001b[0m \u001b[39mreturn\u001b[39;00m fit_method(estimator, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
"File \u001b[1;32md:\\DEV WORK\\Data Science Library\\ML-For-Beginners\\.venv\\lib\\site-packages\\sklearn\\linear_model\\_base.py:678\u001b[0m, in \u001b[0;36mLinearRegression.fit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 674\u001b[0m n_jobs_ \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_jobs\n\u001b[0;32m 676\u001b[0m accept_sparse \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpositive \u001b[39melse\u001b[39;00m [\u001b[39m\"\u001b[39m\u001b[39mcsr\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mcsc\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mcoo\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m--> 678\u001b[0m X, y \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_validate_data(\n\u001b[0;32m 679\u001b[0m X, y, accept_sparse\u001b[39m=\u001b[39;49maccept_sparse, y_numeric\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, multi_output\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m\n\u001b[0;32m 680\u001b[0m )\n\u001b[0;32m 682\u001b[0m has_sw \u001b[39m=\u001b[39m sample_weight \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 683\u001b[0m \u001b[39mif\u001b[39;00m has_sw:\n",
"File \u001b[1;32md:\\DEV WORK\\Data Science Library\\ML-For-Beginners\\.venv\\lib\\site-packages\\sklearn\\base.py:621\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[1;34m(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)\u001b[0m\n\u001b[0;32m 619\u001b[0m y \u001b[39m=\u001b[39m check_array(y, input_name\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39my\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mcheck_y_params)\n\u001b[0;32m 620\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m--> 621\u001b[0m X, y \u001b[39m=\u001b[39m check_X_y(X, y, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mcheck_params)\n\u001b[0;32m 622\u001b[0m out \u001b[39m=\u001b[39m X, y\n\u001b[0;32m 624\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m no_val_X \u001b[39mand\u001b[39;00m check_params\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mensure_2d\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mTrue\u001b[39;00m):\n",
"File \u001b[1;32md:\\DEV WORK\\Data Science Library\\ML-For-Beginners\\.venv\\lib\\site-packages\\sklearn\\utils\\validation.py:1165\u001b[0m, in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)\u001b[0m\n\u001b[0;32m 1147\u001b[0m X \u001b[39m=\u001b[39m check_array(\n\u001b[0;32m 1148\u001b[0m X,\n\u001b[0;32m 1149\u001b[0m accept_sparse\u001b[39m=\u001b[39maccept_sparse,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1160\u001b[0m input_name\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mX\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 1161\u001b[0m )\n\u001b[0;32m 1163\u001b[0m y \u001b[39m=\u001b[39m _check_y(y, multi_output\u001b[39m=\u001b[39mmulti_output, y_numeric\u001b[39m=\u001b[39my_numeric, estimator\u001b[39m=\u001b[39mestimator)\n\u001b[1;32m-> 1165\u001b[0m check_consistent_length(X, y)\n\u001b[0;32m 1167\u001b[0m \u001b[39mreturn\u001b[39;00m X, y\n",
"File \u001b[1;32md:\\DEV WORK\\Data Science Library\\ML-For-Beginners\\.venv\\lib\\site-packages\\sklearn\\utils\\validation.py:409\u001b[0m, in \u001b[0;36mcheck_consistent_length\u001b[1;34m(*arrays)\u001b[0m\n\u001b[0;32m 407\u001b[0m uniques \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39munique(lengths)\n\u001b[0;32m 408\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(uniques) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m--> 409\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 410\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mFound input variables with inconsistent numbers of samples: \u001b[39m\u001b[39m%r\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 411\u001b[0m \u001b[39m%\u001b[39m [\u001b[39mint\u001b[39m(l) \u001b[39mfor\u001b[39;00m l \u001b[39min\u001b[39;00m lengths]\n\u001b[0;32m 412\u001b[0m )\n",
"\u001b[1;31mValueError\u001b[0m: Found input variables with inconsistent numbers of samples: [14, 6]"
]
}
],
"source": [
"model = linear_model.LinearRegression()\n",
"model.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,5 +1,407 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>datetime</th>\n",
" <th>city</th>\n",
" <th>state</th>\n",
" <th>country</th>\n",
" <th>shape</th>\n",
" <th>duration (seconds)</th>\n",
" <th>duration (hours/min)</th>\n",
" <th>comments</th>\n",
" <th>date posted</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10/10/1949 20:30</td>\n",
" <td>san marcos</td>\n",
" <td>tx</td>\n",
" <td>us</td>\n",
" <td>cylinder</td>\n",
" <td>2700.0</td>\n",
" <td>45 minutes</td>\n",
" <td>This event took place in early fall around 194...</td>\n",
" <td>4/27/2004</td>\n",
" <td>29.883056</td>\n",
" <td>-97.941111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10/10/1949 21:00</td>\n",
" <td>lackland afb</td>\n",
" <td>tx</td>\n",
" <td>NaN</td>\n",
" <td>light</td>\n",
" <td>7200.0</td>\n",
" <td>1-2 hrs</td>\n",
" <td>1949 Lackland AFB&amp;#44 TX. Lights racing acros...</td>\n",
" <td>12/16/2005</td>\n",
" <td>29.384210</td>\n",
" <td>-98.581082</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10/10/1955 17:00</td>\n",
" <td>chester (uk/england)</td>\n",
" <td>NaN</td>\n",
" <td>gb</td>\n",
" <td>circle</td>\n",
" <td>20.0</td>\n",
" <td>20 seconds</td>\n",
" <td>Green/Orange circular disc over Chester&amp;#44 En...</td>\n",
" <td>1/21/2008</td>\n",
" <td>53.200000</td>\n",
" <td>-2.916667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10/10/1956 21:00</td>\n",
" <td>edna</td>\n",
" <td>tx</td>\n",
" <td>us</td>\n",
" <td>circle</td>\n",
" <td>20.0</td>\n",
" <td>1/2 hour</td>\n",
" <td>My older brother and twin sister were leaving ...</td>\n",
" <td>1/17/2004</td>\n",
" <td>28.978333</td>\n",
" <td>-96.645833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10/10/1960 20:00</td>\n",
" <td>kaneohe</td>\n",
" <td>hi</td>\n",
" <td>us</td>\n",
" <td>light</td>\n",
" <td>900.0</td>\n",
" <td>15 minutes</td>\n",
" <td>AS a Marine 1st Lt. flying an FJ4B fighter/att...</td>\n",
" <td>1/22/2004</td>\n",
" <td>21.418056</td>\n",
" <td>-157.803611</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" datetime city state country shape \\\n",
"0 10/10/1949 20:30 san marcos tx us cylinder \n",
"1 10/10/1949 21:00 lackland afb tx NaN light \n",
"2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n",
"3 10/10/1956 21:00 edna tx us circle \n",
"4 10/10/1960 20:00 kaneohe hi us light \n",
"\n",
" duration (seconds) duration (hours/min) \\\n",
"0 2700.0 45 minutes \n",
"1 7200.0 1-2 hrs \n",
"2 20.0 20 seconds \n",
"3 20.0 1/2 hour \n",
"4 900.0 15 minutes \n",
"\n",
" comments date posted latitude \\\n",
"0 This event took place in early fall around 194... 4/27/2004 29.883056 \n",
"1 1949 Lackland AFB&#44 TX. Lights racing acros... 12/16/2005 29.384210 \n",
"2 Green/Orange circular disc over Chester&#44 En... 1/21/2008 53.200000 \n",
"3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n",
"4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n",
"\n",
" longitude \n",
"0 -97.941111 \n",
"1 -98.581082 \n",
"2 -2.916667 \n",
"3 -96.645833 \n",
"4 -157.803611 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"ufos = pd.read_csv('./data/ufos.csv')\n",
"ufos.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
"\n",
"ufos.Country.unique()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 25863 entries, 2 to 80330\n",
"Data columns (total 4 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Seconds 25863 non-null float64\n",
" 1 Country 25863 non-null object \n",
" 2 Latitude 25863 non-null float64\n",
" 3 Longitude 25863 non-null float64\n",
"dtypes: float64(3), object(1)\n",
"memory usage: 1010.3+ KB\n"
]
}
],
"source": [
"ufos.dropna(inplace=True)\n",
"\n",
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
"\n",
"ufos.info()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Seconds</th>\n",
" <th>Country</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20.0</td>\n",
" <td>3</td>\n",
" <td>53.200000</td>\n",
" <td>-2.916667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20.0</td>\n",
" <td>4</td>\n",
" <td>28.978333</td>\n",
" <td>-96.645833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>30.0</td>\n",
" <td>4</td>\n",
" <td>35.823889</td>\n",
" <td>-80.253611</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>60.0</td>\n",
" <td>4</td>\n",
" <td>45.582778</td>\n",
" <td>-122.352222</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>3.0</td>\n",
" <td>3</td>\n",
" <td>51.783333</td>\n",
" <td>-0.783333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Seconds Country Latitude Longitude\n",
"2 20.0 3 53.200000 -2.916667\n",
"3 20.0 4 28.978333 -96.645833\n",
"14 30.0 4 35.823889 -80.253611\n",
"23 60.0 4 45.582778 -122.352222\n",
"24 3.0 3 51.783333 -0.783333"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
"\n",
"ufos.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"Selected_features = ['Seconds','Latitude','Longitude']\n",
"\n",
"X = ufos[Selected_features]\n",
"y = ufos['Country']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 1.00 1.00 1.00 41\n",
" 1 0.83 0.24 0.37 250\n",
" 2 1.00 1.00 1.00 8\n",
" 3 1.00 1.00 1.00 131\n",
" 4 0.96 1.00 0.98 4743\n",
"\n",
" accuracy 0.96 5173\n",
" macro avg 0.96 0.85 0.87 5173\n",
"weighted avg 0.96 0.96 0.95 5173\n",
"\n",
"Predicted labels: [4 4 4 ... 3 4 4]\n",
"Accuracy: 0.9609510922095496\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\DEV WORK\\Data Science Library\\ML-For-Beginners\\.venv\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
}
],
"source": [
"from sklearn.metrics import accuracy_score, classification_report\n",
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)\n",
"predictions = model.predict(X_test)\n",
"\n",
"print(classification_report(y_test, predictions))\n",
"print('Predicted labels: ', predictions)\n",
"print('Accuracy: ', accuracy_score(y_test, predictions))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\DEV WORK\\Data Science Library\\ML-For-Beginners\\.venv\\lib\\site-packages\\sklearn\\base.py:464: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"import pickle\n",
"model_filename = 'ufo-model.pkl'\n",
"pickle.dump(model, open(model_filename,'wb'))\n",
"\n",
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
"print(model.predict([[50,44,-12]]))"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -14,6 +416,18 @@
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
},
"orig_nbformat": 4
},
"nbformat": 4,

Binary file not shown.
Loading…
Cancel
Save