Merge pull request #11 from PaskalSunari/ML-Patch-2

change
pull/872/head
Paskal Sunari 3 months ago committed by GitHub
commit 3a89bf10ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 28,
"metadata": {},
"outputs": [
{
@ -151,7 +151,7 @@
"4 -157.803611 "
]
},
"execution_count": 11,
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@ -165,7 +165,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 29,
"metadata": {},
"outputs": [
{
@ -174,7 +174,7 @@
"array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
]
},
"execution_count": 12,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@ -191,7 +191,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 30,
"metadata": {},
"outputs": [
{
@ -220,7 +220,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 31,
"metadata": {},
"outputs": [
{
@ -299,7 +299,7 @@
"24 3.0 3 51.783333 -0.783333"
]
},
"execution_count": 15,
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@ -312,7 +312,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
@ -337,7 +337,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 35,
"metadata": {},
"outputs": [
{
@ -397,8 +397,30 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model loaded from disk [3]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\AI\\MachineLearning\\ML-For-Beginners\\.venv\\Lib\\site-packages\\sklearn\\utils\\validation.py:2739: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"import pickle\n",
"model_filename = 'ufos_model.pkl'\n",
"pickle.dump(model, open(model_filename, 'wb'))\n",
"model = pickle.load(open(model_filename, 'rb'))\n",
"print('Model loaded from disk', model.predict([[50, 44, -12]]))"
]
}
],
"metadata": {

@ -1,46 +1,126 @@
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build a Web App using a Regression model to learn about UFO sighting"
],
"cell_type": "markdown",
"metadata": {}
]
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>datetime</th>\n",
" <th>city</th>\n",
" <th>state</th>\n",
" <th>country</th>\n",
" <th>shape</th>\n",
" <th>duration (seconds)</th>\n",
" <th>duration (hours/min)</th>\n",
" <th>comments</th>\n",
" <th>date posted</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10/10/1949 20:30</td>\n",
" <td>san marcos</td>\n",
" <td>tx</td>\n",
" <td>us</td>\n",
" <td>cylinder</td>\n",
" <td>2700.0</td>\n",
" <td>45 minutes</td>\n",
" <td>This event took place in early fall around 194...</td>\n",
" <td>4/27/2004</td>\n",
" <td>29.883056</td>\n",
" <td>-97.941111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10/10/1949 21:00</td>\n",
" <td>lackland afb</td>\n",
" <td>tx</td>\n",
" <td>NaN</td>\n",
" <td>light</td>\n",
" <td>7200.0</td>\n",
" <td>1-2 hrs</td>\n",
" <td>1949 Lackland AFB&amp;#44 TX. Lights racing acros...</td>\n",
" <td>12/16/2005</td>\n",
" <td>29.384210</td>\n",
" <td>-98.581082</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10/10/1955 17:00</td>\n",
" <td>chester (uk/england)</td>\n",
" <td>NaN</td>\n",
" <td>gb</td>\n",
" <td>circle</td>\n",
" <td>20.0</td>\n",
" <td>20 seconds</td>\n",
" <td>Green/Orange circular disc over Chester&amp;#44 En...</td>\n",
" <td>1/21/2008</td>\n",
" <td>53.200000</td>\n",
" <td>-2.916667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10/10/1956 21:00</td>\n",
" <td>edna</td>\n",
" <td>tx</td>\n",
" <td>us</td>\n",
" <td>circle</td>\n",
" <td>20.0</td>\n",
" <td>1/2 hour</td>\n",
" <td>My older brother and twin sister were leaving ...</td>\n",
" <td>1/17/2004</td>\n",
" <td>28.978333</td>\n",
" <td>-96.645833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10/10/1960 20:00</td>\n",
" <td>kaneohe</td>\n",
" <td>hi</td>\n",
" <td>us</td>\n",
" <td>light</td>\n",
" <td>900.0</td>\n",
" <td>15 minutes</td>\n",
" <td>AS a Marine 1st Lt. flying an FJ4B fighter/att...</td>\n",
" <td>1/22/2004</td>\n",
" <td>21.418056</td>\n",
" <td>-157.803611</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" datetime city state country shape \\\n",
"0 10/10/1949 20:30 san marcos tx us cylinder \n",
@ -69,11 +149,11 @@
"2 -2.916667 \n",
"3 -96.645833 \n",
"4 -157.803611 "
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>datetime</th>\n <th>city</th>\n <th>state</th>\n <th>country</th>\n <th>shape</th>\n <th>duration (seconds)</th>\n <th>duration (hours/min)</th>\n <th>comments</th>\n <th>date posted</th>\n <th>latitude</th>\n <th>longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/10/1949 20:30</td>\n <td>san marcos</td>\n <td>tx</td>\n <td>us</td>\n <td>cylinder</td>\n <td>2700.0</td>\n <td>45 minutes</td>\n <td>This event took place in early fall around 194...</td>\n <td>4/27/2004</td>\n <td>29.883056</td>\n <td>-97.941111</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10/10/1949 21:00</td>\n <td>lackland afb</td>\n <td>tx</td>\n <td>NaN</td>\n <td>light</td>\n <td>7200.0</td>\n <td>1-2 hrs</td>\n <td>1949 Lackland AFB&amp;#44 TX. Lights racing acros...</td>\n <td>12/16/2005</td>\n <td>29.384210</td>\n <td>-98.581082</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10/10/1955 17:00</td>\n <td>chester (uk/england)</td>\n <td>NaN</td>\n <td>gb</td>\n <td>circle</td>\n <td>20.0</td>\n <td>20 seconds</td>\n <td>Green/Orange circular disc over Chester&amp;#44 En...</td>\n <td>1/21/2008</td>\n <td>53.200000</td>\n <td>-2.916667</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10/10/1956 21:00</td>\n <td>edna</td>\n <td>tx</td>\n <td>us</td>\n <td>circle</td>\n <td>20.0</td>\n <td>1/2 hour</td>\n <td>My older brother and twin sister were leaving ...</td>\n <td>1/17/2004</td>\n <td>28.978333</td>\n <td>-96.645833</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10/10/1960 20:00</td>\n <td>kaneohe</td>\n <td>hi</td>\n <td>us</td>\n <td>light</td>\n <td>900.0</td>\n <td>15 minutes</td>\n <td>AS a Marine 1st Lt. flying an FJ4B fighter/att...</td>\n <td>1/22/2004</td>\n <td>21.418056</td>\n <td>-157.803611</td>\n </tr>\n </tbody>\n</table>\n</div>"
]
},
"execution_count": 2,
"metadata": {},
"execution_count": 23
"output_type": "execute_result"
}
],
"source": [
@ -86,18 +166,18 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
]
},
"execution_count": 3,
"metadata": {},
"execution_count": 24
"output_type": "execute_result"
}
],
"source": [
@ -111,14 +191,24 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n"
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 25863 entries, 2 to 80330\n",
"Data columns (total 4 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Seconds 25863 non-null float64\n",
" 1 Country 25863 non-null object \n",
" 2 Latitude 25863 non-null float64\n",
" 3 Longitude 25863 non-null float64\n",
"dtypes: float64(3), object(1)\n",
"memory usage: 1010.3+ KB\n"
]
}
],
@ -132,12 +222,76 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Seconds</th>\n",
" <th>Country</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20.0</td>\n",
" <td>3</td>\n",
" <td>53.200000</td>\n",
" <td>-2.916667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20.0</td>\n",
" <td>4</td>\n",
" <td>28.978333</td>\n",
" <td>-96.645833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>30.0</td>\n",
" <td>4</td>\n",
" <td>35.823889</td>\n",
" <td>-80.253611</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>60.0</td>\n",
" <td>4</td>\n",
" <td>45.582778</td>\n",
" <td>-122.352222</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>3.0</td>\n",
" <td>3</td>\n",
" <td>51.783333</td>\n",
" <td>-0.783333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Seconds Country Latitude Longitude\n",
"2 20.0 3 53.200000 -2.916667\n",
@ -145,11 +299,11 @@
"14 30.0 4 35.823889 -80.253611\n",
"23 60.0 4 45.582778 -122.352222\n",
"24 3.0 3 51.783333 -0.783333"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Seconds</th>\n <th>Country</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2</th>\n <td>20.0</td>\n <td>3</td>\n <td>53.200000</td>\n <td>-2.916667</td>\n </tr>\n <tr>\n <th>3</th>\n <td>20.0</td>\n <td>4</td>\n <td>28.978333</td>\n <td>-96.645833</td>\n </tr>\n <tr>\n <th>14</th>\n <td>30.0</td>\n <td>4</td>\n <td>35.823889</td>\n <td>-80.253611</td>\n </tr>\n <tr>\n <th>23</th>\n <td>60.0</td>\n <td>4</td>\n <td>45.582778</td>\n <td>-122.352222</td>\n </tr>\n <tr>\n <th>24</th>\n <td>3.0</td>\n <td>3</td>\n <td>51.783333</td>\n <td>-0.783333</td>\n </tr>\n </tbody>\n</table>\n</div>"
]
},
"execution_count": 5,
"metadata": {},
"execution_count": 26
"output_type": "execute_result"
}
],
"source": [
@ -162,7 +316,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@ -179,37 +333,55 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"name": "stderr",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
" precision recall f1-score support\n",
"\n",
" 0 1.00 1.00 1.00 41\n",
" 1 1.00 0.02 0.05 250\n",
" 2 0.00 0.00 0.00 8\n",
" 3 0.94 1.00 0.97 131\n",
" 4 0.95 1.00 0.97 4743\n",
" 1 0.82 0.22 0.35 250\n",
" 2 1.00 1.00 1.00 8\n",
" 3 1.00 1.00 1.00 131\n",
" 4 0.96 1.00 0.98 4743\n",
"\n",
" accuracy 0.95 5173\n",
" macro avg 0.78 0.60 0.60 5173\n",
"weighted avg 0.95 0.95 0.93 5173\n",
" accuracy 0.96 5173\n",
" macro avg 0.96 0.84 0.87 5173\n",
"weighted avg 0.96 0.96 0.95 5173\n",
"\n",
"Predicted labels: [4 4 4 ... 3 4 4]\n",
"Accuracy: 0.9512855209742895\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n"
"Accuracy: 0.9601778465107288\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\AI\\MachineLearning\\ML-For-Beginners\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:465: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
}
],
"source": [
"# from sklearn.model_selection import train_test_split\n",
"# from sklearn.metrics import accuracy_score, classification_report \n",
"# from sklearn.linear_model import LogisticRegression\n",
"# model = LogisticRegression()\n",
"# model.fit(X_train, y_train)\n",
"# predictions = model.predict(X_test)z\n",
"\n",
"# print(classification_report(y_test, predictions))\n",
"# print('Predicted labels: ', predictions)\n",
"# print('Accuracy: ', accuracy_score(y_test, predictions))\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, classification_report \n",
"from sklearn.linear_model import LogisticRegression\n",
@ -225,20 +397,28 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": null,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"[3]\n"
"[1]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\AI\\MachineLearning\\ML-For-Beginners\\.venv\\Lib\\site-packages\\sklearn\\utils\\validation.py:2739: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"import pickle\n",
"model_filename = 'ufo-model.pkl'\n",
"model_filename = 'ufo-model.pkl'SS\n",
"pickle.dump(model, open(model_filename,'wb'))\n",
"\n",
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
@ -252,5 +432,32 @@
"outputs": [],
"source": []
}
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
},
"orig_nbformat": 2
},
"nbformat": 4,
"nbformat_minor": 2
}

Loading…
Cancel
Save