{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build a Web App using a Regression model to learn about UFO sighting"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" datetime | \n",
" city | \n",
" state | \n",
" country | \n",
" shape | \n",
" duration (seconds) | \n",
" duration (hours/min) | \n",
" comments | \n",
" date posted | \n",
" latitude | \n",
" longitude | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 10/10/1949 20:30 | \n",
" san marcos | \n",
" tx | \n",
" us | \n",
" cylinder | \n",
" 2700.0 | \n",
" 45 minutes | \n",
" This event took place in early fall around 194... | \n",
" 4/27/2004 | \n",
" 29.883056 | \n",
" -97.941111 | \n",
"
\n",
" \n",
" | 1 | \n",
" 10/10/1949 21:00 | \n",
" lackland afb | \n",
" tx | \n",
" NaN | \n",
" light | \n",
" 7200.0 | \n",
" 1-2 hrs | \n",
" 1949 Lackland AFB, TX. Lights racing acros... | \n",
" 12/16/2005 | \n",
" 29.384210 | \n",
" -98.581082 | \n",
"
\n",
" \n",
" | 2 | \n",
" 10/10/1955 17:00 | \n",
" chester (uk/england) | \n",
" NaN | \n",
" gb | \n",
" circle | \n",
" 20.0 | \n",
" 20 seconds | \n",
" Green/Orange circular disc over Chester, En... | \n",
" 1/21/2008 | \n",
" 53.200000 | \n",
" -2.916667 | \n",
"
\n",
" \n",
" | 3 | \n",
" 10/10/1956 21:00 | \n",
" edna | \n",
" tx | \n",
" us | \n",
" circle | \n",
" 20.0 | \n",
" 1/2 hour | \n",
" My older brother and twin sister were leaving ... | \n",
" 1/17/2004 | \n",
" 28.978333 | \n",
" -96.645833 | \n",
"
\n",
" \n",
" | 4 | \n",
" 10/10/1960 20:00 | \n",
" kaneohe | \n",
" hi | \n",
" us | \n",
" light | \n",
" 900.0 | \n",
" 15 minutes | \n",
" AS a Marine 1st Lt. flying an FJ4B fighter/att... | \n",
" 1/22/2004 | \n",
" 21.418056 | \n",
" -157.803611 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" datetime city state country shape \\\n",
"0 10/10/1949 20:30 san marcos tx us cylinder \n",
"1 10/10/1949 21:00 lackland afb tx NaN light \n",
"2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n",
"3 10/10/1956 21:00 edna tx us circle \n",
"4 10/10/1960 20:00 kaneohe hi us light \n",
"\n",
" duration (seconds) duration (hours/min) \\\n",
"0 2700.0 45 minutes \n",
"1 7200.0 1-2 hrs \n",
"2 20.0 20 seconds \n",
"3 20.0 1/2 hour \n",
"4 900.0 15 minutes \n",
"\n",
" comments date posted latitude \\\n",
"0 This event took place in early fall around 194... 4/27/2004 29.883056 \n",
"1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n",
"2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n",
"3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n",
"4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n",
"\n",
" longitude \n",
"0 -97.941111 \n",
"1 -98.581082 \n",
"2 -2.916667 \n",
"3 -96.645833 \n",
"4 -157.803611 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"ufos = pd.read_csv('../data/ufos.csv')\n",
"ufos.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
"\n",
"ufos.Country.unique()\n",
"\n",
"# 0 au, 1 ca, 2 de, 3 gb, 4 us"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Index: 25863 entries, 2 to 80330\n",
"Data columns (total 4 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Seconds 25863 non-null float64\n",
" 1 Country 25863 non-null object \n",
" 2 Latitude 25863 non-null float64\n",
" 3 Longitude 25863 non-null float64\n",
"dtypes: float64(3), object(1)\n",
"memory usage: 1010.3+ KB\n"
]
}
],
"source": [
"ufos.dropna(inplace=True)\n",
"\n",
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
"\n",
"ufos.info()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Seconds | \n",
" Country | \n",
" Latitude | \n",
" Longitude | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2 | \n",
" 20.0 | \n",
" 3 | \n",
" 53.200000 | \n",
" -2.916667 | \n",
"
\n",
" \n",
" | 3 | \n",
" 20.0 | \n",
" 4 | \n",
" 28.978333 | \n",
" -96.645833 | \n",
"
\n",
" \n",
" | 14 | \n",
" 30.0 | \n",
" 4 | \n",
" 35.823889 | \n",
" -80.253611 | \n",
"
\n",
" \n",
" | 23 | \n",
" 60.0 | \n",
" 4 | \n",
" 45.582778 | \n",
" -122.352222 | \n",
"
\n",
" \n",
" | 24 | \n",
" 3.0 | \n",
" 3 | \n",
" 51.783333 | \n",
" -0.783333 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Seconds Country Latitude Longitude\n",
"2 20.0 3 53.200000 -2.916667\n",
"3 20.0 4 28.978333 -96.645833\n",
"14 30.0 4 35.823889 -80.253611\n",
"23 60.0 4 45.582778 -122.352222\n",
"24 3.0 3 51.783333 -0.783333"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
"\n",
"ufos.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"Selected_features = ['Seconds','Latitude','Longitude']\n",
"\n",
"X = ufos[Selected_features]\n",
"y = ufos['Country']\n",
"\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 1.00 1.00 1.00 41\n",
" 1 0.82 0.22 0.35 250\n",
" 2 1.00 1.00 1.00 8\n",
" 3 1.00 1.00 1.00 131\n",
" 4 0.96 1.00 0.98 4743\n",
"\n",
" accuracy 0.96 5173\n",
" macro avg 0.96 0.84 0.87 5173\n",
"weighted avg 0.96 0.96 0.95 5173\n",
"\n",
"Predicted labels: [4 4 4 ... 3 4 4]\n",
"Accuracy: 0.9601778465107288\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\AI\\MachineLearning\\ML-For-Beginners\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:465: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
}
],
"source": [
"# from sklearn.model_selection import train_test_split\n",
"# from sklearn.metrics import accuracy_score, classification_report \n",
"# from sklearn.linear_model import LogisticRegression\n",
"# model = LogisticRegression()\n",
"# model.fit(X_train, y_train)\n",
"# predictions = model.predict(X_test)z\n",
"\n",
"# print(classification_report(y_test, predictions))\n",
"# print('Predicted labels: ', predictions)\n",
"# print('Accuracy: ', accuracy_score(y_test, predictions))\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, classification_report \n",
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)\n",
"predictions = model.predict(X_test)\n",
"\n",
"print(classification_report(y_test, predictions))\n",
"print('Predicted labels: ', predictions)\n",
"print('Accuracy: ', accuracy_score(y_test, predictions))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\AI\\MachineLearning\\ML-For-Beginners\\.venv\\Lib\\site-packages\\sklearn\\utils\\validation.py:2739: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"import pickle\n",
"model_filename = 'ufo-model.pkl'\n",
"pickle.dump(model, open(model_filename,'wb'))\n",
"\n",
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
"print(model.predict([[50,44,-12]]))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
},
"orig_nbformat": 2
},
"nbformat": 4,
"nbformat_minor": 2
}