{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Build a Web App using a Regression model to learn about UFO sighting" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n", "
" ], "text/plain": [ " datetime city state country shape \\\n", "0 10/10/1949 20:30 san marcos tx us cylinder \n", "1 10/10/1949 21:00 lackland afb tx NaN light \n", "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", "3 10/10/1956 21:00 edna tx us circle \n", "4 10/10/1960 20:00 kaneohe hi us light \n", "\n", " duration (seconds) duration (hours/min) \\\n", "0 2700.0 45 minutes \n", "1 7200.0 1-2 hrs \n", "2 20.0 20 seconds \n", "3 20.0 1/2 hour \n", "4 900.0 15 minutes \n", "\n", " comments date posted latitude \\\n", "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", "\n", " longitude \n", "0 -97.941111 \n", "1 -98.581082 \n", "2 -2.916667 \n", "3 -96.645833 \n", "4 -157.803611 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "ufos = pd.read_csv('../data/ufos.csv')\n", "ufos.head()\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", "\n", "ufos.Country.unique()\n", "\n", "# 0 au, 1 ca, 2 de, 3 gb, 4 us" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 25863 entries, 2 to 80330\n", "Data columns (total 4 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Seconds 25863 non-null float64\n", " 1 Country 25863 non-null object \n", " 2 Latitude 25863 non-null float64\n", " 3 Longitude 25863 non-null float64\n", "dtypes: float64(3), object(1)\n", "memory usage: 1010.3+ KB\n" ] } ], "source": [ "ufos.dropna(inplace=True)\n", "\n", "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", "\n", "ufos.info()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n", "
" ], "text/plain": [ " Seconds Country Latitude Longitude\n", "2 20.0 3 53.200000 -2.916667\n", "3 20.0 4 28.978333 -96.645833\n", "14 30.0 4 35.823889 -80.253611\n", "23 60.0 4 45.582778 -122.352222\n", "24 3.0 3 51.783333 -0.783333" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import LabelEncoder\n", "\n", "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", "\n", "ufos.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "Selected_features = ['Seconds','Latitude','Longitude']\n", "\n", "X = ufos[Selected_features]\n", "y = ufos['Country']\n", "\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 1.00 1.00 1.00 41\n", " 1 0.82 0.22 0.35 250\n", " 2 1.00 1.00 1.00 8\n", " 3 1.00 1.00 1.00 131\n", " 4 0.96 1.00 0.98 4743\n", "\n", " accuracy 0.96 5173\n", " macro avg 0.96 0.84 0.87 5173\n", "weighted avg 0.96 0.96 0.95 5173\n", "\n", "Predicted labels: [4 4 4 ... 3 4 4]\n", "Accuracy: 0.9601778465107288\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "d:\\AI\\MachineLearning\\ML-For-Beginners\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:465: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n" ] } ], "source": [ "# from sklearn.model_selection import train_test_split\n", "# from sklearn.metrics import accuracy_score, classification_report \n", "# from sklearn.linear_model import LogisticRegression\n", "# model = LogisticRegression()\n", "# model.fit(X_train, y_train)\n", "# predictions = model.predict(X_test)z\n", "\n", "# print(classification_report(y_test, predictions))\n", "# print('Predicted labels: ', predictions)\n", "# print('Accuracy: ', accuracy_score(y_test, predictions))\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, classification_report \n", "from sklearn.linear_model import LogisticRegression\n", "model = LogisticRegression()\n", "model.fit(X_train, y_train)\n", "predictions = model.predict(X_test)\n", "\n", "print(classification_report(y_test, predictions))\n", "print('Predicted labels: ', predictions)\n", "print('Accuracy: ', accuracy_score(y_test, predictions))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "d:\\AI\\MachineLearning\\ML-For-Beginners\\.venv\\Lib\\site-packages\\sklearn\\utils\\validation.py:2739: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", " warnings.warn(\n" ] } ], "source": [ "import pickle\n", "model_filename = 'ufo-model.pkl'\n", "pickle.dump(model, open(model_filename,'wb'))\n", "\n", "model = pickle.load(open('ufo-model.pkl','rb'))\n", "print(model.predict([[50,44,-12]]))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.3" }, "metadata": { "interpreter": { "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" } }, "orig_nbformat": 2 }, "nbformat": 4, "nbformat_minor": 2 }