Befor the web app

pull/872/head
Paskal 4 months ago
parent b00f991676
commit e3c43721e1

@ -1,8 +1,423 @@
{
"cells": [],
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Building the web Appliation"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>datetime</th>\n",
" <th>city</th>\n",
" <th>state</th>\n",
" <th>country</th>\n",
" <th>shape</th>\n",
" <th>duration (seconds)</th>\n",
" <th>duration (hours/min)</th>\n",
" <th>comments</th>\n",
" <th>date posted</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10/10/1949 20:30</td>\n",
" <td>san marcos</td>\n",
" <td>tx</td>\n",
" <td>us</td>\n",
" <td>cylinder</td>\n",
" <td>2700.0</td>\n",
" <td>45 minutes</td>\n",
" <td>This event took place in early fall around 194...</td>\n",
" <td>4/27/2004</td>\n",
" <td>29.883056</td>\n",
" <td>-97.941111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10/10/1949 21:00</td>\n",
" <td>lackland afb</td>\n",
" <td>tx</td>\n",
" <td>NaN</td>\n",
" <td>light</td>\n",
" <td>7200.0</td>\n",
" <td>1-2 hrs</td>\n",
" <td>1949 Lackland AFB&amp;#44 TX. Lights racing acros...</td>\n",
" <td>12/16/2005</td>\n",
" <td>29.384210</td>\n",
" <td>-98.581082</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10/10/1955 17:00</td>\n",
" <td>chester (uk/england)</td>\n",
" <td>NaN</td>\n",
" <td>gb</td>\n",
" <td>circle</td>\n",
" <td>20.0</td>\n",
" <td>20 seconds</td>\n",
" <td>Green/Orange circular disc over Chester&amp;#44 En...</td>\n",
" <td>1/21/2008</td>\n",
" <td>53.200000</td>\n",
" <td>-2.916667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10/10/1956 21:00</td>\n",
" <td>edna</td>\n",
" <td>tx</td>\n",
" <td>us</td>\n",
" <td>circle</td>\n",
" <td>20.0</td>\n",
" <td>1/2 hour</td>\n",
" <td>My older brother and twin sister were leaving ...</td>\n",
" <td>1/17/2004</td>\n",
" <td>28.978333</td>\n",
" <td>-96.645833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10/10/1960 20:00</td>\n",
" <td>kaneohe</td>\n",
" <td>hi</td>\n",
" <td>us</td>\n",
" <td>light</td>\n",
" <td>900.0</td>\n",
" <td>15 minutes</td>\n",
" <td>AS a Marine 1st Lt. flying an FJ4B fighter/att...</td>\n",
" <td>1/22/2004</td>\n",
" <td>21.418056</td>\n",
" <td>-157.803611</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" datetime city state country shape \\\n",
"0 10/10/1949 20:30 san marcos tx us cylinder \n",
"1 10/10/1949 21:00 lackland afb tx NaN light \n",
"2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n",
"3 10/10/1956 21:00 edna tx us circle \n",
"4 10/10/1960 20:00 kaneohe hi us light \n",
"\n",
" duration (seconds) duration (hours/min) \\\n",
"0 2700.0 45 minutes \n",
"1 7200.0 1-2 hrs \n",
"2 20.0 20 seconds \n",
"3 20.0 1/2 hour \n",
"4 900.0 15 minutes \n",
"\n",
" comments date posted latitude \\\n",
"0 This event took place in early fall around 194... 4/27/2004 29.883056 \n",
"1 1949 Lackland AFB&#44 TX. Lights racing acros... 12/16/2005 29.384210 \n",
"2 Green/Orange circular disc over Chester&#44 En... 1/21/2008 53.200000 \n",
"3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n",
"4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n",
"\n",
" longitude \n",
"0 -97.941111 \n",
"1 -98.581082 \n",
"2 -2.916667 \n",
"3 -96.645833 \n",
"4 -157.803611 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd \n",
"import numpy as np\n",
"ufos = pd.read_csv('./data/ufos.csv')\n",
"ufos.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ufos = pd.DataFrame({\n",
" 'Seconds': ufos['duration (seconds)'],\n",
" 'Country': ufos['country'],\n",
" 'Latitude': ufos['latitude'],\n",
" 'Longitude': ufos['longitude']\n",
"})\n",
"ufos['Country'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 25863 entries, 2 to 80330\n",
"Data columns (total 4 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Seconds 25863 non-null float64\n",
" 1 Country 25863 non-null object \n",
" 2 Latitude 25863 non-null float64\n",
" 3 Longitude 25863 non-null float64\n",
"dtypes: float64(3), object(1)\n",
"memory usage: 1010.3+ KB\n"
]
}
],
"source": [
"ufos.dropna(inplace=True)\n",
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
"ufos.info()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Seconds</th>\n",
" <th>Country</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20.0</td>\n",
" <td>3</td>\n",
" <td>53.200000</td>\n",
" <td>-2.916667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>20.0</td>\n",
" <td>4</td>\n",
" <td>28.978333</td>\n",
" <td>-96.645833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>30.0</td>\n",
" <td>4</td>\n",
" <td>35.823889</td>\n",
" <td>-80.253611</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>60.0</td>\n",
" <td>4</td>\n",
" <td>45.582778</td>\n",
" <td>-122.352222</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>3.0</td>\n",
" <td>3</td>\n",
" <td>51.783333</td>\n",
" <td>-0.783333</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Seconds Country Latitude Longitude\n",
"2 20.0 3 53.200000 -2.916667\n",
"3 20.0 4 28.978333 -96.645833\n",
"14 30.0 4 35.823889 -80.253611\n",
"23 60.0 4 45.582778 -122.352222\n",
"24 3.0 3 51.783333 -0.783333"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
"ufos.head()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"Selected_features = ['Seconds','Latitude','Longitude']\n",
"x = ufos[Selected_features]\n",
"y = ufos['Country']\n",
"Xtrain, Xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Confusion matrix result\n",
"##### [0,0] = 41: 41 samples of country 0 were correctly predicted as 0.\n",
"##### [1,1] = 50: 50 samples of country 1 were correctly predicted as 1.\n",
"##### [1,4] = 238: 238 samples of country 1 were incorrectly predicted as country 4.\n",
"##### [4,4] = 4686: 4686 samples of country 4 were correctly predicted as 4.\n",
"##### [4,1] = 14: 14 samples of country 4 were incorrectly predicted as 1."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 1.00 1.00 1.00 41\n",
" 1 0.78 0.17 0.28 288\n",
" 2 1.00 0.90 0.95 10\n",
" 3 0.99 1.00 1.00 134\n",
" 4 0.95 1.00 0.97 4700\n",
"\n",
" accuracy 0.95 5173\n",
" macro avg 0.95 0.81 0.84 5173\n",
"weighted avg 0.94 0.95 0.94 5173\n",
"\n",
"Predicted labels: [4 4 4 ... 4 4 1]\n",
"Accuracy [4 4 4 ... 4 4 1]\n",
"Confusion matrix:\n",
" [[ 41 0 0 0 0]\n",
" [ 0 50 0 0 238]\n",
" [ 0 0 9 1 0]\n",
" [ 0 0 0 134 0]\n",
" [ 0 14 0 0 4686]]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\AI\\MachineLearning\\ML-For-Beginners\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:465: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
}
],
"source": [
"from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression()\n",
"model.fit(Xtrain, ytrain)\n",
"prediction = model.predict(Xtest)\n",
"print(classification_report(ytest, prediction))\n",
"print('Predicted labels:', prediction)\n",
"print('Accuracy', prediction)\n",
"print('Confusion matrix:\\n', confusion_matrix(ytest, prediction))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,

Loading…
Cancel
Save