|
|
|
@ -38,7 +38,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 77,
|
|
|
|
|
"execution_count": 53,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -76,7 +76,7 @@
|
|
|
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>datetime</th>\n <th>city</th>\n <th>state</th>\n <th>country</th>\n <th>shape</th>\n <th>duration (seconds)</th>\n <th>duration (hours/min)</th>\n <th>comments</th>\n <th>date posted</th>\n <th>latitude</th>\n <th>longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/10/1949 20:30</td>\n <td>san marcos</td>\n <td>tx</td>\n <td>us</td>\n <td>cylinder</td>\n <td>2700.0</td>\n <td>45 minutes</td>\n <td>This event took place in early fall around 194...</td>\n <td>4/27/2004</td>\n <td>29.883056</td>\n <td>-97.941111</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10/10/1949 21:00</td>\n <td>lackland afb</td>\n <td>tx</td>\n <td>NaN</td>\n <td>light</td>\n <td>7200.0</td>\n <td>1-2 hrs</td>\n <td>1949 Lackland AFB&#44 TX. Lights racing acros...</td>\n <td>12/16/2005</td>\n <td>29.384210</td>\n <td>-98.581082</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10/10/1955 17:00</td>\n <td>chester (uk/england)</td>\n <td>NaN</td>\n <td>gb</td>\n <td>circle</td>\n <td>20.0</td>\n <td>20 seconds</td>\n <td>Green/Orange circular disc over Chester&#44 En...</td>\n <td>1/21/2008</td>\n <td>53.200000</td>\n <td>-2.916667</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10/10/1956 21:00</td>\n <td>edna</td>\n <td>tx</td>\n <td>us</td>\n <td>circle</td>\n <td>20.0</td>\n <td>1/2 hour</td>\n <td>My older brother and twin sister were leaving ...</td>\n <td>1/17/2004</td>\n <td>28.978333</td>\n <td>-96.645833</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10/10/1960 20:00</td>\n <td>kaneohe</td>\n <td>hi</td>\n <td>us</td>\n <td>light</td>\n <td>900.0</td>\n <td>15 minutes</td>\n <td>AS a Marine 1st Lt. flying an FJ4B fighter/att...</td>\n <td>1/22/2004</td>\n <td>21.418056</td>\n <td>-157.803611</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"execution_count": 77
|
|
|
|
|
"execution_count": 53
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
@ -90,15 +90,18 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 78,
|
|
|
|
|
"execution_count": 54,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null int64 \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 1010.3 KB\n"
|
|
|
|
|
"output_type": "execute_result",
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"execution_count": 54
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
@ -106,6 +109,25 @@
|
|
|
|
|
"\n",
|
|
|
|
|
"ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"ufos.Country.unique()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# 0 au, 1 ca, 2 de, 3 gb, 4 us"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 55,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null int64 \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 1010.3 KB\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"ufos.dropna(inplace=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
|
|
|
|
@ -114,14 +136,12 @@
|
|
|
|
|
"\n",
|
|
|
|
|
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"ufos.info()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n"
|
|
|
|
|
"ufos.info()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 79,
|
|
|
|
|
"execution_count": 56,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -138,7 +158,7 @@
|
|
|
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Seconds</th>\n <th>Country</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2</th>\n <td>20.0</td>\n <td>3</td>\n <td>53.200000</td>\n <td>-2.916667</td>\n </tr>\n <tr>\n <th>3</th>\n <td>20.0</td>\n <td>4</td>\n <td>28.978333</td>\n <td>-96.645833</td>\n </tr>\n <tr>\n <th>14</th>\n <td>30.0</td>\n <td>4</td>\n <td>35.823889</td>\n <td>-80.253611</td>\n </tr>\n <tr>\n <th>23</th>\n <td>60.0</td>\n <td>4</td>\n <td>45.582778</td>\n <td>-122.352222</td>\n </tr>\n <tr>\n <th>24</th>\n <td>3.0</td>\n <td>3</td>\n <td>51.783333</td>\n <td>-0.783333</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"execution_count": 79
|
|
|
|
|
"execution_count": 56
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
@ -152,12 +172,14 @@
|
|
|
|
|
"\n",
|
|
|
|
|
"new_ufos['Country'] = LabelEncoder().fit_transform(new_ufos['Country'])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"new_ufos.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 88,
|
|
|
|
|
"execution_count": 57,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
@ -169,12 +191,12 @@
|
|
|
|
|
"y = new_ufos['Country']\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
|
|
|
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 89,
|
|
|
|
|
"execution_count": 58,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -214,12 +236,13 @@
|
|
|
|
|
"\n",
|
|
|
|
|
"print(classification_report(y_test, predictions))\n",
|
|
|
|
|
"print('Predicted labels: ', predictions)\n",
|
|
|
|
|
"print('Accuracy: ', accuracy_score(y_test, predictions))\n"
|
|
|
|
|
"print('Accuracy: ', accuracy_score(y_test, predictions))\n",
|
|
|
|
|
"\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 97,
|
|
|
|
|
"execution_count": 59,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|