## Building the web Application

In [28]:
import pandas as pd 
import numpy as np
ufos = pd.read_csv('./data/ufos.csv')
ufos.head()

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


In [29]:
ufos = pd.DataFrame({
    'Seconds': ufos['duration (seconds)'],
    'Country': ufos['country'],
    'Latitude': ufos['latitude'],
    'Longitude': ufos['longitude']
})
ufos['Country'].unique()

array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)

In [30]:
ufos.dropna(inplace=True)
ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]
ufos.info()

<class 'pandas.core.frame.DataFrame'>
Index: 25863 entries, 2 to 80330
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Seconds    25863 non-null  float64
 1   Country    25863 non-null  object 
 2   Latitude   25863 non-null  float64
 3   Longitude  25863 non-null  float64
dtypes: float64(3), object(1)
memory usage: 1010.3+ KB


In [31]:
from sklearn.preprocessing import LabelEncoder
ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])
ufos.head()

Unnamed: 0,Seconds,Country,Latitude,Longitude
2,20.0,3,53.2,-2.916667
3,20.0,4,28.978333,-96.645833
14,30.0,4,35.823889,-80.253611
23,60.0,4,45.582778,-122.352222
24,3.0,3,51.783333,-0.783333


In [32]:
from sklearn.model_selection import train_test_split
Selected_features = ['Seconds','Latitude','Longitude']
x = ufos[Selected_features]
y = ufos['Country']
Xtrain, Xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)


## Confusion matrix result
##### [0,0] = 41: 41 samples of country 0 were correctly predicted as 0.
##### [1,1] = 50: 50 samples of country 1 were correctly predicted as 1.
##### [1,4] = 238: 238 samples of country 1 were incorrectly predicted as country 4.
##### [4,4] = 4686: 4686 samples of country 4 were correctly predicted as 4.
##### [4,1] = 14: 14 samples of country 4 were incorrectly predicted as 1.

In [35]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(Xtrain, ytrain)
prediction = model.predict(Xtest)
print(classification_report(ytest, prediction))
print('Predicted labels:', prediction)
print('Accuracy', accuracy_score(ytest, prediction))
print('Confusion matrix:\n', confusion_matrix(ytest, prediction))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        41
           1       0.78      0.17      0.28       288
           2       1.00      0.90      0.95        10
           3       0.99      1.00      1.00       134
           4       0.95      1.00      0.97      4700

    accuracy                           0.95      5173
   macro avg       0.95      0.81      0.84      5173
weighted avg       0.94      0.95      0.94      5173

Predicted labels: [4 4 4 ... 4 4 1]
Accuracy [4 4 4 ... 4 4 1]
Confusion matrix:
 [[  41    0    0    0    0]
 [   0   50    0    0  238]
 [   0    0    9    1    0]
 [   0    0    0  134    0]
 [   0   14    0    0 4686]]


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
import pickle
model_filename = 'ufos_model.pkl'
pickle.dump(model, open(model_filename, 'wb'))
model = pickle.load(open(model_filename, 'rb'))
print('Model loaded from disk', model.predict([[50, 44, -12]]))

Model loaded from disk [3]


