From 822a37999a3013d2c55f33f1a4ebcc2d4d8a31df Mon Sep 17 00:00:00 2001 From: Richa Date: Tue, 8 Mar 2022 16:08:15 +0530 Subject: [PATCH] Solution to issue 543 --- OpenForce.ipynb | 464 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100644 OpenForce.ipynb diff --git a/OpenForce.ipynb b/OpenForce.ipynb new file mode 100644 index 00000000..1998ca59 --- /dev/null +++ b/OpenForce.ipynb @@ -0,0 +1,464 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "bfd08331", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dc96a636", + "metadata": {}, + "outputs": [], + "source": [ + "pumpkins = pd.read_csv('C:/Users/admin/Downloads/baltimore_9-24-2016_9-30-2017.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a5e6e008", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Commodity NameCity NameTypePackageVarietySub VarietyGradeDateLow PriceHigh Price...ColorEnvironmentUnit of SaleQualityConditionAppearanceStorageCropRepackTrans Mode
0PUMPKINSBALTIMORENaN24 inch binsNaNNaNNaN04/29/2017270280.0...NaNNaNNaNNaNNaNNaNNaNNaNENaN
1PUMPKINSBALTIMORENaN24 inch binsNaNNaNNaN05/06/2017270280.0...NaNNaNNaNNaNNaNNaNNaNNaNENaN
2PUMPKINSBALTIMORENaN24 inch binsHOWDEN TYPENaNNaN09/24/2016160160.0...NaNNaNNaNNaNNaNNaNNaNNaNNNaN
3PUMPKINSBALTIMORENaN24 inch binsHOWDEN TYPENaNNaN09/24/2016160160.0...NaNNaNNaNNaNNaNNaNNaNNaNNNaN
4PUMPKINSBALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/05/201690100.0...NaNNaNNaNNaNNaNNaNNaNNaNNNaN
\n", + "

5 rows × 25 columns

\n", + "
" + ], + "text/plain": [ + " Commodity Name City Name Type Package Variety Sub Variety \\\n", + "0 PUMPKINS BALTIMORE NaN 24 inch bins NaN NaN \n", + "1 PUMPKINS BALTIMORE NaN 24 inch bins NaN NaN \n", + "2 PUMPKINS BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN \n", + "3 PUMPKINS BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN \n", + "4 PUMPKINS BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN \n", + "\n", + " Grade Date Low Price High Price ... Color Environment \\\n", + "0 NaN 04/29/2017 270 280.0 ... NaN NaN \n", + "1 NaN 05/06/2017 270 280.0 ... NaN NaN \n", + "2 NaN 09/24/2016 160 160.0 ... NaN NaN \n", + "3 NaN 09/24/2016 160 160.0 ... NaN NaN \n", + "4 NaN 11/05/2016 90 100.0 ... NaN NaN \n", + "\n", + " Unit of Sale Quality Condition Appearance Storage Crop Repack Trans Mode \n", + "0 NaN NaN NaN NaN NaN NaN E NaN \n", + "1 NaN NaN NaN NaN NaN NaN E NaN \n", + "2 NaN NaN NaN NaN NaN NaN N NaN \n", + "3 NaN NaN NaN NaN NaN NaN N NaN \n", + "4 NaN NaN NaN NaN NaN NaN N NaN \n", + "\n", + "[5 rows x 25 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7d5eb162", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthVarietyCityPackageLow PriceHigh PricePrice
709PIE TYPEBALTIMORE1 1/9 bushel cartons1515.013.636364
719PIE TYPEBALTIMORE1 1/9 bushel cartons1818.016.363636
7210PIE TYPEBALTIMORE1 1/9 bushel cartons1818.016.363636
7310PIE TYPEBALTIMORE1 1/9 bushel cartons1717.015.454545
7410PIE TYPEBALTIMORE1 1/9 bushel cartons1515.013.636364
\n", + "
" + ], + "text/plain": [ + " Month Variety City Package Low Price High Price \\\n", + "70 9 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15 15.0 \n", + "71 9 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18 18.0 \n", + "72 10 PIE TYPE BALTIMORE 1 1/9 bushel cartons 18 18.0 \n", + "73 10 PIE TYPE BALTIMORE 1 1/9 bushel cartons 17 17.0 \n", + "74 10 PIE TYPE BALTIMORE 1 1/9 bushel cartons 15 15.0 \n", + "\n", + " Price \n", + "70 13.636364 \n", + "71 16.363636 \n", + "72 16.363636 \n", + "73 15.454545 \n", + "74 13.636364 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", + "\n", + "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date', 'City Num', 'Variety Num']\n", + "\n", + "\n", + "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", + "\n", + "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", + "\n", + "month = pd.DatetimeIndex(pumpkins['Date']).month\n", + "\n", + "\n", + "new_pumpkins = pd.DataFrame({'Month': month, 'Variety': pumpkins['Variety'], 'City': pumpkins['City Name'], 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", + "\n", + "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", + "\n", + "new_pumpkins.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bde5818a", + "metadata": {}, + "outputs": [], + "source": [ + "X = new_pumpkins.copy()\n", + "y = X.pop('Price')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5ce08713", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.25, random_state = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "efad6351", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OrdinalEncoder\n", + "ordinal_encoder = OrdinalEncoder()\n", + "s = (xtrain.dtypes == 'object')\n", + "object_cols = list(s[s].index)\n", + "label_x_train = xtrain.copy()\n", + "label_x_test = xtest.copy()\n", + "label_x_train[object_cols] = ordinal_encoder.fit_transform(xtrain[object_cols])\n", + "label_x_test[object_cols] = ordinal_encoder.transform(xtest[object_cols])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7f8943bc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9791305564379404\n" + ] + } + ], + "source": [ + "print(label_x_train['Package'].corr(ytrain))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1c5c2b3c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9759780821029631\n" + ] + } + ], + "source": [ + "print(label_x_test['Package'].corr(ytest))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}