NLP Hotel Reviews Data Analysis PD

2 years ago · 74ab6ac0d7
parent 8979d09fef
commit 74ab6ac0d7
1 changed files with 586 additions and 0 deletions
--- a/6-NLP/4-Hotel-Reviews-1/notebook.ipynb
+++ b/6-NLP/4-Hotel-Reviews-1/notebook.ipynb
@ -0,0 +1,586 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading data file now, this could take a while depending on file size\n",
      "Loading took 1.97 seconds\n"
     ]
    }
   ],
   "source": [
    "print(\"Loading data file now, this could take a while depending on file size\")\n",
    "start = time.time()\n",
    "# df is 'DataFrame' - make sure you downloaded the file to the data folder\n",
    "df = pd.read_csv('/Users/ray/Desktop/ML-For-Beginners/6-NLP/4-Hotel-Reviews-1/Hotel_Reviews.csv')\n",
    "end = time.time()\n",
    "print(\"Loading took \" + str(round(end - start, 2)) + \" seconds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The shape of the data (rows, cols) is (515738, 17)\n"
     ]
    }
   ],
   "source": [
    "print(\"The shape of the data (rows, cols) is \" + str(df.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 227 different nationalities\n"
     ]
    }
   ],
   "source": [
    "nationality_freq = df[\"Reviewer_Nationality\"].value_counts()\n",
    "print(\"There are \" + str(nationality_freq.size) + \" different nationalities\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reviewer_Nationality\n",
      " United Kingdom               245246\n",
      " United States of America      35437\n",
      " Australia                     21686\n",
      " Ireland                       14827\n",
      " United Arab Emirates          10235\n",
      "                               ...  \n",
      " Cape Verde                        1\n",
      " Northern Mariana Islands          1\n",
      " Tuvalu                            1\n",
      " Guinea                            1\n",
      " Palau                             1\n",
      "Name: count, Length: 227, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(nationality_freq) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The highest frequency reviewer nationality is United Kingdom with 245246 reviews.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/1920668807.py:1: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")\n"
     ]
    }
   ],
   "source": [
    "print(\"The highest frequency reviewer nationality is \" + str(nationality_freq.index[0]).strip() + \" with \" + str(nationality_freq[0]) + \" reviews.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The next 10 highest frequency reviewer nationalities are:\n",
      "Reviewer_Nationality\n",
      " United States of America     35437\n",
      " Australia                    21686\n",
      " Ireland                      14827\n",
      " United Arab Emirates         10235\n",
      " Saudi Arabia                  8951\n",
      " Netherlands                   8772\n",
      " Switzerland                   8678\n",
      " Germany                       7941\n",
      " Canada                        7894\n",
      " France                        7296\n"
     ]
    }
   ],
   "source": [
    "print(\"The next 10 highest frequency reviewer nationalities are:\")\n",
    "print(nationality_freq[1:11].to_string())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The most reviewed hotel for United Kingdom was Britannia International Hotel Canary Wharf with 3833 reviews.\n",
      "The most reviewed hotel for United States of America was Hotel Esther a with 423 reviews.\n",
      "The most reviewed hotel for Australia was Park Plaza Westminster Bridge London with 167 reviews.\n",
      "The most reviewed hotel for Ireland was Copthorne Tara Hotel London Kensington with 239 reviews.\n",
      "The most reviewed hotel for United Arab Emirates was Millennium Hotel London Knightsbridge with 129 reviews.\n",
      "The most reviewed hotel for Saudi Arabia was The Cumberland A Guoman Hotel with 142 reviews.\n",
      "The most reviewed hotel for Netherlands was Jaz Amsterdam with 97 reviews.\n",
      "The most reviewed hotel for Switzerland was Hotel Da Vinci with 97 reviews.\n",
      "The most reviewed hotel for Germany was Hotel Da Vinci with 86 reviews.\n",
      "The most reviewed hotel for Canada was St James Court A Taj Hotel London with 61 reviews.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n",
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n",
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n",
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n",
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n",
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n",
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n",
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n",
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n",
      "/var/folders/3m/kq3blxtj7gj8n_v2dv5lns700000gp/T/ipykernel_23341/3364414134.py:6: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\")\n"
     ]
    }
   ],
   "source": [
    "for nat in nationality_freq[:10].index:\n",
    "   # First, extract all the rows that match the criteria into a new dataframe\n",
    "   nat_df = df[df[\"Reviewer_Nationality\"] == nat]   \n",
    "   # Now get the hotel freq\n",
    "   freq = nat_df[\"Hotel_Name\"].value_counts()\n",
    "   print(\"The most reviewed hotel for \" + str(nat).strip() + \" was \" + str(freq.index[0]) + \" with \" + str(freq[0]) + \" reviews.\") \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "hotel_freq_df = df.drop([\"Hotel_Address\", \"Additional_Number_of_Scoring\", \"Review_Date\", \"Average_Score\", \"Reviewer_Nationality\", \"Negative_Review\", \"Review_Total_Negative_Word_Counts\", \"Positive_Review\", \"Review_Total_Positive_Word_Counts\", \"Total_Number_of_Reviews_Reviewer_Has_Given\", \"Reviewer_Score\", \"Tags\", \"days_since_review\", \"lat\", \"lng\"], axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Hotel_Name</th>\n",
       "      <th>Total_Number_of_Reviews</th>\n",
       "      <th>Total_Reviews_Found</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Hotel Arena</td>\n",
       "      <td>1403</td>\n",
       "      <td>405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>405</th>\n",
       "      <td>K K Hotel George</td>\n",
       "      <td>1831</td>\n",
       "      <td>566</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>971</th>\n",
       "      <td>Apex Temple Court Hotel</td>\n",
       "      <td>2619</td>\n",
       "      <td>1037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>The Park Grand London Paddington</td>\n",
       "      <td>4380</td>\n",
       "      <td>1770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3778</th>\n",
       "      <td>Monhotel Lounge SPA</td>\n",
       "      <td>171</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>511962</th>\n",
       "      <td>Suite Hotel 900 m zur Oper</td>\n",
       "      <td>3461</td>\n",
       "      <td>439</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>512401</th>\n",
       "      <td>Hotel Amadeus</td>\n",
       "      <td>717</td>\n",
       "      <td>144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>512545</th>\n",
       "      <td>The Berkeley</td>\n",
       "      <td>232</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>512645</th>\n",
       "      <td>Holiday Inn London Kensington</td>\n",
       "      <td>5945</td>\n",
       "      <td>2768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>515413</th>\n",
       "      <td>Atlantis Hotel Vienna</td>\n",
       "      <td>2823</td>\n",
       "      <td>325</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1492 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                              Hotel_Name  Total_Number_of_Reviews  \\\n",
       "0                            Hotel Arena                     1403   \n",
       "405                     K K Hotel George                     1831   \n",
       "971              Apex Temple Court Hotel                     2619   \n",
       "2008    The Park Grand London Paddington                     4380   \n",
       "3778                 Monhotel Lounge SPA                      171   \n",
       "...                                  ...                      ...   \n",
       "511962        Suite Hotel 900 m zur Oper                     3461   \n",
       "512401                     Hotel Amadeus                      717   \n",
       "512545                      The Berkeley                      232   \n",
       "512645     Holiday Inn London Kensington                     5945   \n",
       "515413             Atlantis Hotel Vienna                     2823   \n",
       "\n",
       "        Total_Reviews_Found  \n",
       "0                       405  \n",
       "405                     566  \n",
       "971                    1037  \n",
       "2008                   1770  \n",
       "3778                     35  \n",
       "...                     ...  \n",
       "511962                  439  \n",
       "512401                  144  \n",
       "512545                  100  \n",
       "512645                 2768  \n",
       "515413                  325  \n",
       "\n",
       "[1492 rows x 3 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "hotel_freq_df['Total_Reviews_Found'] = hotel_freq_df.groupby('Hotel_Name').transform('count')\n",
    "   \n",
    "   # Get rid of all the duplicated rows\n",
    "hotel_freq_df = hotel_freq_df.drop_duplicates(subset = [\"Hotel_Name\"])\n",
    "display(hotel_freq_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Average_Score_Difference</th>\n",
       "      <th>Average_Score</th>\n",
       "      <th>Calc_Average_Score</th>\n",
       "      <th>Hotel_Name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>495945</th>\n",
       "      <td>-0.8</td>\n",
       "      <td>7.7</td>\n",
       "      <td>8.5</td>\n",
       "      <td>Best Western Hotel Astoria</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111027</th>\n",
       "      <td>-0.7</td>\n",
       "      <td>8.8</td>\n",
       "      <td>9.5</td>\n",
       "      <td>Hotel Stendhal Place Vend me Paris MGallery by...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43688</th>\n",
       "      <td>-0.7</td>\n",
       "      <td>7.5</td>\n",
       "      <td>8.2</td>\n",
       "      <td>Mercure Paris Porte d Orleans</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178253</th>\n",
       "      <td>-0.7</td>\n",
       "      <td>7.9</td>\n",
       "      <td>8.6</td>\n",
       "      <td>Renaissance Paris Vendome Hotel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>218258</th>\n",
       "      <td>-0.5</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>Hotel Royal Elys es</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>151416</th>\n",
       "      <td>0.7</td>\n",
       "      <td>7.8</td>\n",
       "      <td>7.1</td>\n",
       "      <td>Best Western Allegro Nation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22189</th>\n",
       "      <td>0.8</td>\n",
       "      <td>7.1</td>\n",
       "      <td>6.3</td>\n",
       "      <td>Holiday Inn Paris Montparnasse Pasteur</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>250308</th>\n",
       "      <td>0.9</td>\n",
       "      <td>8.6</td>\n",
       "      <td>7.7</td>\n",
       "      <td>MARQUIS Faubourg St Honor Relais Ch teaux</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68936</th>\n",
       "      <td>0.9</td>\n",
       "      <td>6.8</td>\n",
       "      <td>5.9</td>\n",
       "      <td>Villa Eugenie</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3813</th>\n",
       "      <td>1.3</td>\n",
       "      <td>7.2</td>\n",
       "      <td>5.9</td>\n",
       "      <td>Kube Hotel Ice Bar</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1492 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Average_Score_Difference  Average_Score  Calc_Average_Score  \\\n",
       "495945                      -0.8            7.7                 8.5   \n",
       "111027                      -0.7            8.8                 9.5   \n",
       "43688                       -0.7            7.5                 8.2   \n",
       "178253                      -0.7            7.9                 8.6   \n",
       "218258                      -0.5            7.0                 7.5   \n",
       "...                          ...            ...                 ...   \n",
       "151416                       0.7            7.8                 7.1   \n",
       "22189                        0.8            7.1                 6.3   \n",
       "250308                       0.9            8.6                 7.7   \n",
       "68936                        0.9            6.8                 5.9   \n",
       "3813                         1.3            7.2                 5.9   \n",
       "\n",
       "                                               Hotel_Name  \n",
       "495945                         Best Western Hotel Astoria  \n",
       "111027  Hotel Stendhal Place Vend me Paris MGallery by...  \n",
       "43688                       Mercure Paris Porte d Orleans  \n",
       "178253                    Renaissance Paris Vendome Hotel  \n",
       "218258                                Hotel Royal Elys es  \n",
       "...                                                   ...  \n",
       "151416                        Best Western Allegro Nation  \n",
       "22189              Holiday Inn Paris Montparnasse Pasteur  \n",
       "250308          MARQUIS Faubourg St Honor Relais Ch teaux  \n",
       "68936                                       Villa Eugenie  \n",
       "3813                                   Kube Hotel Ice Bar  \n",
       "\n",
       "[1492 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def get_difference_review_avg(row):\n",
    "     return row[\"Average_Score\"] - row[\"Calc_Average_Score\"]\n",
    "   \n",
    "   # 'mean' is mathematical word for 'average'\n",
    "df['Calc_Average_Score'] = round(df.groupby('Hotel_Name').Reviewer_Score.transform('mean'), 1)\n",
    "\n",
    "# Add a new column with the difference between the two average scores\n",
    "df[\"Average_Score_Difference\"] = df.apply(get_difference_review_avg, axis = 1)\n",
    "\n",
    "# Create a df without all the duplicates of Hotel_Name (so only 1 row per hotel)\n",
    "review_scores_df = df.drop_duplicates(subset = [\"Hotel_Name\"])\n",
    "\n",
    "# Sort the dataframe to find the lowest and highest average score difference\n",
    "review_scores_df = review_scores_df.sort_values(by=[\"Average_Score_Difference\"])\n",
    "\n",
    "display(review_scores_df[[\"Average_Score_Difference\", \"Average_Score\", \"Calc_Average_Score\", \"Hotel_Name\"]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of No Negative reviews: 127890\n",
      "Number of No Positive reviews: 35946\n",
      "Number of both No Negative and No Positive reviews: 127\n",
      "Sum took 0.17 seconds\n"
     ]
    }
   ],
   "source": [
    "start = time.time()\n",
    "no_negative_reviews = sum(df.Negative_Review == \"No Negative\")\n",
    "print(\"Number of No Negative reviews: \" + str(no_negative_reviews))\n",
    "\n",
    "no_positive_reviews = sum(df[\"Positive_Review\"] == \"No Positive\")\n",
    "print(\"Number of No Positive reviews: \" + str(no_positive_reviews))\n",
    "\n",
    "both_no_reviews = sum((df.Negative_Review == \"No Negative\") & (df.Positive_Review == \"No Positive\"))\n",
    "print(\"Number of both No Negative and No Positive reviews: \" + str(both_no_reviews))\n",
    "\n",
    "end = time.time()\n",
    "print(\"Sum took \" + str(round(end - start, 2)) + \" seconds\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }