elena1234

pandas project (Hotel booking date) in Python

Apr 23rd, 2022 (edited)
206
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.29 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3.  
  4. hotels = pd.read_csv("C:/Users/eli/Desktop/hotel_booking_data.csv")
  5.  
  6. # How many rows are there?
  7. number_rows = hotels.count(axis=0)
  8. print(number_rows)
  9.  
  10.  
  11. # Is there any missing data? If so, which column has the most missing data?
  12. # children - 4; country - 488; agent - 16340; company - 112 593
  13. checker = hotels.isna().sum()
  14. print(checker)
  15.  
  16.  
  17. # 'company' has the most missing data
  18. print(hotels.count().idxmin())
  19.  
  20.  
  21. # Drop the "company" column from the dataset
  22. hotels = hotels.drop('company', axis=1)
  23.  
  24.  
  25. # What are the top 5 most common country codes in the dataset?
  26. n = 5
  27. print(hotels['country'].value_counts()[:n])
  28.  
  29.  
  30. # What is the name of the person who paid the highes ADR(average daily rate)? How much was their ADR?
  31. # or another solution: hotels.sort_values('adr', ascending=False)[['adr', 'name']].iloc[0]
  32. max_adr = hotels['adr'].nlargest(1)  # 5400
  33. rows_with_max_adr = hotels.loc[hotels['adr'] == int(max_adr)]  # get the row
  34. print(rows_with_max_adr[['name', 'adr']])
  35.  
  36.  
  37. # What is the mean adr(average daily rate) across all the hotel stays in the dataset?
  38. mean_adr = hotels['adr'].mean()
  39. print(mean_adr)
  40.  
  41.  
  42. # What is the average number of nights for a stay across the entire data set?
  43. sum_columns = hotels['stays_in_week_nights'] + \
  44.     hotels['stays_in_weekend_nights']
  45. print(round(sum_columns.mean(), 2))
  46.  
  47.  
  48. # What is the average total cost for a stay in the dataset
  49. avg_total_cost = ((hotels['stays_in_week_nights'] +
  50.                    hotels['stays_in_weekend_nights']) * hotels['adr']).mean()
  51. print(round(avg_total_cost, 2))
  52.  
  53.  
  54. # What are the names and emails of people who made exactly 5 "Special Requests"?
  55. total_of_special_requests = 5
  56.  
  57. rows_with_exactly_five_spec_requests = hotels.loc[hotels['total_of_special_requests'] == int(
  58.     total_of_special_requests)]  # get all rows with exactly 5 special requests
  59. print(rows_with_exactly_five_spec_requests[['name', 'email']])
  60.  
  61.  
  62. # What percentage of hotel stays were classified as "repeat guests"?
  63. repeat_guests = sum(hotels['is_repeated_guest'] == 1) / len(hotels) * 100
  64. print(repeat_guests)
  65.  
  66.  
  67. # What are the names of the people who had booked the most number children and babies for their stay?
  68. hotels['total_kids'] = (hotels['children'] + hotels['babies'])
  69.  
  70. the_most_kids_index = hotels['total_kids'].nlargest(3).index
  71. print(hotels.iloc[the_most_kids_index][['name', 'adults',
  72.       'total_kids', 'babies', 'children']])  # select rows based on list index
  73.  
  74.  
  75. # What are the top 5 most common last name in the dataset?
  76. n = 5
  77. print(hotels['name'].str.split().str[1].value_counts()[:n])
  78.  
  79.  
  80. # What are the top 3 most common area code in the phone numbers?
  81. n = 3
  82. print(hotels['phone-number'].str.split('-').str[0].value_counts()[:n])
  83.  
  84.  
  85. # How many arrivals took place between the 1st and the 15th of the month (inclusive of 1 and 15)?
  86. print(sum(hotels['arrival_date_day_of_month'].isin(range(1, 16))))
  87.  
  88.  
  89. # Create a table for counts for each day of the week that people arrived.
  90. def convert(day, month, year):
  91.     return f"{day}-{month}-{year}"
  92.  
  93. hotels['date'] = np.vectorize(convert)(
  94.     hotels['arrival_date_day_of_month'], hotels['arrival_date_month'], hotels['arrival_date_year'])
  95.  
  96. hotels['date'] = pd.to_datetime(hotels['date'])
  97.  
  98. print(hotels['date'].dt.day_name().value_counts())
  99.  
Add Comment
Please, Sign In to add comment