Advertisement
jules0707

shopping.py

Feb 28th, 2024 (edited)
9
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.83 KB | None | 0 0
  1. import csv
  2. import sys
  3.  
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.neighbors import KNeighborsClassifier
  6.  
  7. TEST_SIZE = 0.4
  8.  
  9.  
  10. def main():
  11.  
  12.     # Check command-line arguments
  13.     if len(sys.argv) != 2:
  14.         sys.exit("Usage: python shopping.py data")
  15.  
  16.     # Load data from spreadsheet and split into train and test sets
  17.     evidence, labels = load_data(sys.argv[1])
  18.     X_train, X_test, y_train, y_test = train_test_split(
  19.         evidence, labels, test_size=TEST_SIZE
  20.     )
  21.  
  22.     # Train model and make predictions
  23.     model = train_model(X_train, y_train)
  24.     predictions = model.predict(X_test)
  25.     sensitivity, specificity = evaluate(y_test, predictions)
  26.  
  27.     # Print results
  28.     print(f"Correct: {(y_test == predictions).sum()}")
  29.     print(f"Incorrect: {(y_test != predictions).sum()}")
  30.     print(f"True Positive Rate: {100 * sensitivity:.2f}%")
  31.     print(f"True Negative Rate: {100 * specificity:.2f}%")
  32.  
  33.  
  34. def load_data(filename):
  35.     """
  36.    Load shopping data from a CSV file `filename` and convert into a list of
  37.    evidence lists and a list of labels. Return a tuple (evidence, labels).
  38.    """
  39.    
  40.     evidence = []
  41.     labels = []
  42.     month_to_index = {
  43.         "Jan": 0, "Feb": 1, "Mar": 2, "Apr": 3,
  44.         "May": 4, "June": 5, "Jul": 6, "Aug": 7,
  45.         "Sep": 8, "Oct": 9, "Nov": 10, "Dec": 11
  46.     }
  47.  
  48.     with open(filename, mode='r', encoding='utf-8') as file:
  49.         reader = csv.DictReader(file)
  50.         for row in reader:
  51.             row_evidence = [
  52.                 int(row["Administrative"]),
  53.                 float(row["Administrative_Duration"]),
  54.                 int(row["Informational"]),
  55.                 float(row["Informational_Duration"]),
  56.                 int(row["ProductRelated"]),
  57.                 float(row["ProductRelated_Duration"]),
  58.                 float(row["BounceRates"]),
  59.                 float(row["ExitRates"]),
  60.                 float(row["PageValues"]),
  61.                 float(row["SpecialDay"]),
  62.                 month_to_index[row["Month"]],
  63.                 int(row["OperatingSystems"]),
  64.                 int(row["Browser"]),
  65.                 int(row["Region"]),
  66.                 int(row["TrafficType"]),
  67.                 1 if row["VisitorType"] == "Returning_Visitor" else 0,
  68.                 1 if row["Weekend"] == "TRUE" else 0
  69.             ]
  70.             evidence.append(row_evidence)
  71.             labels.append(1 if row["Revenue"] == "TRUE" else 0)
  72.  
  73.     return evidence, labels
  74.  
  75.  
  76. def train_model(evidence, labels):
  77.     """
  78.    Given a list of evidence lists and a list of labels, return a
  79.    fitted k-nearest neighbor model (k=1) trained on the data.
  80.    """
  81.     # Initialize the KNN model with k=1
  82.     model = KNeighborsClassifier(n_neighbors=1)
  83.    
  84.     # Train the model using the provided evidence and labels
  85.     model.fit(evidence, labels)
  86.    
  87.     # Return the trained model
  88.     return model
  89.    
  90.  
  91. def evaluate(labels, predictions):
  92.     """
  93.    Given a list of actual labels and a list of predicted labels,
  94.    return a tuple (sensitivity, specificity).
  95.    """
  96.     # Initialize counters for true positives, false negatives, true negatives, and false positives
  97.     tp = 0
  98.     fn = 0
  99.     tn = 0
  100.     fp = 0
  101.    
  102.     # Loop through all labels and predictions to count tp, fn, tn, fp
  103.     for actual, predicted in zip(labels, predictions):
  104.         if actual == 1 and predicted == 1:
  105.             tp += 1
  106.         elif actual == 1 and predicted == 0:
  107.             fn += 1
  108.         elif actual == 0 and predicted == 1:
  109.             fp += 1
  110.         elif actual == 0 and predicted == 0:
  111.             tn += 1
  112.    
  113.     # Calculate sensitivity and specificity
  114.     sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
  115.     specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
  116.    
  117.     return sensitivity, specificity
  118.  
  119.  
  120. if __name__ == "__main__":
  121.     main()
  122.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement