Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import sys
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- TEST_SIZE = 0.4
- def main():
- # Check command-line arguments
- if len(sys.argv) != 2:
- sys.exit("Usage: python shopping.py data")
- # Load data from spreadsheet and split into train and test sets
- evidence, labels = load_data(sys.argv[1])
- X_train, X_test, y_train, y_test = train_test_split(
- evidence, labels, test_size=TEST_SIZE
- )
- # Train model and make predictions
- model = train_model(X_train, y_train)
- predictions = model.predict(X_test)
- sensitivity, specificity = evaluate(y_test, predictions)
- # Print results
- print(f"Correct: {(y_test == predictions).sum()}")
- print(f"Incorrect: {(y_test != predictions).sum()}")
- print(f"True Positive Rate: {100 * sensitivity:.2f}%")
- print(f"True Negative Rate: {100 * specificity:.2f}%")
- def load_data(filename):
- """
- Load shopping data from a CSV file `filename` and convert into a list of
- evidence lists and a list of labels. Return a tuple (evidence, labels).
- """
- evidence = []
- labels = []
- month_to_index = {
- "Jan": 0, "Feb": 1, "Mar": 2, "Apr": 3,
- "May": 4, "June": 5, "Jul": 6, "Aug": 7,
- "Sep": 8, "Oct": 9, "Nov": 10, "Dec": 11
- }
- with open(filename, mode='r', encoding='utf-8') as file:
- reader = csv.DictReader(file)
- for row in reader:
- row_evidence = [
- int(row["Administrative"]),
- float(row["Administrative_Duration"]),
- int(row["Informational"]),
- float(row["Informational_Duration"]),
- int(row["ProductRelated"]),
- float(row["ProductRelated_Duration"]),
- float(row["BounceRates"]),
- float(row["ExitRates"]),
- float(row["PageValues"]),
- float(row["SpecialDay"]),
- month_to_index[row["Month"]],
- int(row["OperatingSystems"]),
- int(row["Browser"]),
- int(row["Region"]),
- int(row["TrafficType"]),
- 1 if row["VisitorType"] == "Returning_Visitor" else 0,
- 1 if row["Weekend"] == "TRUE" else 0
- ]
- evidence.append(row_evidence)
- labels.append(1 if row["Revenue"] == "TRUE" else 0)
- return evidence, labels
- def train_model(evidence, labels):
- """
- Given a list of evidence lists and a list of labels, return a
- fitted k-nearest neighbor model (k=1) trained on the data.
- """
- # Initialize the KNN model with k=1
- model = KNeighborsClassifier(n_neighbors=1)
- # Train the model using the provided evidence and labels
- model.fit(evidence, labels)
- # Return the trained model
- return model
- def evaluate(labels, predictions):
- """
- Given a list of actual labels and a list of predicted labels,
- return a tuple (sensitivity, specificity).
- """
- # Initialize counters for true positives, false negatives, true negatives, and false positives
- tp = 0
- fn = 0
- tn = 0
- fp = 0
- # Loop through all labels and predictions to count tp, fn, tn, fp
- for actual, predicted in zip(labels, predictions):
- if actual == 1 and predicted == 1:
- tp += 1
- elif actual == 1 and predicted == 0:
- fn += 1
- elif actual == 0 and predicted == 1:
- fp += 1
- elif actual == 0 and predicted == 0:
- tn += 1
- # Calculate sensitivity and specificity
- sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
- specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
- return sensitivity, specificity
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement