Advertisement
mayankjoin3

latency model size compute

Apr 2nd, 2025
338
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.97 KB | None | 0 0
  1. import pandas as pd
  2. import time
  3. import psutil
  4. import os
  5. from sklearn.model_selection import StratifiedKFold
  6. from sklearn.ensemble import ExtraTreesClassifier
  7. from sklearn.metrics import classification_report
  8. import joblib
  9. from sklearn.model_selection import train_test_split
  10. from sklearn.preprocessing import LabelEncoder
  11. import numpy as np
  12.  
  13. # Step 1: Load the data
  14. file_path = "m5.csv"
  15. df = pd.read_csv(file_path)
  16.  
  17. # Preprocessing: If categorical columns exist, encode them
  18. label_encoder = LabelEncoder()
  19.  
  20. # Assuming 'category' column is the target and there may be categorical features
  21. # Encode categorical features if needed, e.g., if there are string columns
  22. # Here we will assume 'category' is the target and all other columns are features
  23. X = df.drop(columns=['label'])
  24. y = df['label']
  25.  
  26. # Encoding categorical features (if they exist)
  27. X = X.apply(lambda col: label_encoder.fit_transform(col.astype(str)), axis=0, result_type='expand')
  28.  
  29. # Step 2: Initialize the ExtraTreesClassifier
  30. clf = ExtraTreesClassifier(random_state=42, n_jobs=-1)  # n_jobs=-1 allows for parallel processing
  31.  
  32. # Step 3: Set up 2-Fold Cross-Validation
  33. skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)
  34.  
  35. # Step 4: Evaluate Model Using Cross-Validation
  36. fold = 1
  37. for train_index, test_index in skf.split(X, y):
  38.     print(f"Fold {fold}...")
  39.     X_train, X_test = X.iloc[train_index], X.iloc[test_index]
  40.     y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  41.  
  42.     # Step 5: Train the model
  43.     start_time = time.time()
  44.     clf.fit(X_train, y_train)
  45.     training_time = time.time() - start_time
  46.     print(f"Training time for fold {fold}: {training_time:.4f} seconds")
  47.  
  48.     # Step 6: Test the model and measure latency
  49.     start_time = time.time()
  50.     predictions = clf.predict(X_test)
  51.     prediction_time = time.time() - start_time
  52.     print(f"Prediction time for fold {fold}: {prediction_time:.4f} seconds per instance")
  53.  
  54.     # Step 7: Memory and CPU Usage during prediction
  55.     process = psutil.Process(os.getpid())
  56.     memory_before = process.memory_info().rss / (1024 * 1024)  # in MB
  57.     cpu_before = psutil.cpu_percent(interval=1)
  58.  
  59.     # Perform prediction to measure CPU and Memory usage
  60.     clf.predict(X_test)
  61.  
  62.     memory_after = process.memory_info().rss / (1024 * 1024)  # in MB
  63.     cpu_after = psutil.cpu_percent(interval=1)
  64.  
  65.     memory_usage = memory_after - memory_before
  66.     cpu_usage = cpu_after - cpu_before
  67.  
  68.     print(f"Memory usage during prediction: {memory_usage:.4f} MB")
  69.     print(f"CPU usage during prediction: {cpu_usage:.4f}%")
  70.  
  71.     # Step 8: Output classification report
  72.     # print("Classification Report:")
  73.     # print(classification_report(y_test, predictions))
  74.  
  75.     # Step 9: Model size on disk
  76.     model_filename = f'extratrees_model_fold{fold}.pkl'
  77.     joblib.dump(clf, model_filename)
  78.     model_size = os.path.getsize(model_filename) / (1024 * 1024)  # in MB
  79.     print(f"Model size for fold {fold}: {model_size:.4f} MB")
  80.  
  81.     fold += 1
  82.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement