Advertisement
Sander447

360composerloadtestgraph.py

Feb 7th, 2025
39
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 20.68 KB | Source Code | 0 0
  1. import os
  2. import csv
  3. import matplotlib.pyplot as plt
  4. from mpl_toolkits.mplot3d import Axes3D
  5. from statistics import mean, stdev
  6. from collections import Counter
  7. import seaborn as sns
  8. import numpy as np
  9.  
  10. server_name = 'chimay'
  11. aws_jobs =    [[23117, 23118, 23119, 23120, 23121, 23122, 23123, 23124, 23125, 23126, 23185, 23186, 23187, 23188, 23189, 23190, 23191, 23192, 23193, 23194],
  12.                [23127, 23128, 23129, 23130, 23131, 23132, 23137, 23138, 23139, 23140, 23195, 23196, 23197, 23198, 23199, 23200, 23201, 23202, 23203, 23204],
  13.                [23155, 23156, 23157, 23158, 23159, 23160, 23161, 23162, 23163, 23164, 23215, 23216, 23217, 23218, 23219, 23220, 23221, 23222, 23223, 23224],
  14.                [23165, 23166, 23167, 23168, 23169, 23170, 23171, 23172, 23173, 23174, 23225, 23226, 23227, 23228, 23229, 23230, 23231, 23232, 23233, 23234],
  15.                [23175, 23176, 23177, 23178, 23179, 23180, 23181, 23182, 23183, 23184, 23235, 23236, 23237, 23238, 23239, 23240, 23241, 23242, 23243, 23244]]
  16. azure_jobs =  [[22613, 22614, 22615, 22616, 22617, 22618, 22619, 22620, 22621, 22622, 22872, 22873, 22874, 22875, 22876, 22877, 22878, 22879, 22880, 22881],
  17.                [22627, 22628, 22629, 22630, 22631, 22632, 22633, 22634, 22635, 22636, 22882, 22883, 22884, 22885, 22886, 22887, 22889, 22890, 22891, 22892],
  18.                [22647, 22648, 22649, 22650, 22651, 22652, 22653, 22654, 22656, 22657, 22903, 22904, 22905, 22906, 22907, 22908, 22909, 22910, 22911, 22912],
  19.                [22659, 22660, 22661, 22662, 22663, 22664, 22665, 22666, 22667, 22668, 22913, 22914, 22915, 22916, 22917, 22918, 22919, 22920, 22921, 22922],
  20.                [22669, 22670, 22671, 22672, 22673, 22674, 22675, 22676, 22677, 22678, 22923, 22924, 22925, 22926, 22927, 22928, 22929, 22930, 22931, 22932]]
  21. gcp_jobs =    [[22738, 22739, 22740, 22741, 22742, 22743, 22744, 22745, 22746, 22747, 22995, 22996, 22997, 22998, 22999, 23000, 23001, 23002, 23003, 23004],
  22.                [22748, 22749, 22750, 22751, 22752, 22753, 22754, 22755, 22756, 22757, 23005, 23006, 23007, 23008, 23009, 23010, 23011, 23012, 23013, 23014],
  23.                [22768, 22769, 22770, 22771, 22772, 22773, 22774, 22775, 22776, 22777, 23025, 23026, 23027, 23028, 23029, 23030, 23031, 23032, 23033, 23034],
  24.                [22778, 22779, 22780, 22781, 22782, 22783, 22784, 22785, 22786, 22787, 23035, 23036, 23037, 23038, 23039, 23040, 23041, 23042, 23043, 23044],
  25.                [22788, 22789, 22790, 22791, 22792, 22793, 22794, 22795, 22796, 22797, 23045, 23046, 23047, 23048, 23049, 23050, 23051, 23052, 23053, 23054]]
  26. chimay_jobs = [[22801, 22802, 22803, 22804, 22805, 22806, 22807, 22809, 22816, 22817, 23055, 23056, 23057, 23058, 23059, 23060, 23061, 23062, 23063, 23064],
  27.                [22818, 22819, 22820, 22821, 22822, 22823, 22825, 22826, 22827, 22828, 23065, 23066, 23067, 23068, 23069, 23070, 23071, 23072, 23073, 23074],
  28.                [22842, 22843, 22844, 22845, 22846, 22847, 22848, 22849, 22850, 22851, 23085, 23086, 23087, 23088, 23089, 23090, 23091, 23092, 23093, 23094],
  29.                [22852, 22853, 22854, 22855, 22856, 22857, 22858, 22859, 22860, 22861, 23095, 23096, 23097, 23098, 23099, 23100, 23101, 23102, 23103, 23104],
  30.                [22862, 22863, 22864, 22865, 22866, 22867, 22868, 22869, 22870, 22871, 23105, 23106, 23107, 23108, 23109, 23110, 23111, 23112, 23113, 23114]]
  31. jobs = [aws_jobs, azure_jobs, gcp_jobs, chimay_jobs]
  32.  
  33. server_name_to_full = {'aws': 'AWS', 'azure': 'Azure', 'gcp': 'GCP', 'chimay': 'Chimay'}
  34. server_names = ['aws', 'azure', 'gcp', 'chimay']
  35. server_full_names = ['AWS', 'Azure', 'GCP', 'Chimay']
  36.  
  37. plot_x_values = [1, 5, 15, 20, 25]
  38.  
  39. # Function to read the CSV file for a given task_id.
  40. def read_csv_for_task(server, run_id, job_id, node_id, task_id):
  41.     file_path = f"m2_data_{server}/measurement_data_{jobs[server_names.index(server)][run_id][job_id]}_{node_id}_{task_id}.csv"
  42.     if os.path.exists(file_path):
  43.         time_values = []
  44.         variant_values = []
  45.         latency_values = []
  46.         filesize_values = []
  47.  
  48.         with open(file_path, mode="r") as file:
  49.             reader = csv.DictReader(file)
  50.             for row in reader:
  51.                 time_values.append(float(row["Time"]))
  52.                 variant_values.append(int(row["Variant"]))
  53.                 latency_values.append(float(row["Latency"]))
  54.                 filesize_values.append(float(row["File_size"]) / float(row["Latency"]))
  55.  
  56.         if not time_values:
  57.             return None, None, None, None
  58.         # Normalize time values by subtracting the minimum time value.
  59.         min_time = min(time_values)
  60.         time_values = [time_value - min_time for time_value in time_values]
  61.  
  62.         return time_values, variant_values, latency_values, filesize_values
  63.     else:
  64.         return None, None, None, None
  65.    
  66. def read_jobs(server):
  67.     all_time_values = []
  68.     all_variant_values = []
  69.     all_latency_values = []
  70.     all_filesize_values = []
  71.  
  72.     for run_i in range(len(jobs[server_names.index(server)])):
  73.         run_time_values = []
  74.         run_variant_values = []
  75.         run_latency_values = []
  76.         run_filesize_values = []
  77.        
  78.         for job_i in range(len(jobs[server_names.index(server)][run_i])):
  79.             job_time_values = []
  80.             job_variant_values = []
  81.             job_latency_values = []
  82.             job_filesize_values = []
  83.  
  84.             for node_i in range(3):
  85.                 for task_i in range(job_i + 1):
  86.                     time_values, variant_values, latency_values, filesize_values = read_csv_for_task(server, run_i, job_i, node_i, task_i)
  87.                     if time_values is not None:
  88.                         job_time_values.append(time_values)
  89.                         job_variant_values.append(variant_values)
  90.                         job_latency_values.append(latency_values)
  91.                         job_filesize_values.append(filesize_values)
  92.  
  93.             run_time_values.append(job_time_values)
  94.             run_variant_values.append(job_variant_values)
  95.             run_latency_values.append(job_latency_values)
  96.             run_filesize_values.append(job_filesize_values)
  97.  
  98.         all_time_values.append(run_time_values)
  99.         all_variant_values.append(run_variant_values)
  100.         all_latency_values.append(run_latency_values)
  101.         all_filesize_values.append(run_filesize_values)
  102.  
  103.     return all_time_values, all_variant_values, all_latency_values, all_filesize_values
  104.  
  105. def plot_latency(all_latency_values):
  106.     # Compute the mean and standard deviation for each job across runs.
  107.     all_latency_avg = [
  108.         mean([mean(job_latency_values) for run_latency_values in run for job_latency_values in run_latency_values])
  109.         for run in all_latency_values
  110.     ]
  111.     all_latency_std = [
  112.         stdev([mean(job_latency_values) for run_latency_values in run for job_latency_values in run_latency_values])
  113.         for run in all_latency_values
  114.     ]
  115.  
  116.     # Plot the mean and standard deviation.
  117.     plt.figure(figsize=(10, 6))
  118.     plt.errorbar(plot_x_values, all_latency_avg, yerr=all_latency_std, fmt='-o', capsize=5, label='Mean ± Std Dev')
  119.  
  120.     plt.xlim(0, 26)
  121.     plt.xticks([0, 1, 5, 10, 15, 20, 25])
  122.     plt.ylim(0, 2)
  123.     plt.xlabel('Concurrent clients')
  124.     plt.ylabel('Latency (s)')
  125.     plt.title(f"Average latency in seconds over concurrent clients ({server_name_to_full[server_name]})")
  126.     plt.legend()
  127.     plt.grid(True, linestyle='--', alpha=0.6)
  128.     plt.savefig(f"latency_avg_{server_name}_plot.pdf")
  129.     plt.show()
  130.  
  131. def plot_all_latency(all_latency_values):
  132.     plt.figure(figsize=(10, 6))
  133.  
  134.     for client_index, client in enumerate(all_latency_values):
  135.         # Compute the mean and standard deviation for each job across runs.
  136.         all_latency_avg = []
  137.         all_latency_std = []
  138.         valid_x_values = []
  139.  
  140.         for run_index, run in enumerate(client):
  141.             # Filter out runs without valid data.
  142.             valid_job_latency_values = [
  143.                 mean(job_latency_values)
  144.                 for run_latency_values in run
  145.                 for job_latency_values in run_latency_values
  146.                 if job_latency_values
  147.             ]
  148.  
  149.             if valid_job_latency_values:
  150.                 all_latency_avg.append(mean(valid_job_latency_values))
  151.                 all_latency_std.append(stdev(valid_job_latency_values))
  152.                 valid_x_values.append(plot_x_values[run_index])
  153.  
  154.         if valid_x_values:
  155.             plt.errorbar(
  156.                 valid_x_values,
  157.                 all_latency_avg,
  158.                 all_latency_std,
  159.                 fmt='-o',
  160.                 capsize=5,
  161.                 label=f"Mean ± Std Dev (Server {server_full_names[client_index]})",
  162.             )
  163.  
  164.     plt.xlim(0, 26)
  165.     plt.xticks([0, 1, 5, 10, 15, 20, 25])
  166.     plt.ylim(0, 2)
  167.     plt.xlabel('Concurrent clients')
  168.     plt.ylabel('Latency (s)')
  169.     plt.title(f"Average latency in seconds over concurrent clients (all servers)")
  170.     plt.legend()
  171.     plt.grid(True, linestyle='--', alpha=0.6)
  172.     plt.savefig(f"latency_avg_all_servers_plot.pdf")
  173.     plt.show()
  174.  
  175. def plot_latency_max(all_latency_values):
  176.     # Compute the mean and standard deviation of the maximum latencies for each job across runs.
  177.     all_latency_avg = [
  178.         mean([max(job_latency_values) for run_latency_values in run for job_latency_values in run_latency_values])
  179.         for run in all_latency_values
  180.     ]
  181.     all_latency_std = [
  182.         stdev([max(job_latency_values) for run_latency_values in run for job_latency_values in run_latency_values])
  183.         for run in all_latency_values
  184.     ]
  185.  
  186.     # Plot the mean and standard deviation.
  187.     plt.figure(figsize=(10, 6))
  188.     plt.errorbar(plot_x_values, all_latency_avg, yerr=all_latency_std, fmt='-o', capsize=5, label='Max Latency ± Std Dev')
  189.  
  190.     plt.xlabel('Concurrent clients')
  191.     plt.ylabel('Maximum latency (s)')
  192.     plt.title(f"Maximum latency in seconds over concurrent clients ({server_name_to_full[server_name]})")
  193.     plt.legend()
  194.     plt.grid(True, linestyle='--', alpha=0.6)
  195.     plt.savefig(f"latency_max_{server_name}_plot.pdf")
  196.     plt.show()
  197.  
  198. def plot_download_speed(all_filesize_values):
  199.     # Compute the mean and standard deviation for each job across runs.
  200.     all_filesize_avg = [
  201.         mean([mean(job_filesize_values) / 1000 for run_filesize_values in run for job_filesize_values in run_filesize_values])
  202.         for run in all_filesize_values
  203.     ]
  204.     all_filesize_std = [
  205.         stdev([mean(job_filesize_values) / 1000 for run_filesize_values in run for job_filesize_values in run_filesize_values])
  206.         for run in all_filesize_values
  207.     ]
  208.  
  209.     # Plot the mean and standard deviation.
  210.     plt.figure(figsize=(10, 6))
  211.     plt.errorbar(plot_x_values, all_filesize_avg, yerr=all_filesize_std, fmt='-o', capsize=5, label='Mean ± Std Dev')
  212.  
  213.     plt.xlim(0, 26)
  214.     plt.xticks([0, 1, 5, 10, 15, 20, 25])
  215.     plt.ylim(0, 15)
  216.     plt.xlabel('Concurrent clients')
  217.     plt.ylabel('Download speed (MB/s)')
  218.     plt.title(f"Average download speed in MB/s over concurrent clients ({server_name_to_full[server_name]})")
  219.     plt.legend()
  220.     plt.grid(True, linestyle='--', alpha=0.6)
  221.     plt.savefig(f"downloadspeed_avg_{server_name}_plot.pdf")
  222.     plt.show()
  223.  
  224. def plot_all_download_speed(all_latency_values):
  225.     plt.figure(figsize=(10, 6))
  226.  
  227.     for client_index, client in enumerate(all_latency_values):
  228.         # Compute the mean and standard deviation for each job across runs.
  229.         all_latency_avg = []
  230.         all_latency_std = []
  231.         valid_x_values = []
  232.  
  233.         for run_index, run in enumerate(client):
  234.             # Filter out runs without valid data.
  235.             valid_job_latency_values = [
  236.                 mean(job_latency_values) / 1000
  237.                 for run_latency_values in run
  238.                 for job_latency_values in run_latency_values
  239.                 if job_latency_values
  240.             ]
  241.  
  242.             if valid_job_latency_values:
  243.                 all_latency_avg.append(mean(valid_job_latency_values))
  244.                 all_latency_std.append(stdev(valid_job_latency_values))
  245.                 valid_x_values.append(plot_x_values[run_index])
  246.  
  247.         if valid_x_values:
  248.             plt.errorbar(
  249.                 valid_x_values,
  250.                 all_latency_avg,
  251.                 all_latency_std,
  252.                 fmt='-o',
  253.                 capsize=5,
  254.                 label=f"Mean ± Std Dev (Server {server_full_names[client_index]})",
  255.             )
  256.  
  257.     plt.xlim(0, 26)
  258.     plt.xticks([0, 1, 5, 10, 15, 20, 25])
  259.     plt.ylim(0, 15)
  260.     plt.xlabel('Concurrent clients')
  261.     plt.ylabel('Download speed (MB/s)')
  262.     plt.title(f"Average download speed in MB/s over concurrent clients (all servers)")
  263.     plt.legend()
  264.     plt.grid(True, linestyle='--', alpha=0.6)
  265.     plt.savefig(f"downloadspeed_avg_all_servers_plot.pdf")
  266.     plt.show()
  267.  
  268. def plot_variant(all_variant_values):
  269.     """
  270.    Plots a heatmap showing the normalized distribution of variants for each instance amount across multiple runs.
  271.    """
  272.     total_clients = len(all_variant_values[0])
  273.     all_variants = set(
  274.         value
  275.         for run_variant_values in all_variant_values
  276.         for job_variant_values in run_variant_values
  277.         for variant_group in job_variant_values
  278.         for value in variant_group
  279.     )
  280.     all_variants = sorted(all_variants)
  281.  
  282.     # Initialize data for the heatmap.
  283.     heatmap_data_avg = []
  284.     heatmap_data_std = []
  285.  
  286.     for job_index in range(total_clients):
  287.         job_distributions = {variant: [] for variant in all_variants}
  288.  
  289.         for run_variant_values in all_variant_values:
  290.             job_variant_values = run_variant_values[job_index]
  291.             flattened_variants = [value for variant_group in job_variant_values for value in variant_group]
  292.             total_instances = len(flattened_variants)
  293.             counts = Counter(flattened_variants)
  294.  
  295.             for variant in all_variants:
  296.                 proportion = counts[variant] / total_instances if total_instances > 0 else 0
  297.                 job_distributions[variant].append(proportion)
  298.  
  299.         # Compute average and standard deviation for each variant.
  300.         heatmap_data_avg.append([np.mean(job_distributions[variant]) for variant in all_variants])
  301.         heatmap_data_std.append([np.std(job_distributions[variant]) for variant in all_variants])
  302.  
  303.     # Transpose the heatmap data for correct formatting.
  304.     heatmap_data_avg = np.array(heatmap_data_avg).T
  305.     heatmap_data_std = np.array(heatmap_data_std).T
  306.  
  307.     # Create heatmap for average values.
  308.     plt.figure(figsize=(10, 6))
  309.     sns.heatmap(
  310.         heatmap_data_avg,
  311.         annot=True,
  312.         cmap="YlGnBu",
  313.         xticklabels=range(1, total_clients + 1),
  314.         yticklabels=all_variants,
  315.         cbar_kws={'label': 'Normalized Frequency (Average)'}
  316.     )
  317.     plt.xlabel('Concurrent Clients')
  318.     plt.ylabel('Variant Quality')
  319.     plt.title(f'Heatmap of Variant Distribution Across Concurrent Clients (Average) ({server_name_to_full[server_name]})')
  320.     plt.savefig(f"variant_heatmap_avg_{server_name}_plot.pdf")
  321.     plt.show()
  322.  
  323.     plt.figure(figsize=(10, 6))
  324.     sns.heatmap(
  325.         heatmap_data_std,
  326.         annot=True,
  327.         cmap="OrRd",
  328.         xticklabels=range(1, total_clients + 1),
  329.         yticklabels=all_variants,
  330.         cbar_kws={'label': 'Normalized Frequency (Std Dev)'}
  331.     )
  332.     plt.xlabel('Concurrent Clients')
  333.     plt.ylabel('Variant Quality')
  334.     plt.title(f'Heatmap of Variant Distribution Across Concurrent Clients (Std Dev) ({server_name_to_full[server_name]})')
  335.     plt.savefig(f"variant_heatmap_std_{server_name}_plot.pdf")
  336.     plt.show()
  337.  
  338. def plot_variant_distribution_stacked_bar(all_variant_values):
  339.     """
  340.    Plots a stacked bar chart showing the distribution of variants for each instance amount.
  341.    """
  342.     all_variants = set(
  343.         value
  344.         for run_variant_values in all_variant_values
  345.         for job_variant_values in run_variant_values
  346.         for variant_group in job_variant_values
  347.         for value in variant_group
  348.     )
  349.     all_variants = sorted(all_variants)
  350.     variant_distribution = {variant: [] for variant in all_variants}
  351.  
  352.     for run_variant_values in all_variant_values:
  353.         # Flatten all variants to get the amount of instances.
  354.         flattened_variants = [value for job_variant_values in run_variant_values for variant_group in job_variant_values for value in variant_group]
  355.         total_instances = len(flattened_variants)
  356.         counts = Counter(flattened_variants)
  357.  
  358.         # Fill variant_distribution with variant data.
  359.         for variant in all_variants:
  360.             variant_distribution[variant].append(counts[variant] / total_instances if total_instances > 0 else 0)
  361.  
  362.     # Plot stacked bar chart.
  363.     x = np.arange(len([1, 5, 15, 20, 25]))
  364.     bottom = np.zeros(len(x))
  365.     bar_width = 0.8
  366.     plt.figure(figsize=(10, 6))
  367.  
  368.     for variant, proportions in variant_distribution.items():
  369.         plt.bar(x, proportions, bottom=bottom, width=bar_width)
  370.         bottom += proportions
  371.  
  372.     plt.xlabel('Concurrent Clients')
  373.     plt.ylabel('Normalized Distribution')
  374.     plt.ylim(0, 1)
  375.     plt.title(f'Variant Distribution Across Concurrent Clients ({server_name_to_full[server_name]})')
  376.     plt.xticks(x, labels=[1, 5, 15, 20, 25])
  377.     plt.legend(["Highest", "High", "Low", "Lowest"], title="Variants")
  378.     plt.grid(axis='y', linestyle='--', alpha=0.7)
  379.     plt.savefig(f"variant_stackedbar_{server_name}_plot.pdf")
  380.     plt.show()
  381.  
  382. def plot_all_variant_distribution_stacked_bar(all_variant_values):
  383.     """
  384.    Plots a stacked bar chart showing the distribution of variants for each instance amount.
  385.    """
  386.     variant_distributions = []
  387.     num_clients = len(all_variant_values)
  388.  
  389.     for client in all_variant_values:
  390.         all_variants = set(
  391.             value
  392.             for run_variant_values in client
  393.             for job_variant_values in run_variant_values
  394.             for variant_group in job_variant_values
  395.             for value in variant_group
  396.         )
  397.         all_variants = sorted(all_variants)
  398.         variant_distribution = {variant: [] for variant in all_variants}
  399.  
  400.         for run_variant_values in client:
  401.             # Flatten all variants to get the amount of instances.
  402.             flattened_variants = [value for job_variant_values in run_variant_values for variant_group in job_variant_values for value in variant_group]
  403.             total_instances = len(flattened_variants)
  404.             counts = Counter(flattened_variants)
  405.  
  406.             # Fill variant_distribution with variant data.
  407.             for variant in all_variants:
  408.                 variant_distribution[variant].append(counts[variant] / total_instances if total_instances > 0 else 0)
  409.  
  410.         variant_distributions.append(variant_distribution)
  411.  
  412.     x = np.arange(len([1, 5, 15, 20, 25]))
  413.     bar_width = 0.7 / num_clients
  414.     plt.figure(figsize=(10, 6))
  415.     colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
  416.  
  417.     for client_index in range(4):
  418.         bottom = np.zeros(len(x))
  419.         for variant, proportions in variant_distributions[client_index].items():
  420.             plt.bar(x + client_index * (bar_width + 0.02), proportions, bottom=bottom, width=bar_width, color=colors[variant % len(colors)])
  421.             bottom += proportions
  422.  
  423.     plt.xlabel('Concurrent Clients (in order of AWS, Azure, GCP, Chimay)')
  424.     plt.ylabel('Normalized Distribution')
  425.     plt.ylim(0, 1)
  426.     plt.title(f'Variant Distribution Across Concurrent Clients (in order of AWS, Azure, GCP, Chimay)')
  427.     plt.xticks(x + (num_clients - 1) * bar_width / 2, labels=[1, 5, 15, 20, 25])
  428.     plt.legend(["Highest", "High", "Low", "Lowest"], title="Variants")
  429.     plt.grid(axis='y', linestyle='--', alpha=0.7)
  430.     plt.savefig(f"variant_stackedbar_all_plot.pdf")
  431.     plt.show()
  432.  
  433.  
  434. def main():
  435.     aws_time_values, aws_variant_values, aws_latency_values, aws_filesize_values = read_jobs('aws')
  436.     azure_time_values, azure_variant_values, azure_latency_values, azure_filesize_values = read_jobs('azure')
  437.     gcp_time_values, gcp_variant_values, gcp_latency_values, gcp_filesize_values = read_jobs('gcp')
  438.     chimay_time_values, chimay_variant_values, chimay_latency_values, chimay_filesize_values = read_jobs('chimay')
  439.  
  440.     # plot_latency(chimay_latency_values)
  441.     # plot_download_speed(chimay_filesize_values)
  442.     # plot_latency_max(gcp_latency_values)
  443.     # plot_variant(aws_variant_values)
  444.     # plot_variant_distribution_stacked_bar(chimay_variant_values)
  445.  
  446.     plot_all_latency([aws_latency_values, azure_latency_values, gcp_latency_values, chimay_latency_values])
  447.     plot_all_download_speed([aws_filesize_values, azure_filesize_values, gcp_filesize_values, chimay_filesize_values])
  448.     plot_all_variant_distribution_stacked_bar([aws_variant_values, azure_variant_values, gcp_variant_values, chimay_variant_values])
  449.  
  450. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement