Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import csv
- import matplotlib.pyplot as plt
- from mpl_toolkits.mplot3d import Axes3D
- from statistics import mean, stdev
- from collections import Counter
- import seaborn as sns
- import numpy as np
- server_name = 'chimay'
- aws_jobs = [[23117, 23118, 23119, 23120, 23121, 23122, 23123, 23124, 23125, 23126, 23185, 23186, 23187, 23188, 23189, 23190, 23191, 23192, 23193, 23194],
- [23127, 23128, 23129, 23130, 23131, 23132, 23137, 23138, 23139, 23140, 23195, 23196, 23197, 23198, 23199, 23200, 23201, 23202, 23203, 23204],
- [23155, 23156, 23157, 23158, 23159, 23160, 23161, 23162, 23163, 23164, 23215, 23216, 23217, 23218, 23219, 23220, 23221, 23222, 23223, 23224],
- [23165, 23166, 23167, 23168, 23169, 23170, 23171, 23172, 23173, 23174, 23225, 23226, 23227, 23228, 23229, 23230, 23231, 23232, 23233, 23234],
- [23175, 23176, 23177, 23178, 23179, 23180, 23181, 23182, 23183, 23184, 23235, 23236, 23237, 23238, 23239, 23240, 23241, 23242, 23243, 23244]]
- azure_jobs = [[22613, 22614, 22615, 22616, 22617, 22618, 22619, 22620, 22621, 22622, 22872, 22873, 22874, 22875, 22876, 22877, 22878, 22879, 22880, 22881],
- [22627, 22628, 22629, 22630, 22631, 22632, 22633, 22634, 22635, 22636, 22882, 22883, 22884, 22885, 22886, 22887, 22889, 22890, 22891, 22892],
- [22647, 22648, 22649, 22650, 22651, 22652, 22653, 22654, 22656, 22657, 22903, 22904, 22905, 22906, 22907, 22908, 22909, 22910, 22911, 22912],
- [22659, 22660, 22661, 22662, 22663, 22664, 22665, 22666, 22667, 22668, 22913, 22914, 22915, 22916, 22917, 22918, 22919, 22920, 22921, 22922],
- [22669, 22670, 22671, 22672, 22673, 22674, 22675, 22676, 22677, 22678, 22923, 22924, 22925, 22926, 22927, 22928, 22929, 22930, 22931, 22932]]
- gcp_jobs = [[22738, 22739, 22740, 22741, 22742, 22743, 22744, 22745, 22746, 22747, 22995, 22996, 22997, 22998, 22999, 23000, 23001, 23002, 23003, 23004],
- [22748, 22749, 22750, 22751, 22752, 22753, 22754, 22755, 22756, 22757, 23005, 23006, 23007, 23008, 23009, 23010, 23011, 23012, 23013, 23014],
- [22768, 22769, 22770, 22771, 22772, 22773, 22774, 22775, 22776, 22777, 23025, 23026, 23027, 23028, 23029, 23030, 23031, 23032, 23033, 23034],
- [22778, 22779, 22780, 22781, 22782, 22783, 22784, 22785, 22786, 22787, 23035, 23036, 23037, 23038, 23039, 23040, 23041, 23042, 23043, 23044],
- [22788, 22789, 22790, 22791, 22792, 22793, 22794, 22795, 22796, 22797, 23045, 23046, 23047, 23048, 23049, 23050, 23051, 23052, 23053, 23054]]
- chimay_jobs = [[22801, 22802, 22803, 22804, 22805, 22806, 22807, 22809, 22816, 22817, 23055, 23056, 23057, 23058, 23059, 23060, 23061, 23062, 23063, 23064],
- [22818, 22819, 22820, 22821, 22822, 22823, 22825, 22826, 22827, 22828, 23065, 23066, 23067, 23068, 23069, 23070, 23071, 23072, 23073, 23074],
- [22842, 22843, 22844, 22845, 22846, 22847, 22848, 22849, 22850, 22851, 23085, 23086, 23087, 23088, 23089, 23090, 23091, 23092, 23093, 23094],
- [22852, 22853, 22854, 22855, 22856, 22857, 22858, 22859, 22860, 22861, 23095, 23096, 23097, 23098, 23099, 23100, 23101, 23102, 23103, 23104],
- [22862, 22863, 22864, 22865, 22866, 22867, 22868, 22869, 22870, 22871, 23105, 23106, 23107, 23108, 23109, 23110, 23111, 23112, 23113, 23114]]
- jobs = [aws_jobs, azure_jobs, gcp_jobs, chimay_jobs]
- server_name_to_full = {'aws': 'AWS', 'azure': 'Azure', 'gcp': 'GCP', 'chimay': 'Chimay'}
- server_names = ['aws', 'azure', 'gcp', 'chimay']
- server_full_names = ['AWS', 'Azure', 'GCP', 'Chimay']
- plot_x_values = [1, 5, 15, 20, 25]
- # Function to read the CSV file for a given task_id.
- def read_csv_for_task(server, run_id, job_id, node_id, task_id):
- file_path = f"m2_data_{server}/measurement_data_{jobs[server_names.index(server)][run_id][job_id]}_{node_id}_{task_id}.csv"
- if os.path.exists(file_path):
- time_values = []
- variant_values = []
- latency_values = []
- filesize_values = []
- with open(file_path, mode="r") as file:
- reader = csv.DictReader(file)
- for row in reader:
- time_values.append(float(row["Time"]))
- variant_values.append(int(row["Variant"]))
- latency_values.append(float(row["Latency"]))
- filesize_values.append(float(row["File_size"]) / float(row["Latency"]))
- if not time_values:
- return None, None, None, None
- # Normalize time values by subtracting the minimum time value.
- min_time = min(time_values)
- time_values = [time_value - min_time for time_value in time_values]
- return time_values, variant_values, latency_values, filesize_values
- else:
- return None, None, None, None
- def read_jobs(server):
- all_time_values = []
- all_variant_values = []
- all_latency_values = []
- all_filesize_values = []
- for run_i in range(len(jobs[server_names.index(server)])):
- run_time_values = []
- run_variant_values = []
- run_latency_values = []
- run_filesize_values = []
- for job_i in range(len(jobs[server_names.index(server)][run_i])):
- job_time_values = []
- job_variant_values = []
- job_latency_values = []
- job_filesize_values = []
- for node_i in range(3):
- for task_i in range(job_i + 1):
- time_values, variant_values, latency_values, filesize_values = read_csv_for_task(server, run_i, job_i, node_i, task_i)
- if time_values is not None:
- job_time_values.append(time_values)
- job_variant_values.append(variant_values)
- job_latency_values.append(latency_values)
- job_filesize_values.append(filesize_values)
- run_time_values.append(job_time_values)
- run_variant_values.append(job_variant_values)
- run_latency_values.append(job_latency_values)
- run_filesize_values.append(job_filesize_values)
- all_time_values.append(run_time_values)
- all_variant_values.append(run_variant_values)
- all_latency_values.append(run_latency_values)
- all_filesize_values.append(run_filesize_values)
- return all_time_values, all_variant_values, all_latency_values, all_filesize_values
- def plot_latency(all_latency_values):
- # Compute the mean and standard deviation for each job across runs.
- all_latency_avg = [
- mean([mean(job_latency_values) for run_latency_values in run for job_latency_values in run_latency_values])
- for run in all_latency_values
- ]
- all_latency_std = [
- stdev([mean(job_latency_values) for run_latency_values in run for job_latency_values in run_latency_values])
- for run in all_latency_values
- ]
- # Plot the mean and standard deviation.
- plt.figure(figsize=(10, 6))
- plt.errorbar(plot_x_values, all_latency_avg, yerr=all_latency_std, fmt='-o', capsize=5, label='Mean ± Std Dev')
- plt.xlim(0, 26)
- plt.xticks([0, 1, 5, 10, 15, 20, 25])
- plt.ylim(0, 2)
- plt.xlabel('Concurrent clients')
- plt.ylabel('Latency (s)')
- plt.title(f"Average latency in seconds over concurrent clients ({server_name_to_full[server_name]})")
- plt.legend()
- plt.grid(True, linestyle='--', alpha=0.6)
- plt.savefig(f"latency_avg_{server_name}_plot.pdf")
- plt.show()
- def plot_all_latency(all_latency_values):
- plt.figure(figsize=(10, 6))
- for client_index, client in enumerate(all_latency_values):
- # Compute the mean and standard deviation for each job across runs.
- all_latency_avg = []
- all_latency_std = []
- valid_x_values = []
- for run_index, run in enumerate(client):
- # Filter out runs without valid data.
- valid_job_latency_values = [
- mean(job_latency_values)
- for run_latency_values in run
- for job_latency_values in run_latency_values
- if job_latency_values
- ]
- if valid_job_latency_values:
- all_latency_avg.append(mean(valid_job_latency_values))
- all_latency_std.append(stdev(valid_job_latency_values))
- valid_x_values.append(plot_x_values[run_index])
- if valid_x_values:
- plt.errorbar(
- valid_x_values,
- all_latency_avg,
- all_latency_std,
- fmt='-o',
- capsize=5,
- label=f"Mean ± Std Dev (Server {server_full_names[client_index]})",
- )
- plt.xlim(0, 26)
- plt.xticks([0, 1, 5, 10, 15, 20, 25])
- plt.ylim(0, 2)
- plt.xlabel('Concurrent clients')
- plt.ylabel('Latency (s)')
- plt.title(f"Average latency in seconds over concurrent clients (all servers)")
- plt.legend()
- plt.grid(True, linestyle='--', alpha=0.6)
- plt.savefig(f"latency_avg_all_servers_plot.pdf")
- plt.show()
- def plot_latency_max(all_latency_values):
- # Compute the mean and standard deviation of the maximum latencies for each job across runs.
- all_latency_avg = [
- mean([max(job_latency_values) for run_latency_values in run for job_latency_values in run_latency_values])
- for run in all_latency_values
- ]
- all_latency_std = [
- stdev([max(job_latency_values) for run_latency_values in run for job_latency_values in run_latency_values])
- for run in all_latency_values
- ]
- # Plot the mean and standard deviation.
- plt.figure(figsize=(10, 6))
- plt.errorbar(plot_x_values, all_latency_avg, yerr=all_latency_std, fmt='-o', capsize=5, label='Max Latency ± Std Dev')
- plt.xlabel('Concurrent clients')
- plt.ylabel('Maximum latency (s)')
- plt.title(f"Maximum latency in seconds over concurrent clients ({server_name_to_full[server_name]})")
- plt.legend()
- plt.grid(True, linestyle='--', alpha=0.6)
- plt.savefig(f"latency_max_{server_name}_plot.pdf")
- plt.show()
- def plot_download_speed(all_filesize_values):
- # Compute the mean and standard deviation for each job across runs.
- all_filesize_avg = [
- mean([mean(job_filesize_values) / 1000 for run_filesize_values in run for job_filesize_values in run_filesize_values])
- for run in all_filesize_values
- ]
- all_filesize_std = [
- stdev([mean(job_filesize_values) / 1000 for run_filesize_values in run for job_filesize_values in run_filesize_values])
- for run in all_filesize_values
- ]
- # Plot the mean and standard deviation.
- plt.figure(figsize=(10, 6))
- plt.errorbar(plot_x_values, all_filesize_avg, yerr=all_filesize_std, fmt='-o', capsize=5, label='Mean ± Std Dev')
- plt.xlim(0, 26)
- plt.xticks([0, 1, 5, 10, 15, 20, 25])
- plt.ylim(0, 15)
- plt.xlabel('Concurrent clients')
- plt.ylabel('Download speed (MB/s)')
- plt.title(f"Average download speed in MB/s over concurrent clients ({server_name_to_full[server_name]})")
- plt.legend()
- plt.grid(True, linestyle='--', alpha=0.6)
- plt.savefig(f"downloadspeed_avg_{server_name}_plot.pdf")
- plt.show()
- def plot_all_download_speed(all_latency_values):
- plt.figure(figsize=(10, 6))
- for client_index, client in enumerate(all_latency_values):
- # Compute the mean and standard deviation for each job across runs.
- all_latency_avg = []
- all_latency_std = []
- valid_x_values = []
- for run_index, run in enumerate(client):
- # Filter out runs without valid data.
- valid_job_latency_values = [
- mean(job_latency_values) / 1000
- for run_latency_values in run
- for job_latency_values in run_latency_values
- if job_latency_values
- ]
- if valid_job_latency_values:
- all_latency_avg.append(mean(valid_job_latency_values))
- all_latency_std.append(stdev(valid_job_latency_values))
- valid_x_values.append(plot_x_values[run_index])
- if valid_x_values:
- plt.errorbar(
- valid_x_values,
- all_latency_avg,
- all_latency_std,
- fmt='-o',
- capsize=5,
- label=f"Mean ± Std Dev (Server {server_full_names[client_index]})",
- )
- plt.xlim(0, 26)
- plt.xticks([0, 1, 5, 10, 15, 20, 25])
- plt.ylim(0, 15)
- plt.xlabel('Concurrent clients')
- plt.ylabel('Download speed (MB/s)')
- plt.title(f"Average download speed in MB/s over concurrent clients (all servers)")
- plt.legend()
- plt.grid(True, linestyle='--', alpha=0.6)
- plt.savefig(f"downloadspeed_avg_all_servers_plot.pdf")
- plt.show()
- def plot_variant(all_variant_values):
- """
- Plots a heatmap showing the normalized distribution of variants for each instance amount across multiple runs.
- """
- total_clients = len(all_variant_values[0])
- all_variants = set(
- value
- for run_variant_values in all_variant_values
- for job_variant_values in run_variant_values
- for variant_group in job_variant_values
- for value in variant_group
- )
- all_variants = sorted(all_variants)
- # Initialize data for the heatmap.
- heatmap_data_avg = []
- heatmap_data_std = []
- for job_index in range(total_clients):
- job_distributions = {variant: [] for variant in all_variants}
- for run_variant_values in all_variant_values:
- job_variant_values = run_variant_values[job_index]
- flattened_variants = [value for variant_group in job_variant_values for value in variant_group]
- total_instances = len(flattened_variants)
- counts = Counter(flattened_variants)
- for variant in all_variants:
- proportion = counts[variant] / total_instances if total_instances > 0 else 0
- job_distributions[variant].append(proportion)
- # Compute average and standard deviation for each variant.
- heatmap_data_avg.append([np.mean(job_distributions[variant]) for variant in all_variants])
- heatmap_data_std.append([np.std(job_distributions[variant]) for variant in all_variants])
- # Transpose the heatmap data for correct formatting.
- heatmap_data_avg = np.array(heatmap_data_avg).T
- heatmap_data_std = np.array(heatmap_data_std).T
- # Create heatmap for average values.
- plt.figure(figsize=(10, 6))
- sns.heatmap(
- heatmap_data_avg,
- annot=True,
- cmap="YlGnBu",
- xticklabels=range(1, total_clients + 1),
- yticklabels=all_variants,
- cbar_kws={'label': 'Normalized Frequency (Average)'}
- )
- plt.xlabel('Concurrent Clients')
- plt.ylabel('Variant Quality')
- plt.title(f'Heatmap of Variant Distribution Across Concurrent Clients (Average) ({server_name_to_full[server_name]})')
- plt.savefig(f"variant_heatmap_avg_{server_name}_plot.pdf")
- plt.show()
- plt.figure(figsize=(10, 6))
- sns.heatmap(
- heatmap_data_std,
- annot=True,
- cmap="OrRd",
- xticklabels=range(1, total_clients + 1),
- yticklabels=all_variants,
- cbar_kws={'label': 'Normalized Frequency (Std Dev)'}
- )
- plt.xlabel('Concurrent Clients')
- plt.ylabel('Variant Quality')
- plt.title(f'Heatmap of Variant Distribution Across Concurrent Clients (Std Dev) ({server_name_to_full[server_name]})')
- plt.savefig(f"variant_heatmap_std_{server_name}_plot.pdf")
- plt.show()
- def plot_variant_distribution_stacked_bar(all_variant_values):
- """
- Plots a stacked bar chart showing the distribution of variants for each instance amount.
- """
- all_variants = set(
- value
- for run_variant_values in all_variant_values
- for job_variant_values in run_variant_values
- for variant_group in job_variant_values
- for value in variant_group
- )
- all_variants = sorted(all_variants)
- variant_distribution = {variant: [] for variant in all_variants}
- for run_variant_values in all_variant_values:
- # Flatten all variants to get the amount of instances.
- flattened_variants = [value for job_variant_values in run_variant_values for variant_group in job_variant_values for value in variant_group]
- total_instances = len(flattened_variants)
- counts = Counter(flattened_variants)
- # Fill variant_distribution with variant data.
- for variant in all_variants:
- variant_distribution[variant].append(counts[variant] / total_instances if total_instances > 0 else 0)
- # Plot stacked bar chart.
- x = np.arange(len([1, 5, 15, 20, 25]))
- bottom = np.zeros(len(x))
- bar_width = 0.8
- plt.figure(figsize=(10, 6))
- for variant, proportions in variant_distribution.items():
- plt.bar(x, proportions, bottom=bottom, width=bar_width)
- bottom += proportions
- plt.xlabel('Concurrent Clients')
- plt.ylabel('Normalized Distribution')
- plt.ylim(0, 1)
- plt.title(f'Variant Distribution Across Concurrent Clients ({server_name_to_full[server_name]})')
- plt.xticks(x, labels=[1, 5, 15, 20, 25])
- plt.legend(["Highest", "High", "Low", "Lowest"], title="Variants")
- plt.grid(axis='y', linestyle='--', alpha=0.7)
- plt.savefig(f"variant_stackedbar_{server_name}_plot.pdf")
- plt.show()
- def plot_all_variant_distribution_stacked_bar(all_variant_values):
- """
- Plots a stacked bar chart showing the distribution of variants for each instance amount.
- """
- variant_distributions = []
- num_clients = len(all_variant_values)
- for client in all_variant_values:
- all_variants = set(
- value
- for run_variant_values in client
- for job_variant_values in run_variant_values
- for variant_group in job_variant_values
- for value in variant_group
- )
- all_variants = sorted(all_variants)
- variant_distribution = {variant: [] for variant in all_variants}
- for run_variant_values in client:
- # Flatten all variants to get the amount of instances.
- flattened_variants = [value for job_variant_values in run_variant_values for variant_group in job_variant_values for value in variant_group]
- total_instances = len(flattened_variants)
- counts = Counter(flattened_variants)
- # Fill variant_distribution with variant data.
- for variant in all_variants:
- variant_distribution[variant].append(counts[variant] / total_instances if total_instances > 0 else 0)
- variant_distributions.append(variant_distribution)
- x = np.arange(len([1, 5, 15, 20, 25]))
- bar_width = 0.7 / num_clients
- plt.figure(figsize=(10, 6))
- colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
- for client_index in range(4):
- bottom = np.zeros(len(x))
- for variant, proportions in variant_distributions[client_index].items():
- plt.bar(x + client_index * (bar_width + 0.02), proportions, bottom=bottom, width=bar_width, color=colors[variant % len(colors)])
- bottom += proportions
- plt.xlabel('Concurrent Clients (in order of AWS, Azure, GCP, Chimay)')
- plt.ylabel('Normalized Distribution')
- plt.ylim(0, 1)
- plt.title(f'Variant Distribution Across Concurrent Clients (in order of AWS, Azure, GCP, Chimay)')
- plt.xticks(x + (num_clients - 1) * bar_width / 2, labels=[1, 5, 15, 20, 25])
- plt.legend(["Highest", "High", "Low", "Lowest"], title="Variants")
- plt.grid(axis='y', linestyle='--', alpha=0.7)
- plt.savefig(f"variant_stackedbar_all_plot.pdf")
- plt.show()
- def main():
- aws_time_values, aws_variant_values, aws_latency_values, aws_filesize_values = read_jobs('aws')
- azure_time_values, azure_variant_values, azure_latency_values, azure_filesize_values = read_jobs('azure')
- gcp_time_values, gcp_variant_values, gcp_latency_values, gcp_filesize_values = read_jobs('gcp')
- chimay_time_values, chimay_variant_values, chimay_latency_values, chimay_filesize_values = read_jobs('chimay')
- # plot_latency(chimay_latency_values)
- # plot_download_speed(chimay_filesize_values)
- # plot_latency_max(gcp_latency_values)
- # plot_variant(aws_variant_values)
- # plot_variant_distribution_stacked_bar(chimay_variant_values)
- plot_all_latency([aws_latency_values, azure_latency_values, gcp_latency_values, chimay_latency_values])
- plot_all_download_speed([aws_filesize_values, azure_filesize_values, gcp_filesize_values, chimay_filesize_values])
- plot_all_variant_distribution_stacked_bar([aws_variant_values, azure_variant_values, gcp_variant_values, chimay_variant_values])
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement