Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # load the respective cached dictionary for the definition of "kmeansDic" throughout this project
- def load_cache_kmeans(silhouette_setting = True, sample_size = 300000, k_lowest = 2, k_highest = 3, kmeans_input = 1, data_processing = 1, normalize_samples = False):
- os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan")
- if silhouette_setting:
- silhouette_str = f"_{sample_size}-samplesize"
- else:
- silhouette_str = ""
- fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan/Cache/"+ f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}.h5"
- kmeansDic = h5py.File(fpath, "r")
- return kmeansDic
- def violin(kmeans_input=7,currents=False, dpi=None, savepng=False):
- start = time.time()
- figsize = (30*0.75,15*0.75)
- fontsize = figsize[0]
- fig, ax = plt.subplots(figsize=figsize)
- if currents:
- current_array1 = np.array(load_cache_currents()["current_array"]["185"]).ravel()[np.newaxis,:].T
- current_array2 = np.array(load_cache_currents()["current_array"]["183"]).ravel()[np.newaxis,:].T
- current_array3 = np.array(load_cache_currents()["current_array"]["181"]).ravel()[np.newaxis,:].T
- current_array4 = np.array(load_cache_currents()["current_array"]["182"]).ravel()[np.newaxis,:].T
- current_array5 = np.array(load_cache_currents()["current_array"]["186"]).ravel()[np.newaxis,:].T
- numpy_data = np.concatenate((current_array1,current_array2,current_array3,current_array4,current_array5),axis=1)
- df = pd.DataFrame(data=numpy_data, columns=["Scan 185","Scan 183","Scan 181","Scan 182","Scan 186"])
- ax = sn.violinplot(data=df, scale="count",inner="box")
- else:
- numpy_dataDic = {}
- for scan_index in [185,183,181,182,186]:
- current_array_scan = np.array(load_cache_currents()["current_array"][str(scan_index)])
- data = current_array_scan.ravel()[np.newaxis,:].T
- scan = np.repeat(scan_index,len(data))[np.newaxis,:].T
- group = np.array(load_cache_kmeans(kmeans_input = kmeans_input)["2"]["cluster_numbers"]).ravel()[np.newaxis,:].T
- numpy_dataDic[scan_index] = np.concatenate((data,scan,group),axis=1)
- #plt.axvline(x=scan_index-181.5, color="grey", lw=1)
- numpy_data = np.concatenate((numpy_dataDic[185],numpy_dataDic[183],numpy_dataDic[181],numpy_dataDic[182],numpy_dataDic[186]))
- df = pd.DataFrame(data=numpy_data, columns=["XBIC","Scan","Group"])
- ax = sn.violinplot(x = "Scan", y = "XBIC", hue = "Group", data = df, order=[185,183,181,182,186], palette = "viridis", split=True, linewidth = 0, scale = "count", inner = "box", ax = ax)
- ax.set_xlabel("Scan", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
- ax.set_ylabel("XBIC", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
- ax.tick_params(labelsize=fontsize, length=4/5*fontsize)
- # add minor ticks for XBIC
- ax.yaxis.set_minor_locator(MultipleLocator(10))
- ax.tick_params(which='minor', length=2/5*fontsize)
- plt.title("Violin plot", pad = 15, fontsize=round(9/5*fontsize), fontweight="bold")
- plt.grid(axis="y", which="minor",lw=0.25)
- # add vertical lines
- for x in range(5):
- plt.axvline(x=x+0.5, color="grey", lw=1)
- ax.xaxis.set_ticks([0,1,2,3,4])
- ax.xaxis.set_ticklabels([185,183,181,182,186], fontsize=round(9/5*fontsize))
- plt.grid(axis="y")
- ax.legend(labels=["Group 1", "Group 2"], prop={'size': fontsize})
- plt.show()
- end = time.time()
- print(f"Plotting of the violin plots took {str(round(end-start,2))} seconds.")
- if savepng:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if dpi is None:
- fig.savefig("savefig/violin_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/violin_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- violin()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement