Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # coding: utf-8
- # In[1]:
- # import external modules
- import h5py
- import time
- import math
- import cv2
- import os
- import os.path
- import shutil
- import pylatex
- import random
- import numpy as np
- import matplotlib as mpl
- import matplotlib.pyplot as plt
- import matplotlib.cm as cm
- import matplotlib.colors as colors
- import scipy.constants as constants
- import seaborn as sn
- import pandas as pd
- from IPython.core.display import display, HTML
- from shutil import copyfile
- from os import path
- from datetime import datetime # for saving figures and files with current data/time in file name
- from scipy.interpolate import griddata
- from mpl_toolkits.axes_grid.inset_locator import (inset_axes, InsetPosition, mark_inset) # create tiny plot inside of another plot
- from mpl_toolkits.axes_grid1 import make_axes_locatable
- from mpl_toolkits.axes_grid1.colorbar import colorbar
- from matplotlib.colors import ListedColormap, LinearSegmentedColormap
- from matplotlib.patches import Patch
- from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator, AutoLocator)
- from sklearn.preprocessing import MinMaxScaler, minmax_scale
- from sklearn.cluster import KMeans
- from sklearn.metrics import silhouette_score, calinski_harabasz_score
- from scipy.optimize import curve_fit
- from silx.io.dictdump import dicttoh5 # save computationally expensive dictionaries as h5 files
- from PIL import Image, ImageOps
- get_ipython().run_line_magic('matplotlib', 'inline')
- # In[2]:
- # change width of Jupyter notebook
- def displaywidth(width):
- display(HTML("<style>.container { width:" + str(width) + "% !important; }</style>"))
- displaywidth(100)
- # In[2]:
- def load_h5(scan, datatype, show_time=False):
- '''This function loads only a few selected h5 files, but the definition should suffice for all the files I might need throughout the work on my bachelor's thesis.'''
- try:
- if datatype == "positions":
- fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/processed/0016_EMPA_MSH_003c/scan_00" + str(scan) + "/positions.h5"
- h5_file = h5py.File(fpath, "r")
- else:
- fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/processed/0016_EMPA_MSH_003c/scan_00" + str(scan) + "/data/" + datatype + ".h5"
- h5_file = h5py.File(fpath, "r")
- return h5_file
- except:
- print("You entered '" + str(scan) + "' for the argument 'scan'. The function only takes an integer as first argument. Its value must lie between 178 and 192, except for 184, because this scan was aborted.\nThe second argument must be a string for the datatype, e.g. 'positions' or 'fluo_roi'.")
- # In[3]:
- def counts2amps(scan, nA=True, solar_cell_channel_only=False, select_channel=None):
- '''This function takes the integer scan number as an argument, looks for the counter data inside of the h5 file,
- converts all the counts into XBIC in (nA) and returns the values as an ndarray.'''
- # load counts and turn it into 2d array
- if scan in [179,187,188,189,190]: # XBIC measurements without lock-in amplification
- loaded_counts = load_h5(scan, "counter")["/data/solar_cell_0-10V"]
- counts_as_array = np.array(loaded_counts)
- print("This is the direct signal, because the chopper wasn't used for this scan!")
- else:
- loaded_counts = load_h5(scan, "counter")["/data/lock_in_R_0-10V"]
- counts_as_array = np.array(loaded_counts).astype(float) # this integer array must be converted to float, otherwise np.reciprocal will return only zeros
- if solar_cell_channel_only:
- if scan in [178,179,180,181,183,185,188,191,192]:
- loaded_counts = load_h5(scan, "counter")["/data/solar_cell_0-10V"]
- counts_as_array = np.array(loaded_counts)
- if scan in [182,186]:
- loaded_counts = load_h5(scan, "counter")["/data/solar_cell_-10-0V"]
- counts_as_array = np.array(loaded_counts)*(-1)
- if scan in [187,189,190]:
- counts_as_array = np.zeros(40200)
- ##### for plotting all channels ####
- if select_channel == "negative" and solar_cell_channel_only:
- loaded_counts = load_h5(scan, "counter")["/data/solar_cell_-10-0V"]
- counts_as_array = np.array(loaded_counts)*(-1)
- if select_channel == "positive" and solar_cell_channel_only:
- loaded_counts = load_h5(scan, "counter")["/data/solar_cell_0-10V"]
- counts_as_array = np.array(loaded_counts)
- if select_channel == "negative" and not solar_cell_channel_only:
- loaded_counts = load_h5(scan, "counter")["/data/lock-in_-10-0V"]
- counts_as_array = np.array(loaded_counts)
- if select_channel == "positive" and not solar_cell_channel_only:
- loaded_counts = load_h5(scan, "counter")["/data/lock_in_0-10V"]
- counts_as_array = np.array(loaded_counts)
- if select_channel == "R" and not solar_cell_channel_only:
- pass
- ####################################
- # load photon counts
- loaded_photon_counts = load_h5(scan, "counter")["/data/qbpm_micro_sum"]
- photon_counts_as_array = np.array(loaded_photon_counts).astype(float) # this integer array must be converted to float, otherwise np.reciprocal will return only zeros
- # load exposure times and turn it into 2d array
- loaded_exposure_times = load_h5(scan, "counter")["/data/exposure_time"]
- exposure_times_as_array = np.array(loaded_exposure_times)
- # normalize counts and photons to one second
- c_rate = (counts_as_array/exposure_times_as_array)
- ph_rate = photon_counts_as_array/exposure_times_as_array
- # normalize counts to the average photon rate and one second
- f_DAQ = c_rate * np.mean(ph_rate)/ph_rate # multiply each LIA count by the ratio of the average photon count to the respective photon count (e.g. twice as much photons as the average should be normalized to half of the XBIC curent)
- # calculate the conversion factor
- if scan in [178,179,180,181,182,183,185,186,191,192]: # these scans have a sensitivity of 1 μA/V
- A_PA = 10**6 # "P_A" = 1/sensitivity: Preamplifier amplification factor in V/A, values taken from LogbuchStrahlzeitPetraIII31.10.-6.11.2018.pptx, slide 54f.
- if scan in [187,188,189,190]: # these scans have a sensitivity of 100 mA/V
- A_PA = 10
- A_LIA = 1 # "A_LIA" = 1/scaling = Lock-in amplifier amplification factor in V/V, values also taken from the pptx logbook
- V2f = 10**6
- Wff = math.sqrt(2) # the output signal (in this case a sine signal) of the LIA is the RMS amplitude, but we are interested in the peak-to-peak value (see jove paper, p.4)
- H_conv = 2*Wff/(V2f*A_LIA*A_PA)
- # finally multiply the conversion factor with the normalized count of each pixel
- if not nA:
- S_XBIC = f_DAQ * H_conv
- S_XBIC = f_DAQ * H_conv * 10**9
- # return S_XBIC_raw_nA
- return S_XBIC
- # In[462]:
- def concat_images(image_paths, size, shape=None, return_only=False, savepng=False):
- # Open images and resize them
- width, height = size
- images = map(Image.open, image_paths)
- images = [ImageOps.fit(image, size, Image.ANTIALIAS)
- for image in images]
- # Create canvas for the final image with total size
- shape = shape if shape else (1, len(images))
- image_size = (width * shape[1], height * shape[0])
- image = Image.new('RGB', image_size)
- # Paste images into final image
- for row in range(shape[0]):
- for col in range(shape[1]):
- offset = width * col, height * row
- idx = row * shape[1] + col
- image.paste(images[idx], offset)
- print(1,type(image))
- if return_only:
- return image
- print(2,type(image))
- if savepng:
- # Create and save image grid
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- print(3,type(image))
- image = concat_images(image_array, (400, 400), shape=(5,9))
- print(4,type(image))
- image.save(f"image_{dt_string}.png", "PNG")
- # Get list of image paths
- folder = "/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan/savefig/plotXBIC_singlecell_new"
- image_paths = [os.path.join(folder, f)
- for f in sorted(os.listdir(folder)) if f.endswith('.png')]
- image_array = image_paths
- concat_images(image_array, (400, 400), shape=(5,9),savepng=True)
- # In[4]:
- def supergrid(yvalues, zvalues, currents, gridres):
- """This function creates a rectangular grid where both axes have the given length. The given datapoints, which
- location is characterized by a tuple of y- and z-values, are interpolated such that every point in the grid gets
- assigned a value according to the nearest neighbor method.
- ARGUMENTS:
- yvalues (1D-array): Corresponding y-values for every measurement
- zvalues (1D-array): Corresponding x-values for every measurement
- currents (1D-array): All currents measured on the respective y- and z-coordinates
- gridres (float): Resolution of the grid insofar as it is the length of both axes of the grid in pixels
- RETURNS:
- (yvalues_new, zvalues_new, currents_supergrid) (Tuple of three 2D-Arrays): three grids with corresponding
- y-values, z-values and currents at every single point in the grid.
- """
- if np.size(yvalues) != np.size(zvalues):
- raise Exception("Arrays 'yvalues' and 'zvalues' must have same size.")
- else:
- if np.size(yvalues) - np.size(currents) == -1: # This paragraph is to catch an error that is raised when
- currents = currents[:np.size(currents) - 1] # dealing with the XRF scan XBIC data. It appears that
- if np.size(yvalues) - np.size(currents) == 1: # the y- and z-arrays or the currents are too long by one
- yvalues = yvalues[:np.size(yvalues) - 1] # element where no measurement was done. This element is
- zvalues = zvalues[:np.size(zvalues) - 1] # removed from the respective array to align points and data.
- if np.size(yvalues) - np.size(currents) == 0:
- pass
- else:
- raise Exception("Positions (y, z) and corresponding data points can't be aligned due to their different size.")
- yvalues_shift = yvalues - yvalues.min() # shift y-values to obtain the relative position
- zvalues_shift = zvalues - zvalues.min() # shift y-values to obtain the relative position
- # Create grid of y- and z-values with respect to the selected grid resolution (gridres)
- yvalues_new, zvalues_new = np.mgrid[0:yvalues_shift.max():gridres * 1j, 0:zvalues_shift.max():gridres * 1j]
- # Interpolate given currents such that they fit the grid of x- and y-values
- currents_supergrid = griddata((yvalues_shift, zvalues_shift), currents, (yvalues_new, zvalues_new), method = "nearest")
- return yvalues_new, zvalues_new, currents_supergrid.T
- # In[5]:
- def plotXBIC_loop(scan, fig, fontsize, voltage_dic, loop_count, compareB=None, compareB_counter=None, compare=False):
- #fig.suptitle("Reduced LIA count normalized", fontsize=2*fontsize, x=0.7, y=0.92) # optional title for the figure (with this x and y it is centered and has a good distance)
- # load position and define y, z and f(y,z) as arrays that can be interpreted by the imshow() function
- positions = load_h5(scan, "positions")
- y_axis_h5 = positions["/data/encoder_fast/data"]
- z_axis_h5 = positions["/data/encoder_slow/data"]
- y = np.array(y_axis_h5)
- z = np.array(z_axis_h5)
- current = counts2amps(scan)
- if compare:
- counts2amps_A = current
- counts2amps_B = counts2amps(compareB[compareB_counter])
- # append NaN: scan 180 has the shape (40199,) and thus cannot be broadcast together with the other scans which have shapes of (40200,)
- if scan == 180:
- counts2amps_A = np.append(current,np.nan)
- if compareB[compareB_counter] == 180:
- counts2amps_B = np.append(counts2amps(compareB[compareB_counter]),np.nan)
- current = counts2amps_A-counts2amps_B
- y_grid, z_grid, current_array = supergrid(y,z,current,1000)
- # axes for new subplot
- ax = fig.add_subplot(4, 4, loop_count)
- # spacing to other subplots
- plt.subplots_adjust(right=1.3, hspace=0.5, wspace=0.5)
- # y and z labels
- ax.set_xlabel("Position Y [μm]", labelpad = 0, fontsize=fontsize)
- ax.set_ylabel("Position Z [μm]", labelpad = 0, fontsize=fontsize)
- # title
- bias_voltage = voltage_dic[scan]
- if compare:
- plt.title("Scan #" + str(scan) + " minus Scan #" + str(compareB[compareB_counter]), pad = 15, fontsize=round(9/8*fontsize))
- else:
- plt.title("Scan #" + str(scan) + bias_voltage, pad = 15, fontsize=round(9/8*fontsize))
- # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
- ax.xaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
- ax.xaxis.set_ticks([0,500,999]) # if 1000 instead of 999 it won't display
- ax.yaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
- ax.yaxis.set_ticks([0,500,999])
- ax.xaxis.set_minor_locator(MultipleLocator(50)) # Minor ticks are set to multiples of 50
- ax.yaxis.set_minor_locator(MultipleLocator(50))
- current_array = cv2.GaussianBlur(current_array, (33, 33), 0) # This accounts for the beam profile with FWHM of 105 x 108 nm.
- # plot the data
- im = ax.imshow(current_array, cmap=cm.afmhot, origin='lower')
- # add a colorbar as legend
- ax_divider = make_axes_locatable(ax) # Introduce a divider next to the axis already in place
- cax = ax_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
- legend = colorbar(im, cax = cax) # Introduce a legend (colorbar) where the new axis has been placed
- cax.set_ylabel(r"$\frac{XBIC}{pixel \cdot photons_{avg}}$" + " (nA)", rotation = -90, labelpad = 50 , fontsize=5/4*fontsize) # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
- cax.yaxis.set_major_locator(AutoLocator())
- cax.yaxis.set_minor_locator(AutoMinorLocator())
- # In[6]:
- def plotXBIC(plot_range=None, singleplots=None, compareA=None, compareB=None, figsize=(30,30), savepng=False, dpi=None):
- '''
- This function plots the XBIC, using data from the previously defined counts2amps() function. Look at the examples to see how to use it:
- plotXBIC(plot_range=14) will do the following:
- It will plot 14 datasets, starting with #178 and ending with #192 (and omitting 184 which is still counted).
- plotXBIC(singleplots=[178,185,192]) will do the following:
- It will plot #178, #185 and #192.
- plotXBIC(compareA=[178,178,185], compareB=[187,189,189]) will do the following:
- It will plot
- Scan 178 minus Scan 187
- Scan 178 minus Scan 189
- Scan 185 minus Scan 189.
- '''
- start = time.time() # measure time for each plot
- # the best looking fontsize is about half of the width of the figsize
- fontsize = figsize[0]//2
- # individualize titles for each scan if there is a bias voltage applied (information from LogbuchStrahlzeitPetraIII31.10.-6.11.2018.pptx, slide 54f., corresponding to handwritten logbook)
- voltage_dic = {
- 178: "",
- 179: "",
- 180: "",
- 181: "",
- 182: " (+ 100 mV)",
- 183: " (- 100 mV)",
- 185: " (- 200 mV)",
- 186: " (+ 200 mV)",
- 187: " (+ 500 mV)",
- 188: "(- 500 mV)",
- 189: " (- 1 V)",
- 190: " (+ 600 mv)",
- 191: "",
- 192: ""
- }
- # create a figure for all plots
- fig = plt.figure(figsize=figsize)
- loop_count = 1 # the subplot number should be independent of omitted scans (e.g. scan #184); otherwise you would see an empty space at that position
- if type(plot_range) is int:
- if plot_range > 6:
- plot_range += 1 # scan #184 is omitted, but still counted by the for-loop
- for scan in range(178, 178 + plot_range):
- if scan == 184:
- continue # scan 184 was aborted
- plotXBIC_loop(scan=scan,fig=fig,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count)
- loop_count += 1
- plt.show()
- if type(singleplots) is list:
- for scan in singleplots:
- if scan == 184:
- continue # scan 184 was aborted
- plotXBIC_loop(scan=scan,fig=fig,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count)
- loop_count += 1
- plt.show()
- if type(compareA) is list:
- compareB_counter=0
- for scan in compareA:
- plotXBIC_loop(scan=scan,fig=fig,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,compare=True,compareB=compareB,compareB_counter=compareB_counter)
- loop_count += 1
- compareB_counter += 1
- plt.show()
- end = time.time()
- if type(compareA) is list:
- print("Scan " + str(scan) + " minus Scan " + str(compareB[compareB_counter]) + " took " + str(round(end-start,2)) + " seconds to plot.")
- else:
- print("Scan " + str(scan) + " took " + str(round(end-start,2)) + " seconds to plot.")
- if savepng:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if dpi is None:
- fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- # In[7]:
- def ImageRegistration(scan1,scan2,cell=None,printdetails=False):
- positions = load_h5(scan1, "positions")
- y_axis_h5 = positions["/data/encoder_fast/data"]
- z_axis_h5 = positions["/data/encoder_slow/data"]
- y = np.array(y_axis_h5)
- z = np.array(z_axis_h5)
- current1 = counts2amps(scan1)
- current2 = counts2amps(scan2)
- y_grid1, z_grid1, current1_array_original = supergrid(y,z,current1,1000)
- y_grid2, z_grid2, current2_array_original = supergrid(y,z,current2,1000)
- # convert current1_array to a grayscale image format (values between 0 and 255)
- current1_array_8bitgrayscale = current1_array_original/current1_array_original.max()
- current1_array_8bitgrayscale *= (1/1-current1_array_8bitgrayscale.min())
- current1_array_8bitgrayscale += 1 - current1_array_8bitgrayscale.max()
- current1_array_8bitgrayscale[current1_array_8bitgrayscale < 0] = 0
- current1_array_8bitgrayscale = np.array(current1_array_8bitgrayscale*255, dtype = np.uint8)
- current2_array_8bitgrayscale = current2_array_original/current2_array_original.max()
- current2_array_8bitgrayscale *= (1/1-current2_array_8bitgrayscale.min())
- current2_array_8bitgrayscale += 1 - current2_array_8bitgrayscale.max()
- current2_array_8bitgrayscale[current2_array_8bitgrayscale < 0] = 0
- current2_array_8bitgrayscale = np.array(current2_array_8bitgrayscale*255, dtype = np.uint8)
- shape = np.shape(current1_array_8bitgrayscale)
- warp_mode = cv2.MOTION_TRANSLATION # we only need translation in direction of y and z (no rotation etc.)
- warp_matrix = np.eye(2, 3, dtype=np.float32) # this is the matrix that the results of the ECC algorithm are stored in
- number_of_iterations = 5000 # Specify the number of iterations
- termination_eps = 1e-10 # Specify the threshold of the increment in the correlation coefficient between two iterations
- # Specify the overall criteria
- criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
- (cc, warp_matrix) = cv2.findTransformECC(current1_array_8bitgrayscale, current2_array_8bitgrayscale, warp_matrix, warp_mode, criteria, None, gaussFiltSize=33)
- if printdetails == True:
- scan2_aligned = cv2.warpAffine(current2_array_8bitgrayscale, warp_matrix, (shape[1], shape[0]), flags = cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP, borderValue = float("NaN")).astype(np.float32)
- scan2_aligned[scan2_aligned == 0] = float("NaN")
- # show where the values of the registered image lie (XBIC as a function of the 40200 datapoints)
- scan2_aligned_ravel = scan2_aligned.ravel()
- x = np.arange(np.shape(scan2_aligned_ravel)[0])
- plt.figure(figsize=(50,5))
- plt.scatter(x, scan2_aligned_ravel)
- plt.show()
- fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (15, 15), constrained_layout = True)
- im1 = ax1.imshow(current1_array_8bitgrayscale, origin = "lower", cmap = cm.afmhot)
- ax1.set_title("Scan #" + str(scan1))
- ax1_divider = make_axes_locatable(ax1) # Introduce a divider next to the axis already in place
- cax1 = ax1_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
- legend = colorbar(im1, cax = cax1) # Introduce a legend (colorbar) where the new axis has been placed
- cax1.yaxis.set_major_locator(AutoLocator())
- cax1.yaxis.set_minor_locator(AutoMinorLocator())
- im2 = ax2.imshow(current2_array_8bitgrayscale, origin = "lower", cmap = cm.afmhot)
- ax2.set_title("Scan #" + str(scan2))
- ax2_divider = make_axes_locatable(ax2) # Introduce a divider next to the axis already in place
- cax2 = ax2_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
- legend = colorbar(im2, cax = cax2) # Introduce a legend (colorbar) where the new axis has been placed
- cax2.yaxis.set_major_locator(AutoLocator())
- cax2.yaxis.set_minor_locator(AutoMinorLocator())
- im3 = ax3.imshow(scan2_aligned, origin = "lower", cmap = "inferno")
- ax3.set_title("Scan #" + str(scan2) + " aligned")
- ax3.set_ylim(bottom = 0)
- ax3_divider = make_axes_locatable(ax3) # Introduce a divider next to the axis already in place
- cax3 = ax3_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
- legend = colorbar(im3, cax = cax3) # Introduce a legend (colorbar) where the new axis has been placed
- cax3.yaxis.set_major_locator(AutoLocator())
- cax3.yaxis.set_minor_locator(AutoMinorLocator())
- plt.tight_layout(w_pad=7)
- return warp_matrix
- # In[8]:
- # plotXBIC_loop2 differs from plotXBIC_loop in that it translates the current array according to Image Registration (axes ticks correspondingly and AutoMinorLocator instead of MultipleLocator)
- def plotXBIC_loop2(scan, fig, fontsize, voltage_dic, loop_count, grid, singleplot, figsize, crop=True, image_registration=True, gaussian_blur=True, select_channel=None, solar_cell_channel_only=False, plot_any=None, compareB=None, compareB_counter=None, compare=False):
- # fig.suptitle("Reduced LIA count normalized", fontsize=2*fontsize, x=0.7, y=0.92) # optional title for the figure (with this x and y it is centered and has a good distance)
- # load position and define y, z and f(y,z) as arrays that can be interpreted by the imshow() function
- positions = load_h5(scan, "positions")
- y_axis_h5 = positions["/data/encoder_fast/data"]
- z_axis_h5 = positions["/data/encoder_slow/data"]
- y = np.array(y_axis_h5)
- z = np.array(z_axis_h5)
- scan_order = [185,183,181,182,186]
- if crop and scan in scan_order and image_registration and gaussian_blur:
- if not solar_cell_channel_only:
- current_array = load_cache_currents()["pixelplotting_array"][scan_order.index(scan)]
- else:
- current_array = load_cache_currents()["pixelplotting_array_direct"][scan_order.index(scan)]
- elif scan not in scan_order and crop:
- print("The scan is not amongst the voltage series scans, so it cannot be displayed with a crop.")
- else:
- if solar_cell_channel_only:
- current = counts2amps(scan, solar_cell_channel_only=True, select_channel=select_channel)
- else:
- current = counts2amps(scan, solar_cell_channel_only=False, select_channel=select_channel)
- if compare:
- counts2amps_A = current
- counts2amps_B = counts2amps(compareB[compareB_counter])
- # append NaN: scan 180 has the shape (40199,) and thus cannot be broadcast together with the other scans which have shapes of (40200,)
- if scan == 180:
- counts2amps_A = np.append(current,np.nan)
- if compareB[compareB_counter] == 180:
- counts2amps_B = np.append(counts2amps(compareB[compareB_counter]),np.nan)
- current = counts2amps_A-counts2amps_B
- if plot_any is not None:
- current_array = plot_any
- if plot_any is None:
- y_grid, z_grid, current_array = supergrid(y,z,current,1000)
- # set 1 px border to NaN
- current_array[0,:] = np.float("NaN")
- current_array[999,:] = np.float("NaN")
- current_array[:,0] = np.float("NaN")
- current_array[:,999] = np.float("NaN")
- if gaussian_blur:
- current_array = cv2.GaussianBlur(current_array, (33, 33), 0) # This accounts for the beam profile with FWHM of 105 x 108 nm.
- if scan not in [178,181,187,188,189,190] and image_registration: # scan 178 and 181 are reference scans for the warp matrices, so they don't need to be transformed, and Image Registration would fail for scan #187 to #190
- warp_matrix = np.array(ImageRegistrationDic[str(scan)])
- #current_array = np.pad(current_array, pad_width=200, mode='constant', constant_values=np.nan) # for NaN border... (not working as of now...)
- current_array = cv2.warpAffine(current_array, warp_matrix, (1000, 1000), flags = cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP, borderValue = float("NaN")).astype(np.float32)
- #current_array = np.random.rand(1000,1000)
- if not singleplot:
- # axes for new subplot
- ax = fig.add_subplot(4, 4, loop_count)
- else:
- fig, ax = plt.subplots(figsize=figsize)
- fontsize *= 1.4
- # spacing to other subplots
- plt.subplots_adjust(right=1.3, hspace=0.5, wspace=0.5)
- # y and z labels
- ax.set_xlabel("Position Y (μm)", labelpad = 0, fontsize=fontsize, fontweight="bold")
- ax.set_ylabel("Position Z (μm)", labelpad = 0, fontsize=fontsize, fontweight="bold")
- ax.tick_params(length=2/5*fontsize)
- # title
- bias_voltage = voltage_dic[scan]
- if compare:
- plt.title("Scan #" + str(scan) + " minus Scan #" + str(compareB[compareB_counter]), pad = 15, fontsize=round(9/8*fontsize), fontweight="bold")
- elif plot_any is None:
- plt.title("Scan #" + str(scan) + bias_voltage, pad = 15, fontsize=round(9/8*fontsize), fontweight="bold")
- # ticks and ticklabels need to be shifted according to Image Registration
- if scan in [179,180,182,183,184,185,186,191,192]:
- IRS_y = ImageRegistrationDic[str(scan)][0][2]
- IRS_z = ImageRegistrationDic[str(scan)][1][2]
- else:
- IRS_y = 0
- IRS_z = 0
- # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
- ax.xaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
- #ax.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
- ax.xaxis.set_ticks([0,500,999])
- ax.yaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
- #ax.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
- ax.yaxis.set_ticks([0,500,999])
- ax.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
- ax.yaxis.set_minor_locator(AutoMinorLocator(5))
- # plot the data
- im = ax.imshow(current_array, cmap=cm.afmhot, origin='lower')
- # add a colorbar as legend
- ax_divider = make_axes_locatable(ax) # Introduce a divider next to the axis already in place
- cax = ax_divider.append_axes("right", size = "5%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
- legend = colorbar(im, cax = cax) # Introduce a legend (colorbar) where the new axis has been placed
- legend.ax.tick_params(length=fontsize, labelsize=7/5*fontsize) # manipulate major tick length + label size
- legend.ax.tick_params(which="minor", length=3/5*fontsize) # manipulate minor tick length
- #cax.set_ylabel(r"$\frac{XBIC}{pixel \cdot photons_{avg}}$" + " in (nA)", rotation = -90, labelpad = 50 , fontsize=5/4*fontsize) # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
- if plot_any is None:
- if not solar_cell_channel_only:
- cax.set_ylabel("XBIC (nA)", rotation = -90, labelpad = 50 , fontsize=5/4*fontsize, fontweight="bold") # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
- else:
- cax.set_ylabel("Current (nA)", rotation = -90, labelpad = 50 , fontsize=5/4*fontsize, fontweight="bold")
- cax.yaxis.set_major_locator(AutoLocator())
- cax.yaxis.set_minor_locator(AutoMinorLocator())
- if grid:
- ax.grid(which="both")
- # In[9]:
- def plotXBIC2(plot_range=None, singleplots=None, compareA=None, compareB=None, grid=True, crop=True, solar_cell_channel_only=False, select_channel=None, image_registration=True, gaussian_blur=True, plot_any=None, figsize=(30,30), savepng=False, dpi=None):
- '''
- This function plots the XBIC, using data from the previously defined counts2amps() function. Look at the examples to see how to use it:
- plotXBIC(plot_range=14) will do the following:
- It will plot 14 datasets, starting with #178 and ending with #192 (and omitting 184 which is still counted).
- plotXBIC(singleplots=[178,185,192]) will do the following:
- It will plot #178, #185 and #192.
- plotXBIC(compareA=[178,178,185], compareB=[187,189,189]) will do the following:
- It will plot
- Scan 178 minus Scan 187
- Scan 178 minus Scan 189
- Scan 185 minus Scan 189.
- '''
- start = time.time() # measure time for each plot
- # the best looking fontsize is about half of the width of the figsize
- fontsize = figsize[0]//2
- # individualize titles for each scan if there is a bias voltage applied (information from LogbuchStrahlzeitPetraIII31.10.-6.11.2018.pptx, slide 54f., corresponding to handwritten logbook)
- voltage_dic = {
- 178: "",
- 179: "",
- 180: "",
- 181: " (no bias)",
- 182: " (+ 100 mV)",
- 183: " (- 100 mV)",
- 185: " (- 200 mV)",
- 186: " (+ 200 mV)",
- 187: " (+ 500 mV)",
- 188: "(- 500 mV)",
- 189: " (- 1 V)",
- 190: " (+ 600 mv)",
- 191: "",
- 192: ""
- }
- # create a figure for all plots
- fig = plt.figure(figsize=figsize)
- loop_count = 1 # the subplot number should be independent of omitted scans (e.g. scan #184); otherwise you would see an empty space at that position
- if type(plot_range) is int:
- singleplot=False
- if plot_range > 6:
- plot_range += 1 # scan #184 is omitted, but still counted by the for-loop
- for scan in range(178, 178 + plot_range):
- if scan == 184:
- continue # scan 184 was aborted
- plotXBIC_loop2(scan=scan,fig=fig,grid=grid,crop=crop,solar_cell_channel_only=solar_cell_channel_only,select_channel=select_channel,image_registration=image_registration,gaussian_blur=gaussian_blur,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,singleplot=singleplot,figsize=figsize)
- loop_count += 1
- if not savepng:
- plt.show()
- end = time.time()
- print("Plotting scan 178 to scan " + str(178+plot_range) + " took " + str(round(end-start,2)) + " seconds.")
- if type(singleplots) is list:
- singleplot=True
- figsize=(10,10)
- for scan in singleplots:
- if scan == 184:
- continue # scan 184 was aborted
- plotXBIC_loop2(scan=scan,fig=fig,grid=grid,crop=crop,solar_cell_channel_only=solar_cell_channel_only,select_channel=select_channel,image_registration=image_registration,gaussian_blur=gaussian_blur,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,singleplot=singleplot,figsize=figsize)
- loop_count += 1
- if not savepng:
- plt.show()
- end = time.time()
- print("Scan " + str(scan) + " took " + str(round(end-start,2)) + " seconds to plot.")
- if type(compareA) is list:
- singleplot=False
- compareB_counter=0
- for scan in compareA:
- plotXBIC_loop2(scan=scan,fig=fig,crop=crop,solar_cell_channel_only=solar_cell_channel_only,select_channel=select_channel,image_registration=image_registration,gaussian_blur=gaussian_blur,grid=grid,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,compare=True,compareB=compareB,compareB_counter=compareB_counter,singleplot=singleplot,figsize=figsize)
- loop_count += 1
- compareB_counter += 1
- if not savepng:
- plt.show()
- end = time.time()
- print("Scan " + str(scan) + " minus Scan " + str(compareB[compareB_counter]) + " took " + str(round(end-start,2)) + " seconds to plot.")
- if plot_any is not None:
- singleplot=True
- figsize=(10,10)
- plotXBIC_loop2(scan=178,fig=fig,grid=grid,crop=False,solar_cell_channel_only=solar_cell_channel_only,select_channel=select_channel,image_registration=image_registration,gaussian_blur=gaussian_blur,plot_any=plot_any,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,singleplot=singleplot,figsize=figsize)
- if savepng:
- fig = plt.gcf()
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if dpi is None:
- fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- # In[10]:
- def plotShockley(I_0=0.0058*1e-9, I_Ph=215.0494*1e-9, I_02=None, I_Ph2=None, compare_with_data=True, discrete=True, mean_curve=False, example=False, normalize_curves=False, dpi=None,savepng=False):
- figsize=(15,15)
- fontsize = figsize[0]//2
- fig = plt.figure(figsize=figsize)
- ax = fig.add_subplot(111)
- ax.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=21/5*fontsize, fontweight="bold")
- ax.set_ylabel("XBIC (nA)", labelpad = 10, fontsize=21/5*fontsize, fontweight="bold")
- ax.tick_params(labelsize=15/5*fontsize, length=7/5*fontsize)
- ax.xaxis.set_ticks([-0.2,-0.1,0,0.1,0.2])
- ax.xaxis.set_ticklabels([-200,-100,0,100,200])
- ax.yaxis.set_minor_locator(MultipleLocator(5))
- ax.tick_params(which='minor', length=4/5*fontsize)
- ax.tick_params(labelsize=3*fontsize, length=1.5*fontsize, right=True)
- plt.grid(axis="y", which="minor",lw=0.25)
- plt.grid(axis="y")
- lw = 3
- ms = 12
- if not discrete:
- V = np.arange(-0.2,0.2,0.00001)
- style = "-"
- else:
- V = np.array([-0.2,-0.1,0,0.1,0.2])
- style = "-o"
- I1 = shockley_function(V, I_0 = I_0, I_Ph = I_Ph)*1e9
- if normalize_curves:
- I1 = I1/(abs(shockley_function(V=0, I_0 = I_0, I_Ph = I_Ph)*1e9))
- ax.plot(V,-I1, style, linewidth=1.3*lw, markersize=ms, label="Good diode (calculated)", color="red")
- if I_02 != None and I_Ph != None:
- I2 = shockley_function(V, I_0 = I_02, I_Ph = I_Ph2)*1e9
- if normalize_curves:
- I2 = I2/(abs(shockley_function(V=0, I_0 = I_02, I_Ph = I_Ph2)*1e9))
- ax.plot(V,-I2, style, linewidth=1.3*lw, markersize=ms, label="Bad diode (calculated)", color="blue")
- ax.legend(fontsize=6*fontsize)
- if compare_with_data:
- pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])
- V = [-0.2,-0.1,0,0.1,0.2]
- if not example:
- mean_arrays = []
- kmeansDic = load_cache_kmeans(kmeans_input=7)
- for comparing_curve in [1,2]:
- k_curves_values = pixelplotting_array[:, np.array(kmeansDic[str(2)]["cluster_numbers"]) == comparing_curve].T # instead of transposing, one could also specify another axis for averaging!
- mean_array = np.empty(0)
- for scan_index in range(5):
- mean_array = np.append(mean_array, np.nanmean(k_curves_values[:,scan_index]))
- mean_arrays.append(mean_array)
- if normalize_curves:
- mean_arrays[0] = mean_arrays[0]/mean_arrays[0][2]
- mean_arrays[1] = mean_arrays[1]/mean_arrays[1][2]
- ax.plot(V,mean_arrays[0], "-o", linewidth=lw, markersize=ms, label="Good diode (measured mean)", color="pink")
- ax.plot(V,mean_arrays[1], "-o", linewidth=lw, markersize=ms, label="Bad diode (measured mean)", color="cornflowerblue")
- if example:
- y_good = pixelplotting_array[:,600, 650]
- y_bad = pixelplotting_array[:,400, 400]
- if normalize_curves:
- y_good = y_good/y_good[2]
- y_bad = y_bad/y_bad[2]
- ax.plot(V,y_good, "-o", linewidth=lw, markersize=ms, label="Good diode (measured)", color="pink")
- ax.plot(V,y_bad, "-o", linewidth=lw, markersize=ms, label="Bad diode (measured)", color="cornflowerblue")
- ax.legend(fontsize=4.4*fontsize)
- if mean_curve:
- I_mean = np.array([217.43692, 217.10754, 214.23949449431512, 210.2738, 201.62088])
- if normalize_curves:
- I_mean = I_mean/I_mean[2]
- ax.plot(V, I_mean, "-o", linewidth=lw, markersize=ms, label="Mean IV curve", color="k")
- ax.legend(fontsize=4*fontsize)
- if example:
- ax.legend(fontsize=3.5*fontsize)
- if savepng:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if dpi is None:
- fig.savefig("savefig/ShockleyCurve_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/ShockleyCurve_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- popt = plotIV(return_fitting=True)[0]
- I_0 = popt[0]
- I_Ph = popt[1]
- plotShockley(I_0=I_0, I_Ph=I_Ph, I_02 =2*I_0, I_Ph2=I_Ph, discrete=False, mean_curve=False, example=False, normalize_curves=True, savepng=True)
- # In[358]:
- plotClusterCurves(kmeans_input=7)
- plotClusterCurves(kmeans_input=7, normalize_curves=True)
- # In[11]:
- def plotIV(coordinate=None, sample_size=100, set_region=(0,999,0,999), showPosition=False, inset=False, title=True, figsize=(30,30), savepng=False, dpi=None, savepdf=False, annotations=False, data_processing=1, return_only=False, return_fitting=False):
- if inset:
- showPosition = True
- start = time.time()
- # voltages
- voltages = [-1000, -500, -200, -100, 0, 100, 200, 500, 600]
- # scans (ordered by voltages)
- scans_ordered = [ 189, 188, 185, 183,181,182, 186, 187, 190]
- # convert list to array
- voltages, scans_ordered = np.array(voltages), np.array(scans_ordered)
- current_array_list = []
- current_mean = []
- # plot either with or without negative values [EDIT: deleted from the counts2amps function!] and select if the solar cell channel should be selected for the current
- options_dic = {
- 1: (False, False, "Lock in R , ohne Negativwerte"),
- 2: (False, True, "Lock in R, mit Negativwerten"),
- 3: (True, False, "solar cell channel, ohne Negativwerte"),
- 4: (True, True, "solar cell channel, mit Negativwerten")
- }
- if not return_only and not return_fitting:
- print(options_dic[data_processing][2])
- A = 2 # plot only from scan_ordered[A] to ...
- B = -2 # ... scan_ordered[B]
- counter = 0
- for scan in scans_ordered[A:B]:
- positions = load_h5(scan, "positions")
- y_axis_h5 = positions["/data/encoder_fast/data"]
- z_axis_h5 = positions["/data/encoder_slow/data"]
- y_axis = np.array(y_axis_h5)
- z_axis = np.array(z_axis_h5)
- current = counts2amps(scan,solar_cell_channel_only=options_dic[data_processing][0])
- y_grid, z_grid, current_array = supergrid(y_axis,z_axis,current,1000)
- # set 1 px border to NaN
- current_array[0,:] = np.float("NaN")
- current_array[999,:] = np.float("NaN")
- current_array[:,0] = np.float("NaN")
- current_array[:,999] = np.float("NaN")
- current_array = cv2.GaussianBlur(current_array, (33, 33), 0) # This accounts for the beam profile with FWHM of 105 x 108 nm.
- if scan != 181:
- warp_matrix = np.array(ImageRegistrationDic[str(scan)])
- current_array = cv2.warpAffine(current_array, warp_matrix, (1000, 1000), flags = cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP, borderValue = float("NaN")).astype(np.float32)
- current_array_list.append(current_array)
- counter += 1
- pixelplotting_array = np.stack(current_array_list)
- # have NaN values at same position for all scans
- counter = 0
- for grid in current_array_list:
- grid[np.isnan(pixelplotting_array).any(axis=0)] = np.float("NaN")
- current_mean.append(np.nanmean(grid))
- if not return_only and not return_fitting:
- print(f"Mean current for scan {scan} is {str(round(current_mean[counter],4))} nA. Its min/max current is {str(round(np.nanmin(current_array),4))}/{str(round(np.nanmax(current_array),4))} nA.") # the values are only shown to a few decimal places if not using e.g. {round(current_mean[counter],4):.20f}
- counter += 1
- pixelplotting_array = np.stack(current_array_list)
- if return_only:
- return pixelplotting_array, current_mean
- #fitting
- popt, pcov = curve_fit(lambda V, I_0, I_Ph: shockley_function(V, I_0, I_Ph)*(-1e9), voltages[A:B]/1000, current_mean, p0=[5e-12,current_mean[2]*1e-9], bounds=([1e-13,(current_mean[2]-3)*1e-9],[1e-11,(current_mean[2]+3)*1e-9]))
- if return_fitting:
- return popt, pcov
- fontsize = 1.5*figsize[0]//2
- fig = plt.figure(figsize=figsize)
- ax1 = fig.add_subplot(221)
- # y and z labels
- ax1.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=7/5*fontsize, fontweight="bold")
- ax1.set_ylabel("XBIC (nA)", labelpad = 10, fontsize=7/5*fontsize, fontweight="bold")
- if title:
- plt.title("IV curve(s) for single pixel(s)", pad = 15, fontsize=round(7/5*fontsize), fontweight="bold")
- ax1.tick_params(labelsize=7/5*fontsize, length=fontsize, right=True)
- ax1.set_xticks(voltages[A:B])
- if data_processing == 3: # else the ticks exceed Locator.MAXTICKS
- major_interval = 1000
- else:
- major_interval = 10
- minor_interval = major_interval/5
- ax1.yaxis.set_major_locator(MultipleLocator(major_interval))
- ax1.yaxis.set_minor_locator(MultipleLocator(minor_interval))
- ax1.tick_params(which='minor', length=4/5*fontsize, right=True)
- plt.grid(axis="y", which="minor",lw=0.25)
- plt.grid(axis="y")
- # allows to plot only a specific location on the y-z current array (else plot each 'sample_size'-th IV curve):
- if coordinate != None:
- ax1.plot(voltages[A:B], pixelplotting_array[:,coordinate[0],coordinate[1]], "-o", markersize=8, linewidth=1.2, color="blue", label="IV curve at given (Y,Z)")
- slope, intercept = kmeans_clustering(kmeans_input=8, return_only=True)
- slope = np.reshape(slope, (1000,1000))
- intercept = np.reshape(intercept, (1000,1000))
- ax1.plot(voltages[A:B], slope[coordinate[0],coordinate[1]]*voltages[A:B]+intercept[coordinate[0],coordinate[1]], label="Fitted line")
- ax1.legend(fontsize=7/5*fontsize)
- if showPosition:
- if inset:
- # Create a set of inset Axes: these should fill the bounding box allocated to them.
- ax1_2 = plt.axes([0,0,1,1])
- # Manually set the position and relative size of the inset axes within ax1
- ip = InsetPosition(ax1, [0.2,0.2,0.3,0.3])
- ax1_2.set_axes_locator(ip)
- else:
- # Create a new subplot
- ax1_2 = fig.add_subplot(223)
- # ticks and ticklabels need to be shifted according to Image Registration
- scan = scans_ordered[B-1] # show the scan of the last IV curve point as an example
- IRS_y = ImageRegistrationDic[str(scan)][0][2] # IRS = Image Registration Shift (in y or z)
- IRS_z = ImageRegistrationDic[str(scan)][1][2]
- # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
- ax1_2.xaxis.set_ticklabels([0,5,10],fontsize=round(9/5*fontsize))
- #ax1_2.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
- ax1_2.xaxis.set_ticks([0,500,999])
- ax1_2.yaxis.set_ticklabels([0,5,10],fontsize=round(9/5*fontsize))
- ax1_2.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
- ax1_2.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
- ax1_2.yaxis.set_minor_locator(AutoMinorLocator(5))
- if showPosition:
- ax1_2.imshow(current_array, cmap=cm.afmhot, origin='lower')
- ax1_2.plot([coordinate[0]],coordinate[1], "bo", markersize=8)
- # y and z labels
- ax1_2.set_xlabel("Position Y [μm]", labelpad = 10, fontsize=fontsize)
- ax1_2.set_ylabel("Position Z [μm]", labelpad = 10, fontsize=fontsize)
- print("Number of NaN values on shown IV curve:", str(len(np.where(np.isnan(pixelplotting_array[:,coordinate[0],coordinate[1]]))[0])))
- if coordinate == None:
- countnumberlines = 0
- countNaN = 0
- # define the colormap for the curves
- cmap = cm.gist_rainbow
- norm = colors.Normalize(vmin=1, vmax=sample_size)
- for z in np.linspace(set_region[0],set_region[1],sample_size**0.5,dtype=int):
- for y in np.linspace(set_region[2],set_region[3],sample_size**0.5,dtype=int):
- #ax1.plot(voltages[A:B], pixelplotting_array[:,y,z], "-o", markersize=8, linewidth=1.2, color=cmap(norm(countnumberlines)))
- ax1.plot(voltages[A:B], pixelplotting_array[:,y,z], "-", linewidth=0.5, color="blue")
- ax1.plot(voltages[A:B], current_mean, "-o", markersize=12, linewidth=4, color="blue")
- if showPosition:
- if inset:
- # Create a set of inset Axes: these should fill the bounding box allocated to them.
- ax1_2 = plt.axes([0,0,1,1])
- # Manually set the position and relative size of the inset axes within ax1
- ip = InsetPosition(ax1, [0.2,0.05,figsize[0]/120,figsize[0]/120])
- ax1_2.set_axes_locator(ip)
- else:
- # Create a new subplot
- ax1_2 = fig.add_subplot(223)
- # ticks and ticklabels need to be shifted according to Image Registration
- scan = scans_ordered[B-1] # show the scan of the last IV curve point as an example
- IRS_y = ImageRegistrationDic[str(scan)][0][2] # IRS = Image Registration Shift (in y or z)
- IRS_z = ImageRegistrationDic[str(scan)][1][2]
- # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
- ax1_2.xaxis.set_ticklabels([0,5,10],fontsize=round(fontsize))
- #ax1_2.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
- ax1_2.xaxis.set_ticks([0,500,999])
- ax1_2.yaxis.set_ticklabels([0,5,10],fontsize=round(fontsize))
- #ax1_2.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
- ax1_2.yaxis.set_ticks([0,500,999])
- ax1_2.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
- ax1_2.yaxis.set_minor_locator(AutoMinorLocator(5))
- ax1_2.imshow(current_array, cmap=cm.afmhot, origin='lower')
- #ax1_2.plot(y,z, "bo", markersize=8, color=cmap(norm(countnumberlines)))
- ax1_2.plot(y,z, "bo", markersize=8, color="k")
- # y and z labels
- ax1_2.set_xlabel("Position Y [μm]", labelpad = 10, fontsize=fontsize)
- ax1_2.set_ylabel("Position Z [μm]", labelpad = 10, fontsize=fontsize)
- if not np.isnan(np.sum(pixelplotting_array[:,y,z])):
- countnumberlines += 1
- countNaN += len(np.where(np.isnan(pixelplotting_array[:,y,z]))[0])
- print("Number of IV curves shown in plot:", str(countnumberlines))
- print("Number of NaN values on shown IV curves:", str(countNaN))
- ax2 = fig.add_subplot(222)
- ax2.plot(voltages[A:B], current_mean, "-o", markersize=8, linewidth=1.2, color="blue",label="Mean XBIC for all (y,z) positions (nA)")
- ax2.legend(fontsize=7/5*fontsize)
- ax2.plot(voltages[A:B], shockley_function(voltages[A:B]/1000,popt[0],popt[1])*(-1e9), "-o", markersize=6, linewidth=0.6, color="red", label="Fitted IV curve")
- ax2.legend(fontsize=7/5*fontsize)
- print("-----\nActually measured:\nShort-circuit current:", str(round(current_mean[2],4)), "nA\n-----\nResult of fitting:\nSaturation current I_0:", str(round(popt[0]*1e9,4)), "nA\nPhotocurrent I_Ph:", str(round(popt[1]*1e9,4)),
- "nA\nOpen Circuit Voltage V_OC:", str(round(shockley_function_VOC(popt[0],popt[1])*1e3,4)), "mV\n-----")
- if ax1.get_ylim()[0] < 202 and ax1.get_ylim()[1] > 218: # this condition makes sure that the min and max mean current values are still shown
- ax2.set_ylim(ax1.get_ylim()) # make sure that the current interval is the same so that the shape of the curves can be compared
- ax2.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=7/5*fontsize, fontweight="bold")
- ax2.set_ylabel("XBIC (nA)", labelpad = 10, fontsize=7/5*fontsize, fontweight="bold")
- plt.title("Mean IV curve", pad = 15, fontsize=round(7/5*fontsize), fontweight="bold")
- ax2.tick_params(labelsize=7/5*fontsize, length=fontsize, right=True)
- ax2.set_xticks(voltages[A:B])
- ax2.yaxis.set_major_locator(MultipleLocator(major_interval))
- ax2.yaxis.set_minor_locator(MultipleLocator(minor_interval))
- ax2.tick_params(which='minor', length=4/5*fontsize, right=True)
- plt.grid(axis="y", which="minor",lw=0.25)
- plt.grid(axis="y")
- if annotations:
- for i in range(len(current_mean)):
- ax2.annotate("Scan " + str(scans_ordered[i+A]), xy=(voltages[i+A], current_mean[i]), xytext=(voltages[i+A], current_mean[i]+1.3), arrowprops=dict(facecolor='black', shrink=0.12, width=2, headwidth=8))
- '''#distance of ticklabels from ticks
- ax1.tick_params(axis='both', which='major', pad=25)
- ax1_2.tick_params(axis='both', which='major', pad=25)
- ax2.tick_params(axis='both', which='major', pad=25)'''
- plt.subplots_adjust(wspace=0.35) # adjust the width between the subplots
- plt.show()
- end = time.time()
- print("Plotting took {} seconds.".format(str(round(end-start,2))))
- if savepng:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if coordinate != None:
- coord_str = f"{coordinate[0]}-{coordinate[1]}_"
- else:
- coord_str = ""
- if dpi is None:
- fig.savefig("savefig/IV_curve_" + coord_str + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/IV_curve_" + coord_str + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- if savepdf:
- fig.savefig("savefig/IV_curve_" + dt_string + ".pdf", format="pdf")
- # In[12]:
- def shockley_function(V,I_0,I_Ph): # arguments and output in SI units
- return I_0*(np.exp(constants.e*V/(constants.k*300))-1)-I_Ph
- # In[13]:
- def shockley_function_VOC(I_0,I_Ph): # arguments and output in SI units
- return constants.k*300*np.log(I_Ph/I_0 + 1)/constants.e
- # In[445]:
- plotClusterCurves(kmeans_input=8, normalize_curves=True, title="")
- # In[14]:
- def plotClusterCurves(k=2, k_curves=1, kmeans_input=1, NaN=False, sample_size=100, data_processing=1, compare=True, normalize_curves=False, unnecessary_reference=False, suptitle="", title=True, dpi=None, savepng=False, savepng_str=None):
- start = time.time()
- #pixelplotting_array = plotIV(data_processing=data_processing, return_only=True)[1]
- pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])
- kmeansDic = load_cache_kmeans(kmeans_input=kmeans_input, data_processing=data_processing)
- #kmeansDic = {"2": {"cluster_numbers": plotMedianCorrelation(return_only=True)[0]}}
- scan_order = [185,183,181,182,186]
- voltages = [-1000, -500, -200, -100, 0, 100, 200, 500, 600]
- A = 2 # plot only from scan_ordered[A] to ...
- B = -2 # ... scan_ordered[B]
- if unnecessary_reference:
- figsize=(20,10)
- else:
- figsize=(10*1.5,10*1.5)
- fontsize = figsize[0]//2
- fig = plt.figure(figsize=figsize)
- if unnecessary_reference:
- ax1 = fig.add_subplot(121)
- else:
- ax1 = fig.add_subplot(111)
- if unnecessary_reference:
- # use variables from plotIV function
- #current_array = plotIV(data_processing=data_processing, return_only=True)[0][-1] # select scan of last IV curve point
- current_array = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(186)])
- '''
- # ticks and ticklabels need to be shifted according to Image Registration
- scan = 186 # show the scan of the last IV curve point as an example <-- here it is scan 186, at B = -2 in the plotIV() function
- IRS_y = ImageRegistrationDic[str(scan)][0][2] # IRS = Image Registration Shift (in y or z)
- IRS_z = ImageRegistrationDic[str(scan)][1][2]
- # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
- ax1.xaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
- ax1.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
- ax1.yaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
- ax1.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
- ax1.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
- ax1.yaxis.set_minor_locator(AutoMinorLocator(5))
- '''
- ax1.imshow(current_array, cmap=cm.afmhot, origin='lower')
- # y and z labels
- ax1.set_xlabel("Position Y [μm]", labelpad = 0, fontsize=fontsize)
- ax1.set_ylabel("Position Z [μm]", labelpad = 0, fontsize=fontsize)
- if NaN: # this option was only used, because there was something wrong with the plotClusterCurve "if NaN" condition itself, adding weird NaN values to the current grid
- NaN_where = np.where(np.isnan(pixelplotting_array))
- NaN_where = np.flip(NaN_where,0) # np.flip is necessary, because np.isnan reads from bottom to top
- ax1.scatter(NaN_where[0],NaN_where[1])
- ax1.set_title("Position of NaN values (in blue)", pad = 15, fontsize=round(9/8*fontsize))
- if k != None and unnecessary_reference:
- for z in np.linspace(0,999,sample_size**0.5,dtype=int):
- for y in np.linspace(0,999,sample_size**0.5,dtype=int):
- if np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z] == k_curves:
- ax1.scatter(y,z,color="red")
- else:
- ax1.scatter(y,z,color="black")
- ax1.set_title("Position of the displayed curve", pad = 15, fontsize=round(9/8*fontsize))
- if k_curves != None: # plot curves belonging to the k_th cluster
- if unnecessary_reference:
- ax2 = fig.add_subplot(122)
- else:
- ax2 = fig.add_subplot(111)
- ax2.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=21/5*fontsize, fontweight="bold")
- ax2.set_ylabel("XBIC (nA)", labelpad = 10, fontsize=21/5*fontsize, fontweight="bold")
- ax2.tick_params(labelsize=15/5*fontsize, length=7/5*fontsize)
- ax2.xaxis.set_ticks([-200,-100,0,100,200])
- ax2.yaxis.set_minor_locator(MultipleLocator(5))
- ax2.tick_params(which='minor', length=4/5*fontsize)
- plt.grid(axis="y", which="minor",lw=0.25)
- plt.grid(axis="y")
- k_curves_values = pixelplotting_array[:, np.array(kmeansDic[str(k)]["cluster_numbers"]) == k_curves].T # instead of transposing, one could also specify another axis for averaging!
- mean_array = np.empty(0)
- for scan_index in range(5):
- mean_array = np.append(mean_array, np.nanmean(k_curves_values[:,scan_index]))
- print(f"Mean currents for curve #{k_curves} (with k = {k}):", str(np.around(mean_array,2)))
- countnumberlines_list = []
- countnumberlines = 0
- for z in np.linspace(0,999,sample_size**0.5,dtype=int):
- for y in np.linspace(0,999,sample_size**0.5,dtype=int):
- if not math.isnan(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]):
- if int(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]) == k_curves:
- if normalize_curves:
- ax2.plot(voltages[A:B], pixelplotting_array[:,y,z]/pixelplotting_array[2,y,z], linewidth = 0.5, color="pink")
- else:
- ax2.plot(voltages[A:B], pixelplotting_array[:,y,z], linewidth = 0.5, color="pink")
- countnumberlines += 1
- countnumberlines_list.append(countnumberlines)
- if normalize_curves:
- ax2.plot(voltages[A:B], mean_array/mean_array[2], "-o", markeredgewidth=3, linewidth = 2.0, color="red", label=f"Group {k_curves}")
- else:
- ax2.plot(voltages[A:B], mean_array, "-o", markeredgewidth=3, linewidth = 2.0, color="red", label=f"Group {k_curves}")
- plt.legend()
- if compare:
- colors = ["blue", "green", "brown", "yellow"]
- color_counter = 0
- for comparing_curve in range(1,k+1):
- if comparing_curve == k_curves: # don't plot again the red curve for the k_curve that is already selected
- continue
- countnumberlines = 0
- for z in np.linspace(0,999,sample_size**0.5,dtype=int):
- for y in np.linspace(0,999,sample_size**0.5,dtype=int):
- if not math.isnan(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]):
- if int(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]) == comparing_curve:
- if normalize_curves:
- ax2.plot(voltages[A:B], pixelplotting_array[:,y,z]/pixelplotting_array[2,y,z], linewidth = 0.5, color="cornflowerblue")
- else:
- ax2.plot(voltages[A:B], pixelplotting_array[:,y,z], linewidth = 0.5, color="cornflowerblue")
- countnumberlines += 1
- countnumberlines_list.append(countnumberlines)
- k_curves_values = pixelplotting_array[:, np.array(kmeansDic[str(k)]["cluster_numbers"]) == comparing_curve].T # instead of transposing, one could also specify another axis for averaging!
- mean_array = np.empty(0)
- for scan_index in range(5):
- mean_array = np.append(mean_array, np.nanmean(k_curves_values[:,scan_index]))
- if normalize_curves:
- ax2.plot(voltages[A:B], mean_array/mean_array[2], "-o", markeredgewidth=3, linewidth = 2.0, color=colors[color_counter], label=f"Group {comparing_curve}")
- else:
- ax2.plot(voltages[A:B], mean_array, "-o", markeredgewidth=3, linewidth = 2.0, color=colors[color_counter], label=f"Group {comparing_curve}")
- color_counter += 1
- ax2.legend(fontsize=4*fontsize, prop={'size': 4*fontsize})
- print("For comparison, the mean curve for group", str(comparing_curve), "has been plotted:", str(np.around(mean_array,2)))
- if k == 2:
- s_word = "group"
- else:
- s_word = "groups"
- if unnecessary_reference:
- title_fontsize = round(9/8*fontsize)
- else:
- title_fontsize = round(18/8*fontsize)
- if title:
- ax2.set_title(f"IV curves for group {k_curves} (with k = {k}), compared to the IV curves of the other {s_word}", pad = 15, fontsize=title_fontsize, fontweight="bold")
- countnumberlines_str = f"Number of IV curves shown in plot: {countnumberlines_list[0]} (group {k_curves})"
- if compare:
- comparing_curve_counter = 1
- for comparing_curve in range(1,k+1):
- if comparing_curve == k_curves:
- comparing_curve_counter += 1
- continue
- countnumberlines_str += f", {countnumberlines_list[comparing_curve_counter-1]} (group {comparing_curve_counter})"
- comparing_curve_counter
- print(countnumberlines_str)
- plt.suptitle(suptitle)
- plt.show()
- end = time.time()
- print("Plotting took {} seconds.".format(str(round(end-start,2))))
- if savepng:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if dpi is None:
- if savepng_str is None:
- fig.savefig("savefig/different_inputs_for_clustering/ClusterCurves_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/different_inputs_for_clustering/ClusterCurves_" + savepng_str + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- if savepng_str is None:
- fig.savefig("savefig/different_inputs_for_clustering/ClusterCurves_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/different_inputs_for_clustering/ClusterCurves_" + savepng_str + ".png", dpi=dpi, bbox_inches="tight")
- # In[15]:
- def plotClusterPosition(k=2, kmeans_input=1, data_processing=1, title="", dpi=None, suptitle="", no_suptitle=False, grid=True, plot_any=None, savepng=False, savepng_str=None):
- start = time.time()
- kmeansDic = load_cache_kmeans(kmeans_input = kmeans_input, data_processing=data_processing)
- figsize = (15,15)
- fontsize = figsize[0]
- #fig = plt.figure(figsize=figsize)
- #ax = fig.add_subplot(111)
- fig, ax = plt.subplots(figsize=figsize)
- ax.set_aspect("equal")
- cmap = colors.ListedColormap(["yellow", "black"])
- bounds=[0.5,1.5,2.5]
- norm = colors.BoundaryNorm(bounds, cmap.N)
- if plot_any is not None:
- cmap = colors.ListedColormap(["white", "black"])
- groups = plot_any
- else:
- groups = np.array(kmeansDic[str(k)]["cluster_numbers"])
- im = plt.pcolormesh(groups, cmap=cmap)
- divider = make_axes_locatable(ax)
- cax = divider.append_axes("right", size="5%", pad=0.25)
- cbar = plt.colorbar(im, cax, cmap=cmap, norm=norm, boundaries=bounds, ticks=[1, 2])
- cbar.ax.tick_params(length=7/5*fontsize, labelsize=2*fontsize)
- cax.set_ylabel("Cluster Groups", rotation = -90, labelpad = 50 , fontsize=3*fontsize, fontweight="bold")
- #im = ax.imshow(np.array(kmeansDic[str(k)]["cluster_numbers"]), interpolation='nearest', cmap=cm.afmhot, origin='lower')
- ax.set_xlabel("Position Y (μm)", labelpad = 10, fontsize=15/5*fontsize, fontweight="bold")
- ax.set_ylabel("Position Z (μm)", labelpad = 10, fontsize=15/5*fontsize, fontweight="bold")
- ax.set_title(title, pad = 15, fontsize=15/5*fontsize, fontweight="bold")
- ax.tick_params(length=7/5*fontsize)
- ax.tick_params(which='minor', length=4/5*fontsize)
- # ticks and ticklabels need to be shifted according to Image Registration
- IRS_y_list = []
- IRS_z_list = []
- for scan in [185,183,182,186]:
- IRS_y_list.append(ImageRegistrationDic[str(scan)][0][2])
- IRS_z_list.append(ImageRegistrationDic[str(scan)][1][2])
- IRS_y = max(IRS_y_list) # IRS = Image Registration Shift (in y or z)
- IRS_z = min(IRS_z_list)
- # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
- ax.xaxis.set_ticklabels([0,5,10],fontsize=round(12/5*fontsize))
- #ax.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
- ax.xaxis.set_ticks([0,500,999])
- ax.yaxis.set_ticklabels([0,5,10],fontsize=round(12/5*fontsize))
- #ax.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
- ax.yaxis.set_ticks([0,500,999])
- ax.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
- ax.yaxis.set_minor_locator(AutoMinorLocator(5))
- if not no_suptitle:
- MinMeanMax_str = ""
- for group_index in range(1,k+1):
- Min = int(round(kmeansDic[str(k)]["threshold"][group_index-1][0]))
- Mean = int(round(kmeansDic[str(k)]["threshold"][group_index-1][1]))
- Max = int(round(kmeansDic[str(k)]["threshold"][group_index-1][2]))
- MinMeanMax_str += f"\nMin/Mean/Max current for group {group_index} is {Min}/{Mean}/{Max} nA."
- plt.suptitle(suptitle+MinMeanMax_str)
- '''
- # add a colorbar as legend
- ax_divider = make_axes_locatable(ax) # Introduce a divider next to the axis already in place
- cax = ax_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
- legend = colorbar(im, cax = cax) # Introduce a legend (colorbar) where the new axis has been placed
- cax.set_ylabel("Number of cluster", rotation = -90, labelpad = 50 , style = "italic", fontsize=5/4*fontsize) # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
- cax.yaxis.set_major_locator(AutoLocator())
- cax.yaxis.set_minor_locator(AutoMinorLocator())
- '''
- if grid:
- ax.grid(which="both")
- plt.show()
- end = time.time()
- print("Plotting took {} seconds.".format(str(round(end-start,2))))
- if savepng:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if dpi is None:
- if savepng_str is None:
- fig.savefig("savefig/different_inputs_for_clustering/ClusterPosition_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/different_inputs_for_clustering/ClusterPosition_" + savepng_str + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- if savepng_str is None:
- fig.savefig("savefig/different_inputs_for_clustering/ClusterPosition_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/different_inputs_for_clustering/ClusterPosition_" + savepng_str + ".png", dpi=dpi, bbox_inches="tight")
- # In[385]:
- plotXBIC2(singleplots=[181])
- # In[380]:
- arr = kmeans_clustering(kmeans_input=8, return_only=True)[0]
- arr = np.reshape(arr, (1000,1000))
- plotXBIC2(plot_any=arr)
- # In[16]:
- def kmeans_clustering(data_processing=1, k=2, random_state=22, kmeans_input=1, normalize_samples=False, sample_size=50000, silhouette=True, return_only=False):
- '''This function applies k-means clustering under varying data input. Basis is the pixelplotting_array which contains the five 1000x1000 current grids under five different bias voltages. The argument data_processing refers to what signal path we seek; default is the R value of the lock-in amplifier.
- As an output we get a 1000x1000 grid with all the cluster numbers, starting from 1 and going to k, the different scores for finding the optimal k, and the threshold which refers to the min/max values of each group.'''
- #pixelplotting_array = plotIV(data_processing=data_processing, return_only=True)[1]
- if data_processing == 1:
- pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])
- if data_processing == 3:
- pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array_direct"])
- pixelvectors_original = pixelplotting_array.reshape(5, 1000000).T # each 5D vector represents 1 pixel with 5 different bias voltages from - to + 200 mV
- # 5 absolute values
- if kmeans_input == 1:
- pixelvectors = pixelvectors_original
- # 4 absolute values (from -200 to +100)
- if kmeans_input == 1.1:
- pixelvectors = pixelvectors_original.T[:4].T
- # 4 slopes
- if kmeans_input == 2:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[1],
- pixelvectors.T[1]-pixelvectors.T[2],
- pixelvectors.T[2]-pixelvectors.T[3],
- pixelvectors.T[3]-pixelvectors.T[4]
- ]).T
- # 4 slopes + 1 absolute value (average)
- if kmeans_input == 3:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[1],
- pixelvectors.T[1]-pixelvectors.T[2],
- pixelvectors.T[2]-pixelvectors.T[3],
- pixelvectors.T[3]-pixelvectors.T[4],
- np.nanmean(pixelvectors,axis=1)
- ]).T
- # 4 slopes + 1 absolute value (-200 mV)
- if kmeans_input == 3.1:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[1],
- pixelvectors.T[1]-pixelvectors.T[2],
- pixelvectors.T[2]-pixelvectors.T[3],
- pixelvectors.T[3]-pixelvectors.T[4],
- pixelvectors.T[0]
- ]).T
- # 4 slopes + 1 absolute value (-100 mV)
- if kmeans_input == 3.2:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[1],
- pixelvectors.T[1]-pixelvectors.T[2],
- pixelvectors.T[2]-pixelvectors.T[3],
- pixelvectors.T[3]-pixelvectors.T[4],
- pixelvectors.T[1]
- ]).T
- # 4 slopes + 1 absolute value (0 mV)
- if kmeans_input == 3.3:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[1],
- pixelvectors.T[1]-pixelvectors.T[2],
- pixelvectors.T[2]-pixelvectors.T[3],
- pixelvectors.T[3]-pixelvectors.T[4],
- pixelvectors.T[2]
- ]).T
- # 4 slopes + 1 absolute value (+100 mV)
- if kmeans_input == 3.4:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[1],
- pixelvectors.T[1]-pixelvectors.T[2],
- pixelvectors.T[2]-pixelvectors.T[3],
- pixelvectors.T[3]-pixelvectors.T[4],
- pixelvectors.T[3]
- ]).T
- # 4 slopes + 1 absolute value (+200 mV)
- if kmeans_input == 3.5:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[1],
- pixelvectors.T[1]-pixelvectors.T[2],
- pixelvectors.T[2]-pixelvectors.T[3],
- pixelvectors.T[3]-pixelvectors.T[4],
- pixelvectors.T[4]
- ]).T
- # 2 slopes (-200 to +200, +100 to +200)
- if kmeans_input == 4:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[4],
- pixelvectors.T[3]-pixelvectors.T[4]
- ]).T
- # only I(V=0)
- if kmeans_input == 5:
- pixelvectors = np.empty((1000000,5))
- pixelvectors[np.isnan(pixelvectors_original).any(axis=1)] = np.float("NaN")
- pixelvectors[~np.isnan(pixelvectors_original).any(axis=1)] = pixelvectors_original[np.newaxis,:].T[2][~np.isnan(pixelvectors_original).any(axis=1)]
- pixelvectors = np.array([
- pixelvectors.T[2]
- ]).T
- # only 1 slope (-200 to +200)
- if kmeans_input == 6:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[4]
- ]).T
- # only 1 slope (+100 to +200)
- if kmeans_input == 7:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[3]-pixelvectors.T[4]
- ]).T
- # only 1 slope (-200 to -100)
- if kmeans_input == 7.1:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[0]-pixelvectors.T[1]
- ]).T
- # only 1 slope (-100 to 0)
- if kmeans_input == 7.2:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[1]-pixelvectors.T[2]
- ]).T
- # only 1 slope (0 to +100)
- if kmeans_input == 7.3:
- pixelvectors = pixelvectors_original
- pixelvectors = np.array([
- pixelvectors.T[2]-pixelvectors.T[3]
- ]).T
- # linear regression for all five points
- if kmeans_input == 8:
- pixelvectors = pixelvectors_original
- slope, intercept = np.polyfit([-200,-100,0,100,200], pixelvectors[~np.isnan(pixelvectors).any(axis=1)].T,deg=1)
- pixelvectors_new = np.empty((1000000,1))
- pixelvectors_new[~np.isnan(pixelvectors).any(axis=1)] = slope[np.newaxis,:].T
- pixelvectors_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
- pixelvectors = pixelvectors_new
- if return_only: # also return the intercept
- intercept_new = np.empty((1000000,1))
- intercept_new[~np.isnan(pixelvectors).any(axis=1)] = intercept[np.newaxis,:].T
- intercept_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
- intercept = intercept_new
- # linear regression forall five points + intercept
- if kmeans_input == 8.1:
- pixelvectors = pixelvectors_original
- slope, intercept = np.polyfit([-200,-100,0,100,200], pixelvectors[~np.isnan(pixelvectors).any(axis=1)].T,deg=1)
- slope = slope[np.newaxis,:] # add 1 dimension
- intercept = intercept[np.newaxis,:] # add 1 dimension
- pixelvectors_new = np.empty((1000000,2))
- pixelvectors_new[~np.isnan(pixelvectors).any(axis=1)] = np.concatenate((slope,intercept),axis=0).T
- pixelvectors_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
- pixelvectors = pixelvectors_new
- # linear regression for -100 mV to +100 mV
- if kmeans_input == 9:
- pixelvectors = pixelvectors_original
- slope, intercept = np.polyfit([-100,0,100], pixelvectors[~np.isnan(pixelvectors).any(axis=1)][:, 1:4].T,deg=1)
- pixelvectors_new = np.empty((1000000,1))
- pixelvectors_new[~np.isnan(pixelvectors).any(axis=1)] = slope[np.newaxis,:].T
- pixelvectors_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
- pixelvectors = pixelvectors_new
- if return_only: # also return the intercept
- intercept_new = np.empty((1000000,1))
- intercept_new[~np.isnan(pixelvectors).any(axis=1)] = intercept[np.newaxis,:].T
- intercept_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
- intercept = intercept_new
- # linear regression for -200 mV to +100 mV
- if kmeans_input == 10:
- pixelvectors = pixelvectors_original
- slope, intercept = np.polyfit([-200,-100,0,100], pixelvectors[~np.isnan(pixelvectors).any(axis=1)][:, 0:4].T,deg=1)
- pixelvectors_new = np.empty((1000000,1))
- pixelvectors_new[~np.isnan(pixelvectors).any(axis=1)] = slope[np.newaxis,:].T
- pixelvectors_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
- pixelvectors = pixelvectors_new
- if return_only: # also return the intercept
- intercept_new = np.empty((1000000,1))
- intercept_new[~np.isnan(pixelvectors).any(axis=1)] = intercept[np.newaxis,:].T
- intercept_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
- intercept = intercept_new
- if return_only:
- if kmeans_input in [8,8.1,9,10]:
- return pixelvectors, intercept
- else:
- return pixelvectors
- data = pixelvectors[~np.isnan(pixelvectors).any(axis=1)] # delete all vectors which contain NaN values
- if not normalize_samples: # maximum of ALL values is 1 and minimum of ALL values is 0
- scaler = MinMaxScaler()
- scaler.fit(data)
- data_transformed = scaler.transform(data)
- else: # maximum of each curve is 1 and minimum of each curve is 0
- if kmeans_input in [5,6,7]:
- return "Error! For kmeans_input = 5,6,7 you cannot normalize the samples, as they will all be set to 0.01, leading kmeans to put all samples in group 1."
- data_transformed = data/np.linalg.norm(data, ord=2, axis=1, keepdims=True)
- km = KMeans(n_clusters = k)
- groups = km.fit_predict(data_transformed) + 1 # Adding 1 calls the first group "group 1" instead of "group 0". Also, it avoids confusion with the zeros_array
- number_sqdist = km.inertia_
- if silhouette: # silhouette slows down the function extremely, so it is given as optional
- silhouette = silhouette_score(data_transformed, groups, sample_size=sample_size)
- else:
- silhouette = float("NaN")
- calinski_harabasz = calinski_harabasz_score(data_transformed, groups)
- no_data = np.isnan(pixelvectors).any(axis=1) # returns a 1D array with True if a vector contains a NaN value, else False
- no_data_count = no_data.sum() # counts all the vectors with NaNs
- data_count = 1000000-no_data_count
- zeros_array = np.zeros(1000000)
- zeros_array[~no_data] = groups # fills in all the group numbers where there are no NaNs in a vector
- zeros_array[no_data] = np.nan # where there are NaNs in a vector: assign NaN as "group number"
- cluster_array = zeros_array.reshape(1000,1000)
- threshold = np.zeros(3*k).reshape(k,3)
- for k_index in range(1,k+1):
- if kmeans_input == 5:
- values = pixelplotting_array[2, cluster_array == k_index]
- else:
- values = pixelplotting_array[:, cluster_array == k_index]
- threshold_min = np.nanmin(values.T)
- threshold_avg = np.nanmean(values.T)
- threshold_max = np.nanmax(values.T)
- threshold[k_index-1] = np.array([threshold_min, threshold_avg, threshold_max])
- return cluster_array, number_sqdist, silhouette, calinski_harabasz, threshold
- # In[17]:
- def find_k(method="all", dpi=None, savepng=False):
- figsize=(12,12)
- fontsize = figsize[0]
- fig = plt.figure(figsize=figsize)
- ax1 = fig.add_subplot(111)
- x = []
- y = []
- if method == "elbow": # elbow method
- for k in kmeansDic.keys():
- x.append(int(k))
- y.append(np.array(kmeansDic[str(k)]["ssqd"]))
- plt.scatter(x,y)
- string1 = "Elbow score"
- string2 = "Sum of squared distances to respective centroid"
- string3 = "elbow" # only for printing PNGs
- if method == "silhouette": # silhouette score := (b-a) divided by max(a,b) ; where a is the intra-cluster distance and b is the nearest-cluster distance
- for k in kmeansDic.keys():
- x.append(int(k))
- y.append(np.array(kmeansDic[str(k)]["silhouette"]))
- plt.scatter(x,y)
- string1 = "Silhouette score"
- string2 = "(b-a)/max(a,b)"
- string3 = "silhouette" # only for printing PNGs
- if method == "calinski_harabasz": # Calinski-Harabasz score := within-cluster disperson divided by between-cluster dispersion
- for k in kmeansDic.keys():
- x.append(int(k))
- y.append(np.array(kmeansDic[str(k)]["calinski_harabasz"]))
- plt.scatter(x,y)
- string1 = "Calinski-Harabasz score"
- string2 = "ratio between within-cluster and between-cluster disperson"
- string3 = "calinski_harabasz" # only for printing PNGs
- if method == "all": # compare all scores next to each other
- try:
- for k in kmeansDic.keys():
- x.append(int(k))
- y.append(np.array(kmeansDic[str(k)]["ssqd"]))
- y_normalized = []
- for entry in y:
- y_normalized.append(entry/np.max(y))
- print("elbow:", str([round(i,2) for i in y_normalized]))
- print("elbow (original):", str([round(float(i),2) for i in y]))
- elbow_scatter = ax1.scatter(x,y_normalized, c="red")
- except:
- print("No value for elbow in this kmeans dictionary.") # not actually needed, because there are NaN values from the kmeans_clustering() function
- try:
- x.clear() # don't take over
- y.clear() # values from ssqd!
- for k in kmeansDic.keys():
- x.append(int(k))
- y.append(np.array(kmeansDic[str(k)]["silhouette"]))
- y_normalized = []
- for entry in y:
- y_normalized.append(entry/np.max(y))
- print("silhouette:", str([round(i,2) for i in y_normalized]))
- print("silhouette (original):", str([round(float(i),2) for i in y]))
- silhouette_scatter = ax1.scatter(x,y_normalized, c="green")
- except:
- print("No value for silhouette in this kmeans dictionary.")
- try:
- x.clear() # don't take over
- y.clear() # values from silhouette!
- for k in kmeansDic.keys():
- x.append(int(k))
- y.append(np.array(kmeansDic[str(k)]["calinski_harabasz"]))
- y_normalized = []
- for entry in y:
- y_normalized.append(entry/np.max(y))
- print("calinski_harabasz:", str([round(i,2) for i in y_normalized]))
- print("calinski_harabasz (original):", str([round(float(i),2) for i in y]))
- calinski_harabasz_scatter = ax1.scatter(x,y_normalized, c="blue")
- except:
- print("No value for calinski-harabasz in this kmeans dictionary.")
- string1 = "All scores in comparison"
- string2 = "Normalized scores"
- plt.legend((elbow_scatter, silhouette_scatter, calinski_harabasz_scatter), ("Elbow Score", "Silhouette score", "Calinski-Harabasz score"), scatterpoints=1, loc='lower left', ncol=3, fontsize=fontsize)
- ax1.set_xlabel("Chosen k for clustering", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
- ax1.set_ylabel(string2, labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
- plt.title(string1, pad = 15, fontsize=round(9/5*fontsize), fontweight="bold")
- ax1.tick_params(labelsize=9/5*fontsize, length=fontsize)
- ax1.set_xticks(x)
- plt.show()
- if savepng:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if dpi is None:
- fig.savefig("savefig/find_k_" + string3 + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/find_k_" + string3 + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- # In[18]:
- def bar_chart(input1=1, input2=2,input2_random=False):
- figsize = (20,10)
- fontsize = figsize[0]
- fig, ax = plt.subplots(figsize=figsize)
- groups1 = np.array(load_cache_kmeans(kmeans_input = input1)["2"]["cluster_numbers"])
- if input2_random:
- groups2 = np.random.randint(2,size=(1000,1000))+1
- else:
- groups2 = np.array(load_cache_kmeans(kmeans_input = input2)["2"]["cluster_numbers"])
- OneOne = 0
- OneTwo = 0
- TwoOne = 0
- TwoTwo = 0
- for z in np.arange(0,1000,1):
- for y in np.arange(0,1000,1):
- if groups1[y][z] == 1 and groups2[y][z] == 1:
- OneOne += 1
- if groups1[y][z] == 1 and groups2[y][z] == 2:
- OneTwo += 1
- if groups1[y][z] == 2 and groups2[y][z] == 1:
- TwoOne += 1
- if groups1[y][z] == 2 and groups2[y][z] == 2:
- TwoTwo += 1
- labels = ["1 & 1", "2 & 2", "1 & 2", "2 & 1"]
- comparisons = (OneOne,TwoTwo,OneTwo,TwoOne)
- index = np.arange(4)
- bar_width = 0.9
- plt.bar(index, comparisons, bar_width, color="green")
- plt.xticks(index, labels) # labels get centered
- for p in ax.patches: # add height label above bins
- width = p.get_width()
- height = p.get_height()
- x, y = p.get_xy()
- ax.annotate(height, (x + width/2, y + height*1.02), ha='center', fontsize=fontsize)
- ax.set_xlabel("Compared groups", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
- ax.set_ylabel("Counts", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
- ax.tick_params(labelsize=fontsize, length=4/5*fontsize)
- plt.title("Bar Chart", pad = 15, fontsize=round(9/5*fontsize), fontweight="bold")
- plt.show()
- #bar_chart(input1=1, input2=4)
- # In[47]:
- corr_coefficient(kmeans_inputs=[1,2])
- # In[453]:
- #arr1 = np.array(load_cache_currents()["pixelplotting_array"][0])
- arr1 = np.array(load_cache_kmeans(kmeans_input = 7, data_processing=1)["2"]["cluster_numbers"])
- arr2 = np.array(load_cache_kmeans(kmeans_input = 1, data_processing=1)["2"]["cluster_numbers"])
- #arr2 = plotMedianCorrelation(return_only=True)[1]
- corr_coefficient(compare_any=(arr1,arr2))
- # In[446]:
- #arr1 = np.array(load_cache_currents()["pixelplotting_array"][0])
- arr1 = np.array(load_cache_kmeans(kmeans_input = 1, data_processing=1)["2"]["cluster_numbers"])
- arr2 = plotMedianCorrelation(return_only=True)[1]
- corr_coefficient(compare_any=(arr1,arr2))
- # In[19]:
- def corr_coefficient(currents=None, kmeans_inputs=None, compare_any=None, random=False):
- """Given two grids of same shape, this function determines the correlation coefficient between both grids.
- ARGUMENTS:
- grid1 (2D-Array): Grid of data, e. g. of shape 1000 x 1000
- grid2 (2D-Array): Grid of data, e. g. of shape 1000 x 1000
- RETURNS:
- correlation (float): correlation coefficient. 1 means positively correlated, -1 means negatively correlated
- """
- if isinstance(currents, (list, tuple)):
- scan_order = [185,183,181,182,186]
- grid1 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(currents[0])])
- if random:
- grid2 = np.random.uniform(np.nanmin(grid1),np.nanmax(grid1),size=(np.shape(grid1)))
- else:
- grid2 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(currents[1])])
- if isinstance(kmeans_inputs, (list, tuple)):
- grid1 = np.array(load_cache_kmeans(kmeans_input = kmeans_inputs[0])["2"]["cluster_numbers"])
- if random:
- grid2 = np.random.randint(2,size=(np.shape(grid1)))+1
- else:
- grid2 = np.array(load_cache_kmeans(kmeans_input = kmeans_inputs[1])["2"]["cluster_numbers"])
- if isinstance(compare_any, (list, tuple)):
- grid1 = compare_any[0]
- grid2 = compare_any[1]
- grid1 = grid1.flatten() # flatten both arrays to simplify calculations
- grid2 = grid2.flatten()
- no_nan_ind1 = ~ np.isnan(grid1) # delete all NaN-values from both grids
- grid1 = grid1[no_nan_ind1]
- grid2 = grid2[no_nan_ind1]
- no_nan_ind2 = ~ np.isnan(grid2)
- grid1 = grid1[no_nan_ind2]
- grid2 = grid2[no_nan_ind2]
- if np.size(grid1) == np.size(grid2):
- pass
- else:
- raise Exception("Both given arrays must have same size and shape.")
- mean1 = np.mean(grid1) # compute the mean value
- mean2 = np.mean(grid2)
- stdev1 = np.std(grid1) # compute the standard deviation of the values
- stdev2 = np.std(grid2)
- N = np.size(grid1)
- term1 = 0
- for i in range(N):
- term1 += (((grid1[i] - mean1) / stdev1) * ((grid2[i] - mean2) / stdev2))
- correlation = 1 / (N - 1) * term1
- return correlation
- # In[608]:
- scan_order = [185,183,181,182,186]
- current_array1 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(185)]).ravel()[np.newaxis,:].T
- current_array1 = current_array1 - np.nanmin(current_array1)
- current_array2 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(183)]).ravel()[np.newaxis,:].T
- current_array2 = current_array2 - np.nanmin(current_array2)
- current_array3 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(181)]).ravel()[np.newaxis,:].T
- current_array3 = current_array3 - np.nanmin(current_array3)
- current_array4 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(182)]).ravel()[np.newaxis,:].T
- current_array4 = current_array4 - np.nanmin(current_array4)
- current_array5 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(186)]).ravel()[np.newaxis,:].T
- current_array5 = current_array5 - np.nanmin(current_array5)
- numpy_data = np.concatenate((current_array1,current_array2,current_array3,current_array4,current_array5),axis=1)
- print(np.shape(current_array1))
- print(current_array5[~np.isnan(current_array1)])
- for i in range(1,20):
- print(numpy_data[15032*i])
- # In[40]:
- pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])[4].ravel()
- print(len(pixelplotting_array[~np.isnan(pixelplotting_array)]))
- # In[53]:
- violin(currents=True, data_processing=3, savepng=True)
- # In[61]:
- plotXBIC2(singleplots=[181])
- # In[96]:
- scan_order = [185,183,181,182,186]
- current_array1 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(185)])
- current_array2 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(183)])
- current_array3 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(181)])
- current_array4 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(182)])
- current_array5 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(186)])
- x = [-200,-100,0,100,200]
- y = [np.nanstd(current_array1),np.nanstd(current_array2),np.nanstd(current_array3),np.nanstd(current_array4),np.nanstd(current_array5)]
- plt.plot(x,y)
- # In[97]:
- current_array1 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(185)])
- current_array2 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(183)])
- current_array3 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(181)])
- current_array4 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(182)])
- current_array5 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(186)])
- x = [-200,-100,0,100,200]
- y = [np.nanstd(current_array1),np.nanstd(current_array2),np.nanstd(current_array3),np.nanstd(current_array4),np.nanstd(current_array5)]
- plt.plot(x,y)
- # In[98]:
- violin(currents=True, data_processing=3, title=False, savepng=True)
- # In[20]:
- def violin(kmeans_input=7,currents=False, title=True, data_processing=1, dpi=None, savepng=False):
- start = time.time()
- figsize = (30*0.75,15*0.75)
- fontsize = figsize[0]
- fig, ax = plt.subplots(figsize=figsize)
- scan_order = [185,183,181,182,186]
- xlabels = ["-200","-100","0","+100","+200"]
- if currents:
- if data_processing == 1:
- current_array1 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(185)]).ravel()[np.newaxis,:].T
- current_array2 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(183)]).ravel()[np.newaxis,:].T
- current_array3 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(181)]).ravel()[np.newaxis,:].T
- current_array4 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(182)]).ravel()[np.newaxis,:].T
- current_array5 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(186)]).ravel()[np.newaxis,:].T
- if data_processing == 3:
- current_array1 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(185)]).ravel()[np.newaxis,:].T
- current_array1 = current_array1 - np.nanmin(current_array1)
- current_array2 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(183)]).ravel()[np.newaxis,:].T
- current_array2 = current_array2 - np.nanmin(current_array2)
- current_array3 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(181)]).ravel()[np.newaxis,:].T
- current_array3 = current_array3 - np.nanmin(current_array3)
- current_array4 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(182)]).ravel()[np.newaxis,:].T
- current_array4 = current_array4 - np.nanmin(current_array4)
- current_array5 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(186)]).ravel()[np.newaxis,:].T
- current_array5 = current_array5 - np.nanmin(current_array5)
- numpy_data = np.concatenate((current_array1,current_array2,current_array3,current_array4,current_array5),axis=1)
- df = pd.DataFrame(data=numpy_data, columns=xlabels)
- ax = sn.violinplot(data=df, scale="count",inner="box")
- else:
- numpy_dataDic = {}
- for scan_index in scan_order:
- current_array_scan = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(scan_index)])
- data = current_array_scan.ravel()[np.newaxis,:].T
- scan = np.repeat(scan_index,len(data))[np.newaxis,:].T
- group = np.array(load_cache_kmeans(kmeans_input = kmeans_input)["2"]["cluster_numbers"]).ravel()[np.newaxis,:].T
- numpy_dataDic[scan_index] = np.concatenate((data,scan,group),axis=1)
- #plt.axvline(x=scan_index-181.5, color="grey", lw=1)
- numpy_data = np.concatenate((numpy_dataDic[185],numpy_dataDic[183],numpy_dataDic[181],numpy_dataDic[182],numpy_dataDic[186]))
- df = pd.DataFrame(data=numpy_data, columns=["XBIC","Bias voltage (mV)","Group"])
- df = df.replace(1.0,"Group 1")
- df = df.replace(2.0,"Group 2")
- ax = sn.violinplot(x = "Bias voltage (mV)", y = "XBIC", hue = "Group", data = df, order=[185,183,181,182,186], palette = "viridis", split=True, linewidth = 0, scale = "count", inner = "box", ax = ax)
- ax.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
- if data_processing == 1:
- ax.set_ylabel("XBIC", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
- if data_processing == 3:
- ax.set_ylabel("Current - Current$\mathregular{_{min}}$", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
- ax.tick_params(labelsize=fontsize, length=4/5*fontsize)
- ax.xaxis.set_ticks([0,1,2,3,4])
- ax.xaxis.set_ticklabels(xlabels, fontsize=fontsize)
- # add minor ticks for XBIC
- if data_processing == 1:
- ax.yaxis.set_minor_locator(MultipleLocator(10))
- if data_processing == 3:
- ax.yaxis.set_minor_locator(MultipleLocator(500))
- ax.tick_params(which='minor', length=2/5*fontsize)
- plt.grid(axis="y", which="minor",lw=0.25)
- plt.grid(axis="y")
- if title:
- plt.title("Current distribution", pad = 15, fontsize=round(9/5*fontsize), fontweight="bold")
- # add vertical lines
- for x in range(5):
- plt.axvline(x=x+0.5, color="grey", lw=1)
- # meta = {
- # "colour": {"a": "red", "b": "blue"},
- # "label": {"a": "this a", "b": "this b"},
- # }
- # handles = [
- # Patch(
- # facecolor=meta["colour"][x],
- # label=meta["label"][x],
- # )
- # for x in meta["colour"].keys()
- # ]
- # fig.legend(
- # handles=handles,
- # ncol=2,
- # loc="upper right",
- # bbox_to_anchor=(0.5, 0.5),
- # fontsize=10,
- # handlelength=1,
- # handleheight=1,
- # frameon=False,
- # )
- ax.legend(prop={'size': fontsize})
- #ax.get_legend().remove()
- plt.show()
- end = time.time()
- print(f"Plotting of the violin plots took {str(round(end-start,2))} seconds.")
- if savepng:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if dpi is None:
- fig.savefig("savefig/violin_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/violin_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- # In[434]:
- plotMedianCorrelation()
- # In[133]:
- for i in [2,3,4]:
- currents = np.asarray(load_cache_currents()["pixelplotting_array"]) # Shape: (5, 1000, 1000)
- medians = np.nanmedian(currents, axis=(1, 2), keepdims=True) # Shape: (5,)
- nanmaxs = np.nanmax(currents, axis=(1, 2), keepdims=True)
- nanmins = np.nanmin(currents, axis=(1, 2), keepdims=True)
- results = np.full_like(currents, np.nan) # Shape: (5, 1000, 1000)
- results[currents > medians] = 1.0
- results[currents < medians] = 2.0
- #results[currents > medians and currents < (nanmaxs - 10)] = 1.0
- #results[currents < medians and currents < (nanmins + 10)] = 2.0
- counter_good_keeper = 0
- counter_bad_keeper = 0
- counter_not_keeper = 0
- mask = np.empty((1000,1000),dtype=bool)
- counter = 0
- not_NaN_counter = 0
- for z in np.linspace(0,999,1000,dtype=int):
- for y in np.linspace(0,999,1000,dtype=int):
- yz_list = []
- for i in range(5):
- yz_list.append(results[i][y,z])
- if all(x==yz_list[0] for x in yz_list) and yz_list[0] == 1.0:
- counter_good_keeper += 1
- mask[y,z] = True
- if all(x==yz_list[0] for x in yz_list) and yz_list[0] == 2.0:
- counter_bad_keeper += 1
- mask[y,z] = True
- if not all(x==yz_list[0] for x in yz_list):
- counter_not_keeper += 1
- mask[y,z] = False
- if not np.isnan(np.sum(yz_list)):
- not_NaN_counter += 1
- counter += 1
- if counter in [10000,100000,400000,900000,999999]:
- print(counter)
- print("------")
- print(counter)
- print(not_NaN_counter)
- print(counter_not_keeper)
- print(counter_good_keeper)
- print(counter_bad_keeper)
- not_keepers = np.empty((1000,1000))
- not_keepers[mask] = 1.0
- not_keepers[~mask] = 2.0
- pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])
- not_keepers[np.isnan(pixelplotting_array).any(axis=0)] = np.float("NaN")
- plotClusterPosition(plot_any=results[i], title="" ,no_suptitle=True, savepng=True)
- # In[161]:
- def max_interval(precision=0.1, interval_size=20, scan_no=0, return_only=False):
- currents = np.array(load_cache_currents()["pixelplotting_array"])[scan_no].ravel()
- c_min = np.nanmin(currents)
- c_max = np.nanmax(currents)
- counts = np.empty((1,2))
- counter = 0
- for A in np.arange(c_min,c_max,precision):
- count = np.size(np.where(np.logical_and(currents>A,currents<A+interval_size)))
- if counter == 0:
- counts[0,:] = np.array([[A, count]])
- else:
- counts = np.concatenate((counts, np.array([[A, count]])), axis=0)
- counter +=1
- max_counts = counts[np.where(counts == np.max(counts[:,1]))[0]]
- if return_only:
- return max_counts
- print(max_interval)
- # In[165]:
- max_interval()
- # In[164]:
- max_size = 100
- x = np.arange(1,max_size)
- y = []
- y2= []
- y3= []
- y4= []
- y5= []
- for i in range(1,max_size):
- max_counts = max_interval(precision=1,interval_size=i, scan_no=0, return_only=True)[0,0]
- y.append(max_counts)
- for i in range(1,max_size):
- max_counts = max_interval(precision=1,interval_size=i, scan_no=1, return_only=True)[0,0]
- y2.append(max_counts)
- for i in range(1,max_size):
- max_counts = max_interval(precision=1,interval_size=i, scan_no=2, return_only=True)[0,0]
- y3.append(max_counts)
- for i in range(1,max_size):
- max_counts = max_interval(precision=1,interval_size=i, scan_no=3, return_only=True)[0,0]
- y4.append(max_counts)
- for i in range(1,max_size):
- max_counts = max_interval(precision=1,interval_size=i, scan_no=4, return_only=True)[0,0]
- y5.append(max_counts)
- plt.plot(x,y)
- plt.plot(x,y2)
- plt.plot(x,y3)
- plt.plot(x,y4)
- plt.plot(x,y5)
- # In[21]:
- def plotMedianCorrelation(return_only=False, dpi=None, savepng=False):
- currents = np.asarray(load_cache_currents()["pixelplotting_array"]) # Shape: (5, 1000, 1000)
- medians = np.nanmedian(currents, axis=(1, 2), keepdims=True) # Shape: (5,)
- results = np.full_like(currents, np.nan) # Shape: (5, 1000, 1000)
- results[currents > medians] = 1.0
- results[currents < medians] = 2.0
- results[currents == medians] = np.float("NaN")
- if return_only:
- return results
- figsize=(15,15)
- fontsize = figsize[0]//2
- fig = plt.figure(figsize=figsize)
- ax1 = fig.add_subplot(111)
- x = [1,2,3,4]
- correlations = []
- for i in range(4):
- arr1 = results[i]
- arr2 = results[i+1]
- correlation = (corr_coefficient(compare_any=[arr1,arr2]))
- correlations.append(correlation)
- print(correlations)
- ax1.plot(x,correlations,"-o",markersize=15, linewidth=1.2)
- # y and z labels
- ax1.set_xlabel("Compared scans", labelpad = 15, fontsize=3*fontsize, fontweight="bold")
- ax1.set_ylabel("Correlation coefficient", labelpad = 15, fontsize=3*fontsize, fontweight="bold")
- ax1.set_title("Median group correlations", pad = 15, fontsize=round(3*fontsize), fontweight="bold")
- ax1.tick_params(labelsize=15/5*fontsize, length=7/5*fontsize)
- ax1.xaxis.set_ticks([1,2,3,4])
- ax1.xaxis.set_ticklabels((["-200 to -100mV","-100 to 0mV","0 to +100mV","+100 to +200mV"]))
- ax1.yaxis.set_minor_locator(MultipleLocator(5))
- ax1.tick_params(which='minor', length=4/5*fontsize)
- plt.grid(axis="y", which="minor",lw=0.25)
- plt.grid(axis="y")
- if savepng:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- if dpi is None:
- fig.savefig("savefig/MedianCorrelation_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
- else:
- fig.savefig("savefig/MedianCorrelation_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
- # In[21]:
- for i in [7,7.1,7.2,7.3]:
- plotClusterCurves(kmeans_input=i)
- # In[496]:
- for i in [7,7.1,7.2,7.3]:
- plotClusterPosition(kmeans_input=i)
- # In[431]:
- for i in range(1,10):
- if i in [8,8.1,9]:
- kmeansDic = load_cache_kmeans(kmeans_input=i, k_highest=10, silhouette_setting=False)
- else:
- kmeansDic = load_cache_kmeans(kmeans_input=i, k_highest=10, silhouette_setting=True)
- find_k()
- # In[614]:
- for i in [1,2,3.1,4,5,6,7,8,9]:
- plotClusterPosition(data_processing=3, kmeans_input=i)
- #plotClusterPosition(kmeans_input=i)
- # In[82]:
- SAVE = False
- # code for saving all pictures:
- # save all figures for different kmeans inputs
- for k_index in [2]:
- #for kmeans_input_index in [1,2,3.1,4,5,6,7]:
- for kmeans_input_index in [1,2,3.1,4,5,6,7,8,9]:
- for normalize_samples_index in [False]:
- for normalize_curves_index in [False]:
- # load the respective cached dictionary for the definition of "kmeansDic" throughout this project
- silhouette_setting = False # True should be selected for complete data. If False is selected, then silhouette will just be assigned to 0 and omitted, saving a lot of computation time.
- sample_size = 300000 # this number refers to the sample size in the silhouette_score() function which is computationally very expensive
- k_lowest = 2
- k_highest = 3
- kmeans_input = kmeans_input_index
- data_processing = 1
- normalize_samples = normalize_samples_index
- try:
- if silhouette_setting:
- silhouette_str = f"_{sample_size}-samplesize"
- else:
- silhouette_str = ""
- except:
- continue
- if not normalize_samples:
- normalize_samples_str = "feature normalized"
- else:
- normalize_samples_str = "feature & sample normalized"
- suptitle = f"k = {k_index}, kmeans-input = {kmeans_input}, " + normalize_samples_str
- if normalize_curves_index:
- savepng_str = f"{k_index}-k_{kmeans_input_index}-kmeans-input_{normalize_samples_index}-normalize-samples_True-normalize-curves"
- else:
- savepng_str = f"{k_index}-k_{kmeans_input_index}-kmeans-input_{normalize_samples_index}-normalize-samples"
- plotClusterPosition(k=k_index, kmeans_input=kmeans_input_index, title="", no_suptitle=True, suptitle="", dpi=200, savepng=SAVE, savepng_str=savepng_str)
- #plotClusterCurves(k=k_index,compare=True,k_curves=1, kmeans_input=kmeans_input_index,normalize_curves=normalize_curves_index, suptitle="", title=False, dpi=200, savepng=SAVE, savepng_str=savepng_str)
- # <font size="3"><b>Run the cells at the bottom to save variables in working memory to speed up calculations that would otherwise include computationally expensive lines of code
- # In[195]:
- # To speed up all calculations remarkably: Create a dictionary with all the warp matrices:
- # Run each time you start working this notebook to save the values in the global namespace.
- start = time.time()
- CurrentsDic = {
- "pixelplotting_array": plotIV(return_only=True)[0],
- "currents_mean": plotIV(return_only=True)[1],
- "pixelplotting_array_direct": plotIV(data_processing=3, return_only=True)[0]
- }
- end = time.time()
- print("Loading all current arrays took", round((end-start), 2), "seconds.")
- ### SAVE CACHE ###
- # with these settings (copied from http://www.silx.org/doc/silx/0.2.0/modules/io/dictdump.html), the file size is much smaller, but loading takes much longer, so better not use it!
- SAVE = False
- if SAVE:
- compression = False
- os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan") # just make sure that we are talking about the correct location in the file system
- if compression:
- create_ds_args = {'compression': "gzip",
- 'shuffle': True,
- 'fletcher32': True}
- else:
- create_ds_args = None
- saveh5 = input("Do you want to save the dictionary as a h5 file that other functions will refer to ? (y/n)\n")
- if saveh5.lower() in ["y", "yes"]:
- if not compression:
- fname = "CurrentsDic.h5"
- else:
- fname = "CurrentsDic_COMPRESSED.h5"
- dicttoh5(CurrentsDic, "Cache/"+ fname, create_dataset_args=create_ds_args)
- print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
- if saveh5.lower() in ["n", "no"]:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- fname = "CurrentsDic_no-save_" + dt_string + ".h5"
- dicttoh5(CurrentsDic, "Cache/" + fname)
- print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
- # In[24]:
- # To speed up all calculations remarkably: Create a dictionary with all the warp matrices:
- # Run each time you start working this notebook to save the values in the global namespace.
- start = time.time()
- ImageRegistrationDic = {
- "179": ImageRegistration(178,179),
- "180": ImageRegistration(178,180),
- "182": ImageRegistration(181,182),
- "183": ImageRegistration(181,183),
- "185": ImageRegistration(181,185),
- "186": ImageRegistration(181,186),
- "191": ImageRegistration(181,191),
- "192": ImageRegistration(181,192)
- }
- end = time.time()
- print("Calculating all warp matrices took", round((end-start), 2), "seconds.")
- ### SAVE CACHE ###
- # with these settings (copied from http://www.silx.org/doc/silx/0.2.0/modules/io/dictdump.html), the file size is much smaller, but loading takes much longer, so better not use it!
- SAVE = False
- if SAVE:
- compression = False
- os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan") # just make sure that we are talking about the correct location in the file system
- if compression:
- create_ds_args = {'compression': "gzip",
- 'shuffle': True,
- 'fletcher32': True}
- else:
- create_ds_args = None
- saveh5 = input("Do you want to save the dictionary as a h5 file that other functions will refer to ? (y/n)\n")
- if saveh5.lower() in ["y", "yes"]:
- if not compression:
- fname = "ImageRegistrationDic.h5"
- else:
- fname = "ImageRegistrationDic_COMPRESSED.h5"
- dicttoh5(ImageRegistrationDic, "Cache/"+ fname, create_dataset_args=create_ds_args)
- print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
- if saveh5.lower() in ["n", "no"]:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- fname = "ImageRegistrationDic_no-save_" + dt_string + ".h5"
- dicttoh5(ImageRegistrationDic, "Cache/" + fname)
- print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
- # In[588]:
- for i in [1,2,3.1,4,5,6,7,8,9]:
- # Create a nested dictionary for all cluster arrays, sum of squared distances, silhouette and calinski_harabasz scores
- start = time.time()
- kmeansDic = {}
- silhouette_setting = False # True should be selected for complete data. If False is selected, then silhouette will just be assigned to 0 and omitted, saving a lot of computation time.
- sample_size = 300000 # this number refers to the sample size in the silhouette_score() function which is computationally very expensive
- k_lowest = 2
- k_highest = 3
- kmeans_input = i
- data_processing = 3
- normalize_samples = False
- for k in range(k_lowest, k_highest+1):
- start_k = time.time()
- cluster_numbers, ssqd, silhouette, calinski_harabasz, threshold = kmeans_clustering(data_processing=data_processing, kmeans_input=kmeans_input, normalize_samples=normalize_samples, k=k, sample_size=sample_size, silhouette=silhouette_setting)
- kmeansDic[str(k)] = {"cluster_numbers": cluster_numbers, "ssqd": ssqd, "silhouette": silhouette, "calinski_harabasz": calinski_harabasz, "threshold": threshold}
- end = time.time()
- print("Finished entry in nested dictionary for k = " + str(k) + ". That took " + str(round((end-start_k), 2)) + " seconds.")
- end = time.time()
- print("Calculating all cluster arrays took", round((end-start), 2), "seconds.")
- ### SAVE CACHE ###
- # with these settings (copied from http://www.silx.org/doc/silx/0.2.0/modules/io/dictdump.html), the file size is much smaller, but loading takes much longer, so better not use it!
- SAVE = False
- if SAVE:
- compression = False
- os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan") # just make sure that we are talking about the correct location in the file system
- if compression:
- create_ds_args = {'compression': "gzip",
- 'shuffle': True,
- 'fletcher32': True}
- else:
- create_ds_args = None
- saveh5 = "y"
- if saveh5.lower() in ["y", "yes"]:
- if silhouette_setting:
- silhouette_str = f"_{sample_size}-samplesize"
- else:
- silhouette_str = ""
- if not compression:
- fname = f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}.h5"
- else:
- fname = f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}_COMPRESSED.h5"
- dicttoh5(kmeansDic, "Cache/"+ fname, create_dataset_args=create_ds_args)
- print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
- if saveh5.lower() in ["n", "no"]:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- fname = "kmeansDic_no-save_" + dt_string + ".h5"
- dicttoh5(kmeansDic, "Cache/" + fname)
- print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
- # In[435]:
- # Create a nested dictionary for all cluster arrays, sum of squared distances, silhouette and calinski_harabasz scores
- start = time.time()
- kmeansDic = {}
- silhouette_setting = False # True should be selected for complete data. If False is selected, then silhouette will just be assigned to 0 and omitted, saving a lot of computation time.
- sample_size = 300000 # this number refers to the sample size in the silhouette_score() function which is computationally very expensive
- k_lowest = 2
- k_highest = 3
- kmeans_input = 1.1
- data_processing = 1
- normalize_samples = False
- for k in range(k_lowest, k_highest+1):
- start_k = time.time()
- cluster_numbers, ssqd, silhouette, calinski_harabasz, threshold = kmeans_clustering(data_processing=data_processing, kmeans_input=kmeans_input, normalize_samples=normalize_samples, k=k, sample_size=sample_size, silhouette=silhouette_setting)
- kmeansDic[str(k)] = {"cluster_numbers": cluster_numbers, "ssqd": ssqd, "silhouette": silhouette, "calinski_harabasz": calinski_harabasz, "threshold": threshold}
- end = time.time()
- print("Finished entry in nested dictionary for k = " + str(k) + ". That took " + str(round((end-start_k), 2)) + " seconds.")
- end = time.time()
- print("Calculating all cluster arrays took", round((end-start), 2), "seconds.")
- ### SAVE CACHE ###
- # with these settings (copied from http://www.silx.org/doc/silx/0.2.0/modules/io/dictdump.html), the file size is much smaller, but loading takes much longer, so better not use it!
- SAVE = False
- if SAVE:
- compression = False
- os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan") # just make sure that we are talking about the correct location in the file system
- if compression:
- create_ds_args = {'compression': "gzip",
- 'shuffle': True,
- 'fletcher32': True}
- else:
- create_ds_args = None
- saveh5 = input("Do you want to save the dictionary as a h5 file that other functions will refer to ? (y/n)\n")
- if saveh5.lower() in ["y", "yes"]:
- if silhouette_setting:
- silhouette_str = f"_{sample_size}-samplesize"
- else:
- silhouette_str = ""
- if not compression:
- fname = f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}.h5"
- else:
- fname = f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}_COMPRESSED.h5"
- dicttoh5(kmeansDic, "Cache/"+ fname, create_dataset_args=create_ds_args)
- print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
- if saveh5.lower() in ["n", "no"]:
- now = datetime.now()
- dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
- fname = "kmeansDic_no-save_" + dt_string + ".h5"
- dicttoh5(kmeansDic, "Cache/" + fname)
- print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
- # In[22]:
- def load_cache_currents():
- os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan")
- fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan/Cache/CurrentsDic.h5"
- CurrentsDic = h5py.File(fpath, "r")
- return CurrentsDic
- CurrentsDic = load_cache_currents()
- # In[23]:
- # load the respective cached dictionary for the definition of "kmeansDic" throughout this project
- def load_cache_IR():
- os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan")
- fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan/Cache/ImageRegistrationDic.h5"
- ImageRegistrationDic = h5py.File(fpath, "r")
- return ImageRegistrationDic
- ImageRegistrationDic = load_cache_IR()
- # In[24]:
- # load the respective cached dictionary for the definition of "kmeansDic" throughout this project
- def load_cache_kmeans(silhouette_setting = False, sample_size = 300000, k_lowest = 2, k_highest = 3, kmeans_input = 1, data_processing = 1, normalize_samples = False):
- os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan")
- if silhouette_setting:
- silhouette_str = f"_{sample_size}-samplesize"
- else:
- silhouette_str = ""
- fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan/Cache/"+ f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}.h5"
- kmeansDic = h5py.File(fpath, "r")
- return kmeansDic
- kmeansDic = load_cache_kmeans()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement