Advertisement
OreganoHauch

All my functions (backup 2021-01-03)

Jan 3rd, 2021
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 119.66 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3.  
  4. # In[1]:
  5.  
  6.  
  7. # import external modules
  8. import h5py
  9. import time
  10. import math
  11. import cv2
  12. import os
  13. import os.path
  14. import shutil
  15. import pylatex
  16. import random
  17.  
  18. import numpy as np
  19. import matplotlib as mpl
  20. import matplotlib.pyplot as plt
  21. import matplotlib.cm as cm
  22. import matplotlib.colors as colors
  23. import scipy.constants as constants
  24. import seaborn as sn
  25. import pandas as pd
  26.  
  27. from IPython.core.display import display, HTML
  28. from shutil import copyfile
  29. from os import path
  30. from datetime import datetime # for saving figures and files with current data/time in file name
  31. from scipy.interpolate import griddata
  32. from mpl_toolkits.axes_grid.inset_locator import (inset_axes, InsetPosition, mark_inset) # create tiny plot inside of another plot
  33. from mpl_toolkits.axes_grid1 import make_axes_locatable
  34. from mpl_toolkits.axes_grid1.colorbar import colorbar
  35. from matplotlib.colors import ListedColormap, LinearSegmentedColormap
  36. from matplotlib.patches import Patch
  37. from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator, AutoLocator)
  38. from sklearn.preprocessing import MinMaxScaler, minmax_scale
  39. from sklearn.cluster import KMeans
  40. from sklearn.metrics import silhouette_score, calinski_harabasz_score
  41. from scipy.optimize import curve_fit
  42. from silx.io.dictdump import dicttoh5 # save computationally expensive dictionaries as h5 files
  43. from PIL import Image, ImageOps
  44.  
  45. get_ipython().run_line_magic('matplotlib', 'inline')
  46.  
  47.  
  48. # In[2]:
  49.  
  50.  
  51. # change width of Jupyter notebook
  52. def displaywidth(width):
  53. display(HTML("<style>.container { width:" + str(width) + "% !important; }</style>"))
  54. displaywidth(100)
  55.  
  56.  
  57. # In[2]:
  58.  
  59.  
  60. def load_h5(scan, datatype, show_time=False):
  61. '''This function loads only a few selected h5 files, but the definition should suffice for all the files I might need throughout the work on my bachelor's thesis.'''
  62.  
  63. try:
  64. if datatype == "positions":
  65. fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/processed/0016_EMPA_MSH_003c/scan_00" + str(scan) + "/positions.h5"
  66. h5_file = h5py.File(fpath, "r")
  67. else:
  68. fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/processed/0016_EMPA_MSH_003c/scan_00" + str(scan) + "/data/" + datatype + ".h5"
  69. h5_file = h5py.File(fpath, "r")
  70. return h5_file
  71. except:
  72. print("You entered '" + str(scan) + "' for the argument 'scan'. The function only takes an integer as first argument. Its value must lie between 178 and 192, except for 184, because this scan was aborted.\nThe second argument must be a string for the datatype, e.g. 'positions' or 'fluo_roi'.")
  73.  
  74.  
  75. # In[3]:
  76.  
  77.  
  78. def counts2amps(scan, nA=True, solar_cell_channel_only=False, select_channel=None):
  79. '''This function takes the integer scan number as an argument, looks for the counter data inside of the h5 file,
  80. converts all the counts into XBIC in (nA) and returns the values as an ndarray.'''
  81.  
  82. # load counts and turn it into 2d array
  83. if scan in [179,187,188,189,190]: # XBIC measurements without lock-in amplification
  84. loaded_counts = load_h5(scan, "counter")["/data/solar_cell_0-10V"]
  85. counts_as_array = np.array(loaded_counts)
  86. print("This is the direct signal, because the chopper wasn't used for this scan!")
  87. else:
  88. loaded_counts = load_h5(scan, "counter")["/data/lock_in_R_0-10V"]
  89. counts_as_array = np.array(loaded_counts).astype(float) # this integer array must be converted to float, otherwise np.reciprocal will return only zeros
  90.  
  91. if solar_cell_channel_only:
  92. if scan in [178,179,180,181,183,185,188,191,192]:
  93. loaded_counts = load_h5(scan, "counter")["/data/solar_cell_0-10V"]
  94. counts_as_array = np.array(loaded_counts)
  95. if scan in [182,186]:
  96. loaded_counts = load_h5(scan, "counter")["/data/solar_cell_-10-0V"]
  97. counts_as_array = np.array(loaded_counts)*(-1)
  98. if scan in [187,189,190]:
  99. counts_as_array = np.zeros(40200)
  100.  
  101. ##### for plotting all channels ####
  102. if select_channel == "negative" and solar_cell_channel_only:
  103. loaded_counts = load_h5(scan, "counter")["/data/solar_cell_-10-0V"]
  104. counts_as_array = np.array(loaded_counts)*(-1)
  105. if select_channel == "positive" and solar_cell_channel_only:
  106. loaded_counts = load_h5(scan, "counter")["/data/solar_cell_0-10V"]
  107. counts_as_array = np.array(loaded_counts)
  108. if select_channel == "negative" and not solar_cell_channel_only:
  109. loaded_counts = load_h5(scan, "counter")["/data/lock-in_-10-0V"]
  110. counts_as_array = np.array(loaded_counts)
  111. if select_channel == "positive" and not solar_cell_channel_only:
  112. loaded_counts = load_h5(scan, "counter")["/data/lock_in_0-10V"]
  113. counts_as_array = np.array(loaded_counts)
  114. if select_channel == "R" and not solar_cell_channel_only:
  115. pass
  116. ####################################
  117.  
  118. # load photon counts
  119. loaded_photon_counts = load_h5(scan, "counter")["/data/qbpm_micro_sum"]
  120. photon_counts_as_array = np.array(loaded_photon_counts).astype(float) # this integer array must be converted to float, otherwise np.reciprocal will return only zeros
  121.  
  122. # load exposure times and turn it into 2d array
  123. loaded_exposure_times = load_h5(scan, "counter")["/data/exposure_time"]
  124. exposure_times_as_array = np.array(loaded_exposure_times)
  125.  
  126. # normalize counts and photons to one second
  127. c_rate = (counts_as_array/exposure_times_as_array)
  128. ph_rate = photon_counts_as_array/exposure_times_as_array
  129.  
  130. # normalize counts to the average photon rate and one second
  131. f_DAQ = c_rate * np.mean(ph_rate)/ph_rate # multiply each LIA count by the ratio of the average photon count to the respective photon count (e.g. twice as much photons as the average should be normalized to half of the XBIC curent)
  132.  
  133. # calculate the conversion factor
  134. if scan in [178,179,180,181,182,183,185,186,191,192]: # these scans have a sensitivity of 1 μA/V
  135. A_PA = 10**6 # "P_A" = 1/sensitivity: Preamplifier amplification factor in V/A, values taken from LogbuchStrahlzeitPetraIII31.10.-6.11.2018.pptx, slide 54f.
  136. if scan in [187,188,189,190]: # these scans have a sensitivity of 100 mA/V
  137. A_PA = 10
  138. A_LIA = 1 # "A_LIA" = 1/scaling = Lock-in amplifier amplification factor in V/V, values also taken from the pptx logbook
  139. V2f = 10**6
  140. Wff = math.sqrt(2) # the output signal (in this case a sine signal) of the LIA is the RMS amplitude, but we are interested in the peak-to-peak value (see jove paper, p.4)
  141. H_conv = 2*Wff/(V2f*A_LIA*A_PA)
  142.  
  143. # finally multiply the conversion factor with the normalized count of each pixel
  144. if not nA:
  145. S_XBIC = f_DAQ * H_conv
  146. S_XBIC = f_DAQ * H_conv * 10**9
  147.  
  148. # return S_XBIC_raw_nA
  149. return S_XBIC
  150.  
  151.  
  152. # In[462]:
  153.  
  154.  
  155. def concat_images(image_paths, size, shape=None, return_only=False, savepng=False):
  156. # Open images and resize them
  157. width, height = size
  158. images = map(Image.open, image_paths)
  159. images = [ImageOps.fit(image, size, Image.ANTIALIAS)
  160. for image in images]
  161.  
  162. # Create canvas for the final image with total size
  163. shape = shape if shape else (1, len(images))
  164. image_size = (width * shape[1], height * shape[0])
  165. image = Image.new('RGB', image_size)
  166. # Paste images into final image
  167. for row in range(shape[0]):
  168. for col in range(shape[1]):
  169. offset = width * col, height * row
  170. idx = row * shape[1] + col
  171. image.paste(images[idx], offset)
  172. print(1,type(image))
  173.  
  174. if return_only:
  175. return image
  176. print(2,type(image))
  177. if savepng:
  178. # Create and save image grid
  179. now = datetime.now()
  180. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  181. print(3,type(image))
  182. image = concat_images(image_array, (400, 400), shape=(5,9))
  183. print(4,type(image))
  184. image.save(f"image_{dt_string}.png", "PNG")
  185.  
  186. # Get list of image paths
  187. folder = "/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan/savefig/plotXBIC_singlecell_new"
  188. image_paths = [os.path.join(folder, f)
  189. for f in sorted(os.listdir(folder)) if f.endswith('.png')]
  190. image_array = image_paths
  191. concat_images(image_array, (400, 400), shape=(5,9),savepng=True)
  192.  
  193.  
  194. # In[4]:
  195.  
  196.  
  197. def supergrid(yvalues, zvalues, currents, gridres):
  198. """This function creates a rectangular grid where both axes have the given length. The given datapoints, which
  199. location is characterized by a tuple of y- and z-values, are interpolated such that every point in the grid gets
  200. assigned a value according to the nearest neighbor method.
  201.  
  202. ARGUMENTS:
  203. yvalues (1D-array): Corresponding y-values for every measurement
  204. zvalues (1D-array): Corresponding x-values for every measurement
  205. currents (1D-array): All currents measured on the respective y- and z-coordinates
  206. gridres (float): Resolution of the grid insofar as it is the length of both axes of the grid in pixels
  207.  
  208. RETURNS:
  209. (yvalues_new, zvalues_new, currents_supergrid) (Tuple of three 2D-Arrays): three grids with corresponding
  210. y-values, z-values and currents at every single point in the grid.
  211. """
  212.  
  213. if np.size(yvalues) != np.size(zvalues):
  214. raise Exception("Arrays 'yvalues' and 'zvalues' must have same size.")
  215. else:
  216. if np.size(yvalues) - np.size(currents) == -1: # This paragraph is to catch an error that is raised when
  217. currents = currents[:np.size(currents) - 1] # dealing with the XRF scan XBIC data. It appears that
  218. if np.size(yvalues) - np.size(currents) == 1: # the y- and z-arrays or the currents are too long by one
  219. yvalues = yvalues[:np.size(yvalues) - 1] # element where no measurement was done. This element is
  220. zvalues = zvalues[:np.size(zvalues) - 1] # removed from the respective array to align points and data.
  221. if np.size(yvalues) - np.size(currents) == 0:
  222. pass
  223. else:
  224. raise Exception("Positions (y, z) and corresponding data points can't be aligned due to their different size.")
  225.  
  226. yvalues_shift = yvalues - yvalues.min() # shift y-values to obtain the relative position
  227. zvalues_shift = zvalues - zvalues.min() # shift y-values to obtain the relative position
  228.  
  229. # Create grid of y- and z-values with respect to the selected grid resolution (gridres)
  230. yvalues_new, zvalues_new = np.mgrid[0:yvalues_shift.max():gridres * 1j, 0:zvalues_shift.max():gridres * 1j]
  231.  
  232. # Interpolate given currents such that they fit the grid of x- and y-values
  233. currents_supergrid = griddata((yvalues_shift, zvalues_shift), currents, (yvalues_new, zvalues_new), method = "nearest")
  234.  
  235. return yvalues_new, zvalues_new, currents_supergrid.T
  236.  
  237.  
  238. # In[5]:
  239.  
  240.  
  241. def plotXBIC_loop(scan, fig, fontsize, voltage_dic, loop_count, compareB=None, compareB_counter=None, compare=False):
  242.  
  243. #fig.suptitle("Reduced LIA count normalized", fontsize=2*fontsize, x=0.7, y=0.92) # optional title for the figure (with this x and y it is centered and has a good distance)
  244.  
  245. # load position and define y, z and f(y,z) as arrays that can be interpreted by the imshow() function
  246. positions = load_h5(scan, "positions")
  247. y_axis_h5 = positions["/data/encoder_fast/data"]
  248. z_axis_h5 = positions["/data/encoder_slow/data"]
  249. y = np.array(y_axis_h5)
  250. z = np.array(z_axis_h5)
  251. current = counts2amps(scan)
  252. if compare:
  253. counts2amps_A = current
  254. counts2amps_B = counts2amps(compareB[compareB_counter])
  255.  
  256. # append NaN: scan 180 has the shape (40199,) and thus cannot be broadcast together with the other scans which have shapes of (40200,)
  257. if scan == 180:
  258. counts2amps_A = np.append(current,np.nan)
  259. if compareB[compareB_counter] == 180:
  260. counts2amps_B = np.append(counts2amps(compareB[compareB_counter]),np.nan)
  261. current = counts2amps_A-counts2amps_B
  262.  
  263. y_grid, z_grid, current_array = supergrid(y,z,current,1000)
  264.  
  265. # axes for new subplot
  266. ax = fig.add_subplot(4, 4, loop_count)
  267.  
  268. # spacing to other subplots
  269. plt.subplots_adjust(right=1.3, hspace=0.5, wspace=0.5)
  270.  
  271. # y and z labels
  272. ax.set_xlabel("Position Y [μm]", labelpad = 0, fontsize=fontsize)
  273. ax.set_ylabel("Position Z [μm]", labelpad = 0, fontsize=fontsize)
  274.  
  275. # title
  276. bias_voltage = voltage_dic[scan]
  277. if compare:
  278. plt.title("Scan #" + str(scan) + " minus Scan #" + str(compareB[compareB_counter]), pad = 15, fontsize=round(9/8*fontsize))
  279. else:
  280. plt.title("Scan #" + str(scan) + bias_voltage, pad = 15, fontsize=round(9/8*fontsize))
  281.  
  282. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  283. ax.xaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
  284. ax.xaxis.set_ticks([0,500,999]) # if 1000 instead of 999 it won't display
  285. ax.yaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
  286. ax.yaxis.set_ticks([0,500,999])
  287. ax.xaxis.set_minor_locator(MultipleLocator(50)) # Minor ticks are set to multiples of 50
  288. ax.yaxis.set_minor_locator(MultipleLocator(50))
  289.  
  290. current_array = cv2.GaussianBlur(current_array, (33, 33), 0) # This accounts for the beam profile with FWHM of 105 x 108 nm.
  291.  
  292. # plot the data
  293. im = ax.imshow(current_array, cmap=cm.afmhot, origin='lower')
  294.  
  295. # add a colorbar as legend
  296. ax_divider = make_axes_locatable(ax) # Introduce a divider next to the axis already in place
  297. cax = ax_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  298. legend = colorbar(im, cax = cax) # Introduce a legend (colorbar) where the new axis has been placed
  299. cax.set_ylabel(r"$\frac{XBIC}{pixel \cdot photons_{avg}}$" + " (nA)", rotation = -90, labelpad = 50 , fontsize=5/4*fontsize) # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
  300. cax.yaxis.set_major_locator(AutoLocator())
  301. cax.yaxis.set_minor_locator(AutoMinorLocator())
  302.  
  303.  
  304. # In[6]:
  305.  
  306.  
  307. def plotXBIC(plot_range=None, singleplots=None, compareA=None, compareB=None, figsize=(30,30), savepng=False, dpi=None):
  308. '''
  309. This function plots the XBIC, using data from the previously defined counts2amps() function. Look at the examples to see how to use it:
  310.  
  311. plotXBIC(plot_range=14) will do the following:
  312. It will plot 14 datasets, starting with #178 and ending with #192 (and omitting 184 which is still counted).
  313.  
  314. plotXBIC(singleplots=[178,185,192]) will do the following:
  315. It will plot #178, #185 and #192.
  316.  
  317. plotXBIC(compareA=[178,178,185], compareB=[187,189,189]) will do the following:
  318. It will plot
  319. Scan 178 minus Scan 187
  320. Scan 178 minus Scan 189
  321. Scan 185 minus Scan 189.
  322. '''
  323.  
  324. start = time.time() # measure time for each plot
  325.  
  326. # the best looking fontsize is about half of the width of the figsize
  327. fontsize = figsize[0]//2
  328.  
  329. # individualize titles for each scan if there is a bias voltage applied (information from LogbuchStrahlzeitPetraIII31.10.-6.11.2018.pptx, slide 54f., corresponding to handwritten logbook)
  330. voltage_dic = {
  331. 178: "",
  332. 179: "",
  333. 180: "",
  334. 181: "",
  335. 182: " (+ 100 mV)",
  336. 183: " (- 100 mV)",
  337. 185: " (- 200 mV)",
  338. 186: " (+ 200 mV)",
  339. 187: " (+ 500 mV)",
  340. 188: "(- 500 mV)",
  341. 189: " (- 1 V)",
  342. 190: " (+ 600 mv)",
  343. 191: "",
  344. 192: ""
  345. }
  346.  
  347. # create a figure for all plots
  348. fig = plt.figure(figsize=figsize)
  349.  
  350. loop_count = 1 # the subplot number should be independent of omitted scans (e.g. scan #184); otherwise you would see an empty space at that position
  351.  
  352. if type(plot_range) is int:
  353. if plot_range > 6:
  354. plot_range += 1 # scan #184 is omitted, but still counted by the for-loop
  355. for scan in range(178, 178 + plot_range):
  356.  
  357. if scan == 184:
  358. continue # scan 184 was aborted
  359.  
  360. plotXBIC_loop(scan=scan,fig=fig,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count)
  361.  
  362. loop_count += 1
  363.  
  364. plt.show()
  365.  
  366. if type(singleplots) is list:
  367. for scan in singleplots:
  368.  
  369. if scan == 184:
  370. continue # scan 184 was aborted
  371.  
  372. plotXBIC_loop(scan=scan,fig=fig,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count)
  373.  
  374. loop_count += 1
  375.  
  376. plt.show()
  377.  
  378. if type(compareA) is list:
  379. compareB_counter=0
  380. for scan in compareA:
  381.  
  382. plotXBIC_loop(scan=scan,fig=fig,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,compare=True,compareB=compareB,compareB_counter=compareB_counter)
  383.  
  384. loop_count += 1
  385. compareB_counter += 1
  386.  
  387. plt.show()
  388.  
  389. end = time.time()
  390. if type(compareA) is list:
  391. print("Scan " + str(scan) + " minus Scan " + str(compareB[compareB_counter]) + " took " + str(round(end-start,2)) + " seconds to plot.")
  392. else:
  393. print("Scan " + str(scan) + " took " + str(round(end-start,2)) + " seconds to plot.")
  394.  
  395. if savepng:
  396. now = datetime.now()
  397. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  398. if dpi is None:
  399. fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  400. else:
  401. fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  402.  
  403.  
  404. # In[7]:
  405.  
  406.  
  407. def ImageRegistration(scan1,scan2,cell=None,printdetails=False):
  408.  
  409. positions = load_h5(scan1, "positions")
  410. y_axis_h5 = positions["/data/encoder_fast/data"]
  411. z_axis_h5 = positions["/data/encoder_slow/data"]
  412. y = np.array(y_axis_h5)
  413. z = np.array(z_axis_h5)
  414. current1 = counts2amps(scan1)
  415. current2 = counts2amps(scan2)
  416.  
  417.  
  418. y_grid1, z_grid1, current1_array_original = supergrid(y,z,current1,1000)
  419. y_grid2, z_grid2, current2_array_original = supergrid(y,z,current2,1000)
  420.  
  421. # convert current1_array to a grayscale image format (values between 0 and 255)
  422. current1_array_8bitgrayscale = current1_array_original/current1_array_original.max()
  423. current1_array_8bitgrayscale *= (1/1-current1_array_8bitgrayscale.min())
  424. current1_array_8bitgrayscale += 1 - current1_array_8bitgrayscale.max()
  425. current1_array_8bitgrayscale[current1_array_8bitgrayscale < 0] = 0
  426. current1_array_8bitgrayscale = np.array(current1_array_8bitgrayscale*255, dtype = np.uint8)
  427.  
  428. current2_array_8bitgrayscale = current2_array_original/current2_array_original.max()
  429. current2_array_8bitgrayscale *= (1/1-current2_array_8bitgrayscale.min())
  430. current2_array_8bitgrayscale += 1 - current2_array_8bitgrayscale.max()
  431. current2_array_8bitgrayscale[current2_array_8bitgrayscale < 0] = 0
  432. current2_array_8bitgrayscale = np.array(current2_array_8bitgrayscale*255, dtype = np.uint8)
  433.  
  434. shape = np.shape(current1_array_8bitgrayscale)
  435.  
  436. warp_mode = cv2.MOTION_TRANSLATION # we only need translation in direction of y and z (no rotation etc.)
  437.  
  438. warp_matrix = np.eye(2, 3, dtype=np.float32) # this is the matrix that the results of the ECC algorithm are stored in
  439.  
  440. number_of_iterations = 5000 # Specify the number of iterations
  441. termination_eps = 1e-10 # Specify the threshold of the increment in the correlation coefficient between two iterations
  442.  
  443. # Specify the overall criteria
  444. criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
  445.  
  446. (cc, warp_matrix) = cv2.findTransformECC(current1_array_8bitgrayscale, current2_array_8bitgrayscale, warp_matrix, warp_mode, criteria, None, gaussFiltSize=33)
  447.  
  448. if printdetails == True:
  449. scan2_aligned = cv2.warpAffine(current2_array_8bitgrayscale, warp_matrix, (shape[1], shape[0]), flags = cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP, borderValue = float("NaN")).astype(np.float32)
  450. scan2_aligned[scan2_aligned == 0] = float("NaN")
  451.  
  452. # show where the values of the registered image lie (XBIC as a function of the 40200 datapoints)
  453. scan2_aligned_ravel = scan2_aligned.ravel()
  454. x = np.arange(np.shape(scan2_aligned_ravel)[0])
  455. plt.figure(figsize=(50,5))
  456. plt.scatter(x, scan2_aligned_ravel)
  457. plt.show()
  458.  
  459. fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (15, 15), constrained_layout = True)
  460.  
  461. im1 = ax1.imshow(current1_array_8bitgrayscale, origin = "lower", cmap = cm.afmhot)
  462. ax1.set_title("Scan #" + str(scan1))
  463.  
  464. ax1_divider = make_axes_locatable(ax1) # Introduce a divider next to the axis already in place
  465. cax1 = ax1_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  466. legend = colorbar(im1, cax = cax1) # Introduce a legend (colorbar) where the new axis has been placed
  467. cax1.yaxis.set_major_locator(AutoLocator())
  468. cax1.yaxis.set_minor_locator(AutoMinorLocator())
  469.  
  470. im2 = ax2.imshow(current2_array_8bitgrayscale, origin = "lower", cmap = cm.afmhot)
  471. ax2.set_title("Scan #" + str(scan2))
  472.  
  473. ax2_divider = make_axes_locatable(ax2) # Introduce a divider next to the axis already in place
  474. cax2 = ax2_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  475. legend = colorbar(im2, cax = cax2) # Introduce a legend (colorbar) where the new axis has been placed
  476. cax2.yaxis.set_major_locator(AutoLocator())
  477. cax2.yaxis.set_minor_locator(AutoMinorLocator())
  478.  
  479.  
  480. im3 = ax3.imshow(scan2_aligned, origin = "lower", cmap = "inferno")
  481. ax3.set_title("Scan #" + str(scan2) + " aligned")
  482. ax3.set_ylim(bottom = 0)
  483.  
  484. ax3_divider = make_axes_locatable(ax3) # Introduce a divider next to the axis already in place
  485. cax3 = ax3_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  486. legend = colorbar(im3, cax = cax3) # Introduce a legend (colorbar) where the new axis has been placed
  487. cax3.yaxis.set_major_locator(AutoLocator())
  488. cax3.yaxis.set_minor_locator(AutoMinorLocator())
  489.  
  490. plt.tight_layout(w_pad=7)
  491.  
  492. return warp_matrix
  493.  
  494.  
  495. # In[8]:
  496.  
  497.  
  498. # plotXBIC_loop2 differs from plotXBIC_loop in that it translates the current array according to Image Registration (axes ticks correspondingly and AutoMinorLocator instead of MultipleLocator)
  499.  
  500. def plotXBIC_loop2(scan, fig, fontsize, voltage_dic, loop_count, grid, singleplot, figsize, crop=True, image_registration=True, gaussian_blur=True, select_channel=None, solar_cell_channel_only=False, plot_any=None, compareB=None, compareB_counter=None, compare=False):
  501.  
  502.  
  503. # fig.suptitle("Reduced LIA count normalized", fontsize=2*fontsize, x=0.7, y=0.92) # optional title for the figure (with this x and y it is centered and has a good distance)
  504.  
  505. # load position and define y, z and f(y,z) as arrays that can be interpreted by the imshow() function
  506. positions = load_h5(scan, "positions")
  507. y_axis_h5 = positions["/data/encoder_fast/data"]
  508. z_axis_h5 = positions["/data/encoder_slow/data"]
  509. y = np.array(y_axis_h5)
  510. z = np.array(z_axis_h5)
  511.  
  512. scan_order = [185,183,181,182,186]
  513. if crop and scan in scan_order and image_registration and gaussian_blur:
  514. if not solar_cell_channel_only:
  515. current_array = load_cache_currents()["pixelplotting_array"][scan_order.index(scan)]
  516. else:
  517. current_array = load_cache_currents()["pixelplotting_array_direct"][scan_order.index(scan)]
  518. elif scan not in scan_order and crop:
  519. print("The scan is not amongst the voltage series scans, so it cannot be displayed with a crop.")
  520.  
  521. else:
  522. if solar_cell_channel_only:
  523. current = counts2amps(scan, solar_cell_channel_only=True, select_channel=select_channel)
  524. else:
  525. current = counts2amps(scan, solar_cell_channel_only=False, select_channel=select_channel)
  526. if compare:
  527. counts2amps_A = current
  528. counts2amps_B = counts2amps(compareB[compareB_counter])
  529.  
  530. # append NaN: scan 180 has the shape (40199,) and thus cannot be broadcast together with the other scans which have shapes of (40200,)
  531. if scan == 180:
  532. counts2amps_A = np.append(current,np.nan)
  533. if compareB[compareB_counter] == 180:
  534. counts2amps_B = np.append(counts2amps(compareB[compareB_counter]),np.nan)
  535. current = counts2amps_A-counts2amps_B
  536.  
  537. if plot_any is not None:
  538. current_array = plot_any
  539.  
  540. if plot_any is None:
  541. y_grid, z_grid, current_array = supergrid(y,z,current,1000)
  542.  
  543. # set 1 px border to NaN
  544. current_array[0,:] = np.float("NaN")
  545. current_array[999,:] = np.float("NaN")
  546. current_array[:,0] = np.float("NaN")
  547. current_array[:,999] = np.float("NaN")
  548.  
  549. if gaussian_blur:
  550. current_array = cv2.GaussianBlur(current_array, (33, 33), 0) # This accounts for the beam profile with FWHM of 105 x 108 nm.
  551.  
  552. if scan not in [178,181,187,188,189,190] and image_registration: # scan 178 and 181 are reference scans for the warp matrices, so they don't need to be transformed, and Image Registration would fail for scan #187 to #190
  553. warp_matrix = np.array(ImageRegistrationDic[str(scan)])
  554. #current_array = np.pad(current_array, pad_width=200, mode='constant', constant_values=np.nan) # for NaN border... (not working as of now...)
  555. current_array = cv2.warpAffine(current_array, warp_matrix, (1000, 1000), flags = cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP, borderValue = float("NaN")).astype(np.float32)
  556.  
  557. #current_array = np.random.rand(1000,1000)
  558.  
  559. if not singleplot:
  560. # axes for new subplot
  561. ax = fig.add_subplot(4, 4, loop_count)
  562. else:
  563. fig, ax = plt.subplots(figsize=figsize)
  564. fontsize *= 1.4
  565.  
  566. # spacing to other subplots
  567. plt.subplots_adjust(right=1.3, hspace=0.5, wspace=0.5)
  568.  
  569. # y and z labels
  570. ax.set_xlabel("Position Y (μm)", labelpad = 0, fontsize=fontsize, fontweight="bold")
  571. ax.set_ylabel("Position Z (μm)", labelpad = 0, fontsize=fontsize, fontweight="bold")
  572.  
  573. ax.tick_params(length=2/5*fontsize)
  574.  
  575. # title
  576. bias_voltage = voltage_dic[scan]
  577. if compare:
  578. plt.title("Scan #" + str(scan) + " minus Scan #" + str(compareB[compareB_counter]), pad = 15, fontsize=round(9/8*fontsize), fontweight="bold")
  579. elif plot_any is None:
  580. plt.title("Scan #" + str(scan) + bias_voltage, pad = 15, fontsize=round(9/8*fontsize), fontweight="bold")
  581.  
  582. # ticks and ticklabels need to be shifted according to Image Registration
  583. if scan in [179,180,182,183,184,185,186,191,192]:
  584. IRS_y = ImageRegistrationDic[str(scan)][0][2]
  585. IRS_z = ImageRegistrationDic[str(scan)][1][2]
  586. else:
  587. IRS_y = 0
  588. IRS_z = 0
  589.  
  590. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  591. ax.xaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
  592. #ax.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
  593. ax.xaxis.set_ticks([0,500,999])
  594. ax.yaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
  595. #ax.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
  596. ax.yaxis.set_ticks([0,500,999])
  597. ax.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
  598. ax.yaxis.set_minor_locator(AutoMinorLocator(5))
  599.  
  600. # plot the data
  601. im = ax.imshow(current_array, cmap=cm.afmhot, origin='lower')
  602.  
  603. # add a colorbar as legend
  604. ax_divider = make_axes_locatable(ax) # Introduce a divider next to the axis already in place
  605. cax = ax_divider.append_axes("right", size = "5%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  606. legend = colorbar(im, cax = cax) # Introduce a legend (colorbar) where the new axis has been placed
  607. legend.ax.tick_params(length=fontsize, labelsize=7/5*fontsize) # manipulate major tick length + label size
  608. legend.ax.tick_params(which="minor", length=3/5*fontsize) # manipulate minor tick length
  609. #cax.set_ylabel(r"$\frac{XBIC}{pixel \cdot photons_{avg}}$" + " in (nA)", rotation = -90, labelpad = 50 , fontsize=5/4*fontsize) # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
  610. if plot_any is None:
  611. if not solar_cell_channel_only:
  612. cax.set_ylabel("XBIC (nA)", rotation = -90, labelpad = 50 , fontsize=5/4*fontsize, fontweight="bold") # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
  613. else:
  614. cax.set_ylabel("Current (nA)", rotation = -90, labelpad = 50 , fontsize=5/4*fontsize, fontweight="bold")
  615. cax.yaxis.set_major_locator(AutoLocator())
  616. cax.yaxis.set_minor_locator(AutoMinorLocator())
  617.  
  618. if grid:
  619. ax.grid(which="both")
  620.  
  621.  
  622. # In[9]:
  623.  
  624.  
  625. def plotXBIC2(plot_range=None, singleplots=None, compareA=None, compareB=None, grid=True, crop=True, solar_cell_channel_only=False, select_channel=None, image_registration=True, gaussian_blur=True, plot_any=None, figsize=(30,30), savepng=False, dpi=None):
  626. '''
  627. This function plots the XBIC, using data from the previously defined counts2amps() function. Look at the examples to see how to use it:
  628.  
  629. plotXBIC(plot_range=14) will do the following:
  630. It will plot 14 datasets, starting with #178 and ending with #192 (and omitting 184 which is still counted).
  631.  
  632. plotXBIC(singleplots=[178,185,192]) will do the following:
  633. It will plot #178, #185 and #192.
  634.  
  635. plotXBIC(compareA=[178,178,185], compareB=[187,189,189]) will do the following:
  636. It will plot
  637. Scan 178 minus Scan 187
  638. Scan 178 minus Scan 189
  639. Scan 185 minus Scan 189.
  640. '''
  641.  
  642. start = time.time() # measure time for each plot
  643.  
  644. # the best looking fontsize is about half of the width of the figsize
  645. fontsize = figsize[0]//2
  646.  
  647. # individualize titles for each scan if there is a bias voltage applied (information from LogbuchStrahlzeitPetraIII31.10.-6.11.2018.pptx, slide 54f., corresponding to handwritten logbook)
  648. voltage_dic = {
  649. 178: "",
  650. 179: "",
  651. 180: "",
  652. 181: " (no bias)",
  653. 182: " (+ 100 mV)",
  654. 183: " (- 100 mV)",
  655. 185: " (- 200 mV)",
  656. 186: " (+ 200 mV)",
  657. 187: " (+ 500 mV)",
  658. 188: "(- 500 mV)",
  659. 189: " (- 1 V)",
  660. 190: " (+ 600 mv)",
  661. 191: "",
  662. 192: ""
  663. }
  664.  
  665. # create a figure for all plots
  666. fig = plt.figure(figsize=figsize)
  667.  
  668. loop_count = 1 # the subplot number should be independent of omitted scans (e.g. scan #184); otherwise you would see an empty space at that position
  669.  
  670. if type(plot_range) is int:
  671. singleplot=False
  672. if plot_range > 6:
  673. plot_range += 1 # scan #184 is omitted, but still counted by the for-loop
  674. for scan in range(178, 178 + plot_range):
  675.  
  676. if scan == 184:
  677. continue # scan 184 was aborted
  678.  
  679. plotXBIC_loop2(scan=scan,fig=fig,grid=grid,crop=crop,solar_cell_channel_only=solar_cell_channel_only,select_channel=select_channel,image_registration=image_registration,gaussian_blur=gaussian_blur,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,singleplot=singleplot,figsize=figsize)
  680.  
  681. loop_count += 1
  682. if not savepng:
  683. plt.show()
  684. end = time.time()
  685. print("Plotting scan 178 to scan " + str(178+plot_range) + " took " + str(round(end-start,2)) + " seconds.")
  686.  
  687. if type(singleplots) is list:
  688. singleplot=True
  689. figsize=(10,10)
  690. for scan in singleplots:
  691.  
  692. if scan == 184:
  693. continue # scan 184 was aborted
  694.  
  695. plotXBIC_loop2(scan=scan,fig=fig,grid=grid,crop=crop,solar_cell_channel_only=solar_cell_channel_only,select_channel=select_channel,image_registration=image_registration,gaussian_blur=gaussian_blur,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,singleplot=singleplot,figsize=figsize)
  696.  
  697. loop_count += 1
  698. if not savepng:
  699. plt.show()
  700. end = time.time()
  701. print("Scan " + str(scan) + " took " + str(round(end-start,2)) + " seconds to plot.")
  702.  
  703. if type(compareA) is list:
  704. singleplot=False
  705. compareB_counter=0
  706. for scan in compareA:
  707.  
  708. plotXBIC_loop2(scan=scan,fig=fig,crop=crop,solar_cell_channel_only=solar_cell_channel_only,select_channel=select_channel,image_registration=image_registration,gaussian_blur=gaussian_blur,grid=grid,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,compare=True,compareB=compareB,compareB_counter=compareB_counter,singleplot=singleplot,figsize=figsize)
  709.  
  710. loop_count += 1
  711. compareB_counter += 1
  712. if not savepng:
  713. plt.show()
  714. end = time.time()
  715. print("Scan " + str(scan) + " minus Scan " + str(compareB[compareB_counter]) + " took " + str(round(end-start,2)) + " seconds to plot.")
  716.  
  717. if plot_any is not None:
  718. singleplot=True
  719. figsize=(10,10)
  720. plotXBIC_loop2(scan=178,fig=fig,grid=grid,crop=False,solar_cell_channel_only=solar_cell_channel_only,select_channel=select_channel,image_registration=image_registration,gaussian_blur=gaussian_blur,plot_any=plot_any,fontsize=fontsize,voltage_dic=voltage_dic,loop_count=loop_count,singleplot=singleplot,figsize=figsize)
  721.  
  722. if savepng:
  723. fig = plt.gcf()
  724. now = datetime.now()
  725. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  726. if dpi is None:
  727. fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  728. else:
  729. fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  730.  
  731.  
  732. # In[10]:
  733.  
  734.  
  735. def plotShockley(I_0=0.0058*1e-9, I_Ph=215.0494*1e-9, I_02=None, I_Ph2=None, compare_with_data=True, discrete=True, mean_curve=False, example=False, normalize_curves=False, dpi=None,savepng=False):
  736. figsize=(15,15)
  737. fontsize = figsize[0]//2
  738. fig = plt.figure(figsize=figsize)
  739. ax = fig.add_subplot(111)
  740.  
  741. ax.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=21/5*fontsize, fontweight="bold")
  742. ax.set_ylabel("XBIC (nA)", labelpad = 10, fontsize=21/5*fontsize, fontweight="bold")
  743. ax.tick_params(labelsize=15/5*fontsize, length=7/5*fontsize)
  744.  
  745. ax.xaxis.set_ticks([-0.2,-0.1,0,0.1,0.2])
  746. ax.xaxis.set_ticklabels([-200,-100,0,100,200])
  747.  
  748. ax.yaxis.set_minor_locator(MultipleLocator(5))
  749. ax.tick_params(which='minor', length=4/5*fontsize)
  750. ax.tick_params(labelsize=3*fontsize, length=1.5*fontsize, right=True)
  751. plt.grid(axis="y", which="minor",lw=0.25)
  752. plt.grid(axis="y")
  753.  
  754. lw = 3
  755. ms = 12
  756.  
  757. if not discrete:
  758. V = np.arange(-0.2,0.2,0.00001)
  759. style = "-"
  760. else:
  761. V = np.array([-0.2,-0.1,0,0.1,0.2])
  762. style = "-o"
  763.  
  764. I1 = shockley_function(V, I_0 = I_0, I_Ph = I_Ph)*1e9
  765. if normalize_curves:
  766. I1 = I1/(abs(shockley_function(V=0, I_0 = I_0, I_Ph = I_Ph)*1e9))
  767.  
  768. ax.plot(V,-I1, style, linewidth=1.3*lw, markersize=ms, label="Good diode (calculated)", color="red")
  769.  
  770. if I_02 != None and I_Ph != None:
  771.  
  772. I2 = shockley_function(V, I_0 = I_02, I_Ph = I_Ph2)*1e9
  773. if normalize_curves:
  774. I2 = I2/(abs(shockley_function(V=0, I_0 = I_02, I_Ph = I_Ph2)*1e9))
  775. ax.plot(V,-I2, style, linewidth=1.3*lw, markersize=ms, label="Bad diode (calculated)", color="blue")
  776. ax.legend(fontsize=6*fontsize)
  777.  
  778. if compare_with_data:
  779. pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])
  780. V = [-0.2,-0.1,0,0.1,0.2]
  781. if not example:
  782. mean_arrays = []
  783. kmeansDic = load_cache_kmeans(kmeans_input=7)
  784. for comparing_curve in [1,2]:
  785. k_curves_values = pixelplotting_array[:, np.array(kmeansDic[str(2)]["cluster_numbers"]) == comparing_curve].T # instead of transposing, one could also specify another axis for averaging!
  786. mean_array = np.empty(0)
  787. for scan_index in range(5):
  788. mean_array = np.append(mean_array, np.nanmean(k_curves_values[:,scan_index]))
  789. mean_arrays.append(mean_array)
  790. if normalize_curves:
  791. mean_arrays[0] = mean_arrays[0]/mean_arrays[0][2]
  792. mean_arrays[1] = mean_arrays[1]/mean_arrays[1][2]
  793. ax.plot(V,mean_arrays[0], "-o", linewidth=lw, markersize=ms, label="Good diode (measured mean)", color="pink")
  794. ax.plot(V,mean_arrays[1], "-o", linewidth=lw, markersize=ms, label="Bad diode (measured mean)", color="cornflowerblue")
  795. if example:
  796. y_good = pixelplotting_array[:,600, 650]
  797. y_bad = pixelplotting_array[:,400, 400]
  798. if normalize_curves:
  799. y_good = y_good/y_good[2]
  800. y_bad = y_bad/y_bad[2]
  801. ax.plot(V,y_good, "-o", linewidth=lw, markersize=ms, label="Good diode (measured)", color="pink")
  802. ax.plot(V,y_bad, "-o", linewidth=lw, markersize=ms, label="Bad diode (measured)", color="cornflowerblue")
  803. ax.legend(fontsize=4.4*fontsize)
  804.  
  805. if mean_curve:
  806. I_mean = np.array([217.43692, 217.10754, 214.23949449431512, 210.2738, 201.62088])
  807. if normalize_curves:
  808. I_mean = I_mean/I_mean[2]
  809. ax.plot(V, I_mean, "-o", linewidth=lw, markersize=ms, label="Mean IV curve", color="k")
  810. ax.legend(fontsize=4*fontsize)
  811. if example:
  812. ax.legend(fontsize=3.5*fontsize)
  813.  
  814. if savepng:
  815. now = datetime.now()
  816. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  817. if dpi is None:
  818. fig.savefig("savefig/ShockleyCurve_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  819. else:
  820. fig.savefig("savefig/ShockleyCurve_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  821.  
  822. popt = plotIV(return_fitting=True)[0]
  823. I_0 = popt[0]
  824. I_Ph = popt[1]
  825.  
  826. plotShockley(I_0=I_0, I_Ph=I_Ph, I_02 =2*I_0, I_Ph2=I_Ph, discrete=False, mean_curve=False, example=False, normalize_curves=True, savepng=True)
  827.  
  828.  
  829. # In[358]:
  830.  
  831.  
  832. plotClusterCurves(kmeans_input=7)
  833. plotClusterCurves(kmeans_input=7, normalize_curves=True)
  834.  
  835.  
  836. # In[11]:
  837.  
  838.  
  839. def plotIV(coordinate=None, sample_size=100, set_region=(0,999,0,999), showPosition=False, inset=False, title=True, figsize=(30,30), savepng=False, dpi=None, savepdf=False, annotations=False, data_processing=1, return_only=False, return_fitting=False):
  840.  
  841. if inset:
  842. showPosition = True
  843.  
  844. start = time.time()
  845. # voltages
  846. voltages = [-1000, -500, -200, -100, 0, 100, 200, 500, 600]
  847. # scans (ordered by voltages)
  848. scans_ordered = [ 189, 188, 185, 183,181,182, 186, 187, 190]
  849.  
  850. # convert list to array
  851. voltages, scans_ordered = np.array(voltages), np.array(scans_ordered)
  852.  
  853. current_array_list = []
  854. current_mean = []
  855.  
  856. # plot either with or without negative values [EDIT: deleted from the counts2amps function!] and select if the solar cell channel should be selected for the current
  857. options_dic = {
  858. 1: (False, False, "Lock in R , ohne Negativwerte"),
  859. 2: (False, True, "Lock in R, mit Negativwerten"),
  860. 3: (True, False, "solar cell channel, ohne Negativwerte"),
  861. 4: (True, True, "solar cell channel, mit Negativwerten")
  862. }
  863. if not return_only and not return_fitting:
  864. print(options_dic[data_processing][2])
  865. A = 2 # plot only from scan_ordered[A] to ...
  866. B = -2 # ... scan_ordered[B]
  867. counter = 0
  868. for scan in scans_ordered[A:B]:
  869. positions = load_h5(scan, "positions")
  870. y_axis_h5 = positions["/data/encoder_fast/data"]
  871. z_axis_h5 = positions["/data/encoder_slow/data"]
  872. y_axis = np.array(y_axis_h5)
  873. z_axis = np.array(z_axis_h5)
  874. current = counts2amps(scan,solar_cell_channel_only=options_dic[data_processing][0])
  875.  
  876. y_grid, z_grid, current_array = supergrid(y_axis,z_axis,current,1000)
  877.  
  878. # set 1 px border to NaN
  879. current_array[0,:] = np.float("NaN")
  880. current_array[999,:] = np.float("NaN")
  881. current_array[:,0] = np.float("NaN")
  882. current_array[:,999] = np.float("NaN")
  883.  
  884. current_array = cv2.GaussianBlur(current_array, (33, 33), 0) # This accounts for the beam profile with FWHM of 105 x 108 nm.
  885.  
  886. if scan != 181:
  887. warp_matrix = np.array(ImageRegistrationDic[str(scan)])
  888. current_array = cv2.warpAffine(current_array, warp_matrix, (1000, 1000), flags = cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP, borderValue = float("NaN")).astype(np.float32)
  889.  
  890. current_array_list.append(current_array)
  891.  
  892. counter += 1
  893.  
  894. pixelplotting_array = np.stack(current_array_list)
  895.  
  896.  
  897.  
  898. # have NaN values at same position for all scans
  899. counter = 0
  900. for grid in current_array_list:
  901. grid[np.isnan(pixelplotting_array).any(axis=0)] = np.float("NaN")
  902.  
  903. current_mean.append(np.nanmean(grid))
  904.  
  905. if not return_only and not return_fitting:
  906. print(f"Mean current for scan {scan} is {str(round(current_mean[counter],4))} nA. Its min/max current is {str(round(np.nanmin(current_array),4))}/{str(round(np.nanmax(current_array),4))} nA.") # the values are only shown to a few decimal places if not using e.g. {round(current_mean[counter],4):.20f}
  907. counter += 1
  908. pixelplotting_array = np.stack(current_array_list)
  909.  
  910. if return_only:
  911. return pixelplotting_array, current_mean
  912.  
  913. #fitting
  914. popt, pcov = curve_fit(lambda V, I_0, I_Ph: shockley_function(V, I_0, I_Ph)*(-1e9), voltages[A:B]/1000, current_mean, p0=[5e-12,current_mean[2]*1e-9], bounds=([1e-13,(current_mean[2]-3)*1e-9],[1e-11,(current_mean[2]+3)*1e-9]))
  915. if return_fitting:
  916. return popt, pcov
  917.  
  918. fontsize = 1.5*figsize[0]//2
  919. fig = plt.figure(figsize=figsize)
  920. ax1 = fig.add_subplot(221)
  921.  
  922. # y and z labels
  923. ax1.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=7/5*fontsize, fontweight="bold")
  924. ax1.set_ylabel("XBIC (nA)", labelpad = 10, fontsize=7/5*fontsize, fontweight="bold")
  925. if title:
  926. plt.title("IV curve(s) for single pixel(s)", pad = 15, fontsize=round(7/5*fontsize), fontweight="bold")
  927. ax1.tick_params(labelsize=7/5*fontsize, length=fontsize, right=True)
  928. ax1.set_xticks(voltages[A:B])
  929.  
  930. if data_processing == 3: # else the ticks exceed Locator.MAXTICKS
  931. major_interval = 1000
  932. else:
  933. major_interval = 10
  934. minor_interval = major_interval/5
  935. ax1.yaxis.set_major_locator(MultipleLocator(major_interval))
  936. ax1.yaxis.set_minor_locator(MultipleLocator(minor_interval))
  937. ax1.tick_params(which='minor', length=4/5*fontsize, right=True)
  938. plt.grid(axis="y", which="minor",lw=0.25)
  939. plt.grid(axis="y")
  940.  
  941. # allows to plot only a specific location on the y-z current array (else plot each 'sample_size'-th IV curve):
  942. if coordinate != None:
  943. ax1.plot(voltages[A:B], pixelplotting_array[:,coordinate[0],coordinate[1]], "-o", markersize=8, linewidth=1.2, color="blue", label="IV curve at given (Y,Z)")
  944.  
  945. slope, intercept = kmeans_clustering(kmeans_input=8, return_only=True)
  946. slope = np.reshape(slope, (1000,1000))
  947. intercept = np.reshape(intercept, (1000,1000))
  948. ax1.plot(voltages[A:B], slope[coordinate[0],coordinate[1]]*voltages[A:B]+intercept[coordinate[0],coordinate[1]], label="Fitted line")
  949. ax1.legend(fontsize=7/5*fontsize)
  950.  
  951. if showPosition:
  952. if inset:
  953. # Create a set of inset Axes: these should fill the bounding box allocated to them.
  954. ax1_2 = plt.axes([0,0,1,1])
  955. # Manually set the position and relative size of the inset axes within ax1
  956. ip = InsetPosition(ax1, [0.2,0.2,0.3,0.3])
  957. ax1_2.set_axes_locator(ip)
  958. else:
  959. # Create a new subplot
  960. ax1_2 = fig.add_subplot(223)
  961. # ticks and ticklabels need to be shifted according to Image Registration
  962. scan = scans_ordered[B-1] # show the scan of the last IV curve point as an example
  963. IRS_y = ImageRegistrationDic[str(scan)][0][2] # IRS = Image Registration Shift (in y or z)
  964. IRS_z = ImageRegistrationDic[str(scan)][1][2]
  965. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  966. ax1_2.xaxis.set_ticklabels([0,5,10],fontsize=round(9/5*fontsize))
  967. #ax1_2.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
  968. ax1_2.xaxis.set_ticks([0,500,999])
  969. ax1_2.yaxis.set_ticklabels([0,5,10],fontsize=round(9/5*fontsize))
  970. ax1_2.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
  971. ax1_2.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
  972. ax1_2.yaxis.set_minor_locator(AutoMinorLocator(5))
  973.  
  974. if showPosition:
  975. ax1_2.imshow(current_array, cmap=cm.afmhot, origin='lower')
  976. ax1_2.plot([coordinate[0]],coordinate[1], "bo", markersize=8)
  977. # y and z labels
  978. ax1_2.set_xlabel("Position Y [μm]", labelpad = 10, fontsize=fontsize)
  979. ax1_2.set_ylabel("Position Z [μm]", labelpad = 10, fontsize=fontsize)
  980.  
  981. print("Number of NaN values on shown IV curve:", str(len(np.where(np.isnan(pixelplotting_array[:,coordinate[0],coordinate[1]]))[0])))
  982.  
  983. if coordinate == None:
  984. countnumberlines = 0
  985. countNaN = 0
  986.  
  987. # define the colormap for the curves
  988. cmap = cm.gist_rainbow
  989. norm = colors.Normalize(vmin=1, vmax=sample_size)
  990.  
  991. for z in np.linspace(set_region[0],set_region[1],sample_size**0.5,dtype=int):
  992. for y in np.linspace(set_region[2],set_region[3],sample_size**0.5,dtype=int):
  993. #ax1.plot(voltages[A:B], pixelplotting_array[:,y,z], "-o", markersize=8, linewidth=1.2, color=cmap(norm(countnumberlines)))
  994. ax1.plot(voltages[A:B], pixelplotting_array[:,y,z], "-", linewidth=0.5, color="blue")
  995. ax1.plot(voltages[A:B], current_mean, "-o", markersize=12, linewidth=4, color="blue")
  996.  
  997. if showPosition:
  998. if inset:
  999. # Create a set of inset Axes: these should fill the bounding box allocated to them.
  1000. ax1_2 = plt.axes([0,0,1,1])
  1001. # Manually set the position and relative size of the inset axes within ax1
  1002. ip = InsetPosition(ax1, [0.2,0.05,figsize[0]/120,figsize[0]/120])
  1003. ax1_2.set_axes_locator(ip)
  1004. else:
  1005. # Create a new subplot
  1006. ax1_2 = fig.add_subplot(223)
  1007. # ticks and ticklabels need to be shifted according to Image Registration
  1008. scan = scans_ordered[B-1] # show the scan of the last IV curve point as an example
  1009. IRS_y = ImageRegistrationDic[str(scan)][0][2] # IRS = Image Registration Shift (in y or z)
  1010. IRS_z = ImageRegistrationDic[str(scan)][1][2]
  1011. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  1012. ax1_2.xaxis.set_ticklabels([0,5,10],fontsize=round(fontsize))
  1013. #ax1_2.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
  1014. ax1_2.xaxis.set_ticks([0,500,999])
  1015. ax1_2.yaxis.set_ticklabels([0,5,10],fontsize=round(fontsize))
  1016. #ax1_2.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
  1017. ax1_2.yaxis.set_ticks([0,500,999])
  1018. ax1_2.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
  1019. ax1_2.yaxis.set_minor_locator(AutoMinorLocator(5))
  1020.  
  1021. ax1_2.imshow(current_array, cmap=cm.afmhot, origin='lower')
  1022. #ax1_2.plot(y,z, "bo", markersize=8, color=cmap(norm(countnumberlines)))
  1023. ax1_2.plot(y,z, "bo", markersize=8, color="k")
  1024.  
  1025. # y and z labels
  1026. ax1_2.set_xlabel("Position Y [μm]", labelpad = 10, fontsize=fontsize)
  1027. ax1_2.set_ylabel("Position Z [μm]", labelpad = 10, fontsize=fontsize)
  1028. if not np.isnan(np.sum(pixelplotting_array[:,y,z])):
  1029. countnumberlines += 1
  1030. countNaN += len(np.where(np.isnan(pixelplotting_array[:,y,z]))[0])
  1031. print("Number of IV curves shown in plot:", str(countnumberlines))
  1032. print("Number of NaN values on shown IV curves:", str(countNaN))
  1033.  
  1034. ax2 = fig.add_subplot(222)
  1035. ax2.plot(voltages[A:B], current_mean, "-o", markersize=8, linewidth=1.2, color="blue",label="Mean XBIC for all (y,z) positions (nA)")
  1036. ax2.legend(fontsize=7/5*fontsize)
  1037.  
  1038. ax2.plot(voltages[A:B], shockley_function(voltages[A:B]/1000,popt[0],popt[1])*(-1e9), "-o", markersize=6, linewidth=0.6, color="red", label="Fitted IV curve")
  1039. ax2.legend(fontsize=7/5*fontsize)
  1040. print("-----\nActually measured:\nShort-circuit current:", str(round(current_mean[2],4)), "nA\n-----\nResult of fitting:\nSaturation current I_0:", str(round(popt[0]*1e9,4)), "nA\nPhotocurrent I_Ph:", str(round(popt[1]*1e9,4)),
  1041. "nA\nOpen Circuit Voltage V_OC:", str(round(shockley_function_VOC(popt[0],popt[1])*1e3,4)), "mV\n-----")
  1042.  
  1043. if ax1.get_ylim()[0] < 202 and ax1.get_ylim()[1] > 218: # this condition makes sure that the min and max mean current values are still shown
  1044. ax2.set_ylim(ax1.get_ylim()) # make sure that the current interval is the same so that the shape of the curves can be compared
  1045. ax2.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=7/5*fontsize, fontweight="bold")
  1046. ax2.set_ylabel("XBIC (nA)", labelpad = 10, fontsize=7/5*fontsize, fontweight="bold")
  1047. plt.title("Mean IV curve", pad = 15, fontsize=round(7/5*fontsize), fontweight="bold")
  1048. ax2.tick_params(labelsize=7/5*fontsize, length=fontsize, right=True)
  1049. ax2.set_xticks(voltages[A:B])
  1050.  
  1051. ax2.yaxis.set_major_locator(MultipleLocator(major_interval))
  1052. ax2.yaxis.set_minor_locator(MultipleLocator(minor_interval))
  1053. ax2.tick_params(which='minor', length=4/5*fontsize, right=True)
  1054. plt.grid(axis="y", which="minor",lw=0.25)
  1055. plt.grid(axis="y")
  1056.  
  1057. if annotations:
  1058. for i in range(len(current_mean)):
  1059. ax2.annotate("Scan " + str(scans_ordered[i+A]), xy=(voltages[i+A], current_mean[i]), xytext=(voltages[i+A], current_mean[i]+1.3), arrowprops=dict(facecolor='black', shrink=0.12, width=2, headwidth=8))
  1060.  
  1061. '''#distance of ticklabels from ticks
  1062. ax1.tick_params(axis='both', which='major', pad=25)
  1063. ax1_2.tick_params(axis='both', which='major', pad=25)
  1064. ax2.tick_params(axis='both', which='major', pad=25)'''
  1065.  
  1066. plt.subplots_adjust(wspace=0.35) # adjust the width between the subplots
  1067.  
  1068. plt.show()
  1069.  
  1070. end = time.time()
  1071. print("Plotting took {} seconds.".format(str(round(end-start,2))))
  1072.  
  1073.  
  1074. if savepng:
  1075. now = datetime.now()
  1076. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  1077. if coordinate != None:
  1078. coord_str = f"{coordinate[0]}-{coordinate[1]}_"
  1079. else:
  1080. coord_str = ""
  1081. if dpi is None:
  1082. fig.savefig("savefig/IV_curve_" + coord_str + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  1083. else:
  1084. fig.savefig("savefig/IV_curve_" + coord_str + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  1085. if savepdf:
  1086. fig.savefig("savefig/IV_curve_" + dt_string + ".pdf", format="pdf")
  1087.  
  1088.  
  1089. # In[12]:
  1090.  
  1091.  
  1092. def shockley_function(V,I_0,I_Ph): # arguments and output in SI units
  1093. return I_0*(np.exp(constants.e*V/(constants.k*300))-1)-I_Ph
  1094.  
  1095.  
  1096. # In[13]:
  1097.  
  1098.  
  1099. def shockley_function_VOC(I_0,I_Ph): # arguments and output in SI units
  1100. return constants.k*300*np.log(I_Ph/I_0 + 1)/constants.e
  1101.  
  1102.  
  1103. # In[445]:
  1104.  
  1105.  
  1106. plotClusterCurves(kmeans_input=8, normalize_curves=True, title="")
  1107.  
  1108.  
  1109. # In[14]:
  1110.  
  1111.  
  1112. def plotClusterCurves(k=2, k_curves=1, kmeans_input=1, NaN=False, sample_size=100, data_processing=1, compare=True, normalize_curves=False, unnecessary_reference=False, suptitle="", title=True, dpi=None, savepng=False, savepng_str=None):
  1113. start = time.time()
  1114.  
  1115. #pixelplotting_array = plotIV(data_processing=data_processing, return_only=True)[1]
  1116. pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])
  1117. kmeansDic = load_cache_kmeans(kmeans_input=kmeans_input, data_processing=data_processing)
  1118. #kmeansDic = {"2": {"cluster_numbers": plotMedianCorrelation(return_only=True)[0]}}
  1119. scan_order = [185,183,181,182,186]
  1120. voltages = [-1000, -500, -200, -100, 0, 100, 200, 500, 600]
  1121. A = 2 # plot only from scan_ordered[A] to ...
  1122. B = -2 # ... scan_ordered[B]
  1123.  
  1124. if unnecessary_reference:
  1125. figsize=(20,10)
  1126. else:
  1127. figsize=(10*1.5,10*1.5)
  1128. fontsize = figsize[0]//2
  1129. fig = plt.figure(figsize=figsize)
  1130. if unnecessary_reference:
  1131. ax1 = fig.add_subplot(121)
  1132. else:
  1133. ax1 = fig.add_subplot(111)
  1134.  
  1135. if unnecessary_reference:
  1136. # use variables from plotIV function
  1137. #current_array = plotIV(data_processing=data_processing, return_only=True)[0][-1] # select scan of last IV curve point
  1138. current_array = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(186)])
  1139. '''
  1140. # ticks and ticklabels need to be shifted according to Image Registration
  1141. scan = 186 # show the scan of the last IV curve point as an example <-- here it is scan 186, at B = -2 in the plotIV() function
  1142. IRS_y = ImageRegistrationDic[str(scan)][0][2] # IRS = Image Registration Shift (in y or z)
  1143. IRS_z = ImageRegistrationDic[str(scan)][1][2]
  1144. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  1145. ax1.xaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
  1146. ax1.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
  1147. ax1.yaxis.set_ticklabels([0,5,10],fontsize=round(4/5*fontsize))
  1148. ax1.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
  1149. ax1.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
  1150. ax1.yaxis.set_minor_locator(AutoMinorLocator(5))
  1151. '''
  1152. ax1.imshow(current_array, cmap=cm.afmhot, origin='lower')
  1153. # y and z labels
  1154. ax1.set_xlabel("Position Y [μm]", labelpad = 0, fontsize=fontsize)
  1155. ax1.set_ylabel("Position Z [μm]", labelpad = 0, fontsize=fontsize)
  1156.  
  1157. if NaN: # this option was only used, because there was something wrong with the plotClusterCurve "if NaN" condition itself, adding weird NaN values to the current grid
  1158. NaN_where = np.where(np.isnan(pixelplotting_array))
  1159. NaN_where = np.flip(NaN_where,0) # np.flip is necessary, because np.isnan reads from bottom to top
  1160. ax1.scatter(NaN_where[0],NaN_where[1])
  1161. ax1.set_title("Position of NaN values (in blue)", pad = 15, fontsize=round(9/8*fontsize))
  1162.  
  1163. if k != None and unnecessary_reference:
  1164. for z in np.linspace(0,999,sample_size**0.5,dtype=int):
  1165. for y in np.linspace(0,999,sample_size**0.5,dtype=int):
  1166. if np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z] == k_curves:
  1167. ax1.scatter(y,z,color="red")
  1168. else:
  1169. ax1.scatter(y,z,color="black")
  1170. ax1.set_title("Position of the displayed curve", pad = 15, fontsize=round(9/8*fontsize))
  1171.  
  1172.  
  1173. if k_curves != None: # plot curves belonging to the k_th cluster
  1174. if unnecessary_reference:
  1175. ax2 = fig.add_subplot(122)
  1176. else:
  1177. ax2 = fig.add_subplot(111)
  1178.  
  1179. ax2.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=21/5*fontsize, fontweight="bold")
  1180. ax2.set_ylabel("XBIC (nA)", labelpad = 10, fontsize=21/5*fontsize, fontweight="bold")
  1181. ax2.tick_params(labelsize=15/5*fontsize, length=7/5*fontsize)
  1182.  
  1183. ax2.xaxis.set_ticks([-200,-100,0,100,200])
  1184.  
  1185. ax2.yaxis.set_minor_locator(MultipleLocator(5))
  1186. ax2.tick_params(which='minor', length=4/5*fontsize)
  1187. plt.grid(axis="y", which="minor",lw=0.25)
  1188. plt.grid(axis="y")
  1189.  
  1190. k_curves_values = pixelplotting_array[:, np.array(kmeansDic[str(k)]["cluster_numbers"]) == k_curves].T # instead of transposing, one could also specify another axis for averaging!
  1191. mean_array = np.empty(0)
  1192. for scan_index in range(5):
  1193. mean_array = np.append(mean_array, np.nanmean(k_curves_values[:,scan_index]))
  1194. print(f"Mean currents for curve #{k_curves} (with k = {k}):", str(np.around(mean_array,2)))
  1195. countnumberlines_list = []
  1196. countnumberlines = 0
  1197. for z in np.linspace(0,999,sample_size**0.5,dtype=int):
  1198. for y in np.linspace(0,999,sample_size**0.5,dtype=int):
  1199. if not math.isnan(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]):
  1200. if int(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]) == k_curves:
  1201. if normalize_curves:
  1202. ax2.plot(voltages[A:B], pixelplotting_array[:,y,z]/pixelplotting_array[2,y,z], linewidth = 0.5, color="pink")
  1203. else:
  1204. ax2.plot(voltages[A:B], pixelplotting_array[:,y,z], linewidth = 0.5, color="pink")
  1205. countnumberlines += 1
  1206. countnumberlines_list.append(countnumberlines)
  1207. if normalize_curves:
  1208. ax2.plot(voltages[A:B], mean_array/mean_array[2], "-o", markeredgewidth=3, linewidth = 2.0, color="red", label=f"Group {k_curves}")
  1209. else:
  1210. ax2.plot(voltages[A:B], mean_array, "-o", markeredgewidth=3, linewidth = 2.0, color="red", label=f"Group {k_curves}")
  1211. plt.legend()
  1212.  
  1213. if compare:
  1214. colors = ["blue", "green", "brown", "yellow"]
  1215. color_counter = 0
  1216. for comparing_curve in range(1,k+1):
  1217. if comparing_curve == k_curves: # don't plot again the red curve for the k_curve that is already selected
  1218. continue
  1219. countnumberlines = 0
  1220. for z in np.linspace(0,999,sample_size**0.5,dtype=int):
  1221. for y in np.linspace(0,999,sample_size**0.5,dtype=int):
  1222. if not math.isnan(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]):
  1223. if int(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]) == comparing_curve:
  1224. if normalize_curves:
  1225. ax2.plot(voltages[A:B], pixelplotting_array[:,y,z]/pixelplotting_array[2,y,z], linewidth = 0.5, color="cornflowerblue")
  1226. else:
  1227. ax2.plot(voltages[A:B], pixelplotting_array[:,y,z], linewidth = 0.5, color="cornflowerblue")
  1228. countnumberlines += 1
  1229. countnumberlines_list.append(countnumberlines)
  1230.  
  1231. k_curves_values = pixelplotting_array[:, np.array(kmeansDic[str(k)]["cluster_numbers"]) == comparing_curve].T # instead of transposing, one could also specify another axis for averaging!
  1232. mean_array = np.empty(0)
  1233. for scan_index in range(5):
  1234. mean_array = np.append(mean_array, np.nanmean(k_curves_values[:,scan_index]))
  1235. if normalize_curves:
  1236. ax2.plot(voltages[A:B], mean_array/mean_array[2], "-o", markeredgewidth=3, linewidth = 2.0, color=colors[color_counter], label=f"Group {comparing_curve}")
  1237. else:
  1238. ax2.plot(voltages[A:B], mean_array, "-o", markeredgewidth=3, linewidth = 2.0, color=colors[color_counter], label=f"Group {comparing_curve}")
  1239. color_counter += 1
  1240. ax2.legend(fontsize=4*fontsize, prop={'size': 4*fontsize})
  1241. print("For comparison, the mean curve for group", str(comparing_curve), "has been plotted:", str(np.around(mean_array,2)))
  1242. if k == 2:
  1243. s_word = "group"
  1244. else:
  1245. s_word = "groups"
  1246.  
  1247. if unnecessary_reference:
  1248. title_fontsize = round(9/8*fontsize)
  1249. else:
  1250. title_fontsize = round(18/8*fontsize)
  1251. if title:
  1252. ax2.set_title(f"IV curves for group {k_curves} (with k = {k}), compared to the IV curves of the other {s_word}", pad = 15, fontsize=title_fontsize, fontweight="bold")
  1253.  
  1254. countnumberlines_str = f"Number of IV curves shown in plot: {countnumberlines_list[0]} (group {k_curves})"
  1255. if compare:
  1256. comparing_curve_counter = 1
  1257. for comparing_curve in range(1,k+1):
  1258. if comparing_curve == k_curves:
  1259. comparing_curve_counter += 1
  1260. continue
  1261. countnumberlines_str += f", {countnumberlines_list[comparing_curve_counter-1]} (group {comparing_curve_counter})"
  1262. comparing_curve_counter
  1263. print(countnumberlines_str)
  1264.  
  1265. plt.suptitle(suptitle)
  1266. plt.show()
  1267. end = time.time()
  1268. print("Plotting took {} seconds.".format(str(round(end-start,2))))
  1269.  
  1270. if savepng:
  1271. now = datetime.now()
  1272. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  1273. if dpi is None:
  1274. if savepng_str is None:
  1275. fig.savefig("savefig/different_inputs_for_clustering/ClusterCurves_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  1276. else:
  1277. fig.savefig("savefig/different_inputs_for_clustering/ClusterCurves_" + savepng_str + ".png", dpi=fig.dpi, bbox_inches="tight")
  1278. else:
  1279. if savepng_str is None:
  1280. fig.savefig("savefig/different_inputs_for_clustering/ClusterCurves_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  1281. else:
  1282. fig.savefig("savefig/different_inputs_for_clustering/ClusterCurves_" + savepng_str + ".png", dpi=dpi, bbox_inches="tight")
  1283.  
  1284.  
  1285. # In[15]:
  1286.  
  1287.  
  1288. def plotClusterPosition(k=2, kmeans_input=1, data_processing=1, title="", dpi=None, suptitle="", no_suptitle=False, grid=True, plot_any=None, savepng=False, savepng_str=None):
  1289. start = time.time()
  1290.  
  1291. kmeansDic = load_cache_kmeans(kmeans_input = kmeans_input, data_processing=data_processing)
  1292.  
  1293. figsize = (15,15)
  1294. fontsize = figsize[0]
  1295. #fig = plt.figure(figsize=figsize)
  1296. #ax = fig.add_subplot(111)
  1297. fig, ax = plt.subplots(figsize=figsize)
  1298. ax.set_aspect("equal")
  1299.  
  1300. cmap = colors.ListedColormap(["yellow", "black"])
  1301. bounds=[0.5,1.5,2.5]
  1302. norm = colors.BoundaryNorm(bounds, cmap.N)
  1303.  
  1304. if plot_any is not None:
  1305. cmap = colors.ListedColormap(["white", "black"])
  1306. groups = plot_any
  1307. else:
  1308. groups = np.array(kmeansDic[str(k)]["cluster_numbers"])
  1309.  
  1310. im = plt.pcolormesh(groups, cmap=cmap)
  1311.  
  1312. divider = make_axes_locatable(ax)
  1313. cax = divider.append_axes("right", size="5%", pad=0.25)
  1314. cbar = plt.colorbar(im, cax, cmap=cmap, norm=norm, boundaries=bounds, ticks=[1, 2])
  1315. cbar.ax.tick_params(length=7/5*fontsize, labelsize=2*fontsize)
  1316. cax.set_ylabel("Cluster Groups", rotation = -90, labelpad = 50 , fontsize=3*fontsize, fontweight="bold")
  1317.  
  1318. #im = ax.imshow(np.array(kmeansDic[str(k)]["cluster_numbers"]), interpolation='nearest', cmap=cm.afmhot, origin='lower')
  1319.  
  1320. ax.set_xlabel("Position Y (μm)", labelpad = 10, fontsize=15/5*fontsize, fontweight="bold")
  1321. ax.set_ylabel("Position Z (μm)", labelpad = 10, fontsize=15/5*fontsize, fontweight="bold")
  1322. ax.set_title(title, pad = 15, fontsize=15/5*fontsize, fontweight="bold")
  1323. ax.tick_params(length=7/5*fontsize)
  1324. ax.tick_params(which='minor', length=4/5*fontsize)
  1325.  
  1326. # ticks and ticklabels need to be shifted according to Image Registration
  1327. IRS_y_list = []
  1328. IRS_z_list = []
  1329. for scan in [185,183,182,186]:
  1330. IRS_y_list.append(ImageRegistrationDic[str(scan)][0][2])
  1331. IRS_z_list.append(ImageRegistrationDic[str(scan)][1][2])
  1332. IRS_y = max(IRS_y_list) # IRS = Image Registration Shift (in y or z)
  1333. IRS_z = min(IRS_z_list)
  1334. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  1335. ax.xaxis.set_ticklabels([0,5,10],fontsize=round(12/5*fontsize))
  1336. #ax.xaxis.set_ticks([0-IRS_y,500-IRS_y,999-IRS_y]) # if 1000 instead of 999 it won't display
  1337. ax.xaxis.set_ticks([0,500,999])
  1338. ax.yaxis.set_ticklabels([0,5,10],fontsize=round(12/5*fontsize))
  1339. #ax.yaxis.set_ticks([0-IRS_z,500-IRS_z,999-IRS_z])
  1340. ax.yaxis.set_ticks([0,500,999])
  1341. ax.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
  1342. ax.yaxis.set_minor_locator(AutoMinorLocator(5))
  1343.  
  1344. if not no_suptitle:
  1345. MinMeanMax_str = ""
  1346. for group_index in range(1,k+1):
  1347. Min = int(round(kmeansDic[str(k)]["threshold"][group_index-1][0]))
  1348. Mean = int(round(kmeansDic[str(k)]["threshold"][group_index-1][1]))
  1349. Max = int(round(kmeansDic[str(k)]["threshold"][group_index-1][2]))
  1350. MinMeanMax_str += f"\nMin/Mean/Max current for group {group_index} is {Min}/{Mean}/{Max} nA."
  1351. plt.suptitle(suptitle+MinMeanMax_str)
  1352.  
  1353. '''
  1354. # add a colorbar as legend
  1355. ax_divider = make_axes_locatable(ax) # Introduce a divider next to the axis already in place
  1356. cax = ax_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  1357. legend = colorbar(im, cax = cax) # Introduce a legend (colorbar) where the new axis has been placed
  1358.  
  1359. cax.set_ylabel("Number of cluster", rotation = -90, labelpad = 50 , style = "italic", fontsize=5/4*fontsize) # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
  1360. cax.yaxis.set_major_locator(AutoLocator())
  1361. cax.yaxis.set_minor_locator(AutoMinorLocator())
  1362. '''
  1363. if grid:
  1364. ax.grid(which="both")
  1365. plt.show()
  1366. end = time.time()
  1367. print("Plotting took {} seconds.".format(str(round(end-start,2))))
  1368.  
  1369. if savepng:
  1370. now = datetime.now()
  1371. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  1372. if dpi is None:
  1373. if savepng_str is None:
  1374. fig.savefig("savefig/different_inputs_for_clustering/ClusterPosition_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  1375. else:
  1376. fig.savefig("savefig/different_inputs_for_clustering/ClusterPosition_" + savepng_str + ".png", dpi=fig.dpi, bbox_inches="tight")
  1377. else:
  1378. if savepng_str is None:
  1379. fig.savefig("savefig/different_inputs_for_clustering/ClusterPosition_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  1380. else:
  1381. fig.savefig("savefig/different_inputs_for_clustering/ClusterPosition_" + savepng_str + ".png", dpi=dpi, bbox_inches="tight")
  1382.  
  1383.  
  1384. # In[385]:
  1385.  
  1386.  
  1387. plotXBIC2(singleplots=[181])
  1388.  
  1389.  
  1390. # In[380]:
  1391.  
  1392.  
  1393. arr = kmeans_clustering(kmeans_input=8, return_only=True)[0]
  1394. arr = np.reshape(arr, (1000,1000))
  1395. plotXBIC2(plot_any=arr)
  1396.  
  1397.  
  1398. # In[16]:
  1399.  
  1400.  
  1401. def kmeans_clustering(data_processing=1, k=2, random_state=22, kmeans_input=1, normalize_samples=False, sample_size=50000, silhouette=True, return_only=False):
  1402. '''This function applies k-means clustering under varying data input. Basis is the pixelplotting_array which contains the five 1000x1000 current grids under five different bias voltages. The argument data_processing refers to what signal path we seek; default is the R value of the lock-in amplifier.
  1403. As an output we get a 1000x1000 grid with all the cluster numbers, starting from 1 and going to k, the different scores for finding the optimal k, and the threshold which refers to the min/max values of each group.'''
  1404. #pixelplotting_array = plotIV(data_processing=data_processing, return_only=True)[1]
  1405. if data_processing == 1:
  1406. pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])
  1407. if data_processing == 3:
  1408. pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array_direct"])
  1409. pixelvectors_original = pixelplotting_array.reshape(5, 1000000).T # each 5D vector represents 1 pixel with 5 different bias voltages from - to + 200 mV
  1410.  
  1411. # 5 absolute values
  1412. if kmeans_input == 1:
  1413. pixelvectors = pixelvectors_original
  1414.  
  1415. # 4 absolute values (from -200 to +100)
  1416. if kmeans_input == 1.1:
  1417. pixelvectors = pixelvectors_original.T[:4].T
  1418.  
  1419. # 4 slopes
  1420. if kmeans_input == 2:
  1421. pixelvectors = pixelvectors_original
  1422. pixelvectors = np.array([
  1423. pixelvectors.T[0]-pixelvectors.T[1],
  1424. pixelvectors.T[1]-pixelvectors.T[2],
  1425. pixelvectors.T[2]-pixelvectors.T[3],
  1426. pixelvectors.T[3]-pixelvectors.T[4]
  1427. ]).T
  1428.  
  1429. # 4 slopes + 1 absolute value (average)
  1430. if kmeans_input == 3:
  1431. pixelvectors = pixelvectors_original
  1432. pixelvectors = np.array([
  1433. pixelvectors.T[0]-pixelvectors.T[1],
  1434. pixelvectors.T[1]-pixelvectors.T[2],
  1435. pixelvectors.T[2]-pixelvectors.T[3],
  1436. pixelvectors.T[3]-pixelvectors.T[4],
  1437. np.nanmean(pixelvectors,axis=1)
  1438. ]).T
  1439.  
  1440. # 4 slopes + 1 absolute value (-200 mV)
  1441. if kmeans_input == 3.1:
  1442. pixelvectors = pixelvectors_original
  1443. pixelvectors = np.array([
  1444. pixelvectors.T[0]-pixelvectors.T[1],
  1445. pixelvectors.T[1]-pixelvectors.T[2],
  1446. pixelvectors.T[2]-pixelvectors.T[3],
  1447. pixelvectors.T[3]-pixelvectors.T[4],
  1448. pixelvectors.T[0]
  1449. ]).T
  1450.  
  1451. # 4 slopes + 1 absolute value (-100 mV)
  1452. if kmeans_input == 3.2:
  1453. pixelvectors = pixelvectors_original
  1454. pixelvectors = np.array([
  1455. pixelvectors.T[0]-pixelvectors.T[1],
  1456. pixelvectors.T[1]-pixelvectors.T[2],
  1457. pixelvectors.T[2]-pixelvectors.T[3],
  1458. pixelvectors.T[3]-pixelvectors.T[4],
  1459. pixelvectors.T[1]
  1460. ]).T
  1461.  
  1462. # 4 slopes + 1 absolute value (0 mV)
  1463. if kmeans_input == 3.3:
  1464. pixelvectors = pixelvectors_original
  1465. pixelvectors = np.array([
  1466. pixelvectors.T[0]-pixelvectors.T[1],
  1467. pixelvectors.T[1]-pixelvectors.T[2],
  1468. pixelvectors.T[2]-pixelvectors.T[3],
  1469. pixelvectors.T[3]-pixelvectors.T[4],
  1470. pixelvectors.T[2]
  1471. ]).T
  1472.  
  1473. # 4 slopes + 1 absolute value (+100 mV)
  1474. if kmeans_input == 3.4:
  1475. pixelvectors = pixelvectors_original
  1476. pixelvectors = np.array([
  1477. pixelvectors.T[0]-pixelvectors.T[1],
  1478. pixelvectors.T[1]-pixelvectors.T[2],
  1479. pixelvectors.T[2]-pixelvectors.T[3],
  1480. pixelvectors.T[3]-pixelvectors.T[4],
  1481. pixelvectors.T[3]
  1482. ]).T
  1483.  
  1484. # 4 slopes + 1 absolute value (+200 mV)
  1485. if kmeans_input == 3.5:
  1486. pixelvectors = pixelvectors_original
  1487. pixelvectors = np.array([
  1488. pixelvectors.T[0]-pixelvectors.T[1],
  1489. pixelvectors.T[1]-pixelvectors.T[2],
  1490. pixelvectors.T[2]-pixelvectors.T[3],
  1491. pixelvectors.T[3]-pixelvectors.T[4],
  1492. pixelvectors.T[4]
  1493. ]).T
  1494.  
  1495. # 2 slopes (-200 to +200, +100 to +200)
  1496. if kmeans_input == 4:
  1497. pixelvectors = pixelvectors_original
  1498. pixelvectors = np.array([
  1499. pixelvectors.T[0]-pixelvectors.T[4],
  1500. pixelvectors.T[3]-pixelvectors.T[4]
  1501. ]).T
  1502.  
  1503. # only I(V=0)
  1504. if kmeans_input == 5:
  1505. pixelvectors = np.empty((1000000,5))
  1506. pixelvectors[np.isnan(pixelvectors_original).any(axis=1)] = np.float("NaN")
  1507. pixelvectors[~np.isnan(pixelvectors_original).any(axis=1)] = pixelvectors_original[np.newaxis,:].T[2][~np.isnan(pixelvectors_original).any(axis=1)]
  1508.  
  1509. pixelvectors = np.array([
  1510. pixelvectors.T[2]
  1511. ]).T
  1512.  
  1513. # only 1 slope (-200 to +200)
  1514. if kmeans_input == 6:
  1515. pixelvectors = pixelvectors_original
  1516. pixelvectors = np.array([
  1517. pixelvectors.T[0]-pixelvectors.T[4]
  1518. ]).T
  1519.  
  1520. # only 1 slope (+100 to +200)
  1521. if kmeans_input == 7:
  1522. pixelvectors = pixelvectors_original
  1523. pixelvectors = np.array([
  1524. pixelvectors.T[3]-pixelvectors.T[4]
  1525. ]).T
  1526.  
  1527. # only 1 slope (-200 to -100)
  1528. if kmeans_input == 7.1:
  1529. pixelvectors = pixelvectors_original
  1530. pixelvectors = np.array([
  1531. pixelvectors.T[0]-pixelvectors.T[1]
  1532. ]).T
  1533.  
  1534. # only 1 slope (-100 to 0)
  1535. if kmeans_input == 7.2:
  1536. pixelvectors = pixelvectors_original
  1537. pixelvectors = np.array([
  1538. pixelvectors.T[1]-pixelvectors.T[2]
  1539. ]).T
  1540.  
  1541. # only 1 slope (0 to +100)
  1542. if kmeans_input == 7.3:
  1543. pixelvectors = pixelvectors_original
  1544. pixelvectors = np.array([
  1545. pixelvectors.T[2]-pixelvectors.T[3]
  1546. ]).T
  1547.  
  1548. # linear regression for all five points
  1549. if kmeans_input == 8:
  1550. pixelvectors = pixelvectors_original
  1551. slope, intercept = np.polyfit([-200,-100,0,100,200], pixelvectors[~np.isnan(pixelvectors).any(axis=1)].T,deg=1)
  1552. pixelvectors_new = np.empty((1000000,1))
  1553. pixelvectors_new[~np.isnan(pixelvectors).any(axis=1)] = slope[np.newaxis,:].T
  1554. pixelvectors_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
  1555. pixelvectors = pixelvectors_new
  1556.  
  1557. if return_only: # also return the intercept
  1558. intercept_new = np.empty((1000000,1))
  1559. intercept_new[~np.isnan(pixelvectors).any(axis=1)] = intercept[np.newaxis,:].T
  1560. intercept_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
  1561. intercept = intercept_new
  1562.  
  1563. # linear regression forall five points + intercept
  1564. if kmeans_input == 8.1:
  1565. pixelvectors = pixelvectors_original
  1566. slope, intercept = np.polyfit([-200,-100,0,100,200], pixelvectors[~np.isnan(pixelvectors).any(axis=1)].T,deg=1)
  1567. slope = slope[np.newaxis,:] # add 1 dimension
  1568. intercept = intercept[np.newaxis,:] # add 1 dimension
  1569. pixelvectors_new = np.empty((1000000,2))
  1570. pixelvectors_new[~np.isnan(pixelvectors).any(axis=1)] = np.concatenate((slope,intercept),axis=0).T
  1571. pixelvectors_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
  1572. pixelvectors = pixelvectors_new
  1573.  
  1574. # linear regression for -100 mV to +100 mV
  1575. if kmeans_input == 9:
  1576. pixelvectors = pixelvectors_original
  1577. slope, intercept = np.polyfit([-100,0,100], pixelvectors[~np.isnan(pixelvectors).any(axis=1)][:, 1:4].T,deg=1)
  1578. pixelvectors_new = np.empty((1000000,1))
  1579. pixelvectors_new[~np.isnan(pixelvectors).any(axis=1)] = slope[np.newaxis,:].T
  1580. pixelvectors_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
  1581. pixelvectors = pixelvectors_new
  1582.  
  1583. if return_only: # also return the intercept
  1584. intercept_new = np.empty((1000000,1))
  1585. intercept_new[~np.isnan(pixelvectors).any(axis=1)] = intercept[np.newaxis,:].T
  1586. intercept_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
  1587. intercept = intercept_new
  1588.  
  1589. # linear regression for -200 mV to +100 mV
  1590. if kmeans_input == 10:
  1591. pixelvectors = pixelvectors_original
  1592. slope, intercept = np.polyfit([-200,-100,0,100], pixelvectors[~np.isnan(pixelvectors).any(axis=1)][:, 0:4].T,deg=1)
  1593. pixelvectors_new = np.empty((1000000,1))
  1594. pixelvectors_new[~np.isnan(pixelvectors).any(axis=1)] = slope[np.newaxis,:].T
  1595. pixelvectors_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
  1596. pixelvectors = pixelvectors_new
  1597.  
  1598. if return_only: # also return the intercept
  1599. intercept_new = np.empty((1000000,1))
  1600. intercept_new[~np.isnan(pixelvectors).any(axis=1)] = intercept[np.newaxis,:].T
  1601. intercept_new[np.isnan(pixelvectors).any(axis=1)] = np.float("NaN")
  1602. intercept = intercept_new
  1603.  
  1604. if return_only:
  1605. if kmeans_input in [8,8.1,9,10]:
  1606. return pixelvectors, intercept
  1607. else:
  1608. return pixelvectors
  1609.  
  1610. data = pixelvectors[~np.isnan(pixelvectors).any(axis=1)] # delete all vectors which contain NaN values
  1611.  
  1612. if not normalize_samples: # maximum of ALL values is 1 and minimum of ALL values is 0
  1613. scaler = MinMaxScaler()
  1614. scaler.fit(data)
  1615. data_transformed = scaler.transform(data)
  1616. else: # maximum of each curve is 1 and minimum of each curve is 0
  1617. if kmeans_input in [5,6,7]:
  1618. return "Error! For kmeans_input = 5,6,7 you cannot normalize the samples, as they will all be set to 0.01, leading kmeans to put all samples in group 1."
  1619. data_transformed = data/np.linalg.norm(data, ord=2, axis=1, keepdims=True)
  1620. km = KMeans(n_clusters = k)
  1621. groups = km.fit_predict(data_transformed) + 1 # Adding 1 calls the first group "group 1" instead of "group 0". Also, it avoids confusion with the zeros_array
  1622. number_sqdist = km.inertia_
  1623. if silhouette: # silhouette slows down the function extremely, so it is given as optional
  1624. silhouette = silhouette_score(data_transformed, groups, sample_size=sample_size)
  1625. else:
  1626. silhouette = float("NaN")
  1627. calinski_harabasz = calinski_harabasz_score(data_transformed, groups)
  1628.  
  1629. no_data = np.isnan(pixelvectors).any(axis=1) # returns a 1D array with True if a vector contains a NaN value, else False
  1630. no_data_count = no_data.sum() # counts all the vectors with NaNs
  1631. data_count = 1000000-no_data_count
  1632.  
  1633. zeros_array = np.zeros(1000000)
  1634. zeros_array[~no_data] = groups # fills in all the group numbers where there are no NaNs in a vector
  1635. zeros_array[no_data] = np.nan # where there are NaNs in a vector: assign NaN as "group number"
  1636. cluster_array = zeros_array.reshape(1000,1000)
  1637.  
  1638. threshold = np.zeros(3*k).reshape(k,3)
  1639.  
  1640. for k_index in range(1,k+1):
  1641. if kmeans_input == 5:
  1642. values = pixelplotting_array[2, cluster_array == k_index]
  1643. else:
  1644. values = pixelplotting_array[:, cluster_array == k_index]
  1645. threshold_min = np.nanmin(values.T)
  1646. threshold_avg = np.nanmean(values.T)
  1647. threshold_max = np.nanmax(values.T)
  1648. threshold[k_index-1] = np.array([threshold_min, threshold_avg, threshold_max])
  1649.  
  1650. return cluster_array, number_sqdist, silhouette, calinski_harabasz, threshold
  1651.  
  1652.  
  1653. # In[17]:
  1654.  
  1655.  
  1656. def find_k(method="all", dpi=None, savepng=False):
  1657.  
  1658. figsize=(12,12)
  1659. fontsize = figsize[0]
  1660.  
  1661. fig = plt.figure(figsize=figsize)
  1662. ax1 = fig.add_subplot(111)
  1663.  
  1664. x = []
  1665. y = []
  1666.  
  1667. if method == "elbow": # elbow method
  1668. for k in kmeansDic.keys():
  1669. x.append(int(k))
  1670. y.append(np.array(kmeansDic[str(k)]["ssqd"]))
  1671. plt.scatter(x,y)
  1672. string1 = "Elbow score"
  1673. string2 = "Sum of squared distances to respective centroid"
  1674. string3 = "elbow" # only for printing PNGs
  1675.  
  1676. if method == "silhouette": # silhouette score := (b-a) divided by max(a,b) ; where a is the intra-cluster distance and b is the nearest-cluster distance
  1677. for k in kmeansDic.keys():
  1678. x.append(int(k))
  1679. y.append(np.array(kmeansDic[str(k)]["silhouette"]))
  1680. plt.scatter(x,y)
  1681. string1 = "Silhouette score"
  1682. string2 = "(b-a)/max(a,b)"
  1683. string3 = "silhouette" # only for printing PNGs
  1684.  
  1685. if method == "calinski_harabasz": # Calinski-Harabasz score := within-cluster disperson divided by between-cluster dispersion
  1686. for k in kmeansDic.keys():
  1687. x.append(int(k))
  1688. y.append(np.array(kmeansDic[str(k)]["calinski_harabasz"]))
  1689. plt.scatter(x,y)
  1690. string1 = "Calinski-Harabasz score"
  1691. string2 = "ratio between within-cluster and between-cluster disperson"
  1692. string3 = "calinski_harabasz" # only for printing PNGs
  1693.  
  1694. if method == "all": # compare all scores next to each other
  1695.  
  1696. try:
  1697. for k in kmeansDic.keys():
  1698. x.append(int(k))
  1699. y.append(np.array(kmeansDic[str(k)]["ssqd"]))
  1700. y_normalized = []
  1701. for entry in y:
  1702. y_normalized.append(entry/np.max(y))
  1703. print("elbow:", str([round(i,2) for i in y_normalized]))
  1704. print("elbow (original):", str([round(float(i),2) for i in y]))
  1705. elbow_scatter = ax1.scatter(x,y_normalized, c="red")
  1706. except:
  1707. print("No value for elbow in this kmeans dictionary.") # not actually needed, because there are NaN values from the kmeans_clustering() function
  1708.  
  1709. try:
  1710. x.clear() # don't take over
  1711. y.clear() # values from ssqd!
  1712. for k in kmeansDic.keys():
  1713. x.append(int(k))
  1714. y.append(np.array(kmeansDic[str(k)]["silhouette"]))
  1715. y_normalized = []
  1716. for entry in y:
  1717. y_normalized.append(entry/np.max(y))
  1718. print("silhouette:", str([round(i,2) for i in y_normalized]))
  1719. print("silhouette (original):", str([round(float(i),2) for i in y]))
  1720. silhouette_scatter = ax1.scatter(x,y_normalized, c="green")
  1721. except:
  1722. print("No value for silhouette in this kmeans dictionary.")
  1723.  
  1724. try:
  1725. x.clear() # don't take over
  1726. y.clear() # values from silhouette!
  1727. for k in kmeansDic.keys():
  1728. x.append(int(k))
  1729. y.append(np.array(kmeansDic[str(k)]["calinski_harabasz"]))
  1730. y_normalized = []
  1731. for entry in y:
  1732. y_normalized.append(entry/np.max(y))
  1733. print("calinski_harabasz:", str([round(i,2) for i in y_normalized]))
  1734. print("calinski_harabasz (original):", str([round(float(i),2) for i in y]))
  1735. calinski_harabasz_scatter = ax1.scatter(x,y_normalized, c="blue")
  1736. except:
  1737. print("No value for calinski-harabasz in this kmeans dictionary.")
  1738.  
  1739. string1 = "All scores in comparison"
  1740. string2 = "Normalized scores"
  1741.  
  1742. plt.legend((elbow_scatter, silhouette_scatter, calinski_harabasz_scatter), ("Elbow Score", "Silhouette score", "Calinski-Harabasz score"), scatterpoints=1, loc='lower left', ncol=3, fontsize=fontsize)
  1743.  
  1744. ax1.set_xlabel("Chosen k for clustering", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
  1745. ax1.set_ylabel(string2, labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
  1746. plt.title(string1, pad = 15, fontsize=round(9/5*fontsize), fontweight="bold")
  1747. ax1.tick_params(labelsize=9/5*fontsize, length=fontsize)
  1748. ax1.set_xticks(x)
  1749.  
  1750. plt.show()
  1751.  
  1752. if savepng:
  1753. now = datetime.now()
  1754. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  1755. if dpi is None:
  1756. fig.savefig("savefig/find_k_" + string3 + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  1757. else:
  1758. fig.savefig("savefig/find_k_" + string3 + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  1759.  
  1760.  
  1761. # In[18]:
  1762.  
  1763.  
  1764. def bar_chart(input1=1, input2=2,input2_random=False):
  1765.  
  1766. figsize = (20,10)
  1767. fontsize = figsize[0]
  1768. fig, ax = plt.subplots(figsize=figsize)
  1769.  
  1770. groups1 = np.array(load_cache_kmeans(kmeans_input = input1)["2"]["cluster_numbers"])
  1771. if input2_random:
  1772. groups2 = np.random.randint(2,size=(1000,1000))+1
  1773. else:
  1774. groups2 = np.array(load_cache_kmeans(kmeans_input = input2)["2"]["cluster_numbers"])
  1775. OneOne = 0
  1776. OneTwo = 0
  1777. TwoOne = 0
  1778. TwoTwo = 0
  1779. for z in np.arange(0,1000,1):
  1780. for y in np.arange(0,1000,1):
  1781. if groups1[y][z] == 1 and groups2[y][z] == 1:
  1782. OneOne += 1
  1783. if groups1[y][z] == 1 and groups2[y][z] == 2:
  1784. OneTwo += 1
  1785. if groups1[y][z] == 2 and groups2[y][z] == 1:
  1786. TwoOne += 1
  1787. if groups1[y][z] == 2 and groups2[y][z] == 2:
  1788. TwoTwo += 1
  1789. labels = ["1 & 1", "2 & 2", "1 & 2", "2 & 1"]
  1790. comparisons = (OneOne,TwoTwo,OneTwo,TwoOne)
  1791. index = np.arange(4)
  1792. bar_width = 0.9
  1793. plt.bar(index, comparisons, bar_width, color="green")
  1794. plt.xticks(index, labels) # labels get centered
  1795. for p in ax.patches: # add height label above bins
  1796. width = p.get_width()
  1797. height = p.get_height()
  1798. x, y = p.get_xy()
  1799. ax.annotate(height, (x + width/2, y + height*1.02), ha='center', fontsize=fontsize)
  1800.  
  1801. ax.set_xlabel("Compared groups", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
  1802. ax.set_ylabel("Counts", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
  1803. ax.tick_params(labelsize=fontsize, length=4/5*fontsize)
  1804. plt.title("Bar Chart", pad = 15, fontsize=round(9/5*fontsize), fontweight="bold")
  1805.  
  1806. plt.show()
  1807. #bar_chart(input1=1, input2=4)
  1808.  
  1809.  
  1810. # In[47]:
  1811.  
  1812.  
  1813. corr_coefficient(kmeans_inputs=[1,2])
  1814.  
  1815.  
  1816. # In[453]:
  1817.  
  1818.  
  1819. #arr1 = np.array(load_cache_currents()["pixelplotting_array"][0])
  1820. arr1 = np.array(load_cache_kmeans(kmeans_input = 7, data_processing=1)["2"]["cluster_numbers"])
  1821. arr2 = np.array(load_cache_kmeans(kmeans_input = 1, data_processing=1)["2"]["cluster_numbers"])
  1822. #arr2 = plotMedianCorrelation(return_only=True)[1]
  1823.  
  1824. corr_coefficient(compare_any=(arr1,arr2))
  1825.  
  1826.  
  1827. # In[446]:
  1828.  
  1829.  
  1830. #arr1 = np.array(load_cache_currents()["pixelplotting_array"][0])
  1831. arr1 = np.array(load_cache_kmeans(kmeans_input = 1, data_processing=1)["2"]["cluster_numbers"])
  1832. arr2 = plotMedianCorrelation(return_only=True)[1]
  1833.  
  1834. corr_coefficient(compare_any=(arr1,arr2))
  1835.  
  1836.  
  1837. # In[19]:
  1838.  
  1839.  
  1840. def corr_coefficient(currents=None, kmeans_inputs=None, compare_any=None, random=False):
  1841. """Given two grids of same shape, this function determines the correlation coefficient between both grids.
  1842.  
  1843. ARGUMENTS:
  1844. grid1 (2D-Array): Grid of data, e. g. of shape 1000 x 1000
  1845. grid2 (2D-Array): Grid of data, e. g. of shape 1000 x 1000
  1846.  
  1847. RETURNS:
  1848. correlation (float): correlation coefficient. 1 means positively correlated, -1 means negatively correlated
  1849. """
  1850.  
  1851. if isinstance(currents, (list, tuple)):
  1852. scan_order = [185,183,181,182,186]
  1853. grid1 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(currents[0])])
  1854. if random:
  1855. grid2 = np.random.uniform(np.nanmin(grid1),np.nanmax(grid1),size=(np.shape(grid1)))
  1856. else:
  1857. grid2 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(currents[1])])
  1858.  
  1859. if isinstance(kmeans_inputs, (list, tuple)):
  1860. grid1 = np.array(load_cache_kmeans(kmeans_input = kmeans_inputs[0])["2"]["cluster_numbers"])
  1861. if random:
  1862. grid2 = np.random.randint(2,size=(np.shape(grid1)))+1
  1863. else:
  1864. grid2 = np.array(load_cache_kmeans(kmeans_input = kmeans_inputs[1])["2"]["cluster_numbers"])
  1865.  
  1866. if isinstance(compare_any, (list, tuple)):
  1867. grid1 = compare_any[0]
  1868. grid2 = compare_any[1]
  1869.  
  1870. grid1 = grid1.flatten() # flatten both arrays to simplify calculations
  1871. grid2 = grid2.flatten()
  1872.  
  1873. no_nan_ind1 = ~ np.isnan(grid1) # delete all NaN-values from both grids
  1874. grid1 = grid1[no_nan_ind1]
  1875. grid2 = grid2[no_nan_ind1]
  1876.  
  1877. no_nan_ind2 = ~ np.isnan(grid2)
  1878. grid1 = grid1[no_nan_ind2]
  1879. grid2 = grid2[no_nan_ind2]
  1880.  
  1881. if np.size(grid1) == np.size(grid2):
  1882. pass
  1883. else:
  1884. raise Exception("Both given arrays must have same size and shape.")
  1885.  
  1886. mean1 = np.mean(grid1) # compute the mean value
  1887. mean2 = np.mean(grid2)
  1888. stdev1 = np.std(grid1) # compute the standard deviation of the values
  1889. stdev2 = np.std(grid2)
  1890.  
  1891. N = np.size(grid1)
  1892.  
  1893. term1 = 0
  1894. for i in range(N):
  1895. term1 += (((grid1[i] - mean1) / stdev1) * ((grid2[i] - mean2) / stdev2))
  1896.  
  1897. correlation = 1 / (N - 1) * term1
  1898. return correlation
  1899.  
  1900.  
  1901. # In[608]:
  1902.  
  1903.  
  1904. scan_order = [185,183,181,182,186]
  1905. current_array1 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(185)]).ravel()[np.newaxis,:].T
  1906. current_array1 = current_array1 - np.nanmin(current_array1)
  1907. current_array2 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(183)]).ravel()[np.newaxis,:].T
  1908. current_array2 = current_array2 - np.nanmin(current_array2)
  1909. current_array3 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(181)]).ravel()[np.newaxis,:].T
  1910. current_array3 = current_array3 - np.nanmin(current_array3)
  1911. current_array4 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(182)]).ravel()[np.newaxis,:].T
  1912. current_array4 = current_array4 - np.nanmin(current_array4)
  1913. current_array5 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(186)]).ravel()[np.newaxis,:].T
  1914. current_array5 = current_array5 - np.nanmin(current_array5)
  1915. numpy_data = np.concatenate((current_array1,current_array2,current_array3,current_array4,current_array5),axis=1)
  1916. print(np.shape(current_array1))
  1917. print(current_array5[~np.isnan(current_array1)])
  1918. for i in range(1,20):
  1919. print(numpy_data[15032*i])
  1920.  
  1921.  
  1922. # In[40]:
  1923.  
  1924.  
  1925. pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])[4].ravel()
  1926. print(len(pixelplotting_array[~np.isnan(pixelplotting_array)]))
  1927.  
  1928.  
  1929. # In[53]:
  1930.  
  1931.  
  1932. violin(currents=True, data_processing=3, savepng=True)
  1933.  
  1934.  
  1935. # In[61]:
  1936.  
  1937.  
  1938. plotXBIC2(singleplots=[181])
  1939.  
  1940.  
  1941. # In[96]:
  1942.  
  1943.  
  1944. scan_order = [185,183,181,182,186]
  1945.  
  1946.  
  1947. current_array1 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(185)])
  1948. current_array2 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(183)])
  1949. current_array3 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(181)])
  1950. current_array4 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(182)])
  1951. current_array5 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(186)])
  1952.  
  1953. x = [-200,-100,0,100,200]
  1954. y = [np.nanstd(current_array1),np.nanstd(current_array2),np.nanstd(current_array3),np.nanstd(current_array4),np.nanstd(current_array5)]
  1955. plt.plot(x,y)
  1956.  
  1957.  
  1958. # In[97]:
  1959.  
  1960.  
  1961. current_array1 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(185)])
  1962. current_array2 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(183)])
  1963. current_array3 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(181)])
  1964. current_array4 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(182)])
  1965. current_array5 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(186)])
  1966.  
  1967. x = [-200,-100,0,100,200]
  1968. y = [np.nanstd(current_array1),np.nanstd(current_array2),np.nanstd(current_array3),np.nanstd(current_array4),np.nanstd(current_array5)]
  1969. plt.plot(x,y)
  1970.  
  1971.  
  1972. # In[98]:
  1973.  
  1974.  
  1975. violin(currents=True, data_processing=3, title=False, savepng=True)
  1976.  
  1977.  
  1978. # In[20]:
  1979.  
  1980.  
  1981. def violin(kmeans_input=7,currents=False, title=True, data_processing=1, dpi=None, savepng=False):
  1982. start = time.time()
  1983.  
  1984. figsize = (30*0.75,15*0.75)
  1985. fontsize = figsize[0]
  1986. fig, ax = plt.subplots(figsize=figsize)
  1987.  
  1988. scan_order = [185,183,181,182,186]
  1989.  
  1990. xlabels = ["-200","-100","0","+100","+200"]
  1991.  
  1992. if currents:
  1993. if data_processing == 1:
  1994. current_array1 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(185)]).ravel()[np.newaxis,:].T
  1995. current_array2 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(183)]).ravel()[np.newaxis,:].T
  1996. current_array3 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(181)]).ravel()[np.newaxis,:].T
  1997. current_array4 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(182)]).ravel()[np.newaxis,:].T
  1998. current_array5 = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(186)]).ravel()[np.newaxis,:].T
  1999. if data_processing == 3:
  2000. current_array1 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(185)]).ravel()[np.newaxis,:].T
  2001. current_array1 = current_array1 - np.nanmin(current_array1)
  2002. current_array2 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(183)]).ravel()[np.newaxis,:].T
  2003. current_array2 = current_array2 - np.nanmin(current_array2)
  2004. current_array3 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(181)]).ravel()[np.newaxis,:].T
  2005. current_array3 = current_array3 - np.nanmin(current_array3)
  2006. current_array4 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(182)]).ravel()[np.newaxis,:].T
  2007. current_array4 = current_array4 - np.nanmin(current_array4)
  2008. current_array5 = np.array(load_cache_currents()["pixelplotting_array_direct"][scan_order.index(186)]).ravel()[np.newaxis,:].T
  2009. current_array5 = current_array5 - np.nanmin(current_array5)
  2010. numpy_data = np.concatenate((current_array1,current_array2,current_array3,current_array4,current_array5),axis=1)
  2011. df = pd.DataFrame(data=numpy_data, columns=xlabels)
  2012. ax = sn.violinplot(data=df, scale="count",inner="box")
  2013.  
  2014. else:
  2015. numpy_dataDic = {}
  2016. for scan_index in scan_order:
  2017. current_array_scan = np.array(load_cache_currents()["pixelplotting_array"][scan_order.index(scan_index)])
  2018. data = current_array_scan.ravel()[np.newaxis,:].T
  2019. scan = np.repeat(scan_index,len(data))[np.newaxis,:].T
  2020. group = np.array(load_cache_kmeans(kmeans_input = kmeans_input)["2"]["cluster_numbers"]).ravel()[np.newaxis,:].T
  2021. numpy_dataDic[scan_index] = np.concatenate((data,scan,group),axis=1)
  2022. #plt.axvline(x=scan_index-181.5, color="grey", lw=1)
  2023. numpy_data = np.concatenate((numpy_dataDic[185],numpy_dataDic[183],numpy_dataDic[181],numpy_dataDic[182],numpy_dataDic[186]))
  2024. df = pd.DataFrame(data=numpy_data, columns=["XBIC","Bias voltage (mV)","Group"])
  2025. df = df.replace(1.0,"Group 1")
  2026. df = df.replace(2.0,"Group 2")
  2027. ax = sn.violinplot(x = "Bias voltage (mV)", y = "XBIC", hue = "Group", data = df, order=[185,183,181,182,186], palette = "viridis", split=True, linewidth = 0, scale = "count", inner = "box", ax = ax)
  2028.  
  2029. ax.set_xlabel("Bias voltage (mV)", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
  2030. if data_processing == 1:
  2031. ax.set_ylabel("XBIC", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
  2032. if data_processing == 3:
  2033. ax.set_ylabel("Current - Current$\mathregular{_{min}}$", labelpad = 10, fontsize=9/5*fontsize, fontweight="bold")
  2034. ax.tick_params(labelsize=fontsize, length=4/5*fontsize)
  2035.  
  2036. ax.xaxis.set_ticks([0,1,2,3,4])
  2037. ax.xaxis.set_ticklabels(xlabels, fontsize=fontsize)
  2038.  
  2039. # add minor ticks for XBIC
  2040. if data_processing == 1:
  2041. ax.yaxis.set_minor_locator(MultipleLocator(10))
  2042. if data_processing == 3:
  2043. ax.yaxis.set_minor_locator(MultipleLocator(500))
  2044. ax.tick_params(which='minor', length=2/5*fontsize)
  2045. plt.grid(axis="y", which="minor",lw=0.25)
  2046.  
  2047. plt.grid(axis="y")
  2048. if title:
  2049. plt.title("Current distribution", pad = 15, fontsize=round(9/5*fontsize), fontweight="bold")
  2050.  
  2051. # add vertical lines
  2052. for x in range(5):
  2053. plt.axvline(x=x+0.5, color="grey", lw=1)
  2054.  
  2055. # meta = {
  2056. # "colour": {"a": "red", "b": "blue"},
  2057. # "label": {"a": "this a", "b": "this b"},
  2058. # }
  2059.  
  2060. # handles = [
  2061. # Patch(
  2062. # facecolor=meta["colour"][x],
  2063. # label=meta["label"][x],
  2064. # )
  2065. # for x in meta["colour"].keys()
  2066. # ]
  2067.  
  2068. # fig.legend(
  2069. # handles=handles,
  2070. # ncol=2,
  2071. # loc="upper right",
  2072. # bbox_to_anchor=(0.5, 0.5),
  2073. # fontsize=10,
  2074. # handlelength=1,
  2075. # handleheight=1,
  2076. # frameon=False,
  2077. # )
  2078.  
  2079. ax.legend(prop={'size': fontsize})
  2080. #ax.get_legend().remove()
  2081.  
  2082. plt.show()
  2083.  
  2084. end = time.time()
  2085. print(f"Plotting of the violin plots took {str(round(end-start,2))} seconds.")
  2086.  
  2087. if savepng:
  2088. now = datetime.now()
  2089. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  2090. if dpi is None:
  2091. fig.savefig("savefig/violin_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  2092. else:
  2093. fig.savefig("savefig/violin_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  2094.  
  2095.  
  2096. # In[434]:
  2097.  
  2098.  
  2099. plotMedianCorrelation()
  2100.  
  2101.  
  2102. # In[133]:
  2103.  
  2104.  
  2105. for i in [2,3,4]:
  2106. currents = np.asarray(load_cache_currents()["pixelplotting_array"]) # Shape: (5, 1000, 1000)
  2107. medians = np.nanmedian(currents, axis=(1, 2), keepdims=True) # Shape: (5,)
  2108. nanmaxs = np.nanmax(currents, axis=(1, 2), keepdims=True)
  2109. nanmins = np.nanmin(currents, axis=(1, 2), keepdims=True)
  2110.  
  2111. results = np.full_like(currents, np.nan) # Shape: (5, 1000, 1000)
  2112.  
  2113. results[currents > medians] = 1.0
  2114. results[currents < medians] = 2.0
  2115.  
  2116. #results[currents > medians and currents < (nanmaxs - 10)] = 1.0
  2117. #results[currents < medians and currents < (nanmins + 10)] = 2.0
  2118.  
  2119. counter_good_keeper = 0
  2120. counter_bad_keeper = 0
  2121. counter_not_keeper = 0
  2122.  
  2123. mask = np.empty((1000,1000),dtype=bool)
  2124.  
  2125. counter = 0
  2126. not_NaN_counter = 0
  2127.  
  2128. for z in np.linspace(0,999,1000,dtype=int):
  2129. for y in np.linspace(0,999,1000,dtype=int):
  2130. yz_list = []
  2131. for i in range(5):
  2132. yz_list.append(results[i][y,z])
  2133. if all(x==yz_list[0] for x in yz_list) and yz_list[0] == 1.0:
  2134. counter_good_keeper += 1
  2135. mask[y,z] = True
  2136. if all(x==yz_list[0] for x in yz_list) and yz_list[0] == 2.0:
  2137. counter_bad_keeper += 1
  2138. mask[y,z] = True
  2139. if not all(x==yz_list[0] for x in yz_list):
  2140. counter_not_keeper += 1
  2141. mask[y,z] = False
  2142.  
  2143. if not np.isnan(np.sum(yz_list)):
  2144. not_NaN_counter += 1
  2145.  
  2146.  
  2147. counter += 1
  2148. if counter in [10000,100000,400000,900000,999999]:
  2149. print(counter)
  2150.  
  2151. print("------")
  2152. print(counter)
  2153. print(not_NaN_counter)
  2154. print(counter_not_keeper)
  2155. print(counter_good_keeper)
  2156. print(counter_bad_keeper)
  2157.  
  2158. not_keepers = np.empty((1000,1000))
  2159.  
  2160. not_keepers[mask] = 1.0
  2161. not_keepers[~mask] = 2.0
  2162. pixelplotting_array = np.array(load_cache_currents()["pixelplotting_array"])
  2163. not_keepers[np.isnan(pixelplotting_array).any(axis=0)] = np.float("NaN")
  2164.  
  2165. plotClusterPosition(plot_any=results[i], title="" ,no_suptitle=True, savepng=True)
  2166.  
  2167.  
  2168. # In[161]:
  2169.  
  2170.  
  2171. def max_interval(precision=0.1, interval_size=20, scan_no=0, return_only=False):
  2172.  
  2173. currents = np.array(load_cache_currents()["pixelplotting_array"])[scan_no].ravel()
  2174. c_min = np.nanmin(currents)
  2175. c_max = np.nanmax(currents)
  2176.  
  2177. counts = np.empty((1,2))
  2178. counter = 0
  2179. for A in np.arange(c_min,c_max,precision):
  2180. count = np.size(np.where(np.logical_and(currents>A,currents<A+interval_size)))
  2181. if counter == 0:
  2182. counts[0,:] = np.array([[A, count]])
  2183. else:
  2184. counts = np.concatenate((counts, np.array([[A, count]])), axis=0)
  2185. counter +=1
  2186.  
  2187. max_counts = counts[np.where(counts == np.max(counts[:,1]))[0]]
  2188. if return_only:
  2189. return max_counts
  2190.  
  2191. print(max_interval)
  2192.  
  2193.  
  2194. # In[165]:
  2195.  
  2196.  
  2197. max_interval()
  2198.  
  2199.  
  2200. # In[164]:
  2201.  
  2202.  
  2203. max_size = 100
  2204.  
  2205. x = np.arange(1,max_size)
  2206. y = []
  2207. y2= []
  2208. y3= []
  2209. y4= []
  2210. y5= []
  2211.  
  2212. for i in range(1,max_size):
  2213. max_counts = max_interval(precision=1,interval_size=i, scan_no=0, return_only=True)[0,0]
  2214. y.append(max_counts)
  2215.  
  2216. for i in range(1,max_size):
  2217. max_counts = max_interval(precision=1,interval_size=i, scan_no=1, return_only=True)[0,0]
  2218. y2.append(max_counts)
  2219.  
  2220. for i in range(1,max_size):
  2221. max_counts = max_interval(precision=1,interval_size=i, scan_no=2, return_only=True)[0,0]
  2222. y3.append(max_counts)
  2223.  
  2224. for i in range(1,max_size):
  2225. max_counts = max_interval(precision=1,interval_size=i, scan_no=3, return_only=True)[0,0]
  2226. y4.append(max_counts)
  2227.  
  2228. for i in range(1,max_size):
  2229. max_counts = max_interval(precision=1,interval_size=i, scan_no=4, return_only=True)[0,0]
  2230. y5.append(max_counts)
  2231.  
  2232. plt.plot(x,y)
  2233. plt.plot(x,y2)
  2234. plt.plot(x,y3)
  2235. plt.plot(x,y4)
  2236. plt.plot(x,y5)
  2237.  
  2238.  
  2239. # In[21]:
  2240.  
  2241.  
  2242. def plotMedianCorrelation(return_only=False, dpi=None, savepng=False):
  2243.  
  2244. currents = np.asarray(load_cache_currents()["pixelplotting_array"]) # Shape: (5, 1000, 1000)
  2245. medians = np.nanmedian(currents, axis=(1, 2), keepdims=True) # Shape: (5,)
  2246.  
  2247. results = np.full_like(currents, np.nan) # Shape: (5, 1000, 1000)
  2248.  
  2249. results[currents > medians] = 1.0
  2250. results[currents < medians] = 2.0
  2251. results[currents == medians] = np.float("NaN")
  2252.  
  2253. if return_only:
  2254. return results
  2255.  
  2256. figsize=(15,15)
  2257. fontsize = figsize[0]//2
  2258. fig = plt.figure(figsize=figsize)
  2259. ax1 = fig.add_subplot(111)
  2260.  
  2261. x = [1,2,3,4]
  2262. correlations = []
  2263.  
  2264. for i in range(4):
  2265. arr1 = results[i]
  2266. arr2 = results[i+1]
  2267. correlation = (corr_coefficient(compare_any=[arr1,arr2]))
  2268. correlations.append(correlation)
  2269. print(correlations)
  2270.  
  2271. ax1.plot(x,correlations,"-o",markersize=15, linewidth=1.2)
  2272.  
  2273. # y and z labels
  2274. ax1.set_xlabel("Compared scans", labelpad = 15, fontsize=3*fontsize, fontweight="bold")
  2275. ax1.set_ylabel("Correlation coefficient", labelpad = 15, fontsize=3*fontsize, fontweight="bold")
  2276. ax1.set_title("Median group correlations", pad = 15, fontsize=round(3*fontsize), fontweight="bold")
  2277. ax1.tick_params(labelsize=15/5*fontsize, length=7/5*fontsize)
  2278.  
  2279. ax1.xaxis.set_ticks([1,2,3,4])
  2280. ax1.xaxis.set_ticklabels((["-200 to -100mV","-100 to 0mV","0 to +100mV","+100 to +200mV"]))
  2281.  
  2282. ax1.yaxis.set_minor_locator(MultipleLocator(5))
  2283. ax1.tick_params(which='minor', length=4/5*fontsize)
  2284. plt.grid(axis="y", which="minor",lw=0.25)
  2285. plt.grid(axis="y")
  2286.  
  2287.  
  2288. if savepng:
  2289. now = datetime.now()
  2290. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  2291. if dpi is None:
  2292. fig.savefig("savefig/MedianCorrelation_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  2293. else:
  2294. fig.savefig("savefig/MedianCorrelation_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  2295.  
  2296.  
  2297. # In[21]:
  2298.  
  2299.  
  2300. for i in [7,7.1,7.2,7.3]:
  2301. plotClusterCurves(kmeans_input=i)
  2302.  
  2303.  
  2304. # In[496]:
  2305.  
  2306.  
  2307. for i in [7,7.1,7.2,7.3]:
  2308. plotClusterPosition(kmeans_input=i)
  2309.  
  2310.  
  2311. # In[431]:
  2312.  
  2313.  
  2314. for i in range(1,10):
  2315.  
  2316. if i in [8,8.1,9]:
  2317. kmeansDic = load_cache_kmeans(kmeans_input=i, k_highest=10, silhouette_setting=False)
  2318. else:
  2319. kmeansDic = load_cache_kmeans(kmeans_input=i, k_highest=10, silhouette_setting=True)
  2320. find_k()
  2321.  
  2322.  
  2323. # In[614]:
  2324.  
  2325.  
  2326. for i in [1,2,3.1,4,5,6,7,8,9]:
  2327.  
  2328. plotClusterPosition(data_processing=3, kmeans_input=i)
  2329. #plotClusterPosition(kmeans_input=i)
  2330.  
  2331.  
  2332. # In[82]:
  2333.  
  2334.  
  2335. SAVE = False
  2336.  
  2337. # code for saving all pictures:
  2338.  
  2339. # save all figures for different kmeans inputs
  2340.  
  2341. for k_index in [2]:
  2342. #for kmeans_input_index in [1,2,3.1,4,5,6,7]:
  2343. for kmeans_input_index in [1,2,3.1,4,5,6,7,8,9]:
  2344. for normalize_samples_index in [False]:
  2345. for normalize_curves_index in [False]:
  2346. # load the respective cached dictionary for the definition of "kmeansDic" throughout this project
  2347.  
  2348. silhouette_setting = False # True should be selected for complete data. If False is selected, then silhouette will just be assigned to 0 and omitted, saving a lot of computation time.
  2349. sample_size = 300000 # this number refers to the sample size in the silhouette_score() function which is computationally very expensive
  2350. k_lowest = 2
  2351. k_highest = 3
  2352. kmeans_input = kmeans_input_index
  2353. data_processing = 1
  2354. normalize_samples = normalize_samples_index
  2355.  
  2356. try:
  2357.  
  2358. if silhouette_setting:
  2359. silhouette_str = f"_{sample_size}-samplesize"
  2360. else:
  2361. silhouette_str = ""
  2362.  
  2363. except:
  2364. continue
  2365.  
  2366. if not normalize_samples:
  2367. normalize_samples_str = "feature normalized"
  2368. else:
  2369. normalize_samples_str = "feature & sample normalized"
  2370. suptitle = f"k = {k_index}, kmeans-input = {kmeans_input}, " + normalize_samples_str
  2371. if normalize_curves_index:
  2372. savepng_str = f"{k_index}-k_{kmeans_input_index}-kmeans-input_{normalize_samples_index}-normalize-samples_True-normalize-curves"
  2373. else:
  2374. savepng_str = f"{k_index}-k_{kmeans_input_index}-kmeans-input_{normalize_samples_index}-normalize-samples"
  2375.  
  2376. plotClusterPosition(k=k_index, kmeans_input=kmeans_input_index, title="", no_suptitle=True, suptitle="", dpi=200, savepng=SAVE, savepng_str=savepng_str)
  2377. #plotClusterCurves(k=k_index,compare=True,k_curves=1, kmeans_input=kmeans_input_index,normalize_curves=normalize_curves_index, suptitle="", title=False, dpi=200, savepng=SAVE, savepng_str=savepng_str)
  2378.  
  2379.  
  2380. # <font size="3"><b>Run the cells at the bottom to save variables in working memory to speed up calculations that would otherwise include computationally expensive lines of code
  2381.  
  2382. # In[195]:
  2383.  
  2384.  
  2385. # To speed up all calculations remarkably: Create a dictionary with all the warp matrices:
  2386. # Run each time you start working this notebook to save the values in the global namespace.
  2387. start = time.time()
  2388. CurrentsDic = {
  2389. "pixelplotting_array": plotIV(return_only=True)[0],
  2390. "currents_mean": plotIV(return_only=True)[1],
  2391. "pixelplotting_array_direct": plotIV(data_processing=3, return_only=True)[0]
  2392. }
  2393.  
  2394. end = time.time()
  2395. print("Loading all current arrays took", round((end-start), 2), "seconds.")
  2396.  
  2397. ### SAVE CACHE ###
  2398. # with these settings (copied from http://www.silx.org/doc/silx/0.2.0/modules/io/dictdump.html), the file size is much smaller, but loading takes much longer, so better not use it!
  2399. SAVE = False
  2400.  
  2401. if SAVE:
  2402.  
  2403. compression = False
  2404.  
  2405. os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan") # just make sure that we are talking about the correct location in the file system
  2406.  
  2407. if compression:
  2408. create_ds_args = {'compression': "gzip",
  2409. 'shuffle': True,
  2410. 'fletcher32': True}
  2411. else:
  2412. create_ds_args = None
  2413.  
  2414. saveh5 = input("Do you want to save the dictionary as a h5 file that other functions will refer to ? (y/n)\n")
  2415. if saveh5.lower() in ["y", "yes"]:
  2416. if not compression:
  2417. fname = "CurrentsDic.h5"
  2418. else:
  2419. fname = "CurrentsDic_COMPRESSED.h5"
  2420. dicttoh5(CurrentsDic, "Cache/"+ fname, create_dataset_args=create_ds_args)
  2421. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  2422. if saveh5.lower() in ["n", "no"]:
  2423. now = datetime.now()
  2424. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  2425. fname = "CurrentsDic_no-save_" + dt_string + ".h5"
  2426. dicttoh5(CurrentsDic, "Cache/" + fname)
  2427. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  2428.  
  2429.  
  2430. # In[24]:
  2431.  
  2432.  
  2433. # To speed up all calculations remarkably: Create a dictionary with all the warp matrices:
  2434. # Run each time you start working this notebook to save the values in the global namespace.
  2435. start = time.time()
  2436. ImageRegistrationDic = {
  2437. "179": ImageRegistration(178,179),
  2438. "180": ImageRegistration(178,180),
  2439. "182": ImageRegistration(181,182),
  2440. "183": ImageRegistration(181,183),
  2441. "185": ImageRegistration(181,185),
  2442. "186": ImageRegistration(181,186),
  2443. "191": ImageRegistration(181,191),
  2444. "192": ImageRegistration(181,192)
  2445. }
  2446. end = time.time()
  2447. print("Calculating all warp matrices took", round((end-start), 2), "seconds.")
  2448.  
  2449. ### SAVE CACHE ###
  2450. # with these settings (copied from http://www.silx.org/doc/silx/0.2.0/modules/io/dictdump.html), the file size is much smaller, but loading takes much longer, so better not use it!
  2451. SAVE = False
  2452.  
  2453. if SAVE:
  2454.  
  2455. compression = False
  2456.  
  2457. os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan") # just make sure that we are talking about the correct location in the file system
  2458.  
  2459. if compression:
  2460. create_ds_args = {'compression': "gzip",
  2461. 'shuffle': True,
  2462. 'fletcher32': True}
  2463. else:
  2464. create_ds_args = None
  2465.  
  2466. saveh5 = input("Do you want to save the dictionary as a h5 file that other functions will refer to ? (y/n)\n")
  2467. if saveh5.lower() in ["y", "yes"]:
  2468. if not compression:
  2469. fname = "ImageRegistrationDic.h5"
  2470. else:
  2471. fname = "ImageRegistrationDic_COMPRESSED.h5"
  2472. dicttoh5(ImageRegistrationDic, "Cache/"+ fname, create_dataset_args=create_ds_args)
  2473. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  2474. if saveh5.lower() in ["n", "no"]:
  2475. now = datetime.now()
  2476. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  2477. fname = "ImageRegistrationDic_no-save_" + dt_string + ".h5"
  2478. dicttoh5(ImageRegistrationDic, "Cache/" + fname)
  2479. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  2480.  
  2481.  
  2482. # In[588]:
  2483.  
  2484.  
  2485. for i in [1,2,3.1,4,5,6,7,8,9]:
  2486. # Create a nested dictionary for all cluster arrays, sum of squared distances, silhouette and calinski_harabasz scores
  2487.  
  2488. start = time.time()
  2489.  
  2490. kmeansDic = {}
  2491.  
  2492. silhouette_setting = False # True should be selected for complete data. If False is selected, then silhouette will just be assigned to 0 and omitted, saving a lot of computation time.
  2493. sample_size = 300000 # this number refers to the sample size in the silhouette_score() function which is computationally very expensive
  2494. k_lowest = 2
  2495. k_highest = 3
  2496. kmeans_input = i
  2497. data_processing = 3
  2498. normalize_samples = False
  2499.  
  2500. for k in range(k_lowest, k_highest+1):
  2501. start_k = time.time()
  2502. cluster_numbers, ssqd, silhouette, calinski_harabasz, threshold = kmeans_clustering(data_processing=data_processing, kmeans_input=kmeans_input, normalize_samples=normalize_samples, k=k, sample_size=sample_size, silhouette=silhouette_setting)
  2503. kmeansDic[str(k)] = {"cluster_numbers": cluster_numbers, "ssqd": ssqd, "silhouette": silhouette, "calinski_harabasz": calinski_harabasz, "threshold": threshold}
  2504. end = time.time()
  2505. print("Finished entry in nested dictionary for k = " + str(k) + ". That took " + str(round((end-start_k), 2)) + " seconds.")
  2506.  
  2507. end = time.time()
  2508. print("Calculating all cluster arrays took", round((end-start), 2), "seconds.")
  2509.  
  2510. ### SAVE CACHE ###
  2511. # with these settings (copied from http://www.silx.org/doc/silx/0.2.0/modules/io/dictdump.html), the file size is much smaller, but loading takes much longer, so better not use it!
  2512.  
  2513. SAVE = False
  2514.  
  2515. if SAVE:
  2516.  
  2517. compression = False
  2518.  
  2519. os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan") # just make sure that we are talking about the correct location in the file system
  2520.  
  2521. if compression:
  2522. create_ds_args = {'compression': "gzip",
  2523. 'shuffle': True,
  2524. 'fletcher32': True}
  2525. else:
  2526. create_ds_args = None
  2527.  
  2528. saveh5 = "y"
  2529. if saveh5.lower() in ["y", "yes"]:
  2530. if silhouette_setting:
  2531. silhouette_str = f"_{sample_size}-samplesize"
  2532. else:
  2533. silhouette_str = ""
  2534. if not compression:
  2535. fname = f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}.h5"
  2536. else:
  2537. fname = f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}_COMPRESSED.h5"
  2538.  
  2539. dicttoh5(kmeansDic, "Cache/"+ fname, create_dataset_args=create_ds_args)
  2540. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  2541. if saveh5.lower() in ["n", "no"]:
  2542. now = datetime.now()
  2543. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  2544. fname = "kmeansDic_no-save_" + dt_string + ".h5"
  2545. dicttoh5(kmeansDic, "Cache/" + fname)
  2546. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  2547.  
  2548.  
  2549. # In[435]:
  2550.  
  2551.  
  2552. # Create a nested dictionary for all cluster arrays, sum of squared distances, silhouette and calinski_harabasz scores
  2553.  
  2554. start = time.time()
  2555.  
  2556. kmeansDic = {}
  2557.  
  2558. silhouette_setting = False # True should be selected for complete data. If False is selected, then silhouette will just be assigned to 0 and omitted, saving a lot of computation time.
  2559. sample_size = 300000 # this number refers to the sample size in the silhouette_score() function which is computationally very expensive
  2560. k_lowest = 2
  2561. k_highest = 3
  2562. kmeans_input = 1.1
  2563. data_processing = 1
  2564. normalize_samples = False
  2565.  
  2566. for k in range(k_lowest, k_highest+1):
  2567. start_k = time.time()
  2568. cluster_numbers, ssqd, silhouette, calinski_harabasz, threshold = kmeans_clustering(data_processing=data_processing, kmeans_input=kmeans_input, normalize_samples=normalize_samples, k=k, sample_size=sample_size, silhouette=silhouette_setting)
  2569. kmeansDic[str(k)] = {"cluster_numbers": cluster_numbers, "ssqd": ssqd, "silhouette": silhouette, "calinski_harabasz": calinski_harabasz, "threshold": threshold}
  2570. end = time.time()
  2571. print("Finished entry in nested dictionary for k = " + str(k) + ". That took " + str(round((end-start_k), 2)) + " seconds.")
  2572.  
  2573. end = time.time()
  2574. print("Calculating all cluster arrays took", round((end-start), 2), "seconds.")
  2575.  
  2576. ### SAVE CACHE ###
  2577. # with these settings (copied from http://www.silx.org/doc/silx/0.2.0/modules/io/dictdump.html), the file size is much smaller, but loading takes much longer, so better not use it!
  2578.  
  2579. SAVE = False
  2580.  
  2581. if SAVE:
  2582.  
  2583. compression = False
  2584.  
  2585. os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan") # just make sure that we are talking about the correct location in the file system
  2586.  
  2587. if compression:
  2588. create_ds_args = {'compression': "gzip",
  2589. 'shuffle': True,
  2590. 'fletcher32': True}
  2591. else:
  2592. create_ds_args = None
  2593.  
  2594. saveh5 = input("Do you want to save the dictionary as a h5 file that other functions will refer to ? (y/n)\n")
  2595. if saveh5.lower() in ["y", "yes"]:
  2596. if silhouette_setting:
  2597. silhouette_str = f"_{sample_size}-samplesize"
  2598. else:
  2599. silhouette_str = ""
  2600. if not compression:
  2601. fname = f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}.h5"
  2602. else:
  2603. fname = f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}_COMPRESSED.h5"
  2604.  
  2605. dicttoh5(kmeansDic, "Cache/"+ fname, create_dataset_args=create_ds_args)
  2606. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  2607. if saveh5.lower() in ["n", "no"]:
  2608. now = datetime.now()
  2609. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  2610. fname = "kmeansDic_no-save_" + dt_string + ".h5"
  2611. dicttoh5(kmeansDic, "Cache/" + fname)
  2612. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  2613.  
  2614.  
  2615. # In[22]:
  2616.  
  2617.  
  2618. def load_cache_currents():
  2619.  
  2620. os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan")
  2621.  
  2622. fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan/Cache/CurrentsDic.h5"
  2623. CurrentsDic = h5py.File(fpath, "r")
  2624.  
  2625. return CurrentsDic
  2626.  
  2627. CurrentsDic = load_cache_currents()
  2628.  
  2629.  
  2630. # In[23]:
  2631.  
  2632.  
  2633. # load the respective cached dictionary for the definition of "kmeansDic" throughout this project
  2634. def load_cache_IR():
  2635.  
  2636. os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan")
  2637.  
  2638. fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan/Cache/ImageRegistrationDic.h5"
  2639. ImageRegistrationDic = h5py.File(fpath, "r")
  2640.  
  2641. return ImageRegistrationDic
  2642.  
  2643. ImageRegistrationDic = load_cache_IR()
  2644.  
  2645.  
  2646. # In[24]:
  2647.  
  2648.  
  2649. # load the respective cached dictionary for the definition of "kmeansDic" throughout this project
  2650. def load_cache_kmeans(silhouette_setting = False, sample_size = 300000, k_lowest = 2, k_highest = 3, kmeans_input = 1, data_processing = 1, normalize_samples = False):
  2651.  
  2652. os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan")
  2653.  
  2654. if silhouette_setting:
  2655. silhouette_str = f"_{sample_size}-samplesize"
  2656. else:
  2657. silhouette_str = ""
  2658.  
  2659. fpath = "/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan/Cache/"+ f"kmeansDic({k_lowest},{k_highest})_{kmeans_input}-kmeans-input_{normalize_samples!s}-normalize-samples_{data_processing}-data-processing_{silhouette_setting!s}-silhouette{silhouette_str}.h5"
  2660.  
  2661. kmeansDic = h5py.File(fpath, "r")
  2662.  
  2663. return kmeansDic
  2664.  
  2665. kmeansDic = load_cache_kmeans()
  2666.  
  2667.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement