Advertisement
OreganoHauch

All code (as of 15th Nov, 2020)

Nov 15th, 2020
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 57.43 KB | None | 0 0
  1. # import external modules
  2. import h5py
  3. import time
  4. import math
  5. import cv2
  6. import os
  7. import os.path
  8. import shutil
  9. import pylatex
  10.  
  11. import numpy as np
  12. import matplotlib as mpl
  13. import matplotlib.pyplot as plt
  14. import matplotlib.cm as cm
  15. import matplotlib.colors as colors
  16. import scipy.constants as constants
  17.  
  18. from IPython.core.display import display, HTML
  19. from shutil import copyfile
  20. from os import path
  21. from datetime import datetime # for saving figures and files with current data/time in file name
  22. from scipy.interpolate import griddata
  23. from mpl_toolkits.axes_grid.inset_locator import (inset_axes, InsetPosition, mark_inset) # create tiny plot inside of another plot
  24. from mpl_toolkits.axes_grid1 import make_axes_locatable
  25. from mpl_toolkits.axes_grid1.colorbar import colorbar
  26. from matplotlib.colors import ListedColormap, LinearSegmentedColormap
  27. from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator, AutoLocator)
  28. from sklearn.preprocessing import MinMaxScaler, minmax_scale
  29. from sklearn.cluster import KMeans
  30. from sklearn.metrics import silhouette_score, calinski_harabasz_score
  31. from scipy.optimize import curve_fit
  32. from silx.io.dictdump import dicttoh5 # save computationally expensive dictionaries as h5 files
  33.  
  34.  
  35. def load_h5(scan, datatype, show_time=False):
  36. '''This function loads only a few selected h5 files, but the definition should suffice for all the files I might need throughout the work on my bachelor's thesis.'''
  37.  
  38. start = time.time()
  39.  
  40. try:
  41. if datatype == "positions":
  42. h5_file = h5py.File("0016_EMPA_MSH_003c/scan_00" + str(
  43. scan) + "/positions.h5", "r")
  44. else:
  45. h5_file = h5py.File("0016_EMPA_MSH_003c/scan_00" + str(
  46. scan) + "/data/" + datatype + ".h5", "r")
  47. return h5_file
  48. except:
  49. print("You entered '" + str(
  50. scan) + "' for the argument 'scan'. The function only takes an integer as first argument. Its value must lie between 178 and 192, except for 184, because this scan was aborted.\nThe second argument must be a string for the datatype, e.g. 'positions' or 'fluo_roi'.")
  51.  
  52. end = time.time()
  53.  
  54. if show_time:
  55. print("Loading file " + datatype + ".h5 from scan 00" + str(scan) + "took " + str(end - start) + "seconds.")
  56.  
  57.  
  58. def counts2amps(scan, nA=True, negative=False, solar_cell_channel_only=False):
  59. '''This function takes the integer scan number as an argument, looks for the counter data inside of the h5 file,
  60. converts all the counts into XBIC in (nA) and returns the values as an ndarray.'''
  61.  
  62. # load counts and turn it into 2d array
  63. if scan in [179, 187, 188, 189, 190]: # XBIC measurements without lock-in amplification
  64. loaded_counts = load_h5(scan, "counter")["/data/solar_cell_0-10V"]
  65. counts_as_array = np.array(loaded_counts)
  66. else:
  67. loaded_counts = load_h5(scan, "counter")["/data/lock_in_R_0-10V"]
  68. counts_as_array = np.array(loaded_counts).astype(
  69. float) # this integer array must be converted to float, otherwise np.reciprocal will return only zeros
  70.  
  71. if negative:
  72. if scan in [182, 186, 187,
  73. 190]: # these scans show a current signal at the negative branch of the V2F converters
  74. counts_as_array *= -1
  75.  
  76. if solar_cell_channel_only:
  77. if scan in [178, 179, 180, 181, 183, 185, 188, 191, 192]:
  78. loaded_counts = load_h5(scan, "counter")["/data/solar_cell_0-10V"]
  79. counts_as_array = np.array(loaded_counts)
  80. if scan in [182, 186]:
  81. loaded_counts = load_h5(scan, "counter")["/data/solar_cell_-10-0V"]
  82. counts_as_array = np.array(loaded_counts) * (-1)
  83. if scan in [187, 189, 190]:
  84. counts_as_array = np.zeros(40200)
  85.  
  86. # load photon counts
  87. loaded_photon_counts = load_h5(scan, "counter")["/data/qbpm_micro_sum"]
  88. photon_counts_as_array = np.array(loaded_photon_counts).astype(
  89. float) # this integer array must be converted to float, otherwise np.reciprocal will return only zeros
  90.  
  91. # load exposure times and turn it into 2d array
  92. loaded_exposure_times = load_h5(scan, "counter")["/data/exposure_time"]
  93. exposure_times_as_array = np.array(loaded_exposure_times)
  94.  
  95. # normalize counts and photons to one second
  96. c_rate = (counts_as_array / exposure_times_as_array)
  97. ph_rate = photon_counts_as_array / exposure_times_as_array
  98.  
  99. # normalize counts to the average photon rate and one second
  100. f_DAQ = c_rate * np.mean(
  101. ph_rate) / ph_rate # multiply each LIA count by the ratio of the average photon count to the respective photon count (e.g. twice as much photons as the average should be normalized to half of the XBIC curent)
  102.  
  103. # calculate the conversion factor
  104. if scan in [178, 179, 180, 181, 182, 183, 185, 186, 191, 192]: # these scans have a sensitivity of 1 μA/V
  105. A_PA = 10 ** 6 # "P_A" = 1/sensitivity: Preamplifier amplification factor in V/A, values taken from LogbuchStrahlzeitPetraIII31.10.-6.11.2018.pptx, slide 54f.
  106. if scan in [187, 188, 189, 190]: # these scans have a sensitivity of 100 mA/V
  107. A_PA = 10
  108. A_LIA = 1 # "A_LIA" = 1/scaling = Lock-in amplifier amplification factor in V/V, values also taken from the pptx logbook
  109. V2f = 10 ** 6
  110. Wff = math.sqrt(
  111. 2) # the output signal (in this case a sine signal) of the LIA is the RMS amplitude, but we are interested in the peak-to-peak value (see jove paper, p.4)
  112. H_conv = 2 * Wff / (V2f * A_LIA * A_PA)
  113.  
  114. # finally multiply the conversion factor with the normalized count of each pixel
  115. if not nA:
  116. S_XBIC = f_DAQ * H_conv
  117. S_XBIC = f_DAQ * H_conv * 10 ** 9
  118.  
  119. # return S_XBIC_raw_nA
  120. return S_XBIC
  121.  
  122.  
  123. def supergrid(yvalues, zvalues, currents, gridres):
  124. """This function creates a rectangular grid where both axes have the given length. The given datapoints, which
  125. location is characterized by a tuple of y- and z-values, are interpolated such that every point in the grid gets
  126. assigned a value according to the nearest neighbor method.
  127.  
  128. ARGUMENTS:
  129. yvalues (1D-array): Corresponding y-values for every measurement
  130. zvalues (1D-array): Corresponding x-values for every measurement
  131. currents (1D-array): All currents measured on the respective y- and z-coordinates
  132. gridres (float): Resolution of the grid insofar as it is the length of both axes of the grid in pixels
  133.  
  134. RETURNS:
  135. (yvalues_new, zvalues_new, currents_supergrid) (Tuple of three 2D-Arrays): three grids with corresponding
  136. y-values, z-values and currents at every single point in the grid.
  137. """
  138.  
  139. if np.size(yvalues) != np.size(zvalues):
  140. raise Exception("Arrays 'yvalues' and 'zvalues' must have same size.")
  141. else:
  142. if np.size(yvalues) - np.size(currents) == -1: # This paragraph is to catch an error that is raised when
  143. currents = currents[:np.size(currents) - 1] # dealing with the XRF scan XBIC data. It appears that
  144. if np.size(yvalues) - np.size(currents) == 1: # the y- and z-arrays or the currents are too long by one
  145. yvalues = yvalues[:np.size(yvalues) - 1] # element where no measurement was done. This element is
  146. zvalues = zvalues[:np.size(zvalues) - 1] # removed from the respective array to align points and data.
  147. if np.size(yvalues) - np.size(currents) == 0:
  148. pass
  149. else:
  150. raise Exception(
  151. "Positions (y, z) and corresponding data points can't be aligned due to their different size.")
  152.  
  153. yvalues_shift = yvalues - yvalues.min() # shift y-values to obtain the relative position
  154. zvalues_shift = zvalues - zvalues.min() # shift y-values to obtain the relative position
  155.  
  156. # Create grid of y- and z-values with respect to the selected grid resolution (gridres)
  157. yvalues_new, zvalues_new = np.mgrid[0:yvalues_shift.max():gridres * 1j, 0:zvalues_shift.max():gridres * 1j]
  158.  
  159. # Interpolate given currents such that they fit the grid of x- and y-values
  160. currents_supergrid = griddata((yvalues_shift, zvalues_shift), currents, (yvalues_new, zvalues_new),
  161. method="nearest")
  162.  
  163. return yvalues_new, zvalues_new, currents_supergrid.T
  164.  
  165.  
  166. def plotXBIC_loop(scan, fig, fontsize, voltage_dic, loop_count, compareB=None, compareB_counter=None, compare=False):
  167. # fig.suptitle("Reduced LIA count normalized", fontsize=2*fontsize, x=0.7, y=0.92) # optional title for the figure (with this x and y it is centered and has a good distance)
  168.  
  169. # load position and define y, z and f(y,z) as arrays that can be interpreted by the imshow() function
  170. positions = load_h5(scan, "positions")
  171. y_axis_h5 = positions["/data/encoder_fast/data"]
  172. z_axis_h5 = positions["/data/encoder_slow/data"]
  173. y = np.array(y_axis_h5)
  174. z = np.array(z_axis_h5)
  175. current = counts2amps(scan)
  176. if compare:
  177. counts2amps_A = current
  178. counts2amps_B = counts2amps(compareB[compareB_counter])
  179.  
  180. # append NaN: scan 180 has the shape (40199,) and thus cannot be broadcast together with the other scans which have shapes of (40200,)
  181. if scan == 180:
  182. counts2amps_A = np.append(current, np.nan)
  183. if compareB[compareB_counter] == 180:
  184. counts2amps_B = np.append(counts2amps(compareB[compareB_counter]), np.nan)
  185. current = counts2amps_A - counts2amps_B
  186.  
  187. y_grid, z_grid, current_array = supergrid(y, z, current, 1000)
  188.  
  189. # axes for new subplot
  190. ax = fig.add_subplot(4, 4, loop_count)
  191.  
  192. # spacing to other subplots
  193. plt.subplots_adjust(right=1.3, hspace=0.5, wspace=0.5)
  194.  
  195. # y and z labels
  196. ax.set_xlabel("Position Y [μm]", labelpad=0, fontsize=fontsize)
  197. ax.set_ylabel("Position Z [μm]", labelpad=0, fontsize=fontsize)
  198.  
  199. # title
  200. bias_voltage = voltage_dic[scan]
  201. if compare:
  202. plt.title("Scan #" + str(scan) + " minus Scan #" + str(compareB[compareB_counter]), pad=15,
  203. fontsize=round(9 / 8 * fontsize))
  204. else:
  205. plt.title("Scan #" + str(scan) + bias_voltage, pad=15, fontsize=round(9 / 8 * fontsize))
  206.  
  207. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  208. ax.xaxis.set_ticklabels([0, 5, 10], fontsize=round(4 / 5 * fontsize))
  209. ax.xaxis.set_ticks([0, 500, 999]) # if 1000 instead of 999 it won't display
  210. ax.yaxis.set_ticklabels([0, 5, 10], fontsize=round(4 / 5 * fontsize))
  211. ax.yaxis.set_ticks([0, 500, 999])
  212. ax.xaxis.set_minor_locator(MultipleLocator(50)) # Minor ticks are set to multiples of 50
  213. ax.yaxis.set_minor_locator(MultipleLocator(50))
  214.  
  215. # plot the data
  216. im = ax.imshow(current_array, interpolation='nearest', cmap=cm.afmhot, origin='lower')
  217.  
  218. # add a colorbar as legend
  219. ax_divider = make_axes_locatable(ax) # Introduce a divider next to the axis already in place
  220. cax = ax_divider.append_axes("right", size="15%",
  221. pad="5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  222. legend = colorbar(im, cax=cax) # Introduce a legend (colorbar) where the new axis has been placed
  223. cax.set_ylabel(r"$\frac{XBIC}{pixel \cdot photons_{avg}}$" + " in (nA)", rotation=-90, labelpad=50,
  224. fontsize=5 / 4 * fontsize) # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
  225. cax.yaxis.set_major_locator(AutoLocator())
  226. cax.yaxis.set_minor_locator(AutoMinorLocator())
  227.  
  228.  
  229. def plotXBIC(plot_range=None, singleplots=None, compareA=None, compareB=None, figsize=(30, 30), savepng=False,
  230. dpi=None):
  231. '''
  232. This function plots the XBIC, using data from the previously defined counts2amps() function. Look at the examples to see how to use it:
  233.  
  234. plotXBIC(plot_range=14) will do the following:
  235. It will plot 14 datasets, starting with #178 and ending with #192 (and omitting 184 which is still counted).
  236.  
  237. plotXBIC(singleplots=[178,185,192]) will do the following:
  238. It will plot #178, #185 and #192.
  239.  
  240. plotXBIC(compareA=[178,178,185], compareB=[187,189,189]) will do the following:
  241. It will plot
  242. Scan 178 minus Scan 187
  243. Scan 178 minus Scan 189
  244. Scan 185 minus Scan 189.
  245. '''
  246.  
  247. start = time.time() # measure time for each plot
  248.  
  249. # the best looking fontsize is about half of the width of the figsize
  250. fontsize = figsize[0] // 2
  251.  
  252. # individualize titles for each scan if there is a bias voltage applied (information from LogbuchStrahlzeitPetraIII31.10.-6.11.2018.pptx, slide 54f., corresponding to handwritten logbook)
  253. voltage_dic = {
  254. 178: "",
  255. 179: "",
  256. 180: "",
  257. 181: "",
  258. 182: " (+ 100 mV)",
  259. 183: " (- 100 mV)",
  260. 185: " (- 200 mV)",
  261. 186: " (+ 200 mV)",
  262. 187: " (+ 500 mV)",
  263. 188: "(- 500 mV)",
  264. 189: " (- 1 V)",
  265. 190: " (+ 600 mv)",
  266. 191: "",
  267. 192: ""
  268. }
  269.  
  270. # create a figure for all plots
  271. fig = plt.figure(figsize=figsize)
  272.  
  273. loop_count = 1 # the subplot number should be independent of omitted scans (e.g. scan #184); otherwise you would see an empty space at that position
  274.  
  275. if type(plot_range) is int:
  276. if plot_range > 6:
  277. plot_range += 1 # scan #184 is omitted, but still counted by the for-loop
  278. for scan in range(178, 178 + plot_range):
  279.  
  280. if scan == 184:
  281. continue # scan 184 was aborted
  282.  
  283. plotXBIC_loop(scan=scan, fig=fig, fontsize=fontsize, voltage_dic=voltage_dic, loop_count=loop_count)
  284.  
  285. loop_count += 1
  286.  
  287. plt.show()
  288.  
  289. if type(singleplots) is list:
  290. for scan in singleplots:
  291.  
  292. if scan == 184:
  293. continue # scan 184 was aborted
  294.  
  295. plotXBIC_loop(scan=scan, fig=fig, fontsize=fontsize, voltage_dic=voltage_dic, loop_count=loop_count)
  296.  
  297. loop_count += 1
  298.  
  299. plt.show()
  300.  
  301. if type(compareA) is list:
  302. compareB_counter = 0
  303. for scan in compareA:
  304. plotXBIC_loop(scan=scan, fig=fig, fontsize=fontsize, voltage_dic=voltage_dic, loop_count=loop_count,
  305. compare=True, compareB=compareB, compareB_counter=compareB_counter)
  306.  
  307. loop_count += 1
  308. compareB_counter += 1
  309.  
  310. plt.show()
  311.  
  312. end = time.time()
  313. if type(compareA) is list:
  314. print("Scan " + str(scan) + " minus Scan " + str(compareB[compareB_counter]) + " took " + str(
  315. round(end - start, 2)) + " seconds to plot.")
  316. else:
  317. print("Scan " + str(scan) + " took " + str(round(end - start, 2)) + " seconds to plot.")
  318.  
  319. if savepng:
  320. now = datetime.now()
  321. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  322. if dpi is None:
  323. fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  324. else:
  325. fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  326.  
  327.  
  328. def ImageRegistration(scan1, scan2, cell=None, printdetails=False):
  329. positions = load_h5(scan1, "positions")
  330. y_axis_h5 = positions["/data/encoder_fast/data"]
  331. z_axis_h5 = positions["/data/encoder_slow/data"]
  332. y = np.array(y_axis_h5)
  333. z = np.array(z_axis_h5)
  334. current1 = counts2amps(scan1,
  335. negative=False) # negative argument needs to be false, because otherwise the arrays with negative Amps will appear inverted which will make findTransformEEC fail
  336. current2 = counts2amps(scan2, negative=False)
  337.  
  338. y_grid1, z_grid1, current1_array_original = supergrid(y, z, current1, 1000)
  339. y_grid2, z_grid2, current2_array_original = supergrid(y, z, current2, 1000)
  340.  
  341. # convert current1_array to a grayscale image format (values between 0 and 255)
  342. current1_array_8bitgrayscale = current1_array_original / current1_array_original.max()
  343. current1_array_8bitgrayscale *= (1 / 1 - current1_array_8bitgrayscale.min())
  344. current1_array_8bitgrayscale += 1 - current1_array_8bitgrayscale.max()
  345. current1_array_8bitgrayscale[current1_array_8bitgrayscale < 0] = 0
  346. current1_array_8bitgrayscale = np.array(current1_array_8bitgrayscale * 255, dtype=np.uint8)
  347.  
  348. current2_array_8bitgrayscale = current2_array_original / current2_array_original.max()
  349. current2_array_8bitgrayscale *= (1 / 1 - current2_array_8bitgrayscale.min())
  350. current2_array_8bitgrayscale += 1 - current2_array_8bitgrayscale.max()
  351. current2_array_8bitgrayscale[current2_array_8bitgrayscale < 0] = 0
  352. current2_array_8bitgrayscale = np.array(current2_array_8bitgrayscale * 255, dtype=np.uint8)
  353.  
  354. shape = np.shape(current1_array_8bitgrayscale)
  355.  
  356. warp_mode = cv2.MOTION_TRANSLATION # we only need translation in direction of y and z (no rotation etc.)
  357.  
  358. warp_matrix = np.eye(2, 3,
  359. dtype=np.float32) # this is the matrix that the results of the ECC algorithm are stored in
  360.  
  361. number_of_iterations = 5000 # Specify the number of iterations
  362. termination_eps = 1e-10 # Specify the threshold of the increment in the correlation coefficient between two iterations
  363.  
  364. # Specify the overall criteria
  365. criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
  366.  
  367. (cc, warp_matrix) = cv2.findTransformECC(current1_array_8bitgrayscale, current2_array_8bitgrayscale, warp_matrix,
  368. warp_mode, criteria, None, 1)
  369.  
  370. if printdetails == True:
  371. scan2_aligned = cv2.warpAffine(current2_array_8bitgrayscale, warp_matrix, (shape[1], shape[0]),
  372. flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP, borderValue=float("NaN")).astype(
  373. np.float32)
  374. scan2_aligned[scan2_aligned == 0] = float("NaN")
  375.  
  376. # show where the values of the registered image lie (XBIC as a function of the 40200 datapoints)
  377. scan2_aligned_ravel = scan2_aligned.ravel()
  378. x = np.arange(np.shape(scan2_aligned_ravel)[0])
  379. plt.figure(figsize=(50, 5))
  380. plt.scatter(x, scan2_aligned_ravel)
  381. plt.show()
  382.  
  383. fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 15), constrained_layout=True)
  384.  
  385. im1 = ax1.imshow(current1_array_8bitgrayscale, origin="lower", cmap=cm.afmhot)
  386. ax1.set_title("Scan #" + str(scan1))
  387.  
  388. ax1_divider = make_axes_locatable(ax1) # Introduce a divider next to the axis already in place
  389. cax1 = ax1_divider.append_axes("right", size="15%",
  390. pad="5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  391. legend = colorbar(im1, cax=cax1) # Introduce a legend (colorbar) where the new axis has been placed
  392. cax1.yaxis.set_major_locator(AutoLocator())
  393. cax1.yaxis.set_minor_locator(AutoMinorLocator())
  394.  
  395. im2 = ax2.imshow(current2_array_8bitgrayscale, origin="lower", cmap=cm.afmhot)
  396. ax2.set_title("Scan #" + str(scan2))
  397.  
  398. ax2_divider = make_axes_locatable(ax2) # Introduce a divider next to the axis already in place
  399. cax2 = ax2_divider.append_axes("right", size="15%",
  400. pad="5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  401. legend = colorbar(im2, cax=cax2) # Introduce a legend (colorbar) where the new axis has been placed
  402. cax2.yaxis.set_major_locator(AutoLocator())
  403. cax2.yaxis.set_minor_locator(AutoMinorLocator())
  404.  
  405. im3 = ax3.imshow(scan2_aligned, origin="lower", cmap="inferno")
  406. ax3.set_title("Scan #" + str(scan2) + " aligned")
  407. ax3.set_ylim(bottom=0)
  408.  
  409. ax3_divider = make_axes_locatable(ax3) # Introduce a divider next to the axis already in place
  410. cax3 = ax3_divider.append_axes("right", size="15%",
  411. pad="5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  412. legend = colorbar(im3, cax=cax3) # Introduce a legend (colorbar) where the new axis has been placed
  413. cax3.yaxis.set_major_locator(AutoLocator())
  414. cax3.yaxis.set_minor_locator(AutoMinorLocator())
  415.  
  416. plt.tight_layout(w_pad=7)
  417.  
  418. return warp_matrix
  419.  
  420.  
  421. # plotXBIC_loop2 differs from plotXBIC_loop in that it translates the current array according to Image Registration (axes ticks correspondingly and AutoMinorLocator instead of MultipleLocator)
  422.  
  423. def plotXBIC_loop2(scan, fig, fontsize, voltage_dic, loop_count, compareB=None, compareB_counter=None, compare=False):
  424. # fig.suptitle("Reduced LIA count normalized", fontsize=2*fontsize, x=0.7, y=0.92) # optional title for the figure (with this x and y it is centered and has a good distance)
  425.  
  426. # load position and define y, z and f(y,z) as arrays that can be interpreted by the imshow() function
  427. positions = load_h5(scan, "positions")
  428. y_axis_h5 = positions["/data/encoder_fast/data"]
  429. z_axis_h5 = positions["/data/encoder_slow/data"]
  430. y = np.array(y_axis_h5)
  431. z = np.array(z_axis_h5)
  432. current = counts2amps(scan)
  433. if compare:
  434. counts2amps_A = current
  435. counts2amps_B = counts2amps(compareB[compareB_counter])
  436.  
  437. # append NaN: scan 180 has the shape (40199,) and thus cannot be broadcast together with the other scans which have shapes of (40200,)
  438. if scan == 180:
  439. counts2amps_A = np.append(current, np.nan)
  440. if compareB[compareB_counter] == 180:
  441. counts2amps_B = np.append(counts2amps(compareB[compareB_counter]), np.nan)
  442. current = counts2amps_A - counts2amps_B
  443.  
  444. y_grid, z_grid, current_array = supergrid(y, z, current, 1000)
  445.  
  446. # axes for new subplot
  447. ax = fig.add_subplot(4, 4, loop_count)
  448.  
  449. # spacing to other subplots
  450. plt.subplots_adjust(right=1.3, hspace=0.5, wspace=0.5)
  451.  
  452. # y and z labels
  453. ax.set_xlabel("Position Y [μm]", labelpad=0, fontsize=fontsize)
  454. ax.set_ylabel("Position Z [μm]", labelpad=0, fontsize=fontsize)
  455.  
  456. # title
  457. bias_voltage = voltage_dic[scan]
  458. if compare:
  459. plt.title("Scan #" + str(scan) + " minus Scan #" + str(compareB[compareB_counter]), pad=15,
  460. fontsize=round(9 / 8 * fontsize))
  461. else:
  462. plt.title("Scan #" + str(scan) + bias_voltage, pad=15, fontsize=round(9 / 8 * fontsize))
  463.  
  464. # ticks and ticklabels need to be shifted according to Image Registration
  465. if scan in [179, 180, 182, 183, 184, 185, 186, 191, 192]:
  466. IRS_y = ImageRegistrationDic[scan][0][2]
  467. IRS_z = ImageRegistrationDic[scan][1][2]
  468. else:
  469. IRS_y = 0
  470. IRS_z = 0
  471.  
  472. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  473. ax.xaxis.set_ticklabels([0, 5, 10], fontsize=round(4 / 5 * fontsize))
  474. ax.xaxis.set_ticks([0 - IRS_y, 500 - IRS_y, 999 - IRS_y]) # if 1000 instead of 999 it won't display
  475. ax.yaxis.set_ticklabels([0, 5, 10], fontsize=round(4 / 5 * fontsize))
  476. ax.yaxis.set_ticks([0 - IRS_z, 500 - IRS_z, 999 - IRS_z])
  477. ax.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
  478. ax.yaxis.set_minor_locator(AutoMinorLocator(5))
  479.  
  480. if scan in [179, 180, 182, 183, 184, 185, 186, 191, 192]:
  481. warp_matrix = ImageRegistrationDic[scan]
  482.  
  483. if scan not in [178, 181, 187, 188, 189,
  484. 190]: # scan 178 and 181 are reference scans for the warp matrices, so they don't need to be transformed, and Image Registration would fail for scan #187 to #190
  485. current_array = cv2.warpAffine(current_array, warp_matrix, (1000, 1000),
  486. flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP, borderValue=float("NaN")).astype(
  487. np.float32)
  488.  
  489. # plot the data
  490. im = ax.imshow(current_array, interpolation='nearest', cmap=cm.afmhot, origin='lower')
  491.  
  492. # add a colorbar as legend
  493. ax_divider = make_axes_locatable(ax) # Introduce a divider next to the axis already in place
  494. cax = ax_divider.append_axes("right", size="15%",
  495. pad="5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  496. legend = colorbar(im, cax=cax) # Introduce a legend (colorbar) where the new axis has been placed
  497. cax.set_ylabel(r"$\frac{XBIC}{pixel \cdot photons_{avg}}$" + " in (nA)", rotation=-90, labelpad=50,
  498. fontsize=5 / 4 * fontsize) # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
  499. cax.yaxis.set_major_locator(AutoLocator())
  500. cax.yaxis.set_minor_locator(AutoMinorLocator())
  501.  
  502.  
  503. def plotXBIC2(plot_range=None, singleplots=None, compareA=None, compareB=None, figsize=(30, 30), savepng=False,
  504. dpi=None):
  505. '''
  506. This function plots the XBIC, using data from the previously defined counts2amps() function. Look at the examples to see how to use it:
  507.  
  508. plotXBIC(plot_range=14) will do the following:
  509. It will plot 14 datasets, starting with #178 and ending with #192 (and omitting 184 which is still counted).
  510.  
  511. plotXBIC(singleplots=[178,185,192]) will do the following:
  512. It will plot #178, #185 and #192.
  513.  
  514. plotXBIC(compareA=[178,178,185], compareB=[187,189,189]) will do the following:
  515. It will plot
  516. Scan 178 minus Scan 187
  517. Scan 178 minus Scan 189
  518. Scan 185 minus Scan 189.
  519. '''
  520.  
  521. start = time.time() # measure time for each plot
  522.  
  523. # the best looking fontsize is about half of the width of the figsize
  524. fontsize = figsize[0] // 2
  525.  
  526. # individualize titles for each scan if there is a bias voltage applied (information from LogbuchStrahlzeitPetraIII31.10.-6.11.2018.pptx, slide 54f., corresponding to handwritten logbook)
  527. voltage_dic = {
  528. 178: "",
  529. 179: "",
  530. 180: "",
  531. 181: "",
  532. 182: " (+ 100 mV)",
  533. 183: " (- 100 mV)",
  534. 185: " (- 200 mV)",
  535. 186: " (+ 200 mV)",
  536. 187: " (+ 500 mV)",
  537. 188: "(- 500 mV)",
  538. 189: " (- 1 V)",
  539. 190: " (+ 600 mv)",
  540. 191: "",
  541. 192: ""
  542. }
  543.  
  544. # create a figure for all plots
  545. fig = plt.figure(figsize=figsize)
  546.  
  547. loop_count = 1 # the subplot number should be independent of omitted scans (e.g. scan #184); otherwise you would see an empty space at that position
  548.  
  549. if type(plot_range) is int:
  550. if plot_range > 6:
  551. plot_range += 1 # scan #184 is omitted, but still counted by the for-loop
  552. for scan in range(178, 178 + plot_range):
  553.  
  554. if scan == 184:
  555. continue # scan 184 was aborted
  556.  
  557. plotXBIC_loop2(scan=scan, fig=fig, fontsize=fontsize, voltage_dic=voltage_dic, loop_count=loop_count)
  558.  
  559. loop_count += 1
  560.  
  561. plt.show()
  562. end = time.time()
  563. print(
  564. "Plotting scan 178 to scan " + str(178 + plot_range) + " took " + str(round(end - start, 2)) + " seconds.")
  565.  
  566. if type(singleplots) is list:
  567. for scan in singleplots:
  568.  
  569. if scan == 184:
  570. continue # scan 184 was aborted
  571.  
  572. plotXBIC_loop2(scan=scan, fig=fig, fontsize=fontsize, voltage_dic=voltage_dic, loop_count=loop_count)
  573.  
  574. loop_count += 1
  575.  
  576. plt.show()
  577. end = time.time()
  578. print("Scan " + str(scan) + " took " + str(round(end - start, 2)) + " seconds to plot.")
  579.  
  580. if type(compareA) is list:
  581. compareB_counter = 0
  582. for scan in compareA:
  583. plotXBIC_loop2(scan=scan, fig=fig, fontsize=fontsize, voltage_dic=voltage_dic, loop_count=loop_count,
  584. compare=True, compareB=compareB, compareB_counter=compareB_counter)
  585.  
  586. loop_count += 1
  587. compareB_counter += 1
  588.  
  589. plt.show()
  590. end = time.time()
  591. print("Scan " + str(scan) + " minus Scan " + str(compareB[compareB_counter]) + " took " + str(
  592. round(end - start, 2)) + " seconds to plot.")
  593.  
  594. if savepng:
  595. now = datetime.now()
  596. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  597. if dpi is None:
  598. fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  599. else:
  600. fig.savefig("savefig/plotXBIC_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  601.  
  602.  
  603. def plotIV(coordinate=None, sample_size=100, set_region=(0, 999, 0, 999), showPosition=False, inset=False,
  604. figsize=(30, 30), savepng=False, dpi=None, savepdf=False, annotations=False, data_processing=1,
  605. return_only=False):
  606. start = time.time()
  607. # voltages
  608. voltages = [-1000, -500, -200, -100, 0, 100, 200, 500, 600]
  609. # scans (ordered by voltages)
  610. scans_ordered = [189, 188, 185, 183, 181, 182, 186, 187, 190]
  611.  
  612. # convert list to array
  613. voltages, scans_ordered = np.array(voltages), np.array(scans_ordered)
  614.  
  615. current_array_list = []
  616. current_mean = []
  617.  
  618. # plot either with or without negative values and select if the solar cell channel should be selected for the current
  619. options_dic = {
  620. 1: (False, False, "Lock in R , ohne Negativwerte"),
  621. 2: (False, True, "Lock in R, mit Negativwerten"),
  622. 3: (True, False, "solar cell channel, ohne Negativwerte"),
  623. 4: (True, True, "solar cell channel, mit Negativwerten")
  624. }
  625. if not return_only:
  626. print(options_dic[data_processing][2])
  627. A = 2 # plot only from scan_ordered[A] to ...
  628. B = -2 # ... scan_ordered[B]
  629. counter = 0
  630. for scan in scans_ordered[A:B]:
  631. positions = load_h5(scan, "positions")
  632. y_axis_h5 = positions["/data/encoder_fast/data"]
  633. z_axis_h5 = positions["/data/encoder_slow/data"]
  634. y_axis = np.array(y_axis_h5)
  635. z_axis = np.array(z_axis_h5)
  636. current = counts2amps(scan, solar_cell_channel_only=options_dic[data_processing][0],
  637. negative=options_dic[data_processing][1])
  638.  
  639. y_grid, z_grid, current_array = supergrid(y_axis, z_axis, current, 1000)
  640.  
  641. if scan != 181:
  642. warp_matrix = ImageRegistrationDic[scan]
  643. current_array = cv2.warpAffine(current_array, warp_matrix, (1000, 1000),
  644. flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP,
  645. borderValue=float("NaN")).astype(np.float32)
  646.  
  647. current_array_list.append(current_array)
  648.  
  649. current_mean.append(np.nanmean(current_array))
  650.  
  651. print(
  652. f"Mean current for scan {scan} is {str(round(current_mean[counter], 4))} nA. Its min/max current is {str(round(np.nanmin(current_array), 4))}/{str(round(np.nanmax(current_array), 4))} nA.") # the values are only shown to a few decimal places if not using e.g. {round(current_mean[counter],4):.20f}
  653. counter += 1
  654.  
  655. pixelplotting_array = np.stack(current_array_list)
  656.  
  657. if return_only:
  658. return current_array_list, pixelplotting_array
  659.  
  660. fontsize = 1.5 * figsize[0] // 2
  661. fig = plt.figure(figsize=figsize)
  662. ax1 = fig.add_subplot(221)
  663.  
  664. # y and z labels
  665. ax1.set_xlabel("Bias voltage [mV]", labelpad=10, fontsize=9 / 5 * fontsize)
  666. ax1.set_ylabel("X-Ray beam induced current (nA)", labelpad=10, fontsize=9 / 5 * fontsize)
  667. plt.title("IV curve(s) for single pixel(s)", pad=15, fontsize=round(9 / 5 * fontsize))
  668. ax1.tick_params(labelsize=9 / 5 * fontsize, length=fontsize)
  669. ax1.set_xticks(voltages[A:B])
  670.  
  671. # allows to plot only a specific location on the y-z current array (else plot each 'sample_size'-th IV curve):
  672. if coordinate != None:
  673. ax1.plot(voltages[A:B], pixelplotting_array[:, coordinate[0], coordinate[1]], "-o", markersize=8, linewidth=1.2,
  674. color="blue")
  675.  
  676. if inset:
  677. # Create a set of inset Axes: these should fill the bounding box allocated to them.
  678. ax1_2 = plt.axes([0, 0, 1, 1])
  679. # Manually set the position and relative size of the inset axes within ax1
  680. ip = InsetPosition(ax1, [0.2, 0.2, 0.3, 0.3])
  681. ax1_2.set_axes_locator(ip)
  682. else:
  683. # Create a new subplot
  684. ax1_2 = fig.add_subplot(223)
  685. # ticks and ticklabels need to be shifted according to Image Registration
  686. scan = scans_ordered[B - 1] # show the scan of the last IV curve point as an example
  687. IRS_y = ImageRegistrationDic[scan][0][2] # IRS = Image Registration Shift (in y or z)
  688. IRS_z = ImageRegistrationDic[scan][1][2]
  689. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  690. ax1_2.xaxis.set_ticklabels([0, 5, 10], fontsize=round(9 / 5 * fontsize))
  691. ax1_2.xaxis.set_ticks([0 - IRS_y, 500 - IRS_y, 999 - IRS_y]) # if 1000 instead of 999 it won't display
  692. ax1_2.yaxis.set_ticklabels([0, 5, 10], fontsize=round(9 / 5 * fontsize))
  693. ax1_2.yaxis.set_ticks([0 - IRS_z, 500 - IRS_z, 999 - IRS_z])
  694. ax1_2.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
  695. ax1_2.yaxis.set_minor_locator(AutoMinorLocator(5))
  696.  
  697. ax1_2.imshow(current_array, interpolation='nearest', cmap=cm.afmhot, origin='lower')
  698. ax1_2.plot([coordinate[0]], coordinate[1], "bo", markersize=8)
  699. # y and z labels
  700. ax1_2.set_xlabel("Position Y [μm]", labelpad=10, fontsize=fontsize)
  701. ax1_2.set_ylabel("Position Z [μm]", labelpad=10, fontsize=fontsize)
  702.  
  703. print("Number of NaN values on shown IV curve:",
  704. str(len(np.where(np.isnan(pixelplotting_array[:, coordinate[0], coordinate[1]]))[0])))
  705.  
  706. if coordinate == None:
  707. countnumberlines = 0
  708. countNaN = 0
  709.  
  710. # define the colormap for the curves
  711. cmap = cm.gist_rainbow
  712. norm = colors.Normalize(vmin=1, vmax=sample_size)
  713.  
  714. for z in np.linspace(set_region[0], set_region[1], sample_size ** 0.5, dtype=int):
  715. for y in np.linspace(set_region[2], set_region[3], sample_size ** 0.5, dtype=int):
  716. # ax1.plot(voltages[A:B], pixelplotting_array[:,y,z], "-o", markersize=8, linewidth=1.2, color=cmap(norm(countnumberlines)))
  717. ax1.plot(voltages[A:B], pixelplotting_array[:, y, z], "-", linewidth=0.5, color="blue")
  718. ax1.plot(voltages[A:B], current_mean, "-o", markersize=12, linewidth=4, color="red")
  719.  
  720. if showPosition:
  721. if inset:
  722. # Create a set of inset Axes: these should fill the bounding box allocated to them.
  723. ax1_2 = plt.axes([0, 0, 1, 1])
  724. # Manually set the position and relative size of the inset axes within ax1
  725. ip = InsetPosition(ax1, [0.2, 0.05, figsize[0] / 120, figsize[0] / 120])
  726. ax1_2.set_axes_locator(ip)
  727. else:
  728. # Create a new subplot
  729. ax1_2 = fig.add_subplot(223)
  730. # ticks and ticklabels need to be shifted according to Image Registration
  731. scan = scans_ordered[B - 1] # show the scan of the last IV curve point as an example
  732. IRS_y = ImageRegistrationDic[scan][0][2] # IRS = Image Registration Shift (in y or z)
  733. IRS_z = ImageRegistrationDic[scan][1][2]
  734. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  735. ax1_2.xaxis.set_ticklabels([0, 5, 10], fontsize=round(fontsize))
  736. ax1_2.xaxis.set_ticks(
  737. [0 - IRS_y, 500 - IRS_y, 999 - IRS_y]) # if 1000 instead of 999 it won't display
  738. ax1_2.yaxis.set_ticklabels([0, 5, 10], fontsize=round(fontsize))
  739. ax1_2.yaxis.set_ticks([0 - IRS_z, 500 - IRS_z, 999 - IRS_z])
  740. ax1_2.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
  741. ax1_2.yaxis.set_minor_locator(AutoMinorLocator(5))
  742.  
  743. ax1_2.imshow(current_array, interpolation='nearest', cmap=cm.afmhot, origin='lower')
  744. # ax1_2.plot(y,z, "bo", markersize=8, color=cmap(norm(countnumberlines)))
  745. ax1_2.plot(y, z, "bo", markersize=8, color="k")
  746.  
  747. # y and z labels
  748. ax1_2.set_xlabel("Position Y [μm]", labelpad=10, fontsize=fontsize)
  749. ax1_2.set_ylabel("Position Z [μm]", labelpad=10, fontsize=fontsize)
  750. countnumberlines += 1
  751. countNaN += len(np.where(np.isnan(pixelplotting_array[:, y, z]))[0])
  752. print("Number of IV curves shown in plot:", str(countnumberlines))
  753. print("Number of NaN values on shown IV curves:", str(countNaN))
  754.  
  755. ax2 = fig.add_subplot(222)
  756. ax2.plot(voltages[A:B], current_mean, "-o", markersize=8, linewidth=1.2, color="blue",
  757. label="Mean XBIC for all (y,z) positions (nA)")
  758. ax2.legend(fontsize=7 / 5 * fontsize)
  759.  
  760. # fitting
  761. popt, pcov = curve_fit(lambda V, I_0, I_Ph: shockley_function(V, I_0, I_Ph) * (-1e9), voltages[A:B] / 1000,
  762. current_mean, p0=[5e-12, current_mean[2] * 1e-9],
  763. bounds=([1e-13, (current_mean[2] - 3) * 1e-9], [1e-11, (current_mean[2] + 3) * 1e-9]))
  764.  
  765. ax2.plot(voltages[A:B], shockley_function(voltages[A:B] / 1000, popt[0], popt[1]) * (-1e9), "-o", markersize=6,
  766. linewidth=0.6, color="red", label="Fitted IV curve")
  767. ax2.legend(fontsize=7 / 5 * fontsize)
  768. print("-----\nActually measured:\nShort-circuit current:", str(round(current_mean[2], 4)),
  769. "nA\n-----\nResult of fitting:\nSaturation current I_0:", str(round(popt[0] * 1e9, 4)),
  770. "nA\nPhotocurrent I_Ph:", str(round(popt[1] * 1e9, 4)),
  771. "nA\nOpen Circuit Voltage V_OC:", str(round(shockley_function_VOC(popt[0], popt[1]) * 1e3, 4)), "mV\n-----")
  772.  
  773. if ax1.get_ylim()[0] < 202 and ax1.get_ylim()[
  774. 1] > 218: # this condition makes sure that the min and max mean current values are still shown
  775. ax2.set_ylim(
  776. ax1.get_ylim()) # make sure that the current interval is the same so that the shape of the curves can be compared
  777. ax2.set_xlabel("Bias voltage [mV]", labelpad=10, fontsize=9 / 5 * fontsize)
  778. ax2.set_ylabel("X-Ray beam induced current (nA)", labelpad=10, fontsize=9 / 5 * fontsize)
  779. plt.title("Mean IV curve", pad=15, fontsize=round(9 / 5 * fontsize))
  780. ax2.tick_params(labelsize=9 / 5 * fontsize, length=fontsize)
  781. ax2.set_xticks(voltages[A:B])
  782.  
  783. if annotations:
  784. for i in range(len(current_mean)):
  785. ax2.annotate("Scan " + str(scans_ordered[i + A]), xy=(voltages[i + A], current_mean[i]),
  786. xytext=(voltages[i + A], current_mean[i] + 1.3),
  787. arrowprops=dict(facecolor='black', shrink=0.12, width=2, headwidth=8))
  788.  
  789. '''#distance of ticklabels from ticks
  790. ax1.tick_params(axis='both', which='major', pad=25)
  791. ax1_2.tick_params(axis='both', which='major', pad=25)
  792. ax2.tick_params(axis='both', which='major', pad=25)'''
  793.  
  794. plt.subplots_adjust(wspace=0.35) # adjust the width between the subplots
  795.  
  796. plt.show()
  797.  
  798. end = time.time()
  799. print("Plotting took {} seconds.".format(str(round(end - start, 2))))
  800.  
  801. if savepng:
  802. now = datetime.now()
  803. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  804. if dpi is None:
  805. fig.savefig("savefig/IV_curve_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  806. else:
  807. fig.savefig("savefig/IV_curve_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  808. if savepdf:
  809. fig.savefig("savefig/IV_curve_" + dt_string + ".pdf", format="pdf")
  810.  
  811. def shockley_function(V,I_0,I_Ph): # arguments and output in SI units
  812. return I_0*(np.exp(constants.e*V/(constants.k*300))-1)-I_Ph
  813.  
  814. def shockley_function_VOC(I_0,I_Ph): # arguments and output in SI units
  815. return constants.k*300*np.log(I_Ph/I_0 + 1)/constants.e
  816.  
  817.  
  818. def plotClusterCurves(k=None, k_curves=None, NaN=False, sample_size=100, background=True, data_processing=1,
  819. compare=False, dpi=None, savepng=False):
  820. start = time.time()
  821. # use variables from plotIV function
  822. current_array = plotIV(data_processing=data_processing, return_only=True)[0][
  823. -1] # select scan of last IV curve point
  824. pixelplotting_array = plotIV(data_processing=data_processing, return_only=True)[1]
  825. voltages = [-1000, -500, -200, -100, 0, 100, 200, 500, 600]
  826. A = 2 # plot only from scan_ordered[A] to ...
  827. B = -2 # ... scan_ordered[B]
  828.  
  829. figsize = (30, 15)
  830. fontsize = figsize[0] // 2
  831. fig = plt.figure(figsize=figsize)
  832. ax1 = fig.add_subplot(121)
  833.  
  834. # ticks and ticklabels need to be shifted according to Image Registration
  835. scan = 186 # show the scan of the last IV curve point as an example <-- here it is scan 186, at B = -2 in the plotIV() function
  836. IRS_y = ImageRegistrationDic[scan][0][2] # IRS = Image Registration Shift (in y or z)
  837. IRS_z = ImageRegistrationDic[scan][1][2]
  838. # individual ticks: it's an extrapolated plot from 201x201 to 1000x1000 pixels, but this corresponds to 10x10 micrometers on the solar cell
  839. ax1.xaxis.set_ticklabels([0, 5, 10], fontsize=round(4 / 5 * fontsize))
  840. ax1.xaxis.set_ticks([0 - IRS_y, 500 - IRS_y, 999 - IRS_y]) # if 1000 instead of 999 it won't display
  841. ax1.yaxis.set_ticklabels([0, 5, 10], fontsize=round(4 / 5 * fontsize))
  842. ax1.yaxis.set_ticks([0 - IRS_z, 500 - IRS_z, 999 - IRS_z])
  843. ax1.xaxis.set_minor_locator(AutoMinorLocator(5)) # 1 minor tick for each micrometer
  844. ax1.yaxis.set_minor_locator(AutoMinorLocator(5))
  845.  
  846. if background:
  847. ax1.imshow(current_array, interpolation='nearest', cmap=cm.afmhot, origin='lower')
  848. # y and z labels
  849. ax1.set_xlabel("Position Y [μm]", labelpad=0, fontsize=fontsize)
  850. ax1.set_ylabel("Position Z [μm]", labelpad=0, fontsize=fontsize)
  851.  
  852. if NaN: # this option was only used, because there was something wrong with the plotClusterCurve "if NaN" condition itself, adding weird NaN values to the current grid
  853. NaN_where = np.where(np.isnan(pixelplotting_array))
  854. NaN_where = np.flip(NaN_where, 0) # np.flip is necessary, because np.isnan reads from bottom to top
  855. ax1.scatter(NaN_where[0], NaN_where[1])
  856. ax1.set_title("Position of NaN values (in blue)", pad=15, fontsize=round(9 / 8 * fontsize))
  857.  
  858. if k != None:
  859. for z in np.linspace(0, 999, sample_size ** 0.5, dtype=int):
  860. for y in np.linspace(0, 999, sample_size ** 0.5, dtype=int):
  861. if np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z] == k_curves:
  862. ax1.scatter(y, z, color="red")
  863. else:
  864. ax1.scatter(y, z, color="black")
  865. ax1.set_title("Position of the displayed curve", pad=15, fontsize=round(9 / 8 * fontsize))
  866.  
  867. if k_curves != None: # plot curves belonging to the k_th cluster
  868. ax2 = fig.add_subplot(122)
  869. k_curves_values = pixelplotting_array[:, np.array(kmeansDic[str(k)]["cluster_numbers"]) == k_curves].T
  870. mean_array = np.empty(0)
  871. for scan_index in range(5):
  872. mean_array = np.append(mean_array, np.nanmean(k_curves_values[:, scan_index]))
  873. print(f"Mean currents for curve #{k_curves} (with k = {k}):", str(mean_array))
  874. countnumberlines = 0
  875. for z in np.linspace(0, 999, sample_size ** 0.5, dtype=int):
  876. for y in np.linspace(0, 999, sample_size ** 0.5, dtype=int):
  877. if not math.isnan(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]):
  878. if int(np.array(kmeansDic[str(k)]["cluster_numbers"])[y][z]) == k_curves:
  879. ax2.plot(voltages[A:B], pixelplotting_array[:, y, z], linewidth=0.5, color="blue")
  880. countnumberlines += 1
  881. ax2.plot(voltages[A:B], mean_array, "-o", linewidth=1.2, color="red")
  882. ax2.set_title(f"IV curves for group {k_curves} (with k = {k})", pad=15, fontsize=round(9 / 8 * fontsize))
  883.  
  884. if compare:
  885. for comparing_curve in range(k + 1):
  886. if comparing_curve == k_curves: # don't plot again the red curve for the k_curve that is already selected
  887. continue
  888. k_curves_values = pixelplotting_array[:,
  889. np.array(kmeansDic[str(k)]["cluster_numbers"]) == comparing_curve].T
  890. mean_array = np.empty(0)
  891. for scan_index in range(5):
  892. mean_array = np.append(mean_array, np.nanmean(k_curves_values[:, scan_index]))
  893. ax2.plot(voltages[A:B], mean_array, "-o", linewidth=1.2, color="black")
  894. print("For comparison, the mean curve for group", str(comparing_curve), "has been plotted:",
  895. str(mean_array))
  896.  
  897. ax2.set_title(
  898. f"IV curves for group {k_curves} (with k = {k}), compared to the mean IV curves of the other groups",
  899. pad=15, fontsize=round(9 / 8 * fontsize))
  900.  
  901. print("Number of IV curves shown in plot:", str(countnumberlines))
  902.  
  903. plt.show()
  904. end = time.time()
  905. print("Plotting took {} seconds.".format(str(round(end - start, 2))))
  906.  
  907. if savepng:
  908. now = datetime.now()
  909. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  910. if dpi is None:
  911. fig.savefig("savefig/cluster_curve_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  912. else:
  913. fig.savefig("savefig/cluster_curve_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  914.  
  915.  
  916. def plotClusterPosition(k=2, dpi=None, savepng=False):
  917. start = time.time()
  918.  
  919. figsize = (10, 10)
  920. fig = plt.figure(figsize=figsize)
  921.  
  922. hsv_modified = cm.get_cmap('hsv', 256) # create new hsv colormaps in range of 0.3 (green) to 0.7 (blue)
  923. newcmp = ListedColormap(hsv_modified(np.linspace(0, 0.7, 256))) # show figure
  924. new_inferno = cm.get_cmap(newcmp, 5) # visualize with the new_inferno colormaps
  925.  
  926. im = plt.pcolormesh(kmeansDic[str(k)]["cluster_numbers"], cmap=new_inferno)
  927. plt.colorbar()
  928.  
  929. # im = ax.imshow(np.array(kmeansDic[str(k)]["cluster_numbers"]), interpolation='nearest', cmap=cm.afmhot, origin='lower')
  930.  
  931. '''
  932. # add a colorbar as legend
  933. ax_divider = make_axes_locatable(ax) # Introduce a divider next to the axis already in place
  934. cax = ax_divider.append_axes("right", size = "15%", pad = "5%") # Append a new axis object with name "cax" to the divider. Width ("size") and distance to the 2D-Plot ("pad") can be given in % of the width of the x axis.
  935. legend = colorbar(im, cax = cax) # Introduce a legend (colorbar) where the new axis has been placed
  936.  
  937. cax.set_ylabel("Number of cluster", rotation = -90, labelpad = 50 , style = "italic", fontsize=5/4*fontsize) # Place this label next to the colorbar; labelpad denotes the distance of the colorbar
  938. cax.yaxis.set_major_locator(AutoLocator())
  939. cax.yaxis.set_minor_locator(AutoMinorLocator())
  940. '''
  941.  
  942. end = time.time()
  943. print("Plotting took {} seconds.".format(str(round(end - start, 2))))
  944.  
  945. if savepng:
  946. now = datetime.now()
  947. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  948. if dpi is None:
  949. fig.savefig("savefig/ClusterPosition_" + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  950. else:
  951. fig.savefig("savefig/ClusterPosition_" + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  952.  
  953.  
  954. plotClusterPosition(k=2)
  955.  
  956.  
  957. def kmeans_clustering(data_processing=1, k=3, random_state=22, scale_all=True, absolute_values=False, sample_size=50000,
  958. silhouette=True):
  959. pixelplotting_array = plotIV(data_processing=data_processing, return_only=True)[1]
  960. pixelvectors = pixelplotting_array.reshape(5,
  961. 1000000).T # each 5D vector represents 1 pixel with 5 different bias voltages from - to + 200 mV
  962. if not absolute_values:
  963. pixelvectors = np.array([
  964. pixelvectors.T[0] - pixelvectors.T[1],
  965. pixelvectors.T[1] - pixelvectors.T[2],
  966. pixelvectors.T[2] - pixelvectors.T[3],
  967. pixelvectors.T[3] - pixelvectors.T[4]
  968. ]).T
  969. data = pixelvectors[~np.isnan(pixelvectors).any(axis=1)] # delete all vectors which contain NaN values
  970.  
  971. if scale_all: # maximum of ALL values is 1 and minimum of ALL values is 0
  972. scaler = MinMaxScaler()
  973. scaler.fit(data)
  974. data_transformed = scaler.transform(data)
  975. else: # maximum of each curve is 1 and minimum of each curve is 0
  976. data_transformed = data / np.linalg.norm(data, ord=2, axis=1, keepdims=True)
  977.  
  978. km = KMeans(n_clusters=k)
  979. groups = km.fit_predict(
  980. data_transformed) + 1 # Adding 1 calls the first group "group 1" instead of "group 0". Also, it avoids confusion with the zeros_array
  981. number_sqdist = km.inertia_
  982. if silhouette: # silhouette slows down the function extremely, so it is given as optional
  983. silhouette = silhouette_score(data_transformed, groups, sample_size=sample_size)
  984. else:
  985. silhouette = 0
  986. calinski_harabasz = calinski_harabasz_score(data_transformed, groups)
  987.  
  988. no_data = np.isnan(pixelvectors).any(
  989. axis=1) # returns a 1D array with True if a vector contains a NaN value, else False
  990. no_data_count = no_data.sum() # counts all the vectors with NaNs
  991. data_count = 1000000 - no_data_count
  992.  
  993. zeros_array = np.zeros(1000000)
  994. zeros_array[~no_data] = groups # fills in all the group numbers where there are no NaNs in a vector
  995. zeros_array[no_data] = np.nan # where there are NaNs in a vector: assign NaN as "group number"
  996. cluster_array = zeros_array.reshape(1000, 1000)
  997.  
  998. return cluster_array, number_sqdist, silhouette, calinski_harabasz
  999.  
  1000.  
  1001. def find_k(method="elbow", dpi=None, savepng=False):
  1002. figsize = (12, 12)
  1003. fontsize = figsize[0]
  1004.  
  1005. fig = plt.figure(figsize=figsize)
  1006. ax1 = fig.add_subplot(111)
  1007.  
  1008. x = []
  1009. y = []
  1010.  
  1011. if method == "elbow": # elbow method
  1012. for k in kmeansDic.keys():
  1013. x.append(int(k))
  1014. y.append(np.array(kmeansDic[str(k)]["ssqd"]))
  1015. plt.scatter(x, y)
  1016. string1 = "Elbow method"
  1017. string2 = "Sum of squared distances to respective centroid"
  1018. string3 = "elbow" # only for printing PNGs
  1019.  
  1020. if method == "silhouette": # silhouette score := (b-a) divided by max(a,b) ; where a is the intra-cluster distance and b is the nearest-cluster distance
  1021. for k in kmeansDic.keys():
  1022. x.append(int(k))
  1023. y.append(np.array(kmeansDic[str(k)]["silhouette"]))
  1024. plt.scatter(x, y)
  1025. string1 = "Silhouette score"
  1026. string2 = "(b-a)/max(a,b)"
  1027. string3 = "silhouette" # only for printing PNGs
  1028.  
  1029. if method == "calinski_harabasz": # Calinski-Harabasz score := within-cluster disperson divided by between-cluster dispersion
  1030. for k in kmeansDic.keys():
  1031. x.append(int(k))
  1032. y.append(np.array(kmeansDic[str(k)]["calinski_harabasz"]))
  1033. plt.scatter(x, y)
  1034. string1 = "Calinski-Harabasz score"
  1035. string2 = "ratio between within-cluster and between-cluster disperson"
  1036. string3 = "calinski_harabasz" # only for printing PNGs
  1037.  
  1038. if method == "all": # compare all scores next to each other
  1039. for k in kmeansDic.keys():
  1040. x.append(int(k))
  1041. y.append(np.array(kmeansDic[str(k)]["ssqd"]))
  1042. y_normalized = []
  1043. for entry in y:
  1044. y_normalized.append(entry / np.max(y))
  1045. print("elbow:", str(y_normalized))
  1046. elbow_scatter = ax1.scatter(x, y_normalized, c="red")
  1047.  
  1048. x.clear() # don't take over
  1049. y.clear() # values from ssqd!
  1050. for k in kmeansDic.keys():
  1051. x.append(int(k))
  1052. y.append(np.array(kmeansDic[str(k)]["silhouette"]))
  1053. y_normalized = []
  1054. for entry in y:
  1055. y_normalized.append(entry / np.max(y))
  1056. print("silhouette:", str(y_normalized))
  1057. silhouette_scatter = ax1.scatter(x, y_normalized, c="green")
  1058.  
  1059. x.clear() # don't take over
  1060. y.clear() # values from silhouette!
  1061. for k in kmeansDic.keys():
  1062. x.append(int(k))
  1063. y.append(np.array(kmeansDic[str(k)]["calinski_harabasz"]))
  1064. y_normalized = []
  1065. for entry in y:
  1066. y_normalized.append(entry / np.max(y))
  1067. print("calinski_harabasz:", str(y_normalized))
  1068. calinski_harabasz_scatter = ax1.scatter(x, y_normalized, c="blue")
  1069.  
  1070. string1 = "All scores in comparison"
  1071. string2 = "Normalized scores"
  1072.  
  1073. plt.legend((elbow_scatter, silhouette_scatter, calinski_harabasz_scatter),
  1074. ("Elbow Method", "Silhouette score", "Calinski-Harabasz score"), scatterpoints=1, loc='lower left',
  1075. ncol=3, fontsize=fontsize)
  1076.  
  1077. ax1.set_xlabel("Chosen k for clustering", labelpad=10, fontsize=9 / 5 * fontsize, fontweight="bold")
  1078. ax1.set_ylabel(string2, labelpad=10, fontsize=9 / 5 * fontsize, fontweight="bold")
  1079. plt.title(string1, pad=15, fontsize=round(9 / 5 * fontsize), fontweight="bold")
  1080. ax1.tick_params(labelsize=9 / 5 * fontsize, length=fontsize)
  1081. ax1.set_xticks(x)
  1082.  
  1083. if savepng:
  1084. now = datetime.now()
  1085. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  1086. if dpi is None:
  1087. fig.savefig("savefig/find_k_" + string3 + dt_string + ".png", dpi=fig.dpi, bbox_inches="tight")
  1088. else:
  1089. fig.savefig("savefig/find_k_" + string3 + dt_string + ".png", dpi=dpi, bbox_inches="tight")
  1090.  
  1091. # To speed up all calculations remarkably: Create a dictionary with all the warp matrices:
  1092. # Run each time you start working this notebook to save the values in the global namespace.
  1093. start = time.time()
  1094. ImageRegistrationDic = {
  1095. 179: ImageRegistration(178,179),
  1096. 180: ImageRegistration(178,180),
  1097. 182: ImageRegistration(181,182),
  1098. 183: ImageRegistration(181,183),
  1099. 185: ImageRegistration(181,185),
  1100. 186: ImageRegistration(181,186),
  1101. 191: ImageRegistration(181,191),
  1102. 192: ImageRegistration(181,192)
  1103. }
  1104. end = time.time()
  1105. print("Calculating all warp matrices took", round((end-start), 2), "seconds.")
  1106. '''
  1107. # Create a nested dictionary for all cluster arrays, sum of squared distances, silhouette and calinski_harabasz scores
  1108.  
  1109. start = time.time()
  1110.  
  1111. kmeansDic = {}
  1112.  
  1113. silhouette_setting = True # True should be selected for complete data. If False is selected, then silhouette will just be assigned to 0 and omitted, saving a lot of computation time.
  1114. sample_size = 300000 # this number refers to the sample size in the silhouette_score() function which is computationally very expensive
  1115. k_lowest = 2
  1116. k_highest = 3
  1117. data_processing = 1
  1118. scale_all = True
  1119. absolute_values = False
  1120.  
  1121. compression = True
  1122.  
  1123. for k in range(k_lowest, k_highest+1):
  1124. start_k = time.time()
  1125. cluster_numbers, ssqd, silhouette, calinski_harabasz = kmeans_clustering(data_processing=data_processing, scale_all=scale_all, absolute_values=absolute_values, k=k, sample_size=sample_size, silhouette=silhouette_setting)
  1126. kmeansDic[str(k)] = {"cluster_numbers": cluster_numbers, "ssqd": ssqd, "silhouette": silhouette, "calinski_harabasz": calinski_harabasz}
  1127. end = time.time()
  1128. print("Finished entry in nested dictionary for k = " + str(k) + ". That took " + str(round((end-start_k), 2)) + " seconds.")
  1129.  
  1130. end = time.time()
  1131. print("Calculating all cluster arrays took", round((end-start), 2), "seconds.")
  1132.  
  1133. ### SAVE CACHE ###
  1134. # with these settings (copied from http://www.silx.org/doc/silx/0.2.0/modules/io/dictdump.html), the file size is much smaller, but loading takes much longer, so better not use it!
  1135.  
  1136. os.chdir("/asap3/petra3/gpfs/p06/2018/data/11005475/scratch_cc/Jan") # just make sure that we are talking about the correct location in the file system
  1137.  
  1138. if compression:
  1139. create_ds_args = {'compression': "gzip",
  1140. 'shuffle': True,
  1141. 'fletcher32': True}
  1142. else:
  1143. create_ds_args = None
  1144.  
  1145. saveh5 = input("Do you want to save the dictionary as a h5 file that other functions will refer to ? (y/n)\n")
  1146. if saveh5.lower() == "y":
  1147. if not compression:
  1148. fname = f"kmeansDic({k_lowest},{k_highest})_{sample_size}-samplesize_{silhouette_setting!s}-silhouette_{data_processing}-data-processing_{scale_all!s}-scale-all_{absolute_values!s}-absolute-values.h5"
  1149. else:
  1150. fname = f"kmeansDic({k_lowest},{k_highest})_{sample_size}-samplesize_{silhouette_setting!s}-silhouette_{data_processing}-data-processing_{scale_all!s}-scale-all_{absolute_values!s}-absolute-values_COMPRESSED.h5"
  1151. dicttoh5(kmeansDic, "Cache/"+ fname, create_dataset_args=create_ds_args)
  1152. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  1153. if saveh5.lower() == "n":
  1154. now = datetime.now()
  1155. dt_string = now.strftime("%d-%m-%Y_%H_%M_%S")
  1156. fname = "kmeansDic_no-save_" + dt_string + ".h5"
  1157. dicttoh5(kmeansDic, "Cache/" + fname)
  1158. print("The nested dictionary was saved as \"" + fname + "\" in the Cache folder.")
  1159.  
  1160. '''
  1161.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement