Advertisement
VladNitu

1_histo_vlad

Mar 31st, 2024
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.08 KB | None | 0 0
  1. def custom_histogram(data, bins, density = False):
  2. """
  3. A custom function to compute histogram data without using np.histogram, with an option to scale the histogram
  4. to represent a probability distribution.
  5.  
  6. Parameters:
  7. - data: array-like, the input data.
  8. - bins: int, the number of bins.
  9. - density: bool, if True, scale the histogram counts to represent a probability distribution.
  10.  
  11. Returns:
  12. - counts: array of the counts in each bin, or the probability densities if density=True.
  13. - bin_edges: array of the bin edges.
  14. """
  15. counts = [0] * bins # create array of zeros
  16.  
  17.  
  18.  
  19. bin_edges = [0] * (bins + 1)
  20.  
  21. # YOUR CODE HERE
  22.  
  23. Min, Max = 1e9, -1e9
  24. for point in data:
  25. Min = min(Min, point)
  26. Max = max(Max, point)
  27.  
  28. bin_width = (Max - Min) / bins # All bins have the same width
  29.  
  30. bin_edges[0]= Min
  31. bin_edges[bins] = Max
  32. for i in range(1, bins): # For all bins beside the last one: l <= x < r (inclusive left, exclusive right)
  33. bin_edges[i] = bin_edges[i - 1] + bin_width
  34.  
  35. for i in range(bins - 1):
  36. l, r = bin_edges[i], bin_edges[i + 1] # bounds of the bin
  37. cnt = 0
  38. for point in data:
  39. if l <= point < r:
  40. cnt += 1
  41. counts[i] = cnt
  42.  
  43. i = bins - 1 # Edge case: last bin: inclusidve to the right
  44. l, r = bin_edges[i], bin_edges[i + 1] # bounds of the bin
  45. cnt = 0
  46. for point in data:
  47. if l <= point <= r:
  48. cnt += 1
  49. counts[i] = cnt
  50.  
  51. if density: # Create histogram w/ density scaling
  52. area_histogram = sum([counts[i] * bin_width for i in range(bins)])
  53. else: # NO density scaling
  54. area_histogram = 1
  55.  
  56. counts = [counts[i] / area_histogram for i in range(len(counts))]
  57.  
  58.  
  59. return counts, bin_edges
  60.  
  61. # Example usage with density=True
  62. samples = np.random.randn(10000) # Generate some data
  63. n_bins = 10 # Number of bins
  64.  
  65. # Generate histogram data with density scaling
  66. hist, bin_edges = custom_histogram(samples, n_bins, density = True)
  67.  
  68. # Generate reference histogram data with density scaling, using NumPy
  69. reference_hist, reference_bin_edges = np.histogram(samples, bins = n_bins, density = True)
  70.  
  71. # Plotting the histogram as a probability distribution
  72. fig, ax = plt.subplots(1, 2, figsize = (15, 7))
  73. bin_centers = (np.diff(bin_edges) / 2 + bin_edges[:-1])
  74. ax[0].bar(bin_centers, hist, width = np.diff(bin_centers)[0])
  75. ax[0].set_xlabel('Value')
  76. ax[0].set_ylabel('Probability Density')
  77. ax[0].set_title('Custom Histogram as Probability Distribution')
  78.  
  79. # Checking the answer using plt.hist()
  80. ax[1].hist(samples, bins = n_bins, density = True)
  81. ax[1].set_xlabel('Value')
  82. ax[1].set_ylabel('Probability Density')
  83. ax[1].set_title('Actual Histogram as Probability Distribution')
  84. plt.show()
  85.  
  86. assert len(hist) == len(reference_hist) and (np.abs(hist - reference_hist) < 1e-6).all() # histogram
  87. assert len(bin_edges) == len(reference_bin_edges) and (np.abs(bin_edges - reference_bin_edges) < 1e-6).all() # bin edges
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement