Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def custom_histogram(data, bins, density = False):
- """
- A custom function to compute histogram data without using np.histogram, with an option to scale the histogram
- to represent a probability distribution.
- Parameters:
- - data: array-like, the input data.
- - bins: int, the number of bins.
- - density: bool, if True, scale the histogram counts to represent a probability distribution.
- Returns:
- - counts: array of the counts in each bin, or the probability densities if density=True.
- - bin_edges: array of the bin edges.
- """
- counts = [0] * bins # create array of zeros
- bin_edges = [0] * (bins + 1)
- # YOUR CODE HERE
- Min, Max = 1e9, -1e9
- for point in data:
- Min = min(Min, point)
- Max = max(Max, point)
- bin_width = (Max - Min) / bins # All bins have the same width
- bin_edges[0]= Min
- bin_edges[bins] = Max
- for i in range(1, bins): # For all bins beside the last one: l <= x < r (inclusive left, exclusive right)
- bin_edges[i] = bin_edges[i - 1] + bin_width
- for i in range(bins - 1):
- l, r = bin_edges[i], bin_edges[i + 1] # bounds of the bin
- cnt = 0
- for point in data:
- if l <= point < r:
- cnt += 1
- counts[i] = cnt
- i = bins - 1 # Edge case: last bin: inclusidve to the right
- l, r = bin_edges[i], bin_edges[i + 1] # bounds of the bin
- cnt = 0
- for point in data:
- if l <= point <= r:
- cnt += 1
- counts[i] = cnt
- if density: # Create histogram w/ density scaling
- area_histogram = sum([counts[i] * bin_width for i in range(bins)])
- else: # NO density scaling
- area_histogram = 1
- counts = [counts[i] / area_histogram for i in range(len(counts))]
- return counts, bin_edges
- # Example usage with density=True
- samples = np.random.randn(10000) # Generate some data
- n_bins = 10 # Number of bins
- # Generate histogram data with density scaling
- hist, bin_edges = custom_histogram(samples, n_bins, density = True)
- # Generate reference histogram data with density scaling, using NumPy
- reference_hist, reference_bin_edges = np.histogram(samples, bins = n_bins, density = True)
- # Plotting the histogram as a probability distribution
- fig, ax = plt.subplots(1, 2, figsize = (15, 7))
- bin_centers = (np.diff(bin_edges) / 2 + bin_edges[:-1])
- ax[0].bar(bin_centers, hist, width = np.diff(bin_centers)[0])
- ax[0].set_xlabel('Value')
- ax[0].set_ylabel('Probability Density')
- ax[0].set_title('Custom Histogram as Probability Distribution')
- # Checking the answer using plt.hist()
- ax[1].hist(samples, bins = n_bins, density = True)
- ax[1].set_xlabel('Value')
- ax[1].set_ylabel('Probability Density')
- ax[1].set_title('Actual Histogram as Probability Distribution')
- plt.show()
- assert len(hist) == len(reference_hist) and (np.abs(hist - reference_hist) < 1e-6).all() # histogram
- assert len(bin_edges) == len(reference_bin_edges) and (np.abs(bin_edges - reference_bin_edges) < 1e-6).all() # bin edges
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement