Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import librosa
- import numpy as np
- import soundfile as sf
- # Let's be real. Most of this was generated by AI.
- # Load your stereo audio file
- y_stereo, sr = librosa.load('/mnt/d/instrumentals/motm.wav', sr=None, mono=False, dtype=np.float32)
- # Ensure the audio is stereo
- if y_stereo.ndim != 2 or y_stereo.shape[0] != 2:
- raise ValueError("Input file must be a stereo file.")
- coh_threshold = 0.85 # High coherence indicates mono content
- # Define the threshold for determining mono sections
- energy_diff_threshold = 0.15 # Low energy difference indicates mono content
- # Initialize an array to store the indices of mono sections
- frame_length = 2048
- hop_length = frame_length // 2
- mono_indices = []
- def is_mono_segment(y_left, y_right, silence_threshold=1e-5):
- """
- Determine if a segment is mono based on coherence and energy difference.
- Parameters:
- y_left (numpy.ndarray): Left channel segment.
- y_right (numpy.ndarray): Right channel segment.
- coh_threshold (float): Threshold for coherence.
- energy_diff_threshold (float): Threshold for energy difference.
- silence_threshold (float): Minimum energy to detect non-silent segments.
- Returns:
- bool: True if the segment is mono, False otherwise.
- """
- # Check if the segment is silent (skip silent frames)
- if np.sum(y_left ** 2) + np.sum(y_right ** 2) < silence_threshold:
- return False
- # Compute coherence
- coh = np.corrcoef(y_left, y_right)[0, 1]
- # Compute energy difference
- diff_energy = np.sum((y_left - y_right) ** 2) / (np.sum(y_left ** 2) + np.sum(y_right ** 2) + 1e-10)
- # Determine if the segment is mono
- return coh > coh_threshold and diff_energy < energy_diff_threshold
- def add_stereo_delay(mono_signal, delay_ms, sr):
- """
- Simulate stereo by introducing a small delay to one channel.
- Parameters:
- mono_signal (numpy.ndarray): The mono signal.
- delay_ms (float): Delay in milliseconds.
- sr (int): Sampling rate of the audio.
- Returns:
- numpy.ndarray: Stereo signal with simulated stereo effect.
- """
- # Convert delay from milliseconds to samples
- delay_samples = int((delay_ms / 1000) * sr)
- # Create a delayed version of the signal
- delayed_signal = np.pad(mono_signal, (delay_samples, 0))[:len(mono_signal)]
- # Combine into stereo: left channel is original, right channel is delayed
- stereo_signal = np.column_stack((mono_signal, delayed_signal))
- return stereo_signal
- def simulate_stereo_with_phase_shift(mono_signal, phase_shift_degrees, sr):
- """
- Simulate stereo by introducing a phase shift between channels.
- Parameters:
- mono_signal (numpy.ndarray): The mono signal.
- phase_shift_degrees (float): Phase shift in degrees.
- sr (int): Sampling rate of the audio.
- Returns:
- numpy.ndarray: Stereo signal with simulated stereo effect.
- """
- phase_shift_samples = int((phase_shift_degrees / 360) * sr)
- shifted_signal = np.roll(mono_signal, phase_shift_samples)
- reverb_signal = librosa.effects.preemphasis(shifted_signal) * 0.2
- # Combine into stereo
- stereo_signal = np.column_stack((mono_signal, shifted_signal + reverb_signal))
- #return np.column_stack((mono_signal, shifted_signal))
- return stereo_signal
- def calculate_phase_shift(y_stereo, sr, frame_length=2048, hop_length=1024):
- """
- Calculate the phase shift between the left and right channels in a stereo signal.
- Parameters:
- y_stereo (numpy.ndarray): Stereo audio signal (2, n_samples).
- sr (int): Sampling rate of the audio.
- frame_length (int): STFT frame length.
- hop_length (int): STFT hop length.
- Returns:
- float: Average phase shift in degrees between the left and right channels.
- """
- # Extract the left and right channels
- y_left = y_stereo[0]
- y_right = y_stereo[1]
- # Compute STFT for both channels
- S_left = librosa.stft(y_left, n_fft=frame_length, hop_length=hop_length)
- S_right = librosa.stft(y_right, n_fft=frame_length, hop_length=hop_length)
- # Compute phase difference between left and right channels
- phase_left = np.angle(S_left)
- phase_right = np.angle(S_right)
- phase_diff = phase_right - phase_left # Phase difference in radians
- # Unwrap the phase to avoid discontinuities
- phase_diff_unwrapped = np.unwrap(phase_diff, axis=0)
- # Calculate the mean phase difference across frequencies and frames
- mean_phase_diff = np.mean(phase_diff_unwrapped)
- # Convert to degrees
- phase_shift_degrees = np.degrees(mean_phase_diff)
- return phase_shift_degrees
- # Function to restore mono sections
- def restore_mono_sections(signal, indices, frame_length, hop_length):
- restored_signal = np.copy(signal)
- GAIN_FACTOR = 1.25
- for t in indices:
- y_left = signal[0, t:t+frame_length]
- y_right = signal[1, t:t+frame_length]
- # Use the average of the channels to create a mono section
- mono_frame = (y_left + y_right) / 2
- # Generate simulated stereo for the mono frame
- stereo_frame = simulate_stereo_with_phase_shift(mono_frame, 270, sr)
- stereo_frame *= GAIN_FACTOR
- # Assign the stereo frame to the correct slices of the `restored_signal`
- restored_signal[0, t:t+frame_length] = stereo_frame[:, 0] # Left channel
- restored_signal[1, t:t+frame_length] = stereo_frame[:, 1] # Right channel
- return restored_signal
- mono_indices = []
- import scipy.signal
- # Smooth mono detection with a moving average filter
- frame_status = np.zeros(y_stereo.shape[1] // hop_length)
- for t in range(len(frame_status)):
- start = t * hop_length
- end = start + frame_length
- if is_mono_segment(y_stereo[0, start:end], y_stereo[1, start:end]):
- frame_status[t] = 1
- # Smooth the binary decision with a moving average
- smoothed_status = scipy.signal.convolve(frame_status, np.ones(5) / 5, mode='same')
- # Identify smoothed mono indices
- mono_indices = np.where(smoothed_status > 0.5)[0] * hop_length
- # Restore the audio
- y_restored_stereo = restore_mono_sections(y_stereo, mono_indices, frame_length, hop_length)
- # Save your restored audio file
- sf.write('/mnt/d/instrumentals/motm_test.wav', y_restored_stereo.T, sr)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement