Advertisement
zefie

restore mono segments to stereo

Nov 22nd, 2024
241
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.36 KB | Source Code | 0 0
  1. import librosa
  2. import numpy as np
  3. import soundfile as sf
  4.  
  5. # Let's be real. Most of this was generated by AI.
  6.  
  7. # Load your stereo audio file
  8. y_stereo, sr = librosa.load('/mnt/d/instrumentals/motm.wav', sr=None, mono=False, dtype=np.float32)
  9.  
  10. # Ensure the audio is stereo
  11. if y_stereo.ndim != 2 or y_stereo.shape[0] != 2:
  12.     raise ValueError("Input file must be a stereo file.")
  13.  
  14. coh_threshold = 0.85  # High coherence indicates mono content
  15. # Define the threshold for determining mono sections
  16. energy_diff_threshold = 0.15  # Low energy difference indicates mono content
  17.  
  18. # Initialize an array to store the indices of mono sections
  19. frame_length = 2048
  20. hop_length = frame_length // 2
  21. mono_indices = []
  22.  
  23. def is_mono_segment(y_left, y_right, silence_threshold=1e-5):
  24.  
  25.     """
  26.    Determine if a segment is mono based on coherence and energy difference.
  27.    
  28.    Parameters:
  29.        y_left (numpy.ndarray): Left channel segment.
  30.        y_right (numpy.ndarray): Right channel segment.
  31.        coh_threshold (float): Threshold for coherence.
  32.        energy_diff_threshold (float): Threshold for energy difference.
  33.        silence_threshold (float): Minimum energy to detect non-silent segments.
  34.        
  35.    Returns:
  36.        bool: True if the segment is mono, False otherwise.
  37.    """
  38.     # Check if the segment is silent (skip silent frames)
  39.     if np.sum(y_left ** 2) + np.sum(y_right ** 2) < silence_threshold:
  40.         return False
  41.  
  42.     # Compute coherence
  43.     coh = np.corrcoef(y_left, y_right)[0, 1]
  44.  
  45.     # Compute energy difference
  46.     diff_energy = np.sum((y_left - y_right) ** 2) / (np.sum(y_left ** 2) + np.sum(y_right ** 2) + 1e-10)
  47.  
  48.     # Determine if the segment is mono
  49.     return coh > coh_threshold and diff_energy < energy_diff_threshold
  50.  
  51.  
  52. def add_stereo_delay(mono_signal, delay_ms, sr):
  53.     """
  54.    Simulate stereo by introducing a small delay to one channel.
  55.    
  56.    Parameters:
  57.        mono_signal (numpy.ndarray): The mono signal.
  58.        delay_ms (float): Delay in milliseconds.
  59.        sr (int): Sampling rate of the audio.
  60.    
  61.    Returns:
  62.        numpy.ndarray: Stereo signal with simulated stereo effect.
  63.    """
  64.     # Convert delay from milliseconds to samples
  65.     delay_samples = int((delay_ms / 1000) * sr)
  66.  
  67.     # Create a delayed version of the signal
  68.     delayed_signal = np.pad(mono_signal, (delay_samples, 0))[:len(mono_signal)]
  69.  
  70.     # Combine into stereo: left channel is original, right channel is delayed
  71.     stereo_signal = np.column_stack((mono_signal, delayed_signal))
  72.  
  73.     return stereo_signal
  74.  
  75. def simulate_stereo_with_phase_shift(mono_signal, phase_shift_degrees, sr):
  76.     """
  77.    Simulate stereo by introducing a phase shift between channels.
  78.    
  79.    Parameters:
  80.        mono_signal (numpy.ndarray): The mono signal.
  81.        phase_shift_degrees (float): Phase shift in degrees.
  82.        sr (int): Sampling rate of the audio.
  83.    
  84.    Returns:
  85.        numpy.ndarray: Stereo signal with simulated stereo effect.
  86.    """
  87.     phase_shift_samples = int((phase_shift_degrees / 360) * sr)
  88.     shifted_signal = np.roll(mono_signal, phase_shift_samples)
  89.     reverb_signal = librosa.effects.preemphasis(shifted_signal) * 0.2
  90.  
  91.     # Combine into stereo
  92.     stereo_signal = np.column_stack((mono_signal, shifted_signal + reverb_signal))
  93.    
  94.     #return np.column_stack((mono_signal, shifted_signal))
  95.     return stereo_signal
  96.  
  97. def calculate_phase_shift(y_stereo, sr, frame_length=2048, hop_length=1024):
  98.     """
  99.    Calculate the phase shift between the left and right channels in a stereo signal.
  100.  
  101.    Parameters:
  102.        y_stereo (numpy.ndarray): Stereo audio signal (2, n_samples).
  103.        sr (int): Sampling rate of the audio.
  104.        frame_length (int): STFT frame length.
  105.        hop_length (int): STFT hop length.
  106.  
  107.    Returns:
  108.        float: Average phase shift in degrees between the left and right channels.
  109.    """
  110.     # Extract the left and right channels
  111.     y_left = y_stereo[0]
  112.     y_right = y_stereo[1]
  113.  
  114.     # Compute STFT for both channels
  115.     S_left = librosa.stft(y_left, n_fft=frame_length, hop_length=hop_length)
  116.     S_right = librosa.stft(y_right, n_fft=frame_length, hop_length=hop_length)
  117.  
  118.     # Compute phase difference between left and right channels
  119.     phase_left = np.angle(S_left)
  120.     phase_right = np.angle(S_right)
  121.     phase_diff = phase_right - phase_left  # Phase difference in radians
  122.  
  123.     # Unwrap the phase to avoid discontinuities
  124.     phase_diff_unwrapped = np.unwrap(phase_diff, axis=0)
  125.  
  126.     # Calculate the mean phase difference across frequencies and frames
  127.     mean_phase_diff = np.mean(phase_diff_unwrapped)
  128.  
  129.     # Convert to degrees
  130.     phase_shift_degrees = np.degrees(mean_phase_diff)
  131.  
  132.     return phase_shift_degrees
  133.  
  134. # Function to restore mono sections
  135. def restore_mono_sections(signal, indices, frame_length, hop_length):
  136.     restored_signal = np.copy(signal)
  137.     GAIN_FACTOR = 1.25
  138.     for t in indices:
  139.         y_left = signal[0, t:t+frame_length]
  140.         y_right = signal[1, t:t+frame_length]
  141.  
  142.         # Use the average of the channels to create a mono section
  143.         mono_frame = (y_left + y_right) / 2
  144.  
  145.         # Generate simulated stereo for the mono frame
  146.         stereo_frame = simulate_stereo_with_phase_shift(mono_frame, 270, sr)
  147.  
  148.         stereo_frame *= GAIN_FACTOR
  149.        
  150.         # Assign the stereo frame to the correct slices of the `restored_signal`
  151.         restored_signal[0, t:t+frame_length] = stereo_frame[:, 0]  # Left channel
  152.         restored_signal[1, t:t+frame_length] = stereo_frame[:, 1]  # Right channel
  153.  
  154.     return restored_signal
  155.    
  156. mono_indices = []
  157.  
  158. import scipy.signal
  159.  
  160. # Smooth mono detection with a moving average filter
  161. frame_status = np.zeros(y_stereo.shape[1] // hop_length)
  162. for t in range(len(frame_status)):
  163.     start = t * hop_length
  164.     end = start + frame_length
  165.     if is_mono_segment(y_stereo[0, start:end], y_stereo[1, start:end]):
  166.         frame_status[t] = 1
  167.  
  168. # Smooth the binary decision with a moving average
  169. smoothed_status = scipy.signal.convolve(frame_status, np.ones(5) / 5, mode='same')
  170.  
  171. # Identify smoothed mono indices
  172. mono_indices = np.where(smoothed_status > 0.5)[0] * hop_length
  173.  
  174. # Restore the audio
  175. y_restored_stereo = restore_mono_sections(y_stereo, mono_indices, frame_length, hop_length)
  176.  
  177. # Save your restored audio file
  178. sf.write('/mnt/d/instrumentals/motm_test.wav', y_restored_stereo.T, sr)
  179.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement