restore mono segments to stereo

import librosa
import numpy as np
import soundfile as sf

# Let's be real. Most of this was generated by AI.

# Load your stereo audio file
y_stereo, sr = librosa.load('/mnt/d/instrumentals/motm.wav', sr=None, mono=False, dtype=np.float32)

# Ensure the audio is stereo
if y_stereo.ndim != 2 or y_stereo.shape[0] != 2:
    raise ValueError("Input file must be a stereo file.")

coh_threshold = 0.85  # High coherence indicates mono content
# Define the threshold for determining mono sections
energy_diff_threshold = 0.15  # Low energy difference indicates mono content

# Initialize an array to store the indices of mono sections
frame_length = 2048
hop_length = frame_length // 2
mono_indices = []

def is_mono_segment(y_left, y_right, silence_threshold=1e-5):

    """
    Determine if a segment is mono based on coherence and energy difference.

    Parameters:
        y_left (numpy.ndarray): Left channel segment.
        y_right (numpy.ndarray): Right channel segment.
        coh_threshold (float): Threshold for coherence.
        energy_diff_threshold (float): Threshold for energy difference.
        silence_threshold (float): Minimum energy to detect non-silent segments.

    Returns:
        bool: True if the segment is mono, False otherwise.
    """
    # Check if the segment is silent (skip silent frames)
    if np.sum(y_left ** 2) + np.sum(y_right ** 2) < silence_threshold:
        return False

    # Compute coherence
    coh = np.corrcoef(y_left, y_right)[0, 1]

    # Compute energy difference
    diff_energy = np.sum((y_left - y_right) ** 2) / (np.sum(y_left ** 2) + np.sum(y_right ** 2) + 1e-10)

    # Determine if the segment is mono
    return coh > coh_threshold and diff_energy < energy_diff_threshold


def add_stereo_delay(mono_signal, delay_ms, sr):
    """
    Simulate stereo by introducing a small delay to one channel.

    Parameters:
        mono_signal (numpy.ndarray): The mono signal.
        delay_ms (float): Delay in milliseconds.
        sr (int): Sampling rate of the audio.

    Returns:
        numpy.ndarray: Stereo signal with simulated stereo effect.
    """
    # Convert delay from milliseconds to samples
    delay_samples = int((delay_ms / 1000) * sr)

    # Create a delayed version of the signal
    delayed_signal = np.pad(mono_signal, (delay_samples, 0))[:len(mono_signal)]

    # Combine into stereo: left channel is original, right channel is delayed
    stereo_signal = np.column_stack((mono_signal, delayed_signal))

    return stereo_signal

def simulate_stereo_with_phase_shift(mono_signal, phase_shift_degrees, sr):
    """
    Simulate stereo by introducing a phase shift between channels.

    Parameters:
        mono_signal (numpy.ndarray): The mono signal.
        phase_shift_degrees (float): Phase shift in degrees.
        sr (int): Sampling rate of the audio.

    Returns:
        numpy.ndarray: Stereo signal with simulated stereo effect.
    """
    phase_shift_samples = int((phase_shift_degrees / 360) * sr)
    shifted_signal = np.roll(mono_signal, phase_shift_samples)
    reverb_signal = librosa.effects.preemphasis(shifted_signal) * 0.2

    # Combine into stereo
    stereo_signal = np.column_stack((mono_signal, shifted_signal + reverb_signal))

    #return np.column_stack((mono_signal, shifted_signal))
    return stereo_signal

def calculate_phase_shift(y_stereo, sr, frame_length=2048, hop_length=1024):
    """
    Calculate the phase shift between the left and right channels in a stereo signal.

    Parameters:
        y_stereo (numpy.ndarray): Stereo audio signal (2, n_samples).
        sr (int): Sampling rate of the audio.
        frame_length (int): STFT frame length.
        hop_length (int): STFT hop length.

    Returns:
        float: Average phase shift in degrees between the left and right channels.
    """
    # Extract the left and right channels
    y_left = y_stereo[0]
    y_right = y_stereo[1]

    # Compute STFT for both channels
    S_left = librosa.stft(y_left, n_fft=frame_length, hop_length=hop_length)
    S_right = librosa.stft(y_right, n_fft=frame_length, hop_length=hop_length)

    # Compute phase difference between left and right channels
    phase_left = np.angle(S_left)
    phase_right = np.angle(S_right)
    phase_diff = phase_right - phase_left  # Phase difference in radians

    # Unwrap the phase to avoid discontinuities
    phase_diff_unwrapped = np.unwrap(phase_diff, axis=0)

    # Calculate the mean phase difference across frequencies and frames
    mean_phase_diff = np.mean(phase_diff_unwrapped)

    # Convert to degrees
    phase_shift_degrees = np.degrees(mean_phase_diff)

    return phase_shift_degrees

# Function to restore mono sections
def restore_mono_sections(signal, indices, frame_length, hop_length):
    restored_signal = np.copy(signal)
    GAIN_FACTOR = 1.25
    for t in indices:
        y_left = signal[0, t:t+frame_length]
        y_right = signal[1, t:t+frame_length]

        # Use the average of the channels to create a mono section
        mono_frame = (y_left + y_right) / 2

        # Generate simulated stereo for the mono frame
        stereo_frame = simulate_stereo_with_phase_shift(mono_frame, 270, sr)

        stereo_frame *= GAIN_FACTOR

        # Assign the stereo frame to the correct slices of the `restored_signal`
        restored_signal[0, t:t+frame_length] = stereo_frame[:, 0]  # Left channel
        restored_signal[1, t:t+frame_length] = stereo_frame[:, 1]  # Right channel

    return restored_signal

mono_indices = []

import scipy.signal

# Smooth mono detection with a moving average filter
frame_status = np.zeros(y_stereo.shape[1] // hop_length)
for t in range(len(frame_status)):
    start = t * hop_length
    end = start + frame_length
    if is_mono_segment(y_stereo[0, start:end], y_stereo[1, start:end]):
        frame_status[t] = 1

# Smooth the binary decision with a moving average
smoothed_status = scipy.signal.convolve(frame_status, np.ones(5) / 5, mode='same')

# Identify smoothed mono indices
mono_indices = np.where(smoothed_status > 0.5)[0] * hop_length

# Restore the audio
y_restored_stereo = restore_mono_sections(y_stereo, mono_indices, frame_length, hop_length)

# Save your restored audio file
sf.write('/mnt/d/instrumentals/motm_test.wav', y_restored_stereo.T, sr)