ria-toolkit-oss/ria_toolkit_oss_cli/ria_toolkit_oss/combine.py

495 lines
18 KiB
Python

"""Combine command - Combine multiple recordings into a single file."""
import copy
import time
from pathlib import Path
import click
import numpy as np
from utils.data import Recording
from utils.io import from_npy_legacy, load_recording
from utils_cli.utils.common import (
echo_progress,
echo_verbose,
format_sample_count,
save_recording,
)
def load_recording_list(inputs, legacy, verbose, quiet):
recordings = []
for input_path in inputs:
input_path = Path(input_path)
try:
if legacy:
recording = from_npy_legacy(str(input_path))
else:
recording = load_recording(str(input_path))
# Store original filename in metadata if not present
if "original_file" not in recording._metadata:
recording._metadata["original_file"] = input_path.name
num_samples = recording.data.shape[1]
echo_verbose(f" Loading {input_path.name} ({format_sample_count(num_samples)} samples)... Done", verbose)
recordings.append(recording)
except Exception as e:
raise click.ClickException(f"Failed to load {input_path}: {e}")
return recordings
def pad(recordings, max_len, verbose):
if verbose:
click.echo(f"Aligning (zero-pad to {format_sample_count(max_len)} samples)...")
aligned = []
for i, rec in enumerate(recordings):
if rec.data.shape[1] < max_len:
pad_width = max_len - rec.data.shape[1]
padded = np.pad(rec.data, ((0, 0), (0, pad_width)), mode="constant")
if verbose:
click.echo(f" Recording {i+1}: +{format_sample_count(pad_width)} zeros at end")
aligned.append(padded)
else:
aligned.append(rec.data)
return aligned
def pad_start(recordings, max_len, pad_start_sample, verbose):
if verbose:
click.echo(f"Aligning (pad-start at sample {format_sample_count(pad_start_sample)})...")
aligned = []
for i, rec in enumerate(recordings):
if rec.data.shape[1] < max_len:
pad_before = pad_start_sample
pad_after = max_len - rec.data.shape[1] - pad_before
if pad_after < 0:
raise click.ClickException(
f"Invalid --pad-start-sample\n"
f"Start sample {format_sample_count(pad_start_sample)} with recording length "
f"{format_sample_count(rec.data.shape[1])} exceeds max length {format_sample_count(max_len)}"
)
padded = np.pad(rec.data, ((0, 0), (pad_before, pad_after)), mode="constant")
if verbose:
click.echo(
f" Recording {i+1}: +{format_sample_count(pad_before)} zeros before, "
f"+{format_sample_count(pad_after)} zeros after"
)
aligned.append(padded)
else:
aligned.append(rec.data)
return aligned
def pad_center(recordings, max_len, verbose):
if verbose:
click.echo(f"Aligning (pad-center in {format_sample_count(max_len)} samples)...")
aligned = []
for i, rec in enumerate(recordings):
if rec.data.shape[1] < max_len:
total_pad = max_len - rec.data.shape[1]
pad_before = total_pad // 2
pad_after = total_pad - pad_before
padded = np.pad(rec.data, ((0, 0), (pad_before, pad_after)), mode="constant")
if verbose:
click.echo(
f" Recording {i+1}: +{format_sample_count(pad_before)} zeros before, "
f"+{format_sample_count(pad_after)} zeros after"
)
aligned.append(padded)
else:
aligned.append(rec.data)
return aligned
def pad_end(recordings, max_len, verbose):
if verbose:
click.echo(f"Aligning (pad-end, align to {format_sample_count(max_len)} samples)...")
aligned = []
for i, rec in enumerate(recordings):
if rec.data.shape[1] < max_len:
pad_width = max_len - rec.data.shape[1]
padded = np.pad(rec.data, ((0, 0), (pad_width, 0)), mode="constant")
if verbose:
click.echo(f" Recording {i+1}: +{format_sample_count(pad_width)} zeros at beginning")
aligned.append(padded)
else:
aligned.append(rec.data)
return aligned
def repeat(recordings, max_len, verbose):
if verbose:
click.echo(f"Aligning (repeat pattern to match {format_sample_count(max_len)} samples)...")
aligned = []
for i, rec in enumerate(recordings):
if rec.data.shape[1] < max_len:
n_repeats = int(np.ceil(max_len / rec.data.shape[1]))
repeated = np.tile(rec.data, (1, n_repeats))
truncated = repeated[:, :max_len]
if verbose:
click.echo(
f" Recording {i+1}: repeated {n_repeats} times, "
f"truncated to {format_sample_count(max_len)} samples"
)
aligned.append(truncated)
else:
aligned.append(rec.data)
return aligned
def repeat_spaced(recordings, max_len, repeat_spacing, verbose):
if repeat_spacing <= 0:
raise click.ClickException("Error: --align-mode repeat-spaced requires --repeat-spacing SAMPLES (must be > 0)")
if verbose:
click.echo(f"Aligning (repeat with {format_sample_count(repeat_spacing)} sample spacing)...")
aligned = []
for i, rec in enumerate(recordings):
if rec.data.shape[1] < max_len:
result = np.zeros((rec.data.shape[0], max_len), dtype=rec.data.dtype)
pattern_len = rec.data.shape[1]
pos = 0
repetitions = 0
while pos < max_len:
end = min(pos + pattern_len, max_len)
result[:, pos:end] = rec.data[:, : end - pos]
repetitions += 1
pos = end + repeat_spacing
if verbose:
click.echo(
f" Recording {i+1}: {repetitions} repetitions "
f"({format_sample_count(pattern_len)} samples + {format_sample_count(repeat_spacing)} spacing)"
)
aligned.append(result)
else:
aligned.append(rec.data)
return aligned
def align_for_add(recordings, align_mode, pad_start_sample=0, repeat_spacing=0, verbose=False):
"""Align recordings for add mode based on alignment strategy.
Args:
recordings: List of Recording objects
align_mode: Alignment mode string
pad_start_sample: Sample offset for pad-start mode
repeat_spacing: Spacing between repetitions for repeat-spaced mode
verbose: Verbose output
Returns:
List of aligned numpy arrays
Raises:
click.ClickException: If alignment fails or is invalid
"""
lengths = [rec.data.shape[1] for rec in recordings]
max_len = max(lengths)
min_len = min(lengths)
# All same length, no alignment needed
if len(set(lengths)) == 1:
if verbose:
click.echo(f" All recordings same length ({format_sample_count(max_len)} samples)")
return [rec.data for rec in recordings]
if align_mode == "error":
raise click.ClickException(
f"Recordings have different lengths: {[format_sample_count(len) for len in lengths]}\n"
f"Use --align-mode to specify alignment strategy:\n"
f" --align-mode truncate (use shortest: {format_sample_count(min_len)} samples)\n"
f" --align-mode pad (zero-pad to longest: {format_sample_count(max_len)} samples)\n"
f" --align-mode pad-center (center shorter in longer)\n"
f" --align-mode pad-end (align end of recordings)\n"
f" --align-mode repeat (repeat shorter to match longest)"
)
elif align_mode == "truncate":
if verbose:
click.echo(f"Aligning (truncate to {format_sample_count(min_len)} samples)...")
for i, rec in enumerate(recordings):
if rec.data.shape[1] > min_len:
click.echo(f" Recording {i+1}: truncated from {format_sample_count(rec.data.shape[1])} samples")
return [rec.data[:, :min_len] for rec in recordings]
elif align_mode == "pad":
return pad(recordings, max_len, verbose)
elif align_mode == "pad-start":
return pad_start(recordings, max_len, pad_start_sample, verbose)
elif align_mode == "pad-center":
return pad_center(recordings, max_len, verbose)
elif align_mode == "pad-end":
return pad_end(recordings, max_len, verbose)
elif align_mode == "repeat":
return repeat(recordings, max_len, verbose)
elif align_mode == "repeat-spaced":
return repeat_spaced(recordings, max_len, repeat_spacing, verbose)
else:
raise click.ClickException(f"Unknown alignment mode: {align_mode}")
def concat_recordings(recordings, verbose=False):
"""Concatenate recordings end-to-end.
Args:
recordings: List of Recording objects
verbose: Verbose output
Returns:
Recording: Combined recording
"""
if verbose:
click.echo("Concatenating...")
# Concatenate data
combined_data = np.concatenate([r.data for r in recordings], axis=1)
# Merge annotations with adjusted indices
combined_annotations = []
offset = 0
for rec in recordings:
for ann in rec._annotations:
new_ann = copy.deepcopy(ann)
new_ann.sample_start += offset
combined_annotations.append(new_ann)
offset += rec.data.shape[1]
# Use metadata from first recording
combined_metadata = recordings[0]._metadata.copy()
combined_metadata["combined_from"] = [rec._metadata.get("original_file", "unknown") for rec in recordings]
combined_metadata["combine_mode"] = "concat"
combined_metadata["num_inputs"] = len(recordings)
combined_metadata["combine_timestamp"] = time.time()
# Create combined recording
result = Recording(data=combined_data, metadata=combined_metadata)
result._annotations = combined_annotations
if verbose:
click.echo(f"Total: {format_sample_count(combined_data.shape[1])} samples")
return result
def add_recordings(recordings, align_mode="error", pad_start_sample=0, repeat_spacing=0, verbose=False):
"""Add/mix recordings sample-by-sample.
Args:
recordings: List of Recording objects
align_mode: Alignment mode for different-length recordings
pad_start_sample: Sample offset for pad-start mode
repeat_spacing: Spacing for repeat-spaced mode
verbose: Verbose output
Returns:
Recording: Combined recording
"""
# Align recordings
aligned_data = align_for_add(
recordings, align_mode, pad_start_sample=pad_start_sample, repeat_spacing=repeat_spacing, verbose=verbose
)
if verbose:
click.echo("Adding signals...")
# Add all signals
combined_data = sum(aligned_data)
# Keep first recording's annotations only
combined_metadata = recordings[0]._metadata.copy()
combined_metadata["combined_from"] = [rec._metadata.get("original_file", "unknown") for rec in recordings]
combined_metadata["combine_mode"] = "add"
combined_metadata["align_mode"] = align_mode
combined_metadata["num_inputs"] = len(recordings)
combined_metadata["combine_timestamp"] = time.time()
# Warn if other recordings had annotations
if any(len(rec._annotations) > 0 for rec in recordings[1:]):
click.echo("Warning: Only first recording's annotations preserved (others discarded in add mode)", err=True)
# Create combined recording
result = Recording(data=combined_data, metadata=combined_metadata)
result._annotations = recordings[0]._annotations.copy()
if verbose:
click.echo(f"Total: {format_sample_count(combined_data.shape[1])} samples")
return result
@click.command()
@click.argument("inputs", nargs=-1, required=True, type=click.Path(exists=True))
@click.argument("output", nargs=1, required=True, type=click.Path())
@click.option(
"--mode",
type=click.Choice(["concat", "add"], case_sensitive=False),
default="concat",
help="Combination mode (default: concat)",
)
@click.option(
"--align-mode",
type=click.Choice(
["error", "truncate", "pad", "pad-start", "pad-center", "pad-end", "repeat", "repeat-spaced"],
case_sensitive=False,
),
default="error",
help="Add mode alignment strategy (default: error)",
)
@click.option("--pad-start-sample", type=int, default=0, metavar="N", help="Sample offset for pad-start mode")
@click.option(
"--repeat-spacing",
type=int,
default=0,
metavar="SAMPLES",
help="Spacing between repetitions for repeat-spaced mode",
)
@click.option("--legacy", is_flag=True, help="Load inputs as legacy NPY format")
@click.option("--normalize", is_flag=True, help="Normalize after combining")
@click.option(
"--output-format",
type=click.Choice(["sigmf", "npy", "wav", "blue"], case_sensitive=False),
help="Force output format",
)
@click.option("--overwrite", is_flag=True, help="Overwrite existing output file")
@click.option(
"--metadata", multiple=True, metavar="KEY=VALUE", help="Add custom metadata (can be used multiple times)"
)
@click.option("--verbose", is_flag=True, help="Verbose output")
@click.option("--quiet", is_flag=True, help="Suppress output")
def combine(
inputs,
output,
mode,
align_mode,
pad_start_sample,
repeat_spacing,
legacy,
normalize,
output_format,
overwrite,
metadata,
verbose,
quiet,
):
"""Combine multiple recordings into a single file.
\b
INPUTS Input recording files (2 or more)
OUTPUT Output filename
\b
Modes:
concat Concatenate recordings end-to-end (default)
add Add signals sample-by-sample (mix/superimpose)
\b
Examples:
# Concatenate recordings
utils combine chunk1.npy chunk2.npy chunk3.npy full.npy
\b
# Add signal and noise
utils combine signal.npy noise.npy noisy.npy --mode add\n
\b
# Add with center alignment
utils combine long.npy short.npy output.npy --mode add --align-mode pad-center\n
\b
# Repeat pattern with spacing
utils combine signal.npy pattern.npy output.npy --mode add --align-mode repeat-spaced --repeat-spacing 10000
"""
# Validate inputs
if len(inputs) < 2:
raise click.ClickException(
"Error: At least 2 input files required\n" "Usage: utils combine INPUT1 INPUT2 [INPUT3 ...] OUTPUT"
)
# Special case: single input (though we require 2+ above, this handles edge case)
if len(inputs) == 1:
echo_progress("Warning: Only one input file specified", quiet)
echo_progress("Nothing to combine. Copying to output...", quiet)
mode = mode.lower()
align_mode = align_mode.lower()
# Load recordings
align_str = ", " + align_mode + " alignment" if mode == "add" and align_mode != "error" else ""
echo_progress(
f"Combining {len(inputs)} recordings ({mode} mode{align_str})...",
quiet,
)
recordings = load_recording_list(inputs, legacy, verbose, quiet)
# Validate for empty recordings
for i, rec in enumerate(recordings):
if rec.data.shape[1] == 0:
raise click.ClickException(
f"Error: Input file '{inputs[i]}' has 0 samples\n" "Cannot combine empty recordings"
)
# Validate for add mode
if mode == "add":
# Check sample rates match
sample_rates = [rec._metadata.get("sample_rate") for rec in recordings]
sample_rates = [sr for sr in sample_rates if sr is not None]
if len(sample_rates) > 1 and len(set(sample_rates)) > 1:
raise click.ClickException(
f"Error: Recordings have different sample rates (add mode)\n"
f"Sample rates: {sample_rates}\n"
"All recordings must have matching sample rates for add mode"
)
# Check channel counts match
channel_counts = [rec.data.shape[0] for rec in recordings]
if len(set(channel_counts)) > 1:
raise click.ClickException(
f"Error: Recordings have different channel counts\n"
f"Channels: {channel_counts}\n"
"All recordings must have same number of channels"
)
# Combine recordings
if mode == "concat":
combined = concat_recordings(recordings, verbose=verbose)
elif mode == "add":
combined = add_recordings(
recordings,
align_mode=align_mode,
pad_start_sample=pad_start_sample,
repeat_spacing=repeat_spacing,
verbose=verbose,
)
else:
raise click.ClickException(f"Unknown mode: {mode}")
# Add custom metadata
for meta_item in metadata:
if "=" not in meta_item:
raise click.ClickException(f"Invalid metadata format: {meta_item} (expected KEY=VALUE)")
key, value = meta_item.split("=", 1)
combined.update_metadata(key, value)
# Normalize if requested
if normalize:
echo_verbose("Normalizing...", verbose)
combined = combined.normalize()
combined.update_metadata("normalized", True)
# Save output
try:
save_recording(combined, output, output_format=output_format, overwrite=overwrite, verbose=verbose)
echo_progress(f"Saved to: {output}", quiet)
except Exception as e:
raise click.ClickException(f"Failed to save output: {e}")
if __name__ == "__main__":
combine()