495 lines
18 KiB
Python
495 lines
18 KiB
Python
"""Combine command - Combine multiple recordings into a single file."""
|
|
|
|
import copy
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import click
|
|
import numpy as np
|
|
|
|
from utils.data import Recording
|
|
from utils.io import from_npy_legacy, load_recording
|
|
from utils_cli.utils.common import (
|
|
echo_progress,
|
|
echo_verbose,
|
|
format_sample_count,
|
|
save_recording,
|
|
)
|
|
|
|
|
|
def load_recording_list(inputs, legacy, verbose, quiet):
|
|
recordings = []
|
|
for input_path in inputs:
|
|
input_path = Path(input_path)
|
|
|
|
try:
|
|
if legacy:
|
|
recording = from_npy_legacy(str(input_path))
|
|
else:
|
|
recording = load_recording(str(input_path))
|
|
|
|
# Store original filename in metadata if not present
|
|
if "original_file" not in recording._metadata:
|
|
recording._metadata["original_file"] = input_path.name
|
|
|
|
num_samples = recording.data.shape[1]
|
|
echo_verbose(f" Loading {input_path.name} ({format_sample_count(num_samples)} samples)... Done", verbose)
|
|
recordings.append(recording)
|
|
|
|
except Exception as e:
|
|
raise click.ClickException(f"Failed to load {input_path}: {e}")
|
|
|
|
return recordings
|
|
|
|
|
|
def pad(recordings, max_len, verbose):
|
|
if verbose:
|
|
click.echo(f"Aligning (zero-pad to {format_sample_count(max_len)} samples)...")
|
|
aligned = []
|
|
for i, rec in enumerate(recordings):
|
|
if rec.data.shape[1] < max_len:
|
|
pad_width = max_len - rec.data.shape[1]
|
|
padded = np.pad(rec.data, ((0, 0), (0, pad_width)), mode="constant")
|
|
if verbose:
|
|
click.echo(f" Recording {i+1}: +{format_sample_count(pad_width)} zeros at end")
|
|
aligned.append(padded)
|
|
else:
|
|
aligned.append(rec.data)
|
|
return aligned
|
|
|
|
|
|
def pad_start(recordings, max_len, pad_start_sample, verbose):
|
|
if verbose:
|
|
click.echo(f"Aligning (pad-start at sample {format_sample_count(pad_start_sample)})...")
|
|
aligned = []
|
|
for i, rec in enumerate(recordings):
|
|
if rec.data.shape[1] < max_len:
|
|
pad_before = pad_start_sample
|
|
pad_after = max_len - rec.data.shape[1] - pad_before
|
|
if pad_after < 0:
|
|
raise click.ClickException(
|
|
f"Invalid --pad-start-sample\n"
|
|
f"Start sample {format_sample_count(pad_start_sample)} with recording length "
|
|
f"{format_sample_count(rec.data.shape[1])} exceeds max length {format_sample_count(max_len)}"
|
|
)
|
|
padded = np.pad(rec.data, ((0, 0), (pad_before, pad_after)), mode="constant")
|
|
if verbose:
|
|
click.echo(
|
|
f" Recording {i+1}: +{format_sample_count(pad_before)} zeros before, "
|
|
f"+{format_sample_count(pad_after)} zeros after"
|
|
)
|
|
aligned.append(padded)
|
|
else:
|
|
aligned.append(rec.data)
|
|
return aligned
|
|
|
|
|
|
def pad_center(recordings, max_len, verbose):
|
|
if verbose:
|
|
click.echo(f"Aligning (pad-center in {format_sample_count(max_len)} samples)...")
|
|
aligned = []
|
|
for i, rec in enumerate(recordings):
|
|
if rec.data.shape[1] < max_len:
|
|
total_pad = max_len - rec.data.shape[1]
|
|
pad_before = total_pad // 2
|
|
pad_after = total_pad - pad_before
|
|
padded = np.pad(rec.data, ((0, 0), (pad_before, pad_after)), mode="constant")
|
|
if verbose:
|
|
click.echo(
|
|
f" Recording {i+1}: +{format_sample_count(pad_before)} zeros before, "
|
|
f"+{format_sample_count(pad_after)} zeros after"
|
|
)
|
|
aligned.append(padded)
|
|
else:
|
|
aligned.append(rec.data)
|
|
return aligned
|
|
|
|
|
|
def pad_end(recordings, max_len, verbose):
|
|
if verbose:
|
|
click.echo(f"Aligning (pad-end, align to {format_sample_count(max_len)} samples)...")
|
|
aligned = []
|
|
for i, rec in enumerate(recordings):
|
|
if rec.data.shape[1] < max_len:
|
|
pad_width = max_len - rec.data.shape[1]
|
|
padded = np.pad(rec.data, ((0, 0), (pad_width, 0)), mode="constant")
|
|
if verbose:
|
|
click.echo(f" Recording {i+1}: +{format_sample_count(pad_width)} zeros at beginning")
|
|
aligned.append(padded)
|
|
else:
|
|
aligned.append(rec.data)
|
|
return aligned
|
|
|
|
|
|
def repeat(recordings, max_len, verbose):
|
|
if verbose:
|
|
click.echo(f"Aligning (repeat pattern to match {format_sample_count(max_len)} samples)...")
|
|
aligned = []
|
|
for i, rec in enumerate(recordings):
|
|
if rec.data.shape[1] < max_len:
|
|
n_repeats = int(np.ceil(max_len / rec.data.shape[1]))
|
|
repeated = np.tile(rec.data, (1, n_repeats))
|
|
truncated = repeated[:, :max_len]
|
|
if verbose:
|
|
click.echo(
|
|
f" Recording {i+1}: repeated {n_repeats} times, "
|
|
f"truncated to {format_sample_count(max_len)} samples"
|
|
)
|
|
aligned.append(truncated)
|
|
else:
|
|
aligned.append(rec.data)
|
|
return aligned
|
|
|
|
|
|
def repeat_spaced(recordings, max_len, repeat_spacing, verbose):
|
|
if repeat_spacing <= 0:
|
|
raise click.ClickException("Error: --align-mode repeat-spaced requires --repeat-spacing SAMPLES (must be > 0)")
|
|
if verbose:
|
|
click.echo(f"Aligning (repeat with {format_sample_count(repeat_spacing)} sample spacing)...")
|
|
|
|
aligned = []
|
|
for i, rec in enumerate(recordings):
|
|
if rec.data.shape[1] < max_len:
|
|
result = np.zeros((rec.data.shape[0], max_len), dtype=rec.data.dtype)
|
|
pattern_len = rec.data.shape[1]
|
|
pos = 0
|
|
repetitions = 0
|
|
while pos < max_len:
|
|
end = min(pos + pattern_len, max_len)
|
|
result[:, pos:end] = rec.data[:, : end - pos]
|
|
repetitions += 1
|
|
pos = end + repeat_spacing
|
|
if verbose:
|
|
click.echo(
|
|
f" Recording {i+1}: {repetitions} repetitions "
|
|
f"({format_sample_count(pattern_len)} samples + {format_sample_count(repeat_spacing)} spacing)"
|
|
)
|
|
aligned.append(result)
|
|
else:
|
|
aligned.append(rec.data)
|
|
return aligned
|
|
|
|
|
|
def align_for_add(recordings, align_mode, pad_start_sample=0, repeat_spacing=0, verbose=False):
|
|
"""Align recordings for add mode based on alignment strategy.
|
|
|
|
Args:
|
|
recordings: List of Recording objects
|
|
align_mode: Alignment mode string
|
|
pad_start_sample: Sample offset for pad-start mode
|
|
repeat_spacing: Spacing between repetitions for repeat-spaced mode
|
|
verbose: Verbose output
|
|
|
|
Returns:
|
|
List of aligned numpy arrays
|
|
|
|
Raises:
|
|
click.ClickException: If alignment fails or is invalid
|
|
"""
|
|
lengths = [rec.data.shape[1] for rec in recordings]
|
|
max_len = max(lengths)
|
|
min_len = min(lengths)
|
|
|
|
# All same length, no alignment needed
|
|
if len(set(lengths)) == 1:
|
|
if verbose:
|
|
click.echo(f" All recordings same length ({format_sample_count(max_len)} samples)")
|
|
return [rec.data for rec in recordings]
|
|
|
|
if align_mode == "error":
|
|
raise click.ClickException(
|
|
f"Recordings have different lengths: {[format_sample_count(len) for len in lengths]}\n"
|
|
f"Use --align-mode to specify alignment strategy:\n"
|
|
f" --align-mode truncate (use shortest: {format_sample_count(min_len)} samples)\n"
|
|
f" --align-mode pad (zero-pad to longest: {format_sample_count(max_len)} samples)\n"
|
|
f" --align-mode pad-center (center shorter in longer)\n"
|
|
f" --align-mode pad-end (align end of recordings)\n"
|
|
f" --align-mode repeat (repeat shorter to match longest)"
|
|
)
|
|
|
|
elif align_mode == "truncate":
|
|
if verbose:
|
|
click.echo(f"Aligning (truncate to {format_sample_count(min_len)} samples)...")
|
|
for i, rec in enumerate(recordings):
|
|
if rec.data.shape[1] > min_len:
|
|
click.echo(f" Recording {i+1}: truncated from {format_sample_count(rec.data.shape[1])} samples")
|
|
return [rec.data[:, :min_len] for rec in recordings]
|
|
|
|
elif align_mode == "pad":
|
|
return pad(recordings, max_len, verbose)
|
|
|
|
elif align_mode == "pad-start":
|
|
return pad_start(recordings, max_len, pad_start_sample, verbose)
|
|
|
|
elif align_mode == "pad-center":
|
|
return pad_center(recordings, max_len, verbose)
|
|
|
|
elif align_mode == "pad-end":
|
|
return pad_end(recordings, max_len, verbose)
|
|
|
|
elif align_mode == "repeat":
|
|
return repeat(recordings, max_len, verbose)
|
|
|
|
elif align_mode == "repeat-spaced":
|
|
return repeat_spaced(recordings, max_len, repeat_spacing, verbose)
|
|
|
|
else:
|
|
raise click.ClickException(f"Unknown alignment mode: {align_mode}")
|
|
|
|
|
|
def concat_recordings(recordings, verbose=False):
|
|
"""Concatenate recordings end-to-end.
|
|
|
|
Args:
|
|
recordings: List of Recording objects
|
|
verbose: Verbose output
|
|
|
|
Returns:
|
|
Recording: Combined recording
|
|
"""
|
|
if verbose:
|
|
click.echo("Concatenating...")
|
|
|
|
# Concatenate data
|
|
combined_data = np.concatenate([r.data for r in recordings], axis=1)
|
|
|
|
# Merge annotations with adjusted indices
|
|
combined_annotations = []
|
|
offset = 0
|
|
for rec in recordings:
|
|
for ann in rec._annotations:
|
|
new_ann = copy.deepcopy(ann)
|
|
new_ann.sample_start += offset
|
|
combined_annotations.append(new_ann)
|
|
offset += rec.data.shape[1]
|
|
|
|
# Use metadata from first recording
|
|
combined_metadata = recordings[0]._metadata.copy()
|
|
combined_metadata["combined_from"] = [rec._metadata.get("original_file", "unknown") for rec in recordings]
|
|
combined_metadata["combine_mode"] = "concat"
|
|
combined_metadata["num_inputs"] = len(recordings)
|
|
combined_metadata["combine_timestamp"] = time.time()
|
|
|
|
# Create combined recording
|
|
result = Recording(data=combined_data, metadata=combined_metadata)
|
|
result._annotations = combined_annotations
|
|
|
|
if verbose:
|
|
click.echo(f"Total: {format_sample_count(combined_data.shape[1])} samples")
|
|
|
|
return result
|
|
|
|
|
|
def add_recordings(recordings, align_mode="error", pad_start_sample=0, repeat_spacing=0, verbose=False):
|
|
"""Add/mix recordings sample-by-sample.
|
|
|
|
Args:
|
|
recordings: List of Recording objects
|
|
align_mode: Alignment mode for different-length recordings
|
|
pad_start_sample: Sample offset for pad-start mode
|
|
repeat_spacing: Spacing for repeat-spaced mode
|
|
verbose: Verbose output
|
|
|
|
Returns:
|
|
Recording: Combined recording
|
|
"""
|
|
# Align recordings
|
|
aligned_data = align_for_add(
|
|
recordings, align_mode, pad_start_sample=pad_start_sample, repeat_spacing=repeat_spacing, verbose=verbose
|
|
)
|
|
|
|
if verbose:
|
|
click.echo("Adding signals...")
|
|
|
|
# Add all signals
|
|
combined_data = sum(aligned_data)
|
|
|
|
# Keep first recording's annotations only
|
|
combined_metadata = recordings[0]._metadata.copy()
|
|
combined_metadata["combined_from"] = [rec._metadata.get("original_file", "unknown") for rec in recordings]
|
|
combined_metadata["combine_mode"] = "add"
|
|
combined_metadata["align_mode"] = align_mode
|
|
combined_metadata["num_inputs"] = len(recordings)
|
|
combined_metadata["combine_timestamp"] = time.time()
|
|
|
|
# Warn if other recordings had annotations
|
|
if any(len(rec._annotations) > 0 for rec in recordings[1:]):
|
|
click.echo("Warning: Only first recording's annotations preserved (others discarded in add mode)", err=True)
|
|
|
|
# Create combined recording
|
|
result = Recording(data=combined_data, metadata=combined_metadata)
|
|
result._annotations = recordings[0]._annotations.copy()
|
|
|
|
if verbose:
|
|
click.echo(f"Total: {format_sample_count(combined_data.shape[1])} samples")
|
|
|
|
return result
|
|
|
|
|
|
@click.command()
|
|
@click.argument("inputs", nargs=-1, required=True, type=click.Path(exists=True))
|
|
@click.argument("output", nargs=1, required=True, type=click.Path())
|
|
@click.option(
|
|
"--mode",
|
|
type=click.Choice(["concat", "add"], case_sensitive=False),
|
|
default="concat",
|
|
help="Combination mode (default: concat)",
|
|
)
|
|
@click.option(
|
|
"--align-mode",
|
|
type=click.Choice(
|
|
["error", "truncate", "pad", "pad-start", "pad-center", "pad-end", "repeat", "repeat-spaced"],
|
|
case_sensitive=False,
|
|
),
|
|
default="error",
|
|
help="Add mode alignment strategy (default: error)",
|
|
)
|
|
@click.option("--pad-start-sample", type=int, default=0, metavar="N", help="Sample offset for pad-start mode")
|
|
@click.option(
|
|
"--repeat-spacing",
|
|
type=int,
|
|
default=0,
|
|
metavar="SAMPLES",
|
|
help="Spacing between repetitions for repeat-spaced mode",
|
|
)
|
|
@click.option("--legacy", is_flag=True, help="Load inputs as legacy NPY format")
|
|
@click.option("--normalize", is_flag=True, help="Normalize after combining")
|
|
@click.option(
|
|
"--output-format",
|
|
type=click.Choice(["sigmf", "npy", "wav", "blue"], case_sensitive=False),
|
|
help="Force output format",
|
|
)
|
|
@click.option("--overwrite", is_flag=True, help="Overwrite existing output file")
|
|
@click.option(
|
|
"--metadata", multiple=True, metavar="KEY=VALUE", help="Add custom metadata (can be used multiple times)"
|
|
)
|
|
@click.option("--verbose", is_flag=True, help="Verbose output")
|
|
@click.option("--quiet", is_flag=True, help="Suppress output")
|
|
def combine(
|
|
inputs,
|
|
output,
|
|
mode,
|
|
align_mode,
|
|
pad_start_sample,
|
|
repeat_spacing,
|
|
legacy,
|
|
normalize,
|
|
output_format,
|
|
overwrite,
|
|
metadata,
|
|
verbose,
|
|
quiet,
|
|
):
|
|
"""Combine multiple recordings into a single file.
|
|
|
|
\b
|
|
INPUTS Input recording files (2 or more)
|
|
OUTPUT Output filename
|
|
|
|
\b
|
|
Modes:
|
|
concat Concatenate recordings end-to-end (default)
|
|
add Add signals sample-by-sample (mix/superimpose)
|
|
|
|
\b
|
|
Examples:
|
|
# Concatenate recordings
|
|
utils combine chunk1.npy chunk2.npy chunk3.npy full.npy
|
|
\b
|
|
# Add signal and noise
|
|
utils combine signal.npy noise.npy noisy.npy --mode add\n
|
|
\b
|
|
# Add with center alignment
|
|
utils combine long.npy short.npy output.npy --mode add --align-mode pad-center\n
|
|
\b
|
|
# Repeat pattern with spacing
|
|
utils combine signal.npy pattern.npy output.npy --mode add --align-mode repeat-spaced --repeat-spacing 10000
|
|
"""
|
|
# Validate inputs
|
|
if len(inputs) < 2:
|
|
raise click.ClickException(
|
|
"Error: At least 2 input files required\n" "Usage: utils combine INPUT1 INPUT2 [INPUT3 ...] OUTPUT"
|
|
)
|
|
|
|
# Special case: single input (though we require 2+ above, this handles edge case)
|
|
if len(inputs) == 1:
|
|
echo_progress("Warning: Only one input file specified", quiet)
|
|
echo_progress("Nothing to combine. Copying to output...", quiet)
|
|
|
|
mode = mode.lower()
|
|
align_mode = align_mode.lower()
|
|
|
|
# Load recordings
|
|
align_str = ", " + align_mode + " alignment" if mode == "add" and align_mode != "error" else ""
|
|
echo_progress(
|
|
f"Combining {len(inputs)} recordings ({mode} mode{align_str})...",
|
|
quiet,
|
|
)
|
|
recordings = load_recording_list(inputs, legacy, verbose, quiet)
|
|
|
|
# Validate for empty recordings
|
|
for i, rec in enumerate(recordings):
|
|
if rec.data.shape[1] == 0:
|
|
raise click.ClickException(
|
|
f"Error: Input file '{inputs[i]}' has 0 samples\n" "Cannot combine empty recordings"
|
|
)
|
|
|
|
# Validate for add mode
|
|
if mode == "add":
|
|
# Check sample rates match
|
|
sample_rates = [rec._metadata.get("sample_rate") for rec in recordings]
|
|
sample_rates = [sr for sr in sample_rates if sr is not None]
|
|
if len(sample_rates) > 1 and len(set(sample_rates)) > 1:
|
|
raise click.ClickException(
|
|
f"Error: Recordings have different sample rates (add mode)\n"
|
|
f"Sample rates: {sample_rates}\n"
|
|
"All recordings must have matching sample rates for add mode"
|
|
)
|
|
|
|
# Check channel counts match
|
|
channel_counts = [rec.data.shape[0] for rec in recordings]
|
|
if len(set(channel_counts)) > 1:
|
|
raise click.ClickException(
|
|
f"Error: Recordings have different channel counts\n"
|
|
f"Channels: {channel_counts}\n"
|
|
"All recordings must have same number of channels"
|
|
)
|
|
|
|
# Combine recordings
|
|
if mode == "concat":
|
|
combined = concat_recordings(recordings, verbose=verbose)
|
|
elif mode == "add":
|
|
combined = add_recordings(
|
|
recordings,
|
|
align_mode=align_mode,
|
|
pad_start_sample=pad_start_sample,
|
|
repeat_spacing=repeat_spacing,
|
|
verbose=verbose,
|
|
)
|
|
else:
|
|
raise click.ClickException(f"Unknown mode: {mode}")
|
|
|
|
# Add custom metadata
|
|
for meta_item in metadata:
|
|
if "=" not in meta_item:
|
|
raise click.ClickException(f"Invalid metadata format: {meta_item} (expected KEY=VALUE)")
|
|
key, value = meta_item.split("=", 1)
|
|
combined.update_metadata(key, value)
|
|
|
|
# Normalize if requested
|
|
if normalize:
|
|
echo_verbose("Normalizing...", verbose)
|
|
combined = combined.normalize()
|
|
combined.update_metadata("normalized", True)
|
|
|
|
# Save output
|
|
try:
|
|
save_recording(combined, output, output_format=output_format, overwrite=overwrite, verbose=verbose)
|
|
echo_progress(f"Saved to: {output}", quiet)
|
|
except Exception as e:
|
|
raise click.ClickException(f"Failed to save output: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
combine()
|