ria-toolkit-oss/ria_toolkit_oss_cli/ria_toolkit_oss/convert.py

304 lines
11 KiB
Python

"""Convert command - Convert recordings between file formats."""
import os
from pathlib import Path
import click
from utils.io.recording import (
from_npy,
load_recording,
to_blue,
to_npy,
to_sigmf,
to_wav,
)
from utils_cli.utils.common import (
check_for_overwriting,
detect_file_format,
echo_progress,
echo_verbose,
format_sample_count,
)
from .config import load_user_config
def parse_metadata_override(metadata_str):
"""Parse KEY=VALUE metadata string.
Args:
metadata_str: String in format "key=value"
Returns:
tuple: (key, value) where value is converted to appropriate type
"""
if "=" not in metadata_str:
raise click.BadParameter(f"Metadata must be in KEY=VALUE format, got: {metadata_str}")
key, value = metadata_str.split("=", 1)
# Try to convert to number if possible
try:
# Try int first
if "." not in value:
return (key, int(value))
else:
return (key, float(value))
except ValueError:
# Keep as string
return (key, value)
@click.command()
@click.argument("input", type=click.Path(exists=True))
@click.argument("output", type=click.Path(), required=False)
@click.option(
"--format",
"output_format",
type=click.Choice(["npy", "sigmf", "wav", "blue"]),
help="Output format (required if OUTPUT not specified, otherwise auto-detected from extension)",
)
@click.option("--output-dir", type=click.Path(), help="Output directory (default: current directory)")
@click.option("--legacy", is_flag=True, help="Load input as legacy NPY format")
@click.option("--wav-sample-rate", type=float, default=48000, show_default=True, help="Target WAV sample rate in Hz")
@click.option(
"--wav-bits", type=click.Choice(["16", "32"]), default="32", show_default=True, help="WAV bits per sample"
)
@click.option(
"--blue-format",
type=click.Choice(["CI", "CF", "CD"]),
default="CI",
show_default=True,
help="MIDAS Blue format: CI (int16), CF (float32), CD (float64)",
)
@click.option("--overwrite", is_flag=True, help="Overwrite output if it exists")
@click.option("--metadata", multiple=True, help="Add/override metadata as KEY=VALUE (can be repeated)")
@click.option("--verbose", "-v", is_flag=True, help="Verbose output")
@click.option("--quiet", "-q", is_flag=True, help="Suppress output")
def convert( # noqa: C901
input,
output,
output_format,
output_dir,
legacy,
wav_sample_rate,
wav_bits,
blue_format,
overwrite,
metadata,
verbose,
quiet,
):
"""Convert recordings between file formats.
Automatically detects input format and converts to desired output format.
Supports SigMF, NumPy (.npy), WAV IQ stereo, and MIDAS Blue formats.
If OUTPUT is not specified, the input filename is used with a new extension
based on the --format option.
\b
Examples:
# SigMF to NumPy (explicit output)
utils convert recording.sigmf-data output.npy
\b
# Auto-generate output filename
utils convert recording.npy --format sigmf
\b
# Convert to specific directory
utils convert long_path/recording.npy --format sigmf --output-dir converted
\b
# NumPy to WAV with decimation
utils convert high_rate.npy audio.wav --wav-sample-rate 48000
\b
# Legacy NPY to SigMF
utils convert old.npy --format sigmf --legacy --overwrite
\b
# Add metadata during conversion
utils convert raw.npy --format sigmf --metadata "location=lab" --metadata "antenna=dipole"
"""
# Generate output filename if not provided
if output is None:
if output_format is None:
raise click.ClickException(
"Either OUTPUT or --format must be specified\n"
"Examples:\n"
" utils convert input.npy output.sigmf\n"
" utils convert input.npy --format sigmf"
)
# Get input filename without extension
input_path = Path(input)
input_stem = input_path.stem
# For SigMF input, remove .sigmf-data or .sigmf-meta suffix
if input_stem.endswith(".sigmf-data") or input_stem.endswith(".sigmf-meta"):
input_stem = input_stem[:-11] # Remove '.sigmf-data'/'.sigmf-meta'
elif input_stem.endswith(".sigmf"):
input_stem = input_stem[:-6] # Remove '.sigmf'
# Determine output directory
if output_dir:
out_dir = Path(output_dir)
else:
out_dir = Path(".") # Current directory
# Generate output filename with new extension
extension_map = {"sigmf": ".sigmf", "npy": ".npy", "wav": ".wav", "blue": ".blue"}
output = str(out_dir / f"{input_stem}{extension_map[output_format]}")
echo_verbose(f"Auto-generated output: {output}", verbose)
# Detect input and output formats
input_format = detect_file_format(input)
if output_format is None:
output_format = detect_file_format(output)
# Check for overwriting
output_path = Path(output)
check_for_overwriting(overwrite, output_format, output_path)
echo_progress(f"Converting: {os.path.basename(input)}{os.path.basename(output)}", quiet)
echo_progress(f"Input format: {input_format.upper()}", quiet)
echo_progress(f"Output format: {output_format.upper()}", quiet)
# Load input recording
echo_verbose("Reading input...", verbose)
try:
if legacy:
echo_verbose("Using legacy NPY loader", verbose)
recording = from_npy(input, legacy=True)
else:
recording = load_recording(input)
except Exception as e:
raise click.ClickException(f"Failed to load input file: {e}")
# Get sample count
if hasattr(recording.data, "shape"):
if len(recording.data.shape) == 2:
num_samples = recording.data.shape[1]
num_channels = recording.data.shape[0]
else:
num_samples = len(recording.data)
num_channels = 1
else:
num_samples = len(recording.data)
num_channels = 1
echo_progress(f"Samples: {format_sample_count(num_samples)}", quiet)
if num_channels > 1:
echo_progress(f"Channels: {num_channels}", quiet)
echo_verbose("Input loaded successfully", verbose)
# Load user config and apply default metadata
user_config = load_user_config()
if user_config:
echo_verbose("Applying user config metadata...", verbose)
# Add standard metadata fields from config (if not already present)
for key in ["author", "organization", "project", "location", "testbed"]:
if key in user_config and key not in recording.metadata:
recording._metadata[key] = user_config[key]
echo_verbose(f" {key} = {user_config[key]} (from config)", verbose)
# Add SigMF fields from config (if not already present)
if "sigmf" in user_config:
sigmf = user_config["sigmf"]
for key in ["license", "hw", "dataset"]:
if key in sigmf and key not in recording.metadata:
recording._metadata[key] = sigmf[key]
echo_verbose(f" {key} = {sigmf[key]} (from config)", verbose)
# Apply metadata overrides from CLI (highest priority)
if metadata:
echo_verbose("Applying metadata overrides from CLI...", verbose)
for meta_str in metadata:
key, value = parse_metadata_override(meta_str)
recording._metadata[key] = value
echo_verbose(f" {key} = {value} (CLI override)", verbose)
# Convert to output format
echo_verbose(f"Writing {output_format.upper()} output...", verbose)
# Split output into directory and filename for functions that need it
output_dir = output_path.parent
output_filename = output_path.name
# If output_dir is empty (relative path with no dir), use current directory
if str(output_dir) == ".":
output_dir = None
elif not output_dir.exists():
# Create output directory if it doesn't exist
output_dir.mkdir(parents=True, exist_ok=True)
try:
# Note: All to_* functions use (recording, filename, path) signature
# We split the output path into directory and filename components
if output_format == "sigmf":
to_sigmf(recording, filename=output_filename, path=output_dir, overwrite=overwrite)
echo_progress(
(
f"Conversion complete: {output_path.with_suffix('.sigmf-data').name}, "
f"{output_path.with_suffix('.sigmf-meta').name}"
),
quiet,
)
elif output_format == "npy":
to_npy(recording, filename=output_filename, path=output_dir, overwrite=overwrite)
echo_progress(f"Conversion complete: {output}", quiet)
elif output_format == "wav":
# Check for multichannel
if num_channels > 1:
raise click.ClickException(
f"WAV export not supported for multichannel recordings\n"
f"Input has {num_channels} channels, WAV export requires single channel"
)
# Show decimation info if applicable
original_sample_rate = recording.metadata.get("sample_rate", wav_sample_rate)
if original_sample_rate > wav_sample_rate:
decimation_factor = int(original_sample_rate / wav_sample_rate)
new_sample_count = num_samples // decimation_factor
echo_progress(f"Original sample rate: {original_sample_rate / 1e6:.1f} MHz", quiet)
echo_progress(f"Target sample rate: {wav_sample_rate / 1e3:.1f} kHz", quiet)
echo_progress(f"Decimation factor: {decimation_factor}", quiet)
echo_progress(f"Output samples: {format_sample_count(new_sample_count)}", quiet)
echo_verbose("Decimating...", verbose)
to_wav(
recording,
filename=output_filename,
path=output_dir,
target_sample_rate=wav_sample_rate,
bits_per_sample=int(wav_bits),
overwrite=overwrite,
)
echo_progress(f"Conversion complete: {output}", quiet)
elif output_format == "blue":
# Convert blue format string to format expected by to_blue
format_map = {"CI": "CI", "CF": "CF", "CD": "CD"} # Complex int16 # Complex float32 # Complex float64
blue_data_format = format_map[blue_format]
echo_verbose(f"Using MIDAS Blue format: {blue_format} ({blue_data_format})", verbose)
to_blue(
recording, filename=output_filename, path=output_dir, data_format=blue_data_format, overwrite=overwrite
)
echo_progress(f"Conversion complete: {output}", quiet)
except Exception as e:
raise click.ClickException(f"Failed to write output file: {e}")
# Show metadata preservation info in verbose mode
if verbose and recording.metadata:
echo_verbose("\nMetadata preserved:", verbose)
for key, value in recording.metadata.items():
echo_verbose(f" {key}: {value}", verbose)
if __name__ == "__main__":
convert()