"""Convert command - Convert recordings between file formats.""" import os from pathlib import Path import click from utils.io.recording import ( from_npy, load_recording, to_blue, to_npy, to_sigmf, to_wav, ) from utils_cli.utils.common import ( check_for_overwriting, detect_file_format, echo_progress, echo_verbose, format_sample_count, ) from .config import load_user_config def parse_metadata_override(metadata_str): """Parse KEY=VALUE metadata string. Args: metadata_str: String in format "key=value" Returns: tuple: (key, value) where value is converted to appropriate type """ if "=" not in metadata_str: raise click.BadParameter(f"Metadata must be in KEY=VALUE format, got: {metadata_str}") key, value = metadata_str.split("=", 1) # Try to convert to number if possible try: # Try int first if "." not in value: return (key, int(value)) else: return (key, float(value)) except ValueError: # Keep as string return (key, value) @click.command() @click.argument("input", type=click.Path(exists=True)) @click.argument("output", type=click.Path(), required=False) @click.option( "--format", "output_format", type=click.Choice(["npy", "sigmf", "wav", "blue"]), help="Output format (required if OUTPUT not specified, otherwise auto-detected from extension)", ) @click.option("--output-dir", type=click.Path(), help="Output directory (default: current directory)") @click.option("--legacy", is_flag=True, help="Load input as legacy NPY format") @click.option("--wav-sample-rate", type=float, default=48000, show_default=True, help="Target WAV sample rate in Hz") @click.option( "--wav-bits", type=click.Choice(["16", "32"]), default="32", show_default=True, help="WAV bits per sample" ) @click.option( "--blue-format", type=click.Choice(["CI", "CF", "CD"]), default="CI", show_default=True, help="MIDAS Blue format: CI (int16), CF (float32), CD (float64)", ) @click.option("--overwrite", is_flag=True, help="Overwrite output if it exists") @click.option("--metadata", multiple=True, help="Add/override metadata as KEY=VALUE (can be repeated)") @click.option("--verbose", "-v", is_flag=True, help="Verbose output") @click.option("--quiet", "-q", is_flag=True, help="Suppress output") def convert( # noqa: C901 input, output, output_format, output_dir, legacy, wav_sample_rate, wav_bits, blue_format, overwrite, metadata, verbose, quiet, ): """Convert recordings between file formats. Automatically detects input format and converts to desired output format. Supports SigMF, NumPy (.npy), WAV IQ stereo, and MIDAS Blue formats. If OUTPUT is not specified, the input filename is used with a new extension based on the --format option. \b Examples: # SigMF to NumPy (explicit output) utils convert recording.sigmf-data output.npy \b # Auto-generate output filename utils convert recording.npy --format sigmf \b # Convert to specific directory utils convert long_path/recording.npy --format sigmf --output-dir converted \b # NumPy to WAV with decimation utils convert high_rate.npy audio.wav --wav-sample-rate 48000 \b # Legacy NPY to SigMF utils convert old.npy --format sigmf --legacy --overwrite \b # Add metadata during conversion utils convert raw.npy --format sigmf --metadata "location=lab" --metadata "antenna=dipole" """ # Generate output filename if not provided if output is None: if output_format is None: raise click.ClickException( "Either OUTPUT or --format must be specified\n" "Examples:\n" " utils convert input.npy output.sigmf\n" " utils convert input.npy --format sigmf" ) # Get input filename without extension input_path = Path(input) input_stem = input_path.stem # For SigMF input, remove .sigmf-data or .sigmf-meta suffix if input_stem.endswith(".sigmf-data") or input_stem.endswith(".sigmf-meta"): input_stem = input_stem[:-11] # Remove '.sigmf-data'/'.sigmf-meta' elif input_stem.endswith(".sigmf"): input_stem = input_stem[:-6] # Remove '.sigmf' # Determine output directory if output_dir: out_dir = Path(output_dir) else: out_dir = Path(".") # Current directory # Generate output filename with new extension extension_map = {"sigmf": ".sigmf", "npy": ".npy", "wav": ".wav", "blue": ".blue"} output = str(out_dir / f"{input_stem}{extension_map[output_format]}") echo_verbose(f"Auto-generated output: {output}", verbose) # Detect input and output formats input_format = detect_file_format(input) if output_format is None: output_format = detect_file_format(output) # Check for overwriting output_path = Path(output) check_for_overwriting(overwrite, output_format, output_path) echo_progress(f"Converting: {os.path.basename(input)} → {os.path.basename(output)}", quiet) echo_progress(f"Input format: {input_format.upper()}", quiet) echo_progress(f"Output format: {output_format.upper()}", quiet) # Load input recording echo_verbose("Reading input...", verbose) try: if legacy: echo_verbose("Using legacy NPY loader", verbose) recording = from_npy(input, legacy=True) else: recording = load_recording(input) except Exception as e: raise click.ClickException(f"Failed to load input file: {e}") # Get sample count if hasattr(recording.data, "shape"): if len(recording.data.shape) == 2: num_samples = recording.data.shape[1] num_channels = recording.data.shape[0] else: num_samples = len(recording.data) num_channels = 1 else: num_samples = len(recording.data) num_channels = 1 echo_progress(f"Samples: {format_sample_count(num_samples)}", quiet) if num_channels > 1: echo_progress(f"Channels: {num_channels}", quiet) echo_verbose("Input loaded successfully", verbose) # Load user config and apply default metadata user_config = load_user_config() if user_config: echo_verbose("Applying user config metadata...", verbose) # Add standard metadata fields from config (if not already present) for key in ["author", "organization", "project", "location", "testbed"]: if key in user_config and key not in recording.metadata: recording._metadata[key] = user_config[key] echo_verbose(f" {key} = {user_config[key]} (from config)", verbose) # Add SigMF fields from config (if not already present) if "sigmf" in user_config: sigmf = user_config["sigmf"] for key in ["license", "hw", "dataset"]: if key in sigmf and key not in recording.metadata: recording._metadata[key] = sigmf[key] echo_verbose(f" {key} = {sigmf[key]} (from config)", verbose) # Apply metadata overrides from CLI (highest priority) if metadata: echo_verbose("Applying metadata overrides from CLI...", verbose) for meta_str in metadata: key, value = parse_metadata_override(meta_str) recording._metadata[key] = value echo_verbose(f" {key} = {value} (CLI override)", verbose) # Convert to output format echo_verbose(f"Writing {output_format.upper()} output...", verbose) # Split output into directory and filename for functions that need it output_dir = output_path.parent output_filename = output_path.name # If output_dir is empty (relative path with no dir), use current directory if str(output_dir) == ".": output_dir = None elif not output_dir.exists(): # Create output directory if it doesn't exist output_dir.mkdir(parents=True, exist_ok=True) try: # Note: All to_* functions use (recording, filename, path) signature # We split the output path into directory and filename components if output_format == "sigmf": to_sigmf(recording, filename=output_filename, path=output_dir, overwrite=overwrite) echo_progress( ( f"Conversion complete: {output_path.with_suffix('.sigmf-data').name}, " f"{output_path.with_suffix('.sigmf-meta').name}" ), quiet, ) elif output_format == "npy": to_npy(recording, filename=output_filename, path=output_dir, overwrite=overwrite) echo_progress(f"Conversion complete: {output}", quiet) elif output_format == "wav": # Check for multichannel if num_channels > 1: raise click.ClickException( f"WAV export not supported for multichannel recordings\n" f"Input has {num_channels} channels, WAV export requires single channel" ) # Show decimation info if applicable original_sample_rate = recording.metadata.get("sample_rate", wav_sample_rate) if original_sample_rate > wav_sample_rate: decimation_factor = int(original_sample_rate / wav_sample_rate) new_sample_count = num_samples // decimation_factor echo_progress(f"Original sample rate: {original_sample_rate / 1e6:.1f} MHz", quiet) echo_progress(f"Target sample rate: {wav_sample_rate / 1e3:.1f} kHz", quiet) echo_progress(f"Decimation factor: {decimation_factor}", quiet) echo_progress(f"Output samples: {format_sample_count(new_sample_count)}", quiet) echo_verbose("Decimating...", verbose) to_wav( recording, filename=output_filename, path=output_dir, target_sample_rate=wav_sample_rate, bits_per_sample=int(wav_bits), overwrite=overwrite, ) echo_progress(f"Conversion complete: {output}", quiet) elif output_format == "blue": # Convert blue format string to format expected by to_blue format_map = {"CI": "CI", "CF": "CF", "CD": "CD"} # Complex int16 # Complex float32 # Complex float64 blue_data_format = format_map[blue_format] echo_verbose(f"Using MIDAS Blue format: {blue_format} ({blue_data_format})", verbose) to_blue( recording, filename=output_filename, path=output_dir, data_format=blue_data_format, overwrite=overwrite ) echo_progress(f"Conversion complete: {output}", quiet) except Exception as e: raise click.ClickException(f"Failed to write output file: {e}") # Show metadata preservation info in verbose mode if verbose and recording.metadata: echo_verbose("\nMetadata preserved:", verbose) for key, value in recording.metadata.items(): echo_verbose(f" {key}: {value}", verbose) if __name__ == "__main__": convert()