ria-toolkit-oss/src/ria_toolkit_oss/io/recording.py

1268 lines
44 KiB
Python

"""
Utilities for input/output operations on the ria_toolkit_oss.datatypes.Recording object.
"""
import datetime
import datetime as dt
import numbers
import os
import re
import struct
from datetime import timezone
from typing import Any, List, Optional
import numpy as np
import sigmf
from quantiphy import Quantity
from sigmf import SigMFFile, sigmffile
from sigmf.utils import get_data_type_str
from ria_toolkit_oss.datatypes import Annotation
from ria_toolkit_oss.datatypes.recording import Recording
_BLUE_META_PREFIX = "META_"
_BLUE_META_TAG_MAX_LEN = 60
_BLUE_SKIP_METADATA_KEYS = {"blue_data_format", "blue_endian", "blue_keywords"}
_BLUE_NUMERIC_DTYPE = {
"B": "i1",
"I": "i2",
"L": "i4",
"F": "f4",
"D": "f8",
}
SIGMF_KEY_CONVERSION = {
SigMFFile.AUTHOR_KEY: "author",
SigMFFile.COLLECTION_KEY: "sigmf:collection",
SigMFFile.DATASET_KEY: "sigmf:dataset",
SigMFFile.DATATYPE_KEY: "datatype",
SigMFFile.DATA_DOI_KEY: "data_doi",
SigMFFile.DESCRIPTION_KEY: "description",
SigMFFile.EXTENSIONS_KEY: "sigmf:extensions",
SigMFFile.GEOLOCATION_KEY: "geolocation",
SigMFFile.HASH_KEY: "sigmf:hash",
SigMFFile.HW_KEY: "sdr",
SigMFFile.LICENSE_KEY: "license",
SigMFFile.META_DOI_KEY: "metadata",
SigMFFile.METADATA_ONLY_KEY: "sigmf:metadata_only",
SigMFFile.NUM_CHANNELS_KEY: "sigmf:num_channels",
SigMFFile.RECORDER_KEY: "source_software",
SigMFFile.SAMPLE_RATE_KEY: "sample_rate",
SigMFFile.START_OFFSET_KEY: "sigmf:start_offset",
SigMFFile.TRAILING_BYTES_KEY: "sigmf:trailing_bytes",
SigMFFile.VERSION_KEY: "sigmf:version",
}
def to_npy(
recording: Recording,
filename: Optional[str] = None,
path: Optional[os.PathLike | str] = None,
overwrite: bool = False,
) -> str:
"""Write recording to ``.npy`` binary file.
:param recording: The recording to be written to file.
:type recording: ria_toolkit_oss.datatypes.Recording
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
:type filename: os.PathLike or str, optional
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
:type path: os.PathLike or str, optional
:raises IOError: If there is an issue encountered during the file writing process.
:return: Path where the file was saved.
:rtype: str
**Examples:**
>>> from ria_toolkit_oss.sdr import Synth
>>> from ria_toolkit_oss.data import Recording
>>> from ria_toolkit_oss.io import to_npy
>>> sdr = Synth()
>>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
>>> to_npy(recording=rec, file="sample_recording.npy")
"""
filename, path, fullpath = generate_fullpath(
recording=recording, filename=filename, path=path, extension=".npy", overwrite=overwrite
)
data = np.array(recording.data)
metadata = recording.metadata
annotations = recording.annotations
with open(file=fullpath, mode="wb") as f:
np.save(f, data)
np.save(f, metadata)
np.save(f, annotations)
# print(f"Saved recording to {os.getcwd()}/{fullpath}")
return str(fullpath)
def from_npy(file: os.PathLike | str, legacy: bool = False) -> Recording:
"""Load a recording from a ``.npy`` binary file.
:param file: The directory path to the recording file, with or without the ``.npy`` file extension.
:type file: str or os.PathLike
:param legacy: If True, load legacy format (iqdata, meta[4], extended_meta dict).
If False, load current format (data, metadata dict, annotations list).
Default is False.
:type legacy: bool, optional
:raises IOError: If there is an issue encountered during the file reading process.
:return: The recording, as initialized from the ``.npy`` file.
:rtype: ria_toolkit_oss.datatypes.Recording
"""
filename, extension = os.path.splitext(file)
if extension != ".npy" and extension != "":
raise ValueError("Cannot use from_npy if file extension is not .npy")
# Rebuild with .npy extension.
filename = str(filename) + ".npy"
if legacy:
return from_npy_legacy(filename)
with open(file=filename, mode="rb") as f:
data = np.load(f, allow_pickle=True)
metadata = np.load(f, allow_pickle=True)
metadata = metadata.tolist()
try:
annotations = list(np.load(f, allow_pickle=True))
except EOFError:
annotations = []
recording = Recording(data=data, metadata=metadata, annotations=annotations)
return recording
def from_npy_legacy(file: os.PathLike | str) -> Recording:
"""Load a recording from legacy NPY format.
Legacy format (pre-utils) stores three numpy arrays:
1. iqdata: shape (2, N) with I and Q as separate rows (float32)
2. meta: shape (4,) with [center_freq, rec_length, decimation, sample_rate]
3. extended_meta: dict with additional metadata
:param file: The directory path to the recording file, with or without the ``.npy`` file extension.
:type file: str or os.PathLike
:raises IOError: If there is an issue encountered during the file reading process.
:return: The recording, as initialized from the legacy ``.npy`` file.
:rtype: ria_toolkit_oss.datatypes.Recording
**Examples:**
Load legacy SRS recordings:
>>> from ria_toolkit_oss.io import from_npy_legacy
>>> rec = from_npy_legacy("~/sample_recs/srs/example_srs_recordings/bw40M_Youtube_sr46.08/iq3775MHz053601.npy")
>>> print(rec.metadata.get('protocol'))
5G40
"""
filename, extension = os.path.splitext(file)
if extension != ".npy" and extension != "":
raise ValueError("Cannot use from_npy_legacy if file extension is not .npy")
# Rebuild with .npy extension.
filename = str(filename) + ".npy"
with open(filename, "rb") as f:
# Read IQ data (2, N) format
iqdata = np.load(f)
# Read basic metadata array [center_freq, rec_length, decimation, sample_rate]
meta = np.load(f)
# Read extended metadata dict
extended_meta = np.load(f, allow_pickle=True)[0]
# Convert IQ data from (2, N) to (N,) complex format
i_channel = iqdata[0, :]
q_channel = iqdata[1, :]
complex_data = i_channel + 1j * q_channel
# Build metadata dictionary
metadata = {}
# Extract from basic meta array if available
if len(meta) >= 4:
metadata["center_frequency"] = float(meta[0])
metadata["legacy_rec_length"] = int(meta[1])
metadata["legacy_decimation"] = int(meta[2])
metadata["sample_rate"] = float(meta[3])
# Merge extended metadata
if isinstance(extended_meta, dict):
for key, value in extended_meta.items():
# Convert keys to lowercase snake_case if needed
key_lower = key.lower()
# Don't overwrite already set values
if key_lower not in metadata:
metadata[key_lower] = value
return Recording(data=complex_data, metadata=metadata)
def to_sigmf(
recording: Recording,
filename: Optional[str] = None,
path: Optional[os.PathLike | str] = None,
overwrite: bool = False,
) -> None:
"""Write recording to a set of SigMF files.
The SigMF io format is defined by the `SigMF Specification Project <https://github.com/sigmf/SigMF>`_
:param recording: The recording to be written to file.
:type recording: ria_toolkit_oss.datatypes.Recording
:param filename: The name of the file where the recording is to be saved. Defaults to auto generated filename.
:type filename: os.PathLike or str, optional
:param path: The directory path to where the recording is to be saved. Defaults to recordings/.
:type path: os.PathLike or str, optional
:raises IOError: If there is an issue encountered during the file writing process.
:return: None
**Examples:**
>>> from ria_toolkit_oss.sdr import Synth
>>> from ria_toolkit_oss.data import Recording
>>> from ria_toolkit_oss.io import to_sigmf
>>> sdr = Synth()
>>> rec = sdr.record(center_frequency=2.4e9, sample_rate=20e6)
>>> to_sigmf(recording=rec, file="sample_recording")
"""
filename, path, _ = generate_fullpath(
recording=recording, filename=filename, path=path, extension="", overwrite=True
)
multichannel_samples = recording.data
metadata = recording.metadata
annotations = recording.annotations
if multichannel_samples.shape[0] > 1:
raise NotImplementedError("SigMF File Saving Not Implemented for Multichannel Recordings")
else:
# extract single channel
samples = multichannel_samples[0]
data_file_path = os.path.join(path, f"{filename}.sigmf-data")
meta_file_path = os.path.join(path, f"{filename}.sigmf-meta")
if not overwrite:
if os.path.isfile(data_file_path):
raise IOError("File already exists")
if os.path.isfile(meta_file_path):
raise IOError("File already exists")
samples.tofile(data_file_path)
global_info = {
SigMFFile.DATATYPE_KEY: get_data_type_str(samples),
SigMFFile.VERSION_KEY: sigmf.__version__,
SigMFFile.RECORDER_KEY: "RIA",
SigMFFile.EXTENSIONS_KEY: [
{
"name": "ria",
"version": "0.1.4",
"optional": True,
}
],
}
converted_metadata = {
sigmf_key: metadata[metadata_key]
for sigmf_key, metadata_key in SIGMF_KEY_CONVERSION.items()
if metadata_key in metadata
}
# Merge dictionaries, giving priority to sigmf_meta
global_info = {**converted_metadata, **global_info}
ria_metadata = {f"ria:{key}": value for key, value in metadata.items()}
ria_metadata = convert_to_serializable(ria_metadata)
global_info.update(ria_metadata)
sigMF_metafile = SigMFFile(
data_file=data_file_path,
global_info=global_info,
)
for annotation_object in annotations:
annotation_dict = annotation_object.to_sigmf_format()
annotation_dict = convert_to_serializable(annotation_dict)
sigMF_metafile.add_annotation(
start_index=annotation_dict[SigMFFile.START_INDEX_KEY],
length=annotation_dict[SigMFFile.LENGTH_INDEX_KEY],
metadata=annotation_dict["metadata"],
)
sigMF_metafile.add_capture(
0,
metadata={
SigMFFile.FREQUENCY_KEY: metadata.get("center_frequency", 0),
SigMFFile.DATETIME_KEY: dt.datetime.fromtimestamp(float(metadata.get("timestamp", 0)), tz=timezone.utc)
.isoformat()
.replace("+00:00", "Z"),
},
)
meta_dict = sigMF_metafile.ordered_metadata()
meta_dict["ria"] = metadata
sigMF_metafile.tofile(meta_file_path)
def from_sigmf(file: os.PathLike | str) -> Recording:
"""Load a recording from a set of SigMF files.
:param file: The directory path to the SigMF recording files, without any file extension.
The recording will be initialized from ``file_name.sigmf-data`` and ``file_name.sigmf-meta``.
Both the data and meta files must be present for a successful read.
:type file: str or os.PathLike
:raises IOError: If there is an issue encountered during the file reading process.
:return: The recording, as initialized from the SigMF files.
:rtype: ria_toolkit_oss.datatypes.Recording
"""
file = str(file)
if len(file) > 11:
if file[-11:-5] != ".sigmf":
file = file + ".sigmf-data"
sigmf_file = sigmffile.fromfile(file)
data = sigmf_file.read_samples()
global_metadata = sigmf_file.get_global_info()
dict_annotations = sigmf_file.get_annotations()
processed_metadata = {}
for key, value in global_metadata.items():
# Process core keys
if key.startswith("core:"):
base_key = key[5:] # Remove 'core:' prefix
converted_key = SIGMF_KEY_CONVERSION.get(base_key, base_key)
# Process ria keys
elif key.startswith("ria:"):
converted_key = key[4:] # Remove 'ria:' prefix
else:
# Load non-core/ria keys as is
converted_key = key
processed_metadata[converted_key] = value
annotations = []
for dict in dict_annotations:
annotations.append(
Annotation(
sample_start=dict[SigMFFile.START_INDEX_KEY],
sample_count=dict[SigMFFile.LENGTH_INDEX_KEY],
freq_lower_edge=dict.get(SigMFFile.FLO_KEY, None),
freq_upper_edge=dict.get(SigMFFile.FHI_KEY, None),
label=dict.get(SigMFFile.LABEL_KEY, None),
comment=dict.get(SigMFFile.COMMENT_KEY, None),
detail=dict.get("ria:detail", None),
)
)
output_recording = Recording(data=data, metadata=processed_metadata, annotations=annotations)
return output_recording
def to_wav(
recording: Recording,
filename: Optional[str] = None,
path: Optional[os.PathLike | str] = None,
target_sample_rate: Optional[int] = 48000,
bits_per_sample: int = 32,
overwrite: bool = False,
) -> str:
"""Write recording to WAV file with embedded YAML metadata in LIST INFO chunk.
WAV format uses stereo audio with I (in-phase) in left channel and Q (quadrature) in right channel.
Metadata is stored in standard LIST INFO chunks with RF-specific metadata encoded as YAML
in the ICMT (comment) field for human readability.
:param recording: The recording to be written to file.
:type recording: ria_toolkit_oss.datatypes.Recording
:param filename: The name of the file where the recording is to be saved.
Defaults to auto-generated filename.
:type filename: str, optional
:param path: The directory path to where the recording is to be saved.
Defaults to recordings/.
:type path: os.PathLike or str, optional
:param target_sample_rate: Sample rate written to the WAV header when the recording
metadata does not specify one. Defaults to 48 kHz. No decimation is performed—
IQ samples are written sample-for-sample exactly as provided.
:type target_sample_rate: int, optional
:param bits_per_sample: Bits per sample (32 for float32, 16 for int16).
Default is 32 (float32).
:type bits_per_sample: int, optional
:param overwrite: Whether to overwrite existing files. Default is False.
:type overwrite: bool, optional
:raises IOError: If file already exists and overwrite is False.
:raises ValueError: If recording has multiple channels.
:raises ValueError: If bits_per_sample is not 16 or 32.
:raises ValueError: If 16-bit export is requested but samples fall outside [-1, 1).
:return: Path where the file was saved.
:rtype: str
"""
import wave
if recording.n_chan > 1:
raise ValueError("WAV export not supported for multichannel recordings")
if bits_per_sample not in [16, 32]:
raise ValueError("bits_per_sample must be 16 or 32")
# Generate filename if not provided
filename, path, fullpath = generate_fullpath(
recording=recording, filename=filename, path=path, extension=".wav", overwrite=overwrite
)
# Extract single channel
iq_samples = np.asarray(recording.data[0])
# Determine WAV header sample rate (metadata only)
wav_sample_rate = recording.sample_rate or target_sample_rate or 48000
# Convert complex to stereo (I and Q channels)
i_channel = np.real(iq_samples)
q_channel = np.imag(iq_samples)
# Convert to target data type
if bits_per_sample == 32:
# 32-bit float
i_data = np.ascontiguousarray(i_channel, dtype=np.float32)
q_data = np.ascontiguousarray(q_channel, dtype=np.float32)
sample_width = 4
else:
# 16-bit int
max_mag = np.max(np.abs(np.concatenate([i_channel, q_channel])))
if max_mag > 1.0:
raise ValueError("16-bit WAV export requires samples within [-1, 1). Use float32 or normalize manually.")
scale = np.iinfo(np.int16).max
i_scaled = np.clip(i_channel, -1.0, 1.0 - (1.0 / scale))
q_scaled = np.clip(q_channel, -1.0, 1.0 - (1.0 / scale))
i_data = np.ascontiguousarray(np.round(i_scaled * scale).astype(np.int16))
q_data = np.ascontiguousarray(np.round(q_scaled * scale).astype(np.int16))
sample_width = 2
# Interleave I and Q
stereo = np.empty(len(iq_samples) * 2, dtype=i_data.dtype)
stereo[0::2] = i_data
stereo[1::2] = q_data
# Write WAV file
with wave.open(fullpath, "wb") as wav:
wav.setnchannels(2) # Stereo (I and Q)
wav.setsampwidth(sample_width)
wav.setframerate(int(wav_sample_rate))
if bits_per_sample == 32:
wav.setcomptype("NONE", "not compressed")
wav.writeframes(stereo.tobytes())
# Prepare metadata for LIST INFO chunk
rf_metadata = recording.metadata.copy()
# Record both RF and WAV header sample rates for clarity
if recording.sample_rate:
rf_metadata["rf_sample_rate_hz"] = float(recording.sample_rate)
rf_metadata["wav_sample_rate_hz"] = float(wav_sample_rate)
# Rename common keys to more descriptive versions
if "center_frequency" in rf_metadata:
rf_metadata["center_frequency_hz"] = rf_metadata.pop("center_frequency")
if "sample_rate" in rf_metadata and "rf_sample_rate_hz" not in rf_metadata:
rf_metadata["rf_sample_rate_hz"] = rf_metadata.pop("sample_rate")
# Append LIST INFO chunk with metadata
_append_wav_list_info_chunk(fullpath, rf_metadata)
return fullpath
def from_wav(file: os.PathLike | str) -> Recording:
"""Load recording from WAV file and extract RF metadata.
:param file: The path to the WAV file to load.
:type file: str or os.PathLike
:raises IOError: If there is an issue reading the file.
:raises ValueError: If file is not stereo or has unsupported format.
:return: The recording, as initialized from the WAV file.
:rtype: ria_toolkit_oss.datatypes.Recording
"""
import wave
filename = str(file)
if not filename.endswith(".wav"):
filename = filename + ".wav"
# Read audio data
with wave.open(filename, "rb") as wav:
n_channels = wav.getnchannels()
sample_width = wav.getsampwidth()
sample_rate = wav.getframerate()
n_frames = wav.getnframes()
comp_type = wav.getcomptype()
audio_bytes = wav.readframes(n_frames)
if n_channels != 2:
raise ValueError(f"Expected stereo WAV file, got {n_channels} channels")
# Determine data type
if sample_width == 4 and comp_type == "NONE":
# 32-bit float
dtype = np.float32
elif sample_width == 2:
# 16-bit int
dtype = np.int16
else:
raise ValueError(f"Unsupported WAV format: {sample_width} bytes per sample, comp_type={comp_type}")
# Convert bytes to stereo array
stereo = np.frombuffer(audio_bytes, dtype=dtype)
# De-interleave I and Q
i_channel = stereo[0::2]
q_channel = stereo[1::2]
# Normalize int16 to float
if dtype == np.int16:
i_channel = i_channel.astype(np.float32) / 32767.0
q_channel = q_channel.astype(np.float32) / 32767.0
# Convert to complex
iq_samples = i_channel + 1j * q_channel
# Extract LIST INFO metadata
metadata = _extract_wav_list_info(filename)
if metadata is None:
metadata = {}
# Ensure sample_rate is in metadata
if "sample_rate" not in metadata:
# Prefer RF sample rate if available, otherwise fall back to WAV header
if "rf_sample_rate_hz" in metadata:
metadata["sample_rate"] = metadata["rf_sample_rate_hz"]
elif "wav_sample_rate_hz" in metadata:
metadata["sample_rate"] = metadata["wav_sample_rate_hz"]
else:
metadata["sample_rate"] = float(sample_rate)
# Restore original keys for compatibility
if "center_frequency_hz" in metadata and "center_frequency" not in metadata:
metadata["center_frequency"] = metadata["center_frequency_hz"]
return Recording(data=iq_samples, metadata=metadata)
def to_blue(
recording: Recording,
filename: Optional[str] = None,
path: Optional[os.PathLike | str] = None,
data_format: str = "CI",
overwrite: bool = False,
) -> str:
"""
Write recording to MIDAS Blue file format.
MIDAS Blue is a legacy RF file format with a 512-byte binary header.
Commonly used with X-Midas and other RF/radar signal processing tools.
:param recording: The recording to be written to file.
:type recording: ria_toolkit_oss.datatypes.Recording
:param filename: The name of the file where the recording is to be saved.
Defaults to auto-generated filename.
:type filename: str, optional
:param path: The directory path to where the recording is to be saved.
Defaults to recordings/.
:type path: os.PathLike or str, optional
:param data_format: Format code (default 'CI' = complex int16).
Common formats: 'CI' (complex int16), 'CF' (complex float32), 'CD' (complex float64).
:type data_format: str, optional
:param overwrite: Whether to overwrite existing files. Default is False.
:type overwrite: bool, optional
:raises IOError: If file already exists and overwrite is False.
:raises ValueError: If recording has multiple channels.
:raises ValueError: If data_format is not supported.
:raises ValueError: If integer formats are requested but samples fall outside [-1, 1).
:return: Path where the file was saved.
:rtype: str
"""
if recording.n_chan > 1:
raise ValueError("MIDAS Blue export not supported for multichannel recordings")
if recording.sample_rate is None:
raise ValueError("Recording metadata must include 'sample_rate' for MIDAS Blue export.")
# Generate filename if not provided
filename, path, fullpath = generate_fullpath(
recording=recording, filename=filename, path=path, extension=".blue", overwrite=overwrite
)
# Extract single channel
iq_samples = np.asarray(recording.data[0])
sample_rate = float(recording.sample_rate)
metadata = recording.metadata or {}
# Data format
if data_format not in ["CI", "CF", "CD", "SI", "SF", "SD"]:
raise ValueError(f"Unsupported data format: {data_format}. Use CI, CF, CD, SI, SF, or SD")
# Convert IQ samples to specified format
dtype_map = {
"CI": np.int16,
"CF": np.float32,
"CD": np.float64,
"SI": np.int16,
"SF": np.float32,
"SD": np.float64,
}
dtype = dtype_map[data_format]
# Separate I and Q for complex formats
if data_format.startswith("C"):
# Convert using requested data type
if np.issubdtype(dtype, np.integer):
i_data = np.real(iq_samples)
q_data = np.imag(iq_samples)
max_mag = np.max(np.abs(np.concatenate([i_data, q_data])))
if max_mag > 1.0:
raise ValueError(
"Integer MIDAS Blue export requires samples within [-1, 1). "
"Normalize or export using a float format (CF/CD)."
)
max_val = np.iinfo(dtype).max
eps = 1.0 / max_val
i_scaled = np.clip(i_data, -1.0, 1.0 - eps)
q_scaled = np.clip(q_data, -1.0, 1.0 - eps)
i_converted = np.round(i_scaled * max_val).astype(dtype)
q_converted = np.round(q_scaled * max_val).astype(dtype)
else:
i_converted = np.real(iq_samples).astype(dtype, copy=False)
q_converted = np.imag(iq_samples).astype(dtype, copy=False)
# Interleave I and Q
interleaved = np.empty(len(iq_samples) * 2, dtype=dtype)
interleaved[0::2] = i_converted
interleaved[1::2] = q_converted
else:
# Real-valued data (use only I channel)
if np.issubdtype(dtype, np.integer):
real_channel = np.real(iq_samples)
max_mag = np.max(np.abs(real_channel))
if max_mag >= 1.0:
raise ValueError(
"Integer MIDAS Blue export requires samples within [-1, 1). "
"Normalize or export using a float format (SF/SD)."
)
max_val = np.iinfo(dtype).max
eps = 1.0 / max_val
clipped = np.clip(real_channel, -1.0, 1.0 - eps)
interleaved = np.round(clipped * max_val).astype(dtype)
else:
interleaved = np.real(iq_samples).astype(dtype, copy=False)
# Create 512-byte header
header = bytearray(512)
header[0:4] = b"BLUE"
header[4:8] = b"EEEI"
header[8:12] = b"EEEI"
header[52:54] = data_format.encode("ascii")
struct.pack_into("<d", header, 264, 1.0 / sample_rate)
struct.pack_into("<i", header, 48, 1000)
# Timecode (optional)
timecode_value = metadata.get("timecode", metadata.get("timestamp"))
if isinstance(timecode_value, numbers.Real):
struct.pack_into("<d", header, 56, float(timecode_value))
data_start_offset = 512
data_size_bytes = int(interleaved.nbytes)
struct.pack_into("<d", header, 32, float(data_start_offset))
struct.pack_into("<d", header, 40, float(data_size_bytes))
ext_blob = _encode_blue_extended_header(metadata)
pad_len = 0
ext_start_blocks = 0
ext_size = 0
if ext_blob:
data_end = data_start_offset + data_size_bytes
pad_len = (512 - (data_end % 512)) % 512
ext_start_offset = data_end + pad_len
ext_start_blocks = ext_start_offset // 512
ext_size = len(ext_blob)
struct.pack_into("<i", header, 24, ext_start_blocks)
struct.pack_into("<i", header, 28, ext_size)
# Write file
with open(fullpath, "wb") as f:
f.write(header)
interleaved.tofile(f)
if ext_blob:
if pad_len:
f.write(b"\x00" * pad_len)
f.write(ext_blob)
return fullpath
def from_blue(file: os.PathLike | str) -> Recording:
"""
Load recording from MIDAS Blue file.
:param file: The path to the MIDAS Blue file to load.
:type file: str or os.PathLike
:raises IOError: If there is an issue reading the file.
:raises ValueError: If file format is not valid or unsupported.
:return: The recording, as initialized from the Blue file.
:rtype: ria_toolkit_oss.datatypes.Recording
"""
filename = str(file)
if not filename.endswith(".blue"):
filename = filename + ".blue"
with open(filename, "rb") as f:
header_bytes = f.read(512)
if len(header_bytes) < 512:
raise ValueError("File too small to be a valid MIDAS Blue file")
magic = header_bytes[0:4].decode("ascii", errors="ignore")
if magic != "BLUE":
raise ValueError(f"Not a Blue file (magic={magic})")
header_rep = header_bytes[4:8].decode("ascii", errors="ignore")
data_rep = header_bytes[8:12].decode("ascii", errors="ignore")
header_endian = ">" if header_rep == "IEEE" else "<"
data_endian = ">" if data_rep == "IEEE" else "<"
ext_start = struct.unpack(f"{header_endian}i", header_bytes[24:28])[0]
ext_size = struct.unpack(f"{header_endian}i", header_bytes[28:32])[0]
data_start_offset = int(struct.unpack(f"{header_endian}d", header_bytes[32:40])[0])
data_size_bytes = int(struct.unpack(f"{header_endian}d", header_bytes[40:48])[0])
data_format = header_bytes[52:54].decode("ascii", errors="ignore")
timecode = struct.unpack(f"{header_endian}d", header_bytes[56:64])[0]
time_interval = struct.unpack(f"{header_endian}d", header_bytes[264:272])[0]
sample_rate = 1.0 / time_interval if time_interval > 0 else 0
file_size = os.path.getsize(filename)
if data_start_offset <= 0:
data_start_offset = 512
if data_size_bytes <= 0:
data_end = ext_start * 512 if ext_start > 0 else file_size
data_size_bytes = max(0, data_end - data_start_offset)
# Map format code to numpy dtype
dtype_map = {
"CB": (np.int8, True),
"CI": (np.int16, True),
"CL": (np.int32, True),
"CF": (np.float32, True),
"CD": (np.float64, True),
"SB": (np.int8, False),
"SI": (np.int16, False),
"SL": (np.int32, False),
"SF": (np.float32, False),
"SD": (np.float64, False),
}
base_dtype, is_complex = dtype_map.get(data_format, (None, False))
if base_dtype is None:
raise ValueError(f"Unsupported format: {data_format}")
# Apply endianness
dtype = np.dtype(base_dtype).newbyteorder(data_endian)
ext_keywords: dict[str, Any] = {}
with open(filename, "rb") as f:
f.seek(data_start_offset)
num_elements = data_size_bytes // dtype.itemsize if dtype.itemsize else 0
data = np.fromfile(f, dtype=dtype, count=num_elements)
if ext_start > 0 and ext_size > 0:
f.seek(ext_start * 512)
ext_bytes = f.read(ext_size)
ext_keywords = _decode_blue_keywords(ext_bytes, header_rep)
# Convert to complex if needed
if is_complex:
# Interleaved IQ: [I0, Q0, I1, Q1, ...]
i_samples = data[0::2]
q_samples = data[1::2]
# Normalize integer data
if np.issubdtype(base_dtype, np.integer):
max_val = np.iinfo(base_dtype).max
i_samples = i_samples.astype(np.float32) / max_val
q_samples = q_samples.astype(np.float32) / max_val
iq_samples = i_samples + 1j * q_samples
else:
# Real data - convert to complex
if np.issubdtype(base_dtype, np.integer):
max_val = np.iinfo(base_dtype).max
real_samples = data.astype(np.float32) / max_val
else:
real_samples = data.astype(np.float32)
iq_samples = real_samples.astype(np.complex64)
# Create metadata
metadata = {
"sample_rate": float(sample_rate),
"blue_data_format": data_format,
"blue_endian": data_rep,
}
if ext_keywords:
metadata["blue_keywords"] = ext_keywords
for tag, value in ext_keywords.items():
meta_key = _meta_key_from_tag(tag)
if meta_key and meta_key not in metadata:
metadata[meta_key] = value
if isinstance(timecode, numbers.Real) and timecode != 0:
metadata.setdefault("timestamp", timecode)
metadata["timecode"] = timecode
return Recording(data=iq_samples, metadata=metadata)
def load_recording(file: os.PathLike) -> Recording:
"""Load a recording from file.
:param file: The directory path to the file(s) to load, **with** the file extension.
To loading from SigMF, the file extension must be one of *sigmf*, *sigmf-data*, or *sigmf-meta*,
either way both the SigMF data and meta files must be present for a successful read.
:type file: os.PathLike
:raises IOError: If there is an issue encountered during the file reading process.
:raises ValueError: If the inferred file extension is not supported.
:return: The recording, as initialized from file(s).
:rtype: ria_toolkit_oss.datatypes.Recording
"""
_, extension = os.path.splitext(file)
extension = extension.lstrip(".")
if extension.lower() in ["sigmf", "sigmf-data", "sigmf-meta"]:
return from_sigmf(file=file)
elif extension.lower() == "npy":
return from_npy(file=file)
elif extension.lower() == "wav":
return from_wav(file=file)
elif extension.lower() == "blue":
return from_blue(file=file)
else:
raise ValueError(f"File extension {extension} not supported.")
def convert_to_serializable(obj):
"""
Recursively convert a JSON-compatible structure into a fully JSON-serializable one.
Handles cases like NumPy data types, nested dicts, lists, and sets.
"""
if isinstance(obj, np.integer):
return int(obj) # Convert NumPy int to Python int
elif isinstance(obj, np.floating):
return float(obj) # Convert NumPy float to Python float
elif isinstance(obj, np.ndarray):
return obj.tolist() # Convert NumPy array to list
elif isinstance(obj, (list, tuple)):
return [convert_to_serializable(item) for item in obj] # Process list or tuple
elif isinstance(obj, dict):
return {key: convert_to_serializable(value) for key, value in obj.items()} # Process dict
elif isinstance(obj, set):
return list(obj) # Convert set to list
elif obj in [float("inf"), float("-inf"), None]: # Handle infinity or None
return None
elif isinstance(obj, (str, int, float, bool)) or obj is None:
return obj # Base case: already serializable
else:
raise TypeError(f"Value of type {type(obj)} is not JSON serializable: {obj}")
def generate_filename(recording: Recording, tag: Optional[str] = "rec"):
"""Generate a filename from metadata.
:param tag: The string at the beginning of the generated filename. Default is "rec".
:type tag: str, optional
:return: A filename without an extension.
:rtype: str
"""
tag = tag + "_"
source = recording.metadata.get("source", "")
if source != "":
source = source + "_"
# converts 1000 to 1k for example
center_frequency = str(Quantity(recording.metadata.get("center_frequency", 0)))
if center_frequency != "0":
num = center_frequency[:-1]
suffix = center_frequency[-1]
num = int(np.round(float(num)))
else:
num = 0
suffix = ""
center_frequency = str(num) + suffix + "Hz_"
timestamp = int(recording.timestamp)
timestamp = datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d_%H-%M-%S") + "_"
# Add first seven characters of rec_id for uniqueness
rec_id = recording.rec_id[0:7]
return tag + source + center_frequency + timestamp + rec_id
def generate_fullpath(recording: Recording, filename: str, path: os.PathLike | str, extension: str, overwrite: bool):
"""
Generate the filename, path, and fullpath of the given recording.
"""
# Generate filename if not provided
if filename is not None:
filename, _ = os.path.splitext(filename)
else:
filename = generate_filename(recording=recording)
filename = filename + extension
if path is None:
path = "recordings"
if not os.path.exists(path):
os.makedirs(path)
fullpath = os.path.join(path, filename)
if not overwrite and os.path.isfile(fullpath):
raise IOError(f"File already exists: {fullpath}")
return filename, path, fullpath
def _append_wav_list_info_chunk(filename: str, rf_metadata: dict) -> None:
"""Append LIST INFO chunk to existing WAV file.
Uses ICMT field for YAML-formatted RF metadata.
:param filename: Path to WAV file.
:type filename: str
:param rf_metadata: Dictionary of RF metadata to embed.
:type rf_metadata: dict
"""
import yaml
# Convert metadata to YAML string
yaml_str = "# RF Recording Metadata\n"
yaml_str += yaml.dump(rf_metadata, default_flow_style=False, sort_keys=False)
# Create LIST INFO chunk data
info_data = b""
# Add ICMT (comments) tag with YAML metadata
icmt_value = yaml_str.encode("utf-8", errors="ignore")
icmt_value += b"\x00" # NULL terminator
# Pad to even length (RIFF requirement)
if len(icmt_value) % 2:
icmt_value += b"\x00"
info_data += b"ICMT"
info_data += len(icmt_value).to_bytes(4, "little")
info_data += icmt_value
# Add ISFT (software) tag
isft_value = b"riatoolkit oss SDR toolchain\x00"
if len(isft_value) % 2:
isft_value += b"\x00"
info_data += b"ISFT"
info_data += len(isft_value).to_bytes(4, "little")
info_data += isft_value
# Create LIST chunk
list_chunk = b"LIST"
list_chunk += (4 + len(info_data)).to_bytes(4, "little") # Size includes "INFO" tag
list_chunk += b"INFO" # List type
list_chunk += info_data
# Append to WAV file
with open(filename, "r+b") as f:
# Read RIFF header
f.seek(0)
riff_header = f.read(4)
if riff_header != b"RIFF":
raise ValueError("Not a valid RIFF/WAV file")
old_size = int.from_bytes(f.read(4), "little")
# Update RIFF chunk size
f.seek(4)
new_size = old_size + len(list_chunk)
f.write(new_size.to_bytes(4, "little"))
# Append LIST INFO chunk at end
f.seek(0, 2) # End of file
f.write(list_chunk)
def _extract_wav_list_info(filename: str) -> Optional[dict]:
"""Extract LIST INFO chunk and parse ICMT field as YAML.
:param filename: Path to WAV file.
:type filename: str
:return: Dictionary of metadata from ICMT field, or None if not found.
:rtype: dict or None
"""
with open(filename, "rb") as f:
# Read RIFF header
riff_header = f.read(4)
if riff_header != b"RIFF":
return None
file_size = int.from_bytes(f.read(4), "little")
wave_header = f.read(4)
if wave_header != b"WAVE":
return None
# Skip to chunks after header (12 bytes = RIFF + size + WAVE)
f.seek(12)
while f.tell() < file_size + 8:
chunk_id = f.read(4)
if len(chunk_id) < 4:
break
chunk_size = int.from_bytes(f.read(4), "little")
if chunk_id == b"LIST":
list_type = f.read(4)
if list_type == b"INFO":
# Read INFO chunk data
info_data = f.read(chunk_size - 4)
return _parse_wav_info_chunk(info_data)
else:
# Skip this LIST chunk
f.seek(chunk_size - 4, 1)
else:
# Skip chunk (align to even boundary)
skip_size = chunk_size
if chunk_size % 2:
skip_size += 1
f.seek(skip_size, 1)
return None
def _parse_wav_info_chunk(info_data: bytes) -> Optional[dict]:
"""Parse INFO chunk data and extract ICMT field as YAML.
:param info_data: Raw bytes from INFO chunk.
:type info_data: bytes
:return: Dictionary parsed from YAML in ICMT field, or None.
:rtype: dict or None
"""
import yaml
offset = 0
while offset < len(info_data) - 8:
tag = info_data[offset : offset + 4]
size = int.from_bytes(info_data[offset + 4 : offset + 8], "little")
value_bytes = info_data[offset + 8 : offset + 8 + size]
if tag == b"ICMT":
# Found comments field - decode and parse YAML
icmt_str = value_bytes.decode("utf-8", errors="ignore").rstrip("\x00")
try:
metadata = yaml.safe_load(icmt_str)
# If YAML parsing returns a string (no YAML structure), wrap it
if isinstance(metadata, str):
return {"raw_comment": metadata}
return metadata
except yaml.YAMLError:
# If YAML parsing fails, return as raw comment
return {"raw_comment": icmt_str}
# Move to next tag (aligned to even boundary)
offset += 8 + size
if size % 2:
offset += 1
return None
def _blue_meta_tag_from_key(key: str) -> str:
base = re.sub(r"[^0-9A-Za-z]+", "_", key).strip("_")
if not base:
return ""
base = base.upper()[:_BLUE_META_TAG_MAX_LEN]
return f"{_BLUE_META_PREFIX}{base}"
def _encode_blue_value(value: Any) -> Optional[tuple[str, bytes]]:
if value is None:
return None
if isinstance(value, np.generic):
value = value.item()
if isinstance(value, bool):
value = int(value)
if isinstance(value, numbers.Integral):
if -(2**31) <= int(value) < 2**31:
return "L", struct.pack("<i", int(value))
return "D", struct.pack("<d", float(value))
if isinstance(value, numbers.Real):
return "D", struct.pack("<d", float(value))
if isinstance(value, (bytes, bytearray)):
return "A", bytes(value)
if isinstance(value, str):
return "A", value.encode("utf-8", errors="ignore")
if isinstance(value, (datetime.datetime, datetime.date)):
return "A", value.isoformat().encode("utf-8", errors="ignore")
if isinstance(value, (list, tuple, set, dict, np.ndarray, Annotation)):
return None
return "A", str(value).encode("utf-8", errors="ignore")
def _encode_blue_keyword(tag: str, value: Any) -> Optional[bytes]:
encoded = _encode_blue_value(value)
if encoded is None:
return None
type_char, value_bytes = encoded
tag_bytes = tag.encode("ascii", errors="ignore")
ltag = len(tag_bytes)
value_length = len(value_bytes)
base_length = 8 + value_length + ltag
padding = (8 - (base_length % 8)) % 8
lkey = base_length + padding
lext = 8 + ltag + padding
parts = [
struct.pack("<i", lkey),
struct.pack("<h", lext),
struct.pack("<B", ltag),
type_char.encode("ascii", errors="ignore")[:1],
value_bytes,
tag_bytes,
]
if padding:
parts.append(b"\x00" * padding)
return b"".join(parts)
def _encode_blue_extended_header(metadata: dict[str, Any]) -> bytes:
if not metadata:
return b""
keywords: List[bytes] = []
for key in sorted(metadata.keys()):
if key in _BLUE_SKIP_METADATA_KEYS:
continue
tag = _blue_meta_tag_from_key(key)
if not tag:
continue
encoded = _encode_blue_keyword(tag, metadata[key])
if encoded:
keywords.append(encoded)
return b"".join(keywords)
def _decode_blue_keyword_value(type_char: str, value_bytes: bytes, endian: str) -> Any:
if type_char == "A":
return value_bytes.decode("utf-8", errors="ignore").rstrip("\x00")
dtype_code = _BLUE_NUMERIC_DTYPE.get(type_char)
if dtype_code is None or not value_bytes:
return value_bytes if value_bytes else None
dtype = np.dtype(endian + dtype_code)
array = np.frombuffer(value_bytes, dtype=dtype)
if array.size == 0:
return None
if array.size == 1:
return array[0].item()
return array.tolist()
def _decode_blue_keywords(data: bytes, endian: str) -> dict[str, Any]:
if not data:
return {}
metadata: dict[str, Any] = {}
offset = 0
endian_prefix = "<" if endian in ["EEEI", "VAX", ""] else ">"
while offset + 8 <= len(data):
lkey = struct.unpack_from(f"{endian_prefix}i", data, offset)[0]
if lkey <= 0 or offset + lkey > len(data):
break
lext = struct.unpack_from(f"{endian_prefix}h", data, offset + 4)[0]
ltag = data[offset + 6]
type_char = chr(data[offset + 7])
value_len = lkey - lext
value_start = offset + 8
value_end = value_start + value_len
tag_start = value_end
tag_end = tag_start + ltag
if value_len < 0 or tag_end > offset + lkey:
break
value_bytes = data[value_start:value_end]
tag = data[tag_start:tag_end].decode("ascii", errors="ignore").strip()
metadata[tag] = _decode_blue_keyword_value(type_char, value_bytes, endian_prefix)
offset += lkey
return metadata
def _meta_key_from_tag(tag: str) -> str:
if not tag.startswith(_BLUE_META_PREFIX):
return ""
base = tag[len(_BLUE_META_PREFIX) :].lower()
base = re.sub(r"__+", "_", base)
return base.strip("_")