reorganized file struture

2025-05-21 15:46:28 -04:00 · 2025-05-21 15:46:28 -04:00 · ba7d0d9f67
commit ba7d0d9f67
parent 3df27cf012
32 changed files with 0 additions and 355 deletions
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_0264b4a.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_0264b4a.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_0b3b80f.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_0b3b80f.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_1effc4c.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_1effc4c.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_37a73db.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_37a73db.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_3d557a9.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_3d557a9.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_442fcb9.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_442fcb9.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_491c457.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_491c457.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_4fff84f.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_4fff84f.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_6676600.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_6676600.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_6d35ff9.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_6d35ff9.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_6d85f3e.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_6d85f3e.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_85a8c83.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_85a8c83.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_940988e.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_940988e.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_9f88dc2.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_9f88dc2.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_a4a6ba6.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_a4a6ba6.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_a60964b.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_a60964b.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_ad350fe.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_ad350fe.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_ae5224a.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_ae5224a.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_b68f080.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_b68f080.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_c00477b.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_c00477b.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_cca57ca.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_cca57ca.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_db8a5b4.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_db8a5b4.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_dd021f7.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_dd021f7.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_e0cc41d.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_e0cc41d.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_e61d9bf.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_e61d9bf.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_f024082.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_f024082.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_f2013fa.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_f2013fa.npy
--- a/recordings/rec_0Hz_2025-05-15_09-45-10_f2ae593.npy
+++ b/recordings/rec_0Hz_2025-05-15_09-45-10_f2ae593.npy
--- a/results/interference_recognition_model.ckpt
+++ b/results/interference_recognition_model.ckpt
--- a/scripts/data_gen.py
+++ b/scripts/data_gen.py
@ -1,69 +0,0 @@
-from utils.data import Recording
-import numpy as np
-from utils.signal import block_generator
-
-mods = {
-    "bpsk": {"num_bits_per_symbol": 1, "constellation_type": "psk"},
-    "qpsk": {"num_bits_per_symbol": 2, "constellation_type": "psk"},
-    "qam16": {"num_bits_per_symbol": 4, "constellation_type": "qam"},
-    "qam64": {"num_bits_per_symbol": 6, "constellation_type": "qam"},
-}
-
-
-def generate_modulated_signals():
-    for modulation in ["bpsk", "qpsk", "qam16", "qam64"]:
-        for snr in np.arange(-6, 13, 3):
-
-            recording_length = 1024
-            beta = 0.3  # the rolloff factor, can be changed to add variety
-            sps = 4  # samples per symbol, or the relative bandwidth of the digital signal. Can also be changed.
-
-            # blocks don't directly take the string 'qpsk' so we use the dict 'mods' to get parameters
-            constellation_type = mods[modulation]["constellation_type"]
-            num_bits_per_symbol = mods[modulation]["num_bits_per_symbol"]
-
-            # construct the digital modulation blocks with these parameters
-            # we have bit source -> mapper -> upsampling -> pulse shaping
-
-            bit_source = block_generator.RandomBinarySource()
-            mapper = block_generator.Mapper(
-                constellation_type=constellation_type,
-                num_bits_per_symbol=num_bits_per_symbol,
-            )
-            upsampler = block_generator.Upsampling(factor=sps)
-            pulse_shaping_filter = block_generator.RaisedCosineFilter(
-                upsampling_factor=sps, beta=beta
-            )
-
-            pulse_shaping_filter.connect_input([upsampler])
-            upsampler.connect_input([mapper])
-            mapper.connect_input([bit_source])
-
-            modulation_recording = pulse_shaping_filter.record(
-                num_samples=recording_length
-            )
-
-            # add noise by calculating the power of the modulation recording and generating AWGN from the snr parameter
-            signal_power = np.mean(np.abs(modulation_recording.data[0] ** 2))
-            awgn_source = block_generator.AWGNSource(
-                variance=(signal_power / 2) * (10 ** (((-1 * snr) / 20)))
-            )
-            noise = awgn_source.record(num_samples=recording_length)
-            samples_with_noise = modulation_recording.data + noise.data
-            output_recording = Recording(data=samples_with_noise)
-
-            # add metadata for ML later
-            output_recording.add_to_metadata(key="modulation", value=modulation)
-            output_recording.add_to_metadata(key="snr", value=int(snr))
-            output_recording.add_to_metadata(key="beta", value=beta)
-            output_recording.add_to_metadata(key="sps", value=sps)
-
-            # view if you want
-            # output_recording.view()
-
-            # save to file
-            output_recording.to_npy()  # optionally add path and filename parameters
-
-
-if __name__ == "__main__":
-    generate_modulated_signals()
--- a/scripts/produce_dataset.py
+++ b/scripts/produce_dataset.py
@ -1,159 +0,0 @@
-import os, h5py, numpy as np
-from utils.io import from_npy
-from split_dataset import split
-from helpers.app_settings import get_app_settings
-
-meta_dtype = np.dtype(
-    [
-        ("rec_id", "S256"),
-        ("snippet_idx", np.int32),
-        ("modulation", "S32"),
-        ("snr", np.int32),
-        ("beta", np.float32),
-        ("sps", np.int32),
-    ]
-)
-
-info_dtype = np.dtype(
-    [
-        ("num_records", np.int32),
-        ("dataset_name", "S64"),  # up to 64‐byte UTF-8 strings
-        ("creator", "S64"),
-    ]
-)
-
-
-def write_hdf5_file(records, output_path, dataset_name="data"):
-    """
-    Writes a list of records to an HDF5 file.
-    Parameters:
-        records (list): List of records to be written to the file
-        output_path (str): Path to the output HDF5 file
-        dataset_name (str): Name of the dataset in the HDF5 file (default: "data")
-    Returns:
-        str: Path to the created HDF5 file
-    """
-    meta_arr = np.empty(len(records), dtype=meta_dtype)
-    for i, (_, md) in enumerate(records):
-        meta_arr[i] = (
-            md["rec_id"].encode("utf-8"),
-            md["snippet_idx"],
-            md["modulation"].encode("utf-8"),
-            int(md["snr"]),
-            float(md["beta"]),
-            int(md["sps"]),
-        )
-
-    first_rec, _ = records[0]  # records[0] is a tuple of (data, md)
-    sample = first_rec
-    shape, dtype = sample.shape, sample.dtype
-
-    with h5py.File(output_path, "w") as hf:
-        dset = hf.create_dataset(
-            dataset_name, shape=(len(records),) + shape, dtype=dtype, compression="gzip"
-        )
-
-        for idx, (snip, md) in enumerate(records):
-            dset[idx, ...] = snip
-
-        mg = hf.create_group("metadata")
-        mg.create_dataset("metadata", data=meta_arr, compression="gzip")
-
-        print(dset.shape, f"snippets created in {dataset_name}")
-
-        info_arr = np.array(
-            [
-                (
-                    len(records),
-                    dataset_name.encode("utf-8"),
-                    b"generate_dataset.py",  # already bytes
-                )
-            ],
-            dtype=info_dtype,
-        )
-
-        mg.create_dataset("dataset_info", data=info_arr)
-
-    return output_path
-
-
-def split_recording(recording_list, num_snippets):
-    """
-    Splits a list of recordings into smaller chunks.
-
-    Parameters:
-        recording_list (list): List of recordings to be split
-
-    Returns: yeah yeah
-        list: List of split recordings
-    """
-    snippet_list = []
-
-    for data, md in recording_list:
-        C, N = data.shape
-        L = N // num_snippets
-        for i in range(num_snippets):
-            start = i * L
-            end = (i + 1) * L
-            snippet = data[:, start:end]
-            # copy the metadata, adding a snippet index
-            snippet_md = md.copy()
-            snippet_md["snippet_idx"] = i
-            snippet_list.append((snippet, snippet_md))
-    return snippet_list
-
-
-def generate_datasets(cfg):
-    """
-    Generates a dataset from a folder of .npy files and saves it to an HDF5 file
-
-    Parameters:
-        path_to_recordings (str): Path to the folder containing .npy files
-        output_path (str): Path to the output HDF5 file
-        dataset_name (str): Name of the dataset in the HDF5 file (default: "data")
-
-    Returns:
-        dset (h5py.Dataset): The created dataset object
-    """
-
-    parent = os.path.dirname(cfg.output_dir)
-    if not parent:
-        os.makedirs(cfg.output_dir, exist_ok=True)
-
-    # we assume the recordings are in .npy format
-    files = os.listdir(cfg.input_dir)
-    if not files:
-        raise ValueError("No files found in the specified directory.")
-
-    records = []
-    for fname in files:
-        rec = from_npy(os.path.join(cfg.input_dir, fname))
-
-        data = rec.data
-
-        md = rec.metadata  # pull metadata from the recordinh
-        md.setdefault("recid", len(records))
-        records.append((data, md))
-
-    # split each recording into 8 snippets each
-    records = split_recording(records, cfg.num_slices)
-
-    train_records, val_records = split(records, cfg.train_split, cfg.seed)
-
-    train_path = os.path.join(cfg.output_dir, "train.h5")
-    val_path = os.path.join(cfg.output_dir, "val.h5")
-
-    write_hdf5_file(train_records, train_path, "training_data")
-    write_hdf5_file(val_records, val_path, "validation_data")
-
-    return train_path, val_path
-
-def main():
-    settings = get_app_settings()
-    dataset_cfg = settings.dataset
-    train_path, val_path = generate_datasets(dataset_cfg)
-    print(f"✅ Train: {train_path}\n✅ Val: {val_path}")
-
-    
-if __name__ == "__main__":
-    main()
--- a/scripts/split_dataset.py
+++ b/scripts/split_dataset.py
@ -1,43 +0,0 @@
-import random
-from collections import defaultdict
-
-
-def split(dataset, train_frac=0.8, seed=42):
-    """
-    Splits a dataset into smaller datasets based on the specified lengths.
-
-    Parameters:
-        dataset (list): The dataset to be split.
-        lengths (list): A list of lengths for each split.
-
-    Returns:
-        list: A list of split datasets.
-    """
-    N = len(dataset)
-    target = int(N * train_frac)
-
-    by_rec = defaultdict(list)
-    for i, (_, md) in enumerate(dataset):
-        by_rec[md["rec_id"]].append(i)
-
-    rec_ids = list(by_rec.keys())
-    random.seed(seed)
-    random.shuffle(rec_ids)
-
-    train_set = set()
-    count = 0
-    for rec_id in rec_ids:
-        index = by_rec[rec_id]
-        if count + len(index) <= target:
-            train_set.update(index)
-            count += len(index)
-
-    validation_set = set(range(N)) - train_set
-
-    print(f"Train set :{len(train_set)}")
-    print(f"val set :{len(validation_set)}")
-
-    train_records = [dataset[i] for i in sorted(train_set)]
-    val_records = [dataset[i] for i in sorted(validation_set)]
-
-    return train_records, val_records