2025-10-22 12:04:53 -04:00
1 changed files with 430 additions and 0 deletions
--- a/src/ria_toolkit_oss/viz/radio_dataset.py
+++ b/src/ria_toolkit_oss/viz/radio_dataset.py
@ -0,0 +1,430 @@
 """
 Simple, clean visualization utilities for RadioDataset analysis.
 """
 import random
 from typing import Optional
 import numpy as np
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 from plotly.graph_objects import Figure
 from plotly.subplots import make_subplots
 def create_styled_error_figure(title: str, message: str, suggestion: str = None) -> Figure:
    """Create a professional error figure with Qoherent dark theme styling."""
    fig = go.Figure()
    # Create a clean, centered text display using Plotly's text formatting
    main_text = f"<b style='color:#f56565;font-size:18px'>⚠️ {title}</b><br><br>"
    main_text += f"<span style='color:#e2e8f0;font-size:14px'>{message}</span>"
    if suggestion:
        main_text += f"<br><br><span style='color:#63b3ed;font-size:13px'>💡 <b>Suggestion:</b></span><br>"
        main_text += f"<span style='color:#cbd5e0;font-size:12px'>{suggestion}</span>"
    # Add the main text annotation
    fig.add_annotation(
        text=main_text,
        xref="paper", yref="paper",
        x=0.5, y=0.5,
        xanchor='center', yanchor='middle',
        showarrow=False,
        align="center",
        borderwidth=2,
        bordercolor="#4a5568",
        bgcolor="#2d3748",
        font=dict(
            family="Arial, sans-serif",
            size=14,
            color="#e2e8f0"
        )
    )
    # Update layout with dark theme
    fig.update_layout(
        title="",
        height=400,
        template="plotly_dark",
        margin=dict(l=40, r=40, t=40, b=40),
        plot_bgcolor="#1a202c",
        paper_bgcolor="#1a202c",
        font=dict(color="#e2e8f0")
    )
    # Remove axes and grid
    fig.update_xaxes(visible=False)
    fig.update_yaxes(visible=False)
    return fig
 def _check_dataset_compatibility(dataset, plot_type: str) -> tuple[bool, str]:
    """Check if dataset is compatible with a specific plot type.
    Returns (is_compatible, error_message)
    """
    try:
        metadata = dataset.metadata
        if len(metadata) == 0:
            return False, "Dataset is empty"
        if plot_type == "class_distribution":
            # Check if we have any categorical columns
            categorical_cols = [col for col in metadata.columns if metadata[col].dtype == 'object']
            alternatives = ["class", "label", "modulation", "impairment", "use_case", "category", "labels"]
            has_class_col = any(alt in metadata.columns for alt in alternatives)
            has_categorical = len(categorical_cols) > 0
            if not has_class_col and not has_categorical:
                return False, "No categorical columns found for class distribution"
        elif plot_type == "sample_spectrogram":
            # Check if we can generate a valid spectrogram
            if len(metadata) < 1:
                return False, "No samples available for spectrogram"
            # Check if we can access sample data (basic test)
            try:
                sample_data = dataset[0] if hasattr(dataset, '__getitem__') else None
                if sample_data is None or len(sample_data) < 32:
                    return False, "Insufficient sample data for spectrogram (need at least 32 points)"
            except Exception:
                # If we can't access data, we'll rely on synthetic data generation
                pass
        return True, ""
    except Exception as e:
        return False, f"Dataset compatibility check failed: {str(e)}"
 def class_distribution_plot(dataset, class_key: str = "modulation") -> Figure:
    """Generate a bar plot showing the distribution of examples across classes."""
    try:
        # Check dataset compatibility first
        is_compatible, error_msg = _check_dataset_compatibility(dataset, "class_distribution")
        if not is_compatible:
            return create_styled_error_figure(
                "Dataset Not Compatible",
                "This dataset doesn't have categorical labels needed for class distribution analysis.",
                "Try using the Dataset Overview widget to explore the available data columns."
            )
        metadata = dataset.metadata
        # Find the class column
        if class_key not in metadata.columns:
            # Try common alternatives
            alternatives = ["class", "label", "modulation", "impairment", "use_case", "category", "labels"]
            for alt in alternatives:
                if alt in metadata.columns:
                    class_key = alt
                    break
            else:
                # Use first categorical column
                for col in metadata.columns:
                    if metadata[col].dtype == 'object' or metadata[col].nunique() < 50:
                        class_key = col
                        break
        if class_key not in metadata.columns:
            return create_styled_error_figure(
                "No Class Labels Found",
                "This dataset contains numerical data without categorical labels.",
                "Try using the Dataset Overview widget for data analysis, or check if your dataset has hidden categorical columns."
            )
        # Count examples per class (limit to top 20 for performance)
        class_counts = metadata[class_key].value_counts()
        if len(class_counts) > 20:
            class_counts = class_counts.head(20)
        class_counts = class_counts.sort_index()
        # Create simple bar plot
        fig = px.bar(
            x=class_counts.index,
            y=class_counts.values,
            title=f'Class Distribution: {class_key.title()}'
        )
        fig.update_traces(texttemplate='%{y}', textposition='outside')
        fig.update_layout(
            xaxis_title=class_key.title(),
            yaxis_title='Number of Examples',
            showlegend=False,
            height=400,
            template="plotly_dark"
        )
        return fig
    except Exception as e:
        return create_styled_error_figure(
            "Class Distribution Error",
            f"An error occurred while generating the class distribution plot.",
            f"Technical details: {str(e)}"
        )
 def dataset_overview_plot(dataset) -> Figure:
    """Generate an overview plot with key dataset statistics."""
    try:
        metadata = dataset.metadata
        total_examples = len(metadata)
        # Create subplot with multiple charts
        # Determine subplot titles based on data type
        categorical_cols = [col for col in metadata.columns if metadata[col].dtype == 'object']
        numeric_cols = [col for col in metadata.columns if metadata[col].dtype in ['int64', 'float64']]
        dist_title = "Value Distribution" if categorical_cols else "Data Distribution"
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=("Dataset Size", "Data Types", dist_title, "Statistics Summary"),
            specs=[[{"type": "indicator"}, {"type": "bar"}],
                   [{"type": "histogram" if not categorical_cols else "bar"}, {"type": "table"}]]
        )
        # Top left: Dataset size indicator
        fig.add_trace(
            go.Indicator(
                mode="number",
                value=total_examples,
                title={"text": "Total Examples"},
                number={"font": {"size": 40}}
            ),
            row=1, col=1
        )
        # Top right: Data types distribution
        dtype_counts = metadata.dtypes.value_counts()
        fig.add_trace(
            go.Bar(
                x=[str(dt) for dt in dtype_counts.index],
                y=dtype_counts.values,
                name="Data Types",
                showlegend=False
            ),
            row=1, col=2
        )
        # Bottom left: Show distribution of numeric columns or categorical if available
        categorical_cols = [col for col in metadata.columns if metadata[col].dtype == 'object']
        numeric_cols = [col for col in metadata.columns if metadata[col].dtype in ['int64', 'float64']]
        if categorical_cols:
            col = categorical_cols[0]  # Show first categorical column
            value_counts = metadata[col].value_counts().head(10)
            fig.add_trace(
                go.Bar(
                    x=value_counts.index,
                    y=value_counts.values,
                    name=f"{col} Distribution",
                    showlegend=False
                ),
                row=2, col=1
            )
        elif numeric_cols:
            # Show histogram of first numeric column
            col = numeric_cols[0]
            fig.add_trace(
                go.Histogram(
                    x=metadata[col],
                    name=f"{col} Distribution",
                    showlegend=False,
                    nbinsx=20
                ),
                row=2, col=1
            )
        # Bottom right: Basic statistics table
        stats_data = []
        display_cols = (numeric_cols[:5] if len(numeric_cols) > 0 else metadata.columns[:5])
        for col in display_cols:
            if metadata[col].dtype in ['int64', 'float64']:
                stats_data.append([
                    col[:15] + "..." if len(col) > 15 else col,  # Truncate long column names
                    f"{metadata[col].mean():.3f}",
                    f"{metadata[col].std():.3f}",
                    f"{metadata[col].min():.3f}",
                    f"{metadata[col].max():.3f}"
                ])
            else:
                unique_count = metadata[col].nunique()
                stats_data.append([
                    col[:15] + "..." if len(col) > 15 else col,
                    "N/A", "N/A", 
                    f"{unique_count} unique", 
                    "N/A"
                ])
        if stats_data:
            fig.add_trace(
                go.Table(
                    header=dict(
                        values=["Column", "Mean", "Std", "Min/Unique", "Max"],
                        fill_color="rgba(30, 30, 30, 0.8)",
                        align="center",
                        font=dict(color="white", size=12)
                    ),
                    cells=dict(
                        values=list(zip(*stats_data)),
                        fill_color="rgba(50, 50, 50, 0.6)",
                        align="center",
                        font=dict(color="white", size=11)
                    )
                ),
                row=2, col=2
            )
        # Create informative title
        total_cols = len(metadata.columns)
        title = f"Dataset Overview - {total_examples} samples, {total_cols} columns"
        if total_cols > 5:
            title += f" (showing first 5)"
        fig.update_layout(
            title=title,
            height=600,
            showlegend=False,
            template="plotly_dark"
        )
        return fig
    except Exception as e:
        return create_styled_error_figure(
            "Dataset Overview Error",
            "An error occurred while generating the dataset overview.",
            f"Technical details: {str(e)}"
        )
 def sample_spectrogram_plot(dataset, class_key: str = "modulation", sample_idx: Optional[int] = None) -> Figure:
    """Generate a spectrogram plot from a sample in the dataset."""
    try:
        # Check dataset compatibility first
        is_compatible, error_msg = _check_dataset_compatibility(dataset, "sample_spectrogram")
        if not is_compatible:
            return create_styled_error_figure(
                "Spectrogram Not Available",
                "This dataset doesn't have sufficient signal data for spectrogram visualization.",
                "Ensure your dataset contains complex-valued signal samples with at least 32 data points per sample."
            )
        metadata = dataset.metadata
        if len(metadata) == 0:
            raise ValueError("Dataset is empty")
        # Find class column
        if class_key not in metadata.columns:
            alternatives = ["class", "label", "modulation", "impairment", "use_case"]
            for alt in alternatives:
                if alt in metadata.columns:
                    class_key = alt
                    break
        # Select sample
        if sample_idx is None:
            sample_idx = random.randint(0, len(metadata) - 1)
        sample_metadata = metadata.iloc[sample_idx]
        # Try to get actual sample data, fall back to synthetic
        try:
            sample_data = dataset[sample_idx]
        except:
            # Generate synthetic signal based on class
            n_samples = 1024
            t = np.linspace(0, 1, n_samples)
            freq = 0.1 + 0.05 * sample_idx % 5  # Vary frequency by sample
            sample_data = np.exp(1j * 2 * np.pi * freq * t)
            # Add some noise
            sample_data += 0.1 * (np.random.randn(n_samples) + 1j * np.random.randn(n_samples))
        # Ensure complex data
        if not np.iscomplexobj(sample_data):
            sample_data = sample_data.astype(complex)
        # Simple FFT-based spectrogram
        n_samples = len(sample_data)
        # Ensure minimum viable data size
        if n_samples < 32:
            raise ValueError(f"Insufficient data: need at least 32 samples, got {n_samples}")
        nperseg = min(256, max(32, n_samples // 4))
        # Create spectrogram using numpy (no scipy dependency)
        hop_length = max(1, nperseg // 2)  # Prevent zero hop_length
        # Ensure we can create at least one frame
        if n_samples < nperseg:
            nperseg = n_samples
            hop_length = 1
        n_frames = max(1, (n_samples - nperseg) // hop_length + 1)
        freq_bins = max(1, nperseg // 2)  # Prevent zero frequency bins
        Sxx = np.zeros((freq_bins, n_frames))
        for i in range(n_frames):
            start_idx = i * hop_length
            end_idx = min(start_idx + nperseg, n_samples)  # Prevent index overflow
            if end_idx > start_idx:  # Ensure we have data to process
                windowed = sample_data[start_idx:end_idx]
                # Pad if necessary to maintain nperseg size
                if len(windowed) < nperseg:
                    windowed = np.pad(windowed, (0, nperseg - len(windowed)), mode='constant')
                fft_result = np.fft.fft(windowed)
                Sxx[:, i] = np.abs(fft_result[:freq_bins]) ** 2
        # Convert to dB
        Sxx_db = 10 * np.log10(Sxx + 1e-10)
        # Create time and frequency vectors
        t = np.arange(n_frames) * hop_length / max(1, n_samples)  # Prevent division by zero
        f = np.linspace(0, 0.5, freq_bins)
        # Create plot
        fig = go.Figure(data=go.Heatmap(
            z=Sxx_db,
            x=t,
            y=f,
            colorscale='viridis',
            colorbar=dict(title="Power (dB)")
        ))
        # Add title with metadata
        title = f"Sample Spectrogram (Index: {sample_idx})"
        if class_key in sample_metadata:
            title += f" - {class_key}: {sample_metadata[class_key]}"
        fig.update_layout(
            title=title,
            xaxis_title="Time",
            yaxis_title="Frequency", 
            height=400,
            template="plotly_dark"
        )
        return fig
    except Exception as e:
        return create_styled_error_figure(
            "Spectrogram Error",
            "An error occurred while generating the spectrogram plot.",
            f"Technical details: {str(e)}"
        )