Source code for src.conv

import torch
import torch.nn as nn
from typing import Optional, Union, Dict, Any
from .utils import ACTIVATION_MAP, POOLING_MAP

[docs] class ConvBlock(nn.Module): """A modular convolutional block for PyTorch models. A configurable 2D convolutional block that composes a Conv2d layer with optional BatchNorm, a selectable activation, and an optional pooling layer. The block is assembled into an nn.Sequential and applied as-is in forward. Parameters ---------- in_channels : int Number of channels in the input image/tensor. out_channels : int Number of channels produced by the convolution. kernel_size : int Size of the convolving kernel (assumed square). stride : int, optional (default=1) Stride of the convolution. padding : Union[int, str, bool], optional (default=False) Padding applied to the input before convolution. Behaviors: - If the string ``same`` (case-insensitive) is provided, padding is set to ``kernel_size // 2`` (to approximately preserve spatial dimensions for odd kernel sizes). - If a positive integer is provided, that value is used as padding. - Any other value (including ``False`` or ``0``) results in zero padding. activation : str, optional (default=``'relu'``) Name of the activation to apply after convolution (looked up via ``ACTIVATION_MAP``). activation_kwargs : Optional[Dict[str, Any]], optional (default=None) Optional keyword arguments to instantiate the activation class. If None, the activation is created with its default constructor. use_batch_norm : bool, optional (default=True) If True, a nn.BatchNorm2d(out_channels) layer is inserted after the convolution. When batch normalization is used, the convolution is created with bias=False. pooling_type : Optional[str], optional Name of the pooling operation to apply (looked up via ``POOLING_MAP``), e.g. ``max``, ``avg``. If None, no pooling layer is appended. pooling_kernel : Optional[int], optional Kernel size for the pooling layer. If not provided, pooling is not added. pooling_stride : Optional[int], optional Stride for the pooling layer. If None, defaults to pooling_kernel. Attributes ---------- block : nn.Sequential The assembled sequential block containing the convolution, optional batch norm, activation, and optional pooling. Raises ------ ValueError If pooling_type is provided but not found in POOLING_MAP. Notes ----- - The convolution's bias is disabled when batch normalization is enabled to avoid redundant affine parameters. - Activation and pooling implementations are looked up using ACTIVATION_MAP and POOLING_MAP, respectively; those mappings must be defined in the module scope and map lowercase names to callable nn.Module classes (not instances). - The forward pass simply delegates to self.block(x) and returns the transformed tensor. Example ------- >>> # Construct a conv block with same-padding, batch norm and max pooling >>> ConvBlock(3, 64, kernel_size=3, padding="same", activation="relu", ... use_batch_norm=True, pooling_type="max", pooling_kernel=2) """ def __init__( self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: Union[int, str, bool] = False, activation: str = "relu", activation_kwargs: Optional[Dict[str, Any]] = None, use_batch_norm: bool = False, pooling_type: Optional[str] = None, pooling_kernel: Optional[int] = None, pooling_stride: Optional[int] = None, ): super(ConvBlock, self).__init__() padding_value = 0 if isinstance(padding, str) and padding.lower() == "same": padding_value = kernel_size // 2 elif isinstance(padding, int) and padding > 0: padding_value = padding layers = [] conv_layer = nn.Conv2d( in_channels, out_channels, kernel_size, stride, padding=padding_value, bias=not use_batch_norm, ) layers.append(conv_layer) if use_batch_norm: layers.append(nn.BatchNorm2d(out_channels)) activation_class = ACTIVATION_MAP.get(activation.lower(), nn.ReLU) layers.append(activation_class(**(activation_kwargs or {}))) if pooling_type and pooling_kernel: pooling_class = POOLING_MAP.get(pooling_type.lower()) if not pooling_class: raise ValueError(f"Pooling type '{pooling_type}' is not supported.") pool_stride = ( pooling_stride if pooling_stride is not None else pooling_kernel ) layers.append(pooling_class(kernel_size=pooling_kernel, stride=pool_stride)) self.block = nn.Sequential(*layers) def forward(self, x: torch.Tensor): return self.block(x)
[docs] class ConvTransposeBlock(nn.Module): """A modular transposed-convolution block for PyTorch models. A modular transposed-convolution block for PyTorch models that performs a ConvTranspose2d followed optionally by BatchNorm2d and a configurable activation. Parameters ---------- in_channels : int Number of channels in the input feature map. out_channels : int Number of channels produced by the transposed convolution. kernel_size : int Size of the convolution kernel. stride : int, optional Stride of the transposed convolution. Default is 2. padding : int, optional Padding added to both sides of the input. Default is 1. output_padding : int, optional Additional size added to one side of the output shape. Default is 0. activation : str, optional Name of the activation to apply after convolution (looked up via ACTIVATION_MAP). Defaults to "relu". The activation class is instantiated with its default constructor. activation_kwargs : Optional[Dict[str, Any]], optional (default=None) Optional keyword arguments to instantiate the activation class. If None, the activation is created with its default constructor. use_batch_norm : bool, optional If True, insert nn.BatchNorm2d after the transposed convolution. Default is True. Behavior -------- - Constructs an nn.Sequential block containing: 1. nn.ConvTranspose2d(..., bias=not use_batch_norm) 2. nn.BatchNorm2d(out_channels) if use_batch_norm is True 3. The activation layer resolved from ACTIVATION_MAP - The ConvTranspose2d layer's bias is disabled when batch normalization is used (bias=False) to avoid redundant affine parameters. Input / Output shapes --------------------- Input: Tensor of shape (N, in_channels, H, W) Output: Tensor of shape (N, out_channels, H_out, W_out), where H_out and W_out depend on kernel_size, stride, padding, and output_padding. Examples -------- >>> block = ConvTransposeBlock(128, 64, kernel_size=4, stride=2, padding=1) >>> out = block(x) # x: (N, 128, H, W) -> out: (N, 64, H_out, W_out) """ def __init__( self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 2, padding: int = 1, output_padding: int = 0, activation: str = "relu", activation_kwargs: Optional[Dict[str, Any]] = None, use_batch_norm: bool = False, ): super(ConvTransposeBlock, self).__init__() layers = [] layers.append( nn.ConvTranspose2d( in_channels, out_channels, kernel_size, stride=stride, padding=padding, output_padding=output_padding, bias=not use_batch_norm, ) ) if use_batch_norm: layers.append(nn.BatchNorm2d(out_channels)) activation_class = ACTIVATION_MAP.get(activation.lower(), nn.ReLU) layers.append(activation_class(**(activation_kwargs or {}))) self.block = nn.Sequential(*layers) def forward(self, x): return self.block(x)
[docs] class ConfigurableCNN(nn.Module): """A customizable convolutional neural network (CNN) built from a sequence of ConvBlock. A customizable convolutional neural network (CNN) built from a sequence of ConvBlock layers and ConvTransposeBlock layers defined by the user. Parameters ---------- layers_config : List[Dict[str, Any]] A list of dictionaries, each specifying the parameters for a ConvBlock or ConvTransposeBlock. Each dictionary should contain keys corresponding to the ConvBlock's constructor parameters (e.g., in_channels, out_channels, kernel_size, etc.). Attributes ---------- cnn : nn.Sequential The assembled sequential CNN composed of the specified ConvBlock and ConvTransposeBlock layers. Example ------- >>> layers_config = [ ... {conv:{"in_channels": 3, "out_channels": 32, "kernel_size": 3, "padding": "same"}}, ... {conv:{"in_channels": 32, "out_channels": 64, "kernel_size": 3, "padding": "same", "pooling_type": "max", "pooling_kernel": 2}}, ... {conv_transpose:{"in_channels": 64, "out_channels": 128, "kernel_size": 3, "padding": "same"}} ... ] >>> model = ConfigurableCNN(layers_config) """ def __init__(self, layers_config): super(ConfigurableCNN, self).__init__() layers = [] for layer_cfg in layers_config: if not isinstance(layer_cfg, dict) or len(layer_cfg) != 1: raise ValueError("Each layer configuration must be a dictionary with only one key.") layer_type = next(iter(layer_cfg)) layer_params = layer_cfg[layer_type] if layer_type == "conv": layers.append(ConvBlock(**layer_params)) elif layer_type == "conv_transpose": layers.append(ConvTransposeBlock(**layer_params)) else: raise ValueError(f"Unsupported layer type: {layer_type}") self.cnn = nn.Sequential(*layers) def forward(self, x): return self.cnn(x)
if __name__ == "__main__": # Example usage layers_config = [ {"conv": {"in_channels": 3, "out_channels": 32, "kernel_size": 3, "padding": "same"}}, {"conv": {"in_channels": 32, "out_channels": 64, "kernel_size": 3, "padding": "same", "pooling_type": "max", "pooling_kernel": 2}}, {"conv_transpose": {"in_channels": 64, "out_channels": 128, "kernel_size": 3, "padding": "same"}} ] model = ConfigurableCNN(layers_config) print(model)