Source code for lumin.nn.models.blocks.conv_blocks

from typing import Any, Callable, Optional, Tuple, Union

import numpy as np
import torch
import torch.nn as nn
from fastcore.all import store_attr
from torch import Tensor

from ..initialisations import lookup_normal_init
from ..layers.activations import lookup_act

__all__ = [
    "Conv1DBlock",
    "Res1DBlock",
    "ResNeXt1DBlock",
    "AdaptiveAvgMaxConcatPool1d",
    "AdaptiveAvgMaxConcatPool2d",
    "AdaptiveAvgMaxConcatPool3d",
    "SEBlock1d",
    "SEBlock2d",
    "SEBlock3d",
]


[docs]class Conv1DBlock(nn.Module):
    r"""
    Basic building block for a building and applying a single 1D convolutional layer.

    Arguments:
        in_c: number of input channels (number of features per object / rows in input matrix)
        out_c: number of output channels (number of features / rows in output matrix)
        kernel_sz: width of kernel, i.e. the number of columns to overlay
        padding: amount of padding columns to add at start and end of convolution.
            If left as 'auto', padding will be automatically computed to conserve the number of columns.
        stride: number of columns to move kernel when computing convolutions. Stride 1 = kernel centred on each column,
            stride 2 = kernel centred on ever other column and input size halved, et cetera.
        act: string representation of argument to pass to lookup_act
        bn: whether to use batch normalisation (default order weights->activation->batchnorm)
        lookup_init: function taking choice of activation function, number of inputs, and number of outputs an returning a function to initialise layer weights.
        lookup_act: function taking choice of activation function and returning an activation function layer
        bn_class: class to use for BatchNorm, default is `nn.BatchNorm1d`

    Examples::
        >>> conv = Conv1DBlock(in_c=3, out_c=16, kernel_sz=3)
        >>>
        >>> conv = Conv1DBlock(in_c=16, out_c=32, kernel_sz=3, stride=2)
        >>>
        >>> conv = Conv1DBlock(in_c=3, out_c=16, kernel_sz=3, act='swish', bn=True)
    """

    def __init__(
        self,
        in_c: int,
        out_c: int,
        kernel_sz: int,
        padding: Union[int, str] = "auto",
        stride: int = 1,
        act: str = "relu",
        bn: bool = False,
        lookup_init: Callable[[str, Optional[int], Optional[int]], Callable[[Tensor], None]] = lookup_normal_init,
        lookup_act: Callable[[str], Any] = lookup_act,
        bn_class: Callable[[int], nn.Module] = nn.BatchNorm1d,
    ):
        super().__init__()
        store_attr(but=["padding", "kernel_sz"])
        self.pad, self.ks = padding, kernel_sz
        if self.pad == "auto":
            self.pad = self.get_padding(self.ks)
        self.set_layers()

[docs]    @staticmethod
    def get_padding(kernel_sz: int) -> int:
        r"""
        Automatically computes the required padding to keep the number of columns equal before and after convolution

        Arguments:
            kernel_sz: width of convolutional kernel

        Returns:
            size of padding
        """

        return kernel_sz // 2

[docs]    def set_layers(self) -> None:
        r"""
        One of the main function to overload when inheriting from class. By default calls `self.get_conv_layer` once but can be changed to produce more
        complicated architectures. Sets `self.layers` to the constructed architecture.
        """

        self.layers = self.get_conv_layer(
            in_c=self.in_c, out_c=self.out_c, kernel_sz=self.ks, padding=self.pad, stride=self.stride
        )

[docs]    def get_conv_layer(
        self,
        in_c: int,
        out_c: int,
        kernel_sz: int,
        padding: Union[int, str] = "auto",
        stride: int = 1,
        pre_act: bool = False,
        groups: int = 1,
    ) -> nn.Module:
        r"""
        Builds a sandwich of layers with a single concilutional layer, plus any requested batch norm and activation.
        Also initialises layers to requested scheme.

        Arguments:
            in_c: number of input channels (number of features per object / rows in input matrix)
            out_c: number of output channels (number of features / rows in output matrix)
            kernel_sz: width of kernel, i.e. the number of columns to overlay
            padding: amount of padding columns to add at start and end of convolution.
                If left as 'auto', padding will be automatically computed to conserve the number of columns.
            stride: number of columns to move kernel when computing convolutions. Stride 1 = kernel centred on each column,
                stride 2 = kernel centred on ever other column and input size halved, et cetera.
            pre_act: whether to apply batchnorm and activation layers prior to the weight layer, or afterwards
            groups: number of blocks of connections from input channels to output channels
        """

        if padding == "auto":
            padding = self.get_padding(kernel_sz)
        layers = []
        if pre_act:
            if self.bn:
                layers.append(self.bn_class(in_c))
            if self.act != "linear":
                layers.append(self.lookup_act(self.act))

        layers.append(
            nn.Conv1d(
                in_channels=in_c,
                out_channels=out_c,
                kernel_size=kernel_sz,
                padding=padding,
                stride=stride,
                groups=groups,
            )
        )
        self.lookup_init(self.act)(layers[-1].weight)
        nn.init.zeros_(layers[-1].bias)

        if not pre_act:
            if self.act != "linear":
                layers.append(self.lookup_act(self.act))
            if self.bn:
                layers.append(self.bn_class(out_c))
        return nn.Sequential(*layers)

[docs]    def forward(self, x: Tensor) -> Tensor:
        r"""
        Passes input through the layers.
        Might need to be overloaded in inheritance, depending on architecture.

        Arguments:
            x: input tensor

        Returns:
            Resulting tensor
        """

        return self.layers(x)


[docs]class Res1DBlock(Conv1DBlock):
    r"""
    Basic building block for a building and applying a pair of residually connected 1D convolutional layers (https://arxiv.org/abs/1512.03385).
    Batchnorm is applied 'pre-activation' as per https://arxiv.org/pdf/1603.05027.pdf, and convolutional shortcuts (again https://arxiv.org/pdf/1603.05027.pdf)
    are used when the stride of the first layer is greater than 1, or the number of input channels does not equal the number of output channels.

    Arguments:
        in_c: number of input channels (number of features per object / rows in input matrix)
        out_c: number of output channels (number of features / rows in output matrix)
        kernel_sz: width of kernel, i.e. the number of columns to overlay
        padding: amount of padding columns to add at start and end of convolution.
            If left as 'auto', padding will be automatically computed to conserve the number of columns.
        stride: number of columns to move kernel when computing convolutions. Stride 1 = kernel centred on each column,
            stride 2 = kernel centred on ever other column and input size halved, et cetera.
        act: string representation of argument to pass to lookup_act
        bn: whether to use batch normalisation (order is pre-activation: batchnorm->activation->weights)
        lookup_init: function taking choice of activation function, number of inputs, and number of outputs an returning a function to initialise layer weights.
        lookup_act: function taking choice of activation function and returning an activation function layer

    Examples::
        >>> conv = Res1DBlock(in_c=16, out_c=16, kernel_sz=3)
        >>>
        >>> conv = Res1DBlock(in_c=16, out_c=32, kernel_sz=3, stride=2)
        >>>
        >>> conv = Res1DBlock(in_c=16, out_c=16, kernel_sz=3, act='swish', bn=True)
    """

[docs]    def set_layers(self):
        r"""
        Constructs a pair of pre-activation convolutional layers, and a shortcut layer if necessary.
        """

        self.layers = nn.Sequential(
            self.get_conv_layer(
                in_c=self.in_c, out_c=self.out_c, kernel_sz=self.ks, padding=self.pad, stride=self.stride, pre_act=True
            ),
            self.get_conv_layer(
                in_c=self.out_c, out_c=self.out_c, kernel_sz=self.ks, padding=self.pad, stride=1, pre_act=True
            ),
        )
        if self.stride != 1 or self.in_c != self.out_c:
            self.shortcut = nn.Conv1d(in_channels=self.in_c, out_channels=self.out_c, kernel_size=1, stride=self.stride)
        else:
            self.shortcut = None

[docs]    def forward(self, x: Tensor) -> Tensor:
        r"""
        Passes input through the pair of layers and then adds the resulting tensor to the original input,
        which may be passed through a shortcut connection is necessary.

        Arguments:
            x: input tensor

        Returns:
            Resulting tensor
        """

        skip = x if self.shortcut is None else self.shortcut(x)
        return skip + self.layers(x)


[docs]class ResNeXt1DBlock(Conv1DBlock):
    r"""
    Basic building block for a building and applying a set of residually connected groups of 1D convolutional layers (https://arxiv.org/abs/1611.05431).
    Batchnorm is applied 'pre-activation' as per https://arxiv.org/pdf/1603.05027.pdf, and convolutional shortcuts (again https://arxiv.org/pdf/1603.05027.pdf)
    are used when the stride of the first layer is greater than 1, or the number of input channels does not equal the number of output channels.

    Arguments:
        in_c: number of input channels (number of features per object / rows in input matrix)
        inter_c: number of intermediate channels in groups
        cardinality: number of groups
        out_c: number of output channels (number of features / rows in output matrix)
        kernel_sz: width of kernel, i.e. the number of columns to overlay
        padding: amount of padding columns to add at start and end of convolution.
            If left as 'auto', padding will be automatically computed to conserve the number of columns.
        stride: number of columns to move kernel when computing convolutions. Stride 1 = kernel centred on each column,
            stride 2 = kernel centred on ever other column and input size halved, et cetera.
        act: string representation of argument to pass to lookup_act
        bn: whether to use batch normalisation (order is pre-activation: batchnorm->activation->weights)
        lookup_init: function taking choice of activation function, number of inputs, and number of outputs an returning a function to initialise layer weights.
        lookup_act: function taking choice of activation function and returning an activation function layer
        bn_class: class to use for BatchNorm, default is `nn.BatchNorm1d`

    Examples::
        >>> conv = ResNeXt1DBlock(in_c=32, inter_c=4, cardinality=4, out_c=32, kernel_sz=3)
        >>>
        >>> conv = ResNeXt1DBlock(in_c=32, inter_c=4, cardinality=4, out_c=32, kernel_sz=3, stride=2)
        >>>
        >>> conv = ResNeXt1DBlock(in_c=32, inter_c=4, cardinality=4, out_c=32, kernel_sz=3, act='swish', bn=True)
    """

    def __init__(
        self,
        in_c: int,
        inter_c: int,
        cardinality: int,
        out_c: int,
        kernel_sz: int,
        padding: Union[int, str] = "auto",
        stride: int = 1,
        act: str = "relu",
        bn: bool = False,
        lookup_init: Callable[[str, Optional[int], Optional[int]], Callable[[Tensor], None]] = lookup_normal_init,
        lookup_act: Callable[[str], Any] = lookup_act,
        bn_class: Callable[[int], nn.Module] = nn.BatchNorm1d,
    ):
        self.inter_c, self.cardinality = inter_c, cardinality
        super().__init__(
            in_c=in_c,
            out_c=out_c,
            kernel_sz=kernel_sz,
            padding=padding,
            stride=stride,
            act=act,
            bn=bn,
            lookup_init=lookup_init,
            lookup_act=lookup_act,
            bn_class=bn_class,
        )

[docs]    def set_layers(self):
        r"""
        Constructs a set of grouped pre-activation convolutional layers, and a shortcut layer if necessary.
        """

        self.layers = nn.Sequential(
            self.get_conv_layer(
                in_c=self.in_c, out_c=self.inter_c * self.cardinality, kernel_sz=1, stride=self.stride, pre_act=True
            ),
            self.get_conv_layer(
                in_c=self.inter_c * self.cardinality,
                out_c=self.inter_c * self.cardinality,
                kernel_sz=self.ks,
                padding=self.pad,
                stride=1,
                groups=self.cardinality,
                pre_act=True,
            ),
            self.get_conv_layer(
                in_c=self.inter_c * self.cardinality, out_c=self.out_c, kernel_sz=1, stride=1, pre_act=True
            ),
        )
        if self.stride != 1 or self.in_c != self.out_c:
            self.shortcut = nn.Conv1d(in_channels=self.in_c, out_channels=self.out_c, kernel_size=1, stride=self.stride)
        else:
            self.shortcut = None

[docs]    def forward(self, x: Tensor) -> Tensor:
        r"""
        Passes input through the set of layers and then adds the resulting tensor to the original input,
        which may be passed through a shortcut connection is necessary.

        Arguments:
            x: input tensor

        Returns:
            Resulting tensor
        """

        skip = x if self.shortcut is None else self.shortcut(x)
        return skip + self.layers(x)


[docs]class AdaptiveAvgMaxConcatPool1d(nn.Module):
    r"""
    Layer that reduces the size of each channel to the specified size, via two methods: average pooling and max pooling.
    The outputs are then concatenated channelwise.

    Arguments:
        sz: Requested output size, default reduces each channel to 2*1 elements.
            The first element is the maximum value in the channel and the other is the average value in the channel.
    """

    def __init__(self, sz: Optional[Union[int, Tuple[int, ...]]] = None):
        super().__init__()
        self._setup(sz)

    def _setup(self, sz: Optional[Union[int, Tuple[int]]] = None) -> None:
        if sz is None:
            sz = 1
        self.ap = nn.AdaptiveAvgPool1d(sz)
        self.mp = nn.AdaptiveMaxPool1d(sz)

[docs]    def forward(self, x):
        r"""
        Passes input through the adaptive pooling.

        Arguments:
            x: input tensor

        Returns:
            Resulting tensor
        """

        return torch.cat([self.mp(x), self.ap(x)], 1)


[docs]class AdaptiveAvgMaxConcatPool2d(AdaptiveAvgMaxConcatPool1d):
    r"""
    Layer that reduces the size of each channel to the specified size, via two methods: average pooling and max pooling.
    The outputs are then concatenated channelwise.

    Arguments:
        sz: Requested output size, default reduces each channel to 2*1 elements.
            The first element is the maximum value in the channel and the other is the average value in the channel.
    """

    def _setup(self, sz: Optional[Union[int, Tuple[int, int]]] = None) -> None:
        if sz is None:
            sz = (1, 1)
        self.ap = nn.AdaptiveAvgPool2d(sz)
        self.mp = nn.AdaptiveMaxPool2d(sz)


[docs]class AdaptiveAvgMaxConcatPool3d(AdaptiveAvgMaxConcatPool1d):
    r"""
    Layer that reduces the size of each channel to the specified size, via two methods: average pooling and max pooling.
    The outputs are then concatenated channelwise.

    Arguments:
        sz: Requested output size, default reduces each channel to 2*1 elements.
            The first element is the maximum value in the channel and the other is the average value in the channel.
    """

    def _setup(self, sz: Optional[Union[int, Tuple[int, int, int]]] = None) -> None:
        if sz is None:
            sz = (1, 1, 1)
        self.ap = nn.AdaptiveAvgPool3d(sz)
        self.mp = nn.AdaptiveMaxPool3d(sz)


[docs]class SEBlock1d(nn.Module):
    r"""
    Squeeze-excitation block [Hu, Shen, Albanie, Sun, & Wu, 2017](https://arxiv.org/abs/1709.01507).
    Incoming data is averaged per channel, fed through a single layer of width `n_in//r` and the chose activation, then a second layer of width `n_in` and a sigmoid activation.
    Channels in the original data are then multiplied by the learned channe weights.

    Arguments:
        n_in: number of incoming channels
        r: the reduction ratio for the channel compression
        act: string representation of argument to pass to lookup_act
        lookup_init: function taking choice of activation function, number of inputs, and number of outputs an returning a function to initialise layer weights.
        lookup_act: function taking choice of activation function and returning an activation function layer
    """

    def __init__(
        self,
        n_in: int,
        r: int,
        act: str = "relu",
        lookup_init: Callable[[str, Optional[int], Optional[int]], Callable[[Tensor], None]] = lookup_normal_init,
        lookup_act: Callable[[str], Any] = lookup_act,
    ):
        super().__init__()
        self.n_in, self.r, self.act, self.lookup_init, self.lookup_act = n_in, r, act, lookup_init, lookup_act
        self.layers = self._get_layers()
        self._set_pooling()

    def _set_pooling(self) -> None:
        self.sz = [1]
        self.pool = nn.AdaptiveAvgPool1d(self.sz)

    def _get_layers(self) -> nn.Sequential:
        c = np.max((2, self.n_in // self.r))
        layers = [nn.Linear(self.n_in, c), self.lookup_act(self.act), nn.Linear(c, self.n_in), nn.Sigmoid()]
        self.lookup_init(self.act)(layers[0].weight)
        self.lookup_init("sigmoid")(layers[2].weight)
        nn.init.zeros_(layers[0].bias)
        nn.init.zeros_(layers[2].bias)
        return nn.Sequential(*layers)

[docs]    def forward(self, x: Tensor) -> Tensor:
        r"""
        Rescale the incoming tensor by the learned channel weights

        Arguments:
            x: incoming tensor

        Returns:
            x*y, where y is the output of the squeeze-excitation network
        """

        y = self.pool(x).view(-1, self.n_in)
        y = self.layers(y).view(-1, self.n_in, *self.sz)
        return x * y


[docs]class SEBlock2d(SEBlock1d):
    r"""
    Squeeze-excitation block [Hu, Shen, Albanie, Sun, & Wu, 2017](https://arxiv.org/abs/1709.01507).
    Incoming data is averaged per channel, fed through a single layer of width `n_in//r` and the chose activation, then a second layer of width `n_in` and a sigmoid activation.
    Channels in the original data are then multiplied by the learned channe weights.

    Arguments:
        n_in: number of incoming channels
        r: the reduction ratio for the channel compression
        act: string representation of argument to pass to lookup_act
        lookup_init: function taking choice of activation function, number of inputs, and number of outputs an returning a function to initialise layer weights.
        lookup_act: function taking choice of activation function and returning an activation function layer
    """

    def _set_pooling(self) -> None:
        self.sz = [1, 1]
        self.pool = nn.AdaptiveAvgPool2d(self.sz)


[docs]class SEBlock3d(SEBlock1d):
    r"""
    Squeeze-excitation block [Hu, Shen, Albanie, Sun, & Wu, 2017](https://arxiv.org/abs/1709.01507).
    Incoming data is averaged per channel, fed through a single layer of width `n_in//r` and the chose activation, then a second layer of width `n_in` and a sigmoid activation.
    Channels in the original data are then multiplied by the learned channe weights.

    Arguments:
        n_in: number of incoming channels
        r: the reduction ratio for the channel compression
        act: string representation of argument to pass to lookup_act
        lookup_init: function taking choice of activation function, number of inputs, and number of outputs an returning a function to initialise layer weights.
        lookup_act: function taking choice of activation function and returning an activation function layer
    """

    def _set_pooling(self) -> None:
        self.sz = [1, 1, 1]
        self.pool = nn.AdaptiveAvgPool3d(self.sz)
Source code for lumin.nn.models.blocks.conv_blocks

Docs

Tutorials