Source code for neurophox.initializers

from typing import Tuple, Union, Optional

import tensorflow as tf

try:
    import torch
    from torch.nn import Parameter
except ImportError:
    # if the user did not install pytorch, just do tensorflow stuff
    pass

import numpy as np

from .config import TF_FLOAT, NP_FLOAT, TEST_SEED, T_FLOAT
from .helpers import get_alpha_checkerboard_general, get_default_coarse_grain_block_sizes, \
    get_efficient_coarse_grain_block_sizes
from scipy.special import betaincinv


[docs]class MeshPhaseInitializer:
    def __init__(self, units: int, num_layers: int):
        """

        Args:
            units: Input dimension, :math:`N`
            num_layers: Number of layers :math:`L`
        """
        self.units, self.num_layers = units, num_layers

[docs]    def to_np(self) -> np.ndarray:
        """

        Returns:
            Initialized Numpy array
        """
        raise NotImplementedError('Need to implement numpy initialization')

[docs]    def to_tf(self, phase_varname: str) -> tf.Variable:
        """

        Returns:
            Initialized Tensorflow Variable
        """
        phase_np = self.to_np()
        return tf.Variable(
            name=phase_varname,
            initial_value=phase_np,
            dtype=TF_FLOAT
        )

[docs]    def to_torch(self, is_trainable: bool = True):
        """

        Returns:
            Initialized torch Parameter
        """
        phase_initializer = self.to_np()
        phase = Parameter(torch.tensor(phase_initializer, dtype=T_FLOAT), requires_grad=is_trainable)
        return phase


[docs]class PhaseInitializer(MeshPhaseInitializer):
    """
    User-specified initialization of rectangular and triangular mesh architectures.

    Args:
        phase: Phase to initialize
        units: Input dimension, :math:`N`
    """

    def __init__(self, phase: np.ndarray, units: int):
        self.phase, self.units = phase, units
        super(PhaseInitializer, self).__init__(units, self.phase.shape[0])

[docs]    def to_np(self) -> np.ndarray:
        return self.phase.astype(NP_FLOAT)


[docs]class HaarRandomPhaseInitializer(MeshPhaseInitializer):
    """
    Haar-random initialization of rectangular and triangular mesh architectures.

    Args:
        units: Input dimension, :math:`N`
        num_layers: Number of layers, :math:`L`
        hadamard: Whether to use Hadamard convention
        tri: Initializer for the triangular mesh architecture
    """

    def __init__(self, units: int, num_layers: int = None, hadamard: bool = False, tri: bool = False):
        self.tri = tri
        if self.tri:
            self.num_layers = 2 * units - 3
        else:
            self.num_layers = units if not num_layers else num_layers
        self.hadamard = hadamard
        super(HaarRandomPhaseInitializer, self).__init__(units, self.num_layers)

[docs]    def to_np(self) -> np.ndarray:
        theta_0, theta_1 = get_haar_theta(self.units, self.num_layers, hadamard=self.hadamard, tri=self.tri)
        theta = np.zeros((self.num_layers, self.units // 2))
        theta[::2, :] = theta_0
        if self.units % 2:
            theta[1::2, :] = theta_1
        else:
            theta[1::2, :-1] = theta_1
        return theta.astype(NP_FLOAT)


[docs]class PRMPhaseInitializer(MeshPhaseInitializer):
    def __init__(self, units: int, hadamard: bool, tunable_layers_per_block: Optional[int] = None):
        """
        A useful initialization of permuting mesh architectures based on the Haar random initialization above.
        This currently only works if using default permuting mesh architecture or setting :math:`tunable_layers_per_block`.

        Args:
            units: Input dimension, :math:`N`
            hadamard: Whether to use Hadamard convention
            tunable_layers_per_block: Number of tunable layers per block (same behavior as :code:`PermutingRectangularMeshModel`).
        """
        self.tunable_block_sizes, _ = get_default_coarse_grain_block_sizes(units) if tunable_layers_per_block is None \
            else get_efficient_coarse_grain_block_sizes(units, tunable_layers_per_block)
        self.hadamard = hadamard
        self.num_layers = int(np.sum(self.tunable_block_sizes))
        super(PRMPhaseInitializer, self).__init__(units, self.num_layers)

[docs]    def to_np(self) -> np.ndarray:
        thetas = []
        for block_size in self.tunable_block_sizes:
            theta_0, theta_1 = get_haar_theta(self.units, block_size, hadamard=self.hadamard)
            theta = np.zeros((block_size, self.units // 2))
            theta[::2, :] = theta_0
            if self.units % 2:
                theta[1::2, :] = theta_1
            else:
                theta[1::2, :-1] = theta_1
            thetas.append(theta.astype(NP_FLOAT))
        return np.vstack(thetas)


[docs]class UniformRandomPhaseInitializer(MeshPhaseInitializer):
    def __init__(self, units: int, num_layers: int, max_phase, min_phase: float = 0):
        """
        Defines a uniform random initializer up to some maximum phase,
        e.g. :math:`\\theta \in [0, \pi]` or :math:`\phi \in [0, 2\pi)`.

        Args:
            units: Input dimension, :math:`N`.
            num_layers: Number of layers, :math:`L`.
            max_phase: Maximum phase
            min_phase: Minimum phase (usually 0)
        """
        self.units = units
        self.num_layers = units
        self.max_phase = max_phase
        self.min_phase = min_phase
        super(UniformRandomPhaseInitializer, self).__init__(units, num_layers)

[docs]    def to_np(self) -> np.ndarray:
        phase = (self.max_phase - self.min_phase) * np.random.rand(self.num_layers, self.units // 2) + self.min_phase
        return phase.astype(NP_FLOAT)


[docs]class ConstantPhaseInitializer(MeshPhaseInitializer):
    def __init__(self, units: int, num_layers: int, constant_phase: float):
        """

        Args:
            units: Input dimension, :math:`N`
            num_layers: Number of layers, :math:`L`
            constant_phase: The constant phase to set all array elements
        """
        self.constant_phase = constant_phase
        super(ConstantPhaseInitializer, self).__init__(units, num_layers)

[docs]    def to_np(self) -> np.ndarray:
        return self.constant_phase * np.ones((self.num_layers, self.units // 2))


[docs]def get_haar_theta(units: int, num_layers: int, hadamard: bool,
                   tri: bool = False) -> Union[Tuple[np.ndarray, np.ndarray],
                                               Tuple[tf.Variable, tf.Variable],
                                               tf.Variable]:
    if tri:
        alpha_rows = np.repeat(np.linspace(1, units - 1, units - 1)[:, np.newaxis], units * 2 - 3, axis=1).T
        theta_0_root = 2 * alpha_rows[::2, ::2]
        theta_1_root = 2 * alpha_rows[1::2, 1::2]
    else:
        alpha_checkerboard = get_alpha_checkerboard_general(units, num_layers)
        theta_0_root = 2 * alpha_checkerboard.T[::2, ::2]
        theta_1_root = 2 * alpha_checkerboard.T[1::2, 1::2]
    theta_0_init = 2 * np.arcsin(np.random.rand(*theta_0_root.shape) ** (1 / theta_0_root))
    theta_1_init = 2 * np.arcsin(np.random.rand(*theta_1_root.shape) ** (1 / theta_1_root))
    if not hadamard:
        theta_0_init = np.pi - theta_0_init
        theta_1_init = np.pi - theta_1_init
    return theta_0_init.astype(dtype=NP_FLOAT), theta_1_init.astype(dtype=NP_FLOAT)


[docs]def get_ortho_haar_theta(units: int, num_layers: int,
                         hadamard: bool) -> Union[Tuple[np.ndarray, np.ndarray],
                                                  Tuple[tf.Variable, tf.Variable],
                                                  tf.Variable]:
    alpha_checkerboard = get_alpha_checkerboard_general(units, num_layers)
    theta_0_root = alpha_checkerboard.T[::2, ::2] - 1
    theta_1_root = alpha_checkerboard.T[1::2, 1::2] - 1
    theta_0_init = 2 * np.arcsin(betaincinv(0.5 * theta_0_root, 0.5, np.random.rand(*theta_0_root.shape)))
    theta_1_init = 2 * np.arcsin(betaincinv(0.5 * theta_1_root, 0.5, np.random.rand(*theta_1_root.shape)))
    if not hadamard:
        theta_0_init = np.pi - theta_0_init
        theta_1_init = np.pi - theta_1_init
    return theta_0_init.astype(dtype=NP_FLOAT), theta_1_init.astype(dtype=NP_FLOAT)


[docs]def get_initializer(units: int, num_layers: int, initializer_name: str,
                    hadamard: bool = False, testing: bool = False) -> MeshPhaseInitializer:
    if testing:
        np.random.seed(TEST_SEED)
    initializer_name_to_initializer = {
        'haar_rect': HaarRandomPhaseInitializer(units, num_layers, hadamard),
        'haar_tri': HaarRandomPhaseInitializer(units, num_layers, hadamard, tri=True),
        'haar_prm': PRMPhaseInitializer(units, hadamard=hadamard),
        'random_phi': UniformRandomPhaseInitializer(units, num_layers, 2 * np.pi),
        'random_gamma': UniformRandomPhaseInitializer(2 * units, 1, 2 * np.pi),
        'constant_gamma': UniformRandomPhaseInitializer(2 * units, 1, 0.0),
        'constant_max_gamma': UniformRandomPhaseInitializer(2 * units, 1, 2 * np.pi),
        'random_constant': ConstantPhaseInitializer(units, num_layers, np.pi * np.random.rand()),
        'random_theta': UniformRandomPhaseInitializer(units, num_layers, np.pi),
        'constant_phi': ConstantPhaseInitializer(units, num_layers, 0.0),
        'constant_max_phi': ConstantPhaseInitializer(units, num_layers, 2 * np.pi),
        'bar': ConstantPhaseInitializer(units, num_layers, 0.0 if hadamard else np.pi),
        'cross': ConstantPhaseInitializer(units, num_layers, np.pi if hadamard else 0),
        'transmissive': UniformRandomPhaseInitializer(units, num_layers,
                                                      min_phase=np.pi / 2,
                                                      max_phase=np.pi)
    }
    return initializer_name_to_initializer[initializer_name]