How to safely pass training argument to Keras layers in a dynamic list when I don’t know if each layer supports it?

3 weeks ago 34
ARTICLE AD BOX

I am building a custom Keras layer that consists of a list of sub-layers, but I don’t know ahead of time which layers accept a training argument (e.g., BatchNormalization does, ReLU does not).

Here’s my current approach in the call method:

class SingleConv(keras.layers.Layer): """ Basic convolutional module consisting of a Conv2d/Conv3d, non-linearity and optional batchnorm/groupnorm. The order of operations can be specified via the `order` parameter Args: out_channels (int): number of output channels kernel_size (int or tuple): size of the convolving kernel order (string): determines the order of layers, e.g. 'cr' -> conv + ReLU 'crg' -> conv + ReLU + groupnorm 'cl' -> conv + LeakyReLU 'ce' -> conv + ELU padding (int or tuple): add zero-padding dropout_prob (float): dropout probability, default 0.1 is3d (bool): if True use Conv3d, otherwise use Conv2d """ def __init__(self, out_channels, padding='same', kernel_size=3, order='cr', dropout_prob=0.1, is3d=True, in_channels=3, **kwargs): super().__init__(**kwargs) self.out_channels = out_channels self.padding = padding self.kernel_size = kernel_size self.order = order self.dropout_prob = dropout_prob self.is3d = is3d self.in_channels = in_channels # get layers from create_conv (assumes no or groupnorm and padding = "Same" and "Channels_last" ) layers_list = self.create_conv(out_channels=self.out_channels, kernel_size=self.kernel_size, order=self.order, dropout_prob=self.dropout_prob, is3d=self.is3d, padding=self.padding) self.module_layers = [] for i, (name, layer) in enumerate(layers_list): # Giving each layer a unique attribute name (e.g., self.layer_0) # ensures Keras "sees" these layers and saves their weights. attr_name = f"layer_{i}_{name}" setattr(self, attr_name, layer) self.module_layers.append(layer) def create_conv( self, out_channels, kernel_size, order, dropout_prob, is3d, padding='same'): """ Create a CONV block. Padding is always "SAME". This function assumes channels last ( keras default). Args: out_channels (int): number of output channels kernel_size(int or tuple): size of the convolving kernel order (string): order of things, e.g. 'cr' -> conv + ReLU 'cl' -> conv + LeakyReLU 'ce' -> conv + ELU 'bcr' -> batchnorm + conv + ReLU 'cbrd' -> conv + batchnorm + ReLU + dropout 'cbrD' -> conv + batchnorm + ReLU + dropout2d dropout_prob (float): dropout probability is3d (bool): is3d (bool): if True use Conv3d, otherwise use Conv2d Returns a list of tf.keras.layers in the specified `order`. """ assert 'c' in order, "Conv layer MUST be present" assert order[0] not in 'rle', "Non-linearity cannot be the first operation in the layer" Conv = layers.Conv3D if is3d else layers.Conv2D Dropout = layers.Dropout if is3d else layers.SpatialDropout2D Dropout3D = layers.SpatialDropout3D modules = [] for i, char in enumerate(order): if char == 'r': modules.append(('ReLU', layers.ReLU())) elif char == 'l': modules.append(('LeakyReLU', layers.LeakyReLU())) elif char == 'e': modules.append(('ELU', layers.ELU())) elif char == 'c': # disable bias if normalization follows use_bias = not ('b' in order) modules.append(('conv', Conv( out_channels, kernel_size, use_bias=use_bias, padding=padding, data_format='channels_last' ) )) elif char == 'b': # Determine channels before/after conv is_before_conv = i < order.index('c') bn_axis = -1 # channels_last modules.append(('batchnorm', layers.BatchNormalization(axis=bn_axis) )) elif char == 'd': modules.append(('dropout',Dropout(rate=dropout_prob))) elif char == 'D': if is3d: modules.append(('dropout',layers.SpatialDropout3D(rate=dropout_prob))) else: modules.append(('dropout',layers.SpatialDropout2D(rate=dropout_prob))) else: raise ValueError(f"Unsupported layer type '{char}'. " "MUST be one of ['b', 'g', 'r', 'l', 'e', 'c', 'd', 'D']") return modules def call(self, inputs, training=None): x = inputs for layer in self.module_layers: try: x = layer(x, training=training) except TypeError as e: if "training" in str(e): x = layer(x) else: raise # real bug → don't hide it return x

My questions:

Is this a reasonable approach to dynamically handle the training argument without knowing which layer supports it?

Are there any performance overheads or pitfalls with using try/except inside the loop?

Is there a cleaner/best-practice way to detect if a layer accepts training before calling it?

Read Entire Article