Source code for beyondml.tflow.utils.transformer

import tensorflow as tf
from beyondml.tflow.layers import MultiDense, SelectorLayer


[docs]def build_transformer_block( input_shape, embed_dim, num_heads, neurons, dropout_rate=0.1, ): """ Build a Transformer Block Parameters ---------- input_shape : int or tuple of int The input shape for the model to use embed_dim : int The dimension of the embedding num_heads : int The number of attention heads to use neurons : int The number of hidden neurons to use in the hidden layer dropout_rate : float (default 0.1) Rate at which dropout is applied value_dim : int or None (default None) The dimension to use for the `value` matrix, if provided Returns ------- transformer_block : TensorFlow keras Functional model The transformer block, which can then be used alone or as a layer in another model """ input_layer = tf.keras.layers.Input(input_shape) query = MultiDense(embed_dim)([input_layer] * num_heads) key = MultiDense(embed_dim)([input_layer] * num_heads) value = MultiDense(embed_dim)([input_layer] * num_heads) query_selectors = [ SelectorLayer(i)(query) for i in range(num_heads) ] key_selectors = [ SelectorLayer(i)(key) for i in range(num_heads) ] value_selectors = [ SelectorLayer(i)(value) for i in range(num_heads) ] attention_layers = [ tf.keras.layers.Attention()([query_selectors[i], key_selectors[i], value_selectors[i]]) for i in range(num_heads) ] concat = tf.keras.layers.Concatenate()(attention_layers) merge = tf.keras.layers.Reshape((input_shape[0], -1))(concat) x = tf.keras.layers.Dropout(dropout_rate)(merge) out1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x) x = tf.keras.layers.Dense(neurons, activation='relu')(out1) x = tf.keras.layers.Dense(embed_dim * num_heads)(x) x = tf.keras.layers.Dropout(dropout_rate)(x) x = tf.keras.layers.Add()([out1, x]) output_layer = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x) return tf.keras.models.Model(input_layer, output_layer)
[docs]def build_token_position_embedding_block( sequence_length, vocab_size, embed_dim ): """ Builds a token and position embedding block Parameters ---------- sequence_length : int The length of each sequence vocab_size : int The size of the vocabulary used embed_dim : int The desired embedding dimension Returns ------- embedding_block : TensorFlow keras Functional model The embedding block, which can be used alone or as a layer in another model """ tok_input = tf.keras.layers.Input(sequence_length) pos_input = tf.keras.layers.Input(sequence_length) tok_embed = tf.keras.layers.Embedding( vocab_size, output_dim=embed_dim)(tok_input) pos_embed = tf.keras.layers.Embedding( sequence_length, output_dim=embed_dim)(pos_input) output_layer = tf.keras.layers.Add()([tok_embed, pos_embed]) return tf.keras.models.Model([tok_input, pos_input], output_layer)