Add. mask function

5 years ago · 3869199a31
parent 79d8f4ad1a
commit 3869199a31
1 changed files with 87 additions and 0 deletions
--- a/NLP通用框架BERT项目实战/第二章——BERT源码解读与应用实例/BERT流程解读.md
+++ b/NLP通用框架BERT项目实战/第二章——BERT源码解读与应用实例/BERT流程解读.md
@ -512,3 +512,90 @@ def embedding_postprocessor(input_tensor,
  return output
 ~~~
 #### mask机制
 ~~~python
 class BertModel(object):
  """BERT model ("Bidirectional Encoder Representations from Transformers").
  Example usage:
  ```python
  # Already been converted into WordPiece token ids
  input_ids = tf.constant([[31, 51, 99], [15, 5, 0]])
  input_mask = tf.constant([[1, 1, 1], [1, 1, 0]])
  token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]])
  config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
    num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
  model = modeling.BertModel(config=config, is_training=True,
    input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids)
  label_embeddings = tf.get_variable(...)
  pooled_output = model.get_pooled_output()
  logits = tf.matmul(pooled_output, label_embeddings)
  ...
  ```
  """
  def __init__(self,
               config,
               is_training,
               input_ids,
               input_mask=None,
               token_type_ids=None,
               use_one_hot_embeddings=False,
               scope=None):
    """Constructor for BertModel.
    Args:
      config: `BertConfig` instance.
      is_training: bool. true for training model, false for eval model. Controls
        whether dropout will be applied.
      input_ids: int32 Tensor of shape [batch_size, seq_length].
      input_mask: (optional) int32 Tensor of shape [batch_size, seq_length].
      token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
      use_one_hot_embeddings: (optional) bool. Whether to use one-hot word
        embeddings or tf.embedding_lookup() for the word embeddings.
      scope: (optional) variable scope. Defaults to "bert".
    Raises:
      ValueError: The config is invalid or one of the input tensor shapes
        is invalid.
    """
      with tf.variable_scope("encoder"):
        # This converts a 2D mask of shape [batch_size, seq_length] to a 3D
        # mask of shape [batch_size, seq_length, seq_length] which is used
        # for the attention scores.
        attention_mask = create_attention_mask_from_input_mask(
            input_ids, input_mask)  # 创建mask矩阵
        # 比如一个矩阵:[45，54，85，...，0，0，0]
        #             [12，31，11，...，0，0，0]
        #             [91，51，18，...，12，21，0]
        # 后面长度不足的都补0，mask后，有信息的变1，无信息的变0
        # [1，1，1，...，0，0，0]
        # [1，1，1，...，0，0，0]
        # [1，1，1，...，1，1，0]
        # 不管要知道二维的，还要知道三维的，如开头这句话This converts a 2D mask of shape [batch_size, seq_length] to a 3D
        # 把里面的维度再分一个维度，如左上角的45
        # [1，1，1，...，0，0，0] ， 这里的1是指45能看到的信息是那些，有的则为1，并与其计算，为0则不与其进行计算
        # Run the stacked transformer.
        # `sequence_output` shape = [batch_size, seq_length, hidden_size].
        self.all_encoder_layers = transformer_model(   # Ctrl点击跳转transformer_model
            input_tensor=self.embedding_output,  # 3种embedding
            attention_mask=attention_mask,  # 上面的需不需要计算的0，1，1则是要计算
            hidden_size=config.hidden_size,  # 特征结果
            num_hidden_layers=config.num_hidden_layers,  # Transformer中的隐层神经元个数
            num_attention_heads=config.num_attention_heads,  # 多头机制，在bert的图解中有讲解
            intermediate_size=config.intermediate_size,  # 全连接层神经元个数
            intermediate_act_fn=get_activation(config.hidden_act),
            hidden_dropout_prob=config.hidden_dropout_prob,
            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
            initializer_range=config.initializer_range,
            do_return_all_layers=True)
 ~~~