From 3869199a3158ce6392280526df24c6cdaff63a55 Mon Sep 17 00:00:00 2001
From: benjas <909336740@qq.com>
Date: Fri, 8 Jan 2021 11:32:38 +0800
Subject: [PATCH] Add. mask function

---
 .../BERT流程解读.md                       | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/NLP通用框架BERT项目实战/第二章——BERT源码解读与应用实例/BERT流程解读.md b/NLP通用框架BERT项目实战/第二章——BERT源码解读与应用实例/BERT流程解读.md
index 359587f..3b0c48b 100644
--- a/NLP通用框架BERT项目实战/第二章——BERT源码解读与应用实例/BERT流程解读.md
+++ b/NLP通用框架BERT项目实战/第二章——BERT源码解读与应用实例/BERT流程解读.md
@@ -512,3 +512,90 @@ def embedding_postprocessor(input_tensor,
   return output
 ~~~
 
+
+
+#### mask机制
+
+~~~python
+class BertModel(object):
+  """BERT model ("Bidirectional Encoder Representations from Transformers").
+
+  Example usage:
+
+  ```python
+  # Already been converted into WordPiece token ids
+  input_ids = tf.constant([[31, 51, 99], [15, 5, 0]])
+  input_mask = tf.constant([[1, 1, 1], [1, 1, 0]])
+  token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]])
+
+  config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
+    num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
+
+  model = modeling.BertModel(config=config, is_training=True,
+    input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids)
+
+  label_embeddings = tf.get_variable(...)
+  pooled_output = model.get_pooled_output()
+  logits = tf.matmul(pooled_output, label_embeddings)
+  ...
+  ```
+  """
+
+  def __init__(self,
+               config,
+               is_training,
+               input_ids,
+               input_mask=None,
+               token_type_ids=None,
+               use_one_hot_embeddings=False,
+               scope=None):
+    """Constructor for BertModel.
+
+    Args:
+      config: `BertConfig` instance.
+      is_training: bool. true for training model, false for eval model. Controls
+        whether dropout will be applied.
+      input_ids: int32 Tensor of shape [batch_size, seq_length].
+      input_mask: (optional) int32 Tensor of shape [batch_size, seq_length].
+      token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].
+      use_one_hot_embeddings: (optional) bool. Whether to use one-hot word
+        embeddings or tf.embedding_lookup() for the word embeddings.
+      scope: (optional) variable scope. Defaults to "bert".
+
+    Raises:
+      ValueError: The config is invalid or one of the input tensor shapes
+        is invalid.
+    """
+
+      with tf.variable_scope("encoder"):
+        # This converts a 2D mask of shape [batch_size, seq_length] to a 3D
+        # mask of shape [batch_size, seq_length, seq_length] which is used
+        # for the attention scores.
+        attention_mask = create_attention_mask_from_input_mask(
+            input_ids, input_mask)  # 创建mask矩阵
+        # 比如一个矩阵:[45，54，85，...，0，0，0]
+        #             [12，31，11，...，0，0，0]
+        #             [91，51，18，...，12，21，0]
+        # 后面长度不足的都补0，mask后，有信息的变1，无信息的变0
+        # [1，1，1，...，0，0，0]
+        # [1，1，1，...，0，0，0]
+        # [1，1，1，...，1，1，0]
+        # 不管要知道二维的，还要知道三维的，如开头这句话This converts a 2D mask of shape [batch_size, seq_length] to a 3D
+        # 把里面的维度再分一个维度，如左上角的45
+        # [1，1，1，...，0，0，0] ， 这里的1是指45能看到的信息是那些，有的则为1，并与其计算，为0则不与其进行计算
+        # Run the stacked transformer.
+        # `sequence_output` shape = [batch_size, seq_length, hidden_size].
+        self.all_encoder_layers = transformer_model(   # Ctrl点击跳转transformer_model
+            input_tensor=self.embedding_output,  # 3种embedding
+            attention_mask=attention_mask,  # 上面的需不需要计算的0，1，1则是要计算
+            hidden_size=config.hidden_size,  # 特征结果
+            num_hidden_layers=config.num_hidden_layers,  # Transformer中的隐层神经元个数
+            num_attention_heads=config.num_attention_heads,  # 多头机制，在bert的图解中有讲解
+            intermediate_size=config.intermediate_size,  # 全连接层神经元个数
+            intermediate_act_fn=get_activation(config.hidden_act),
+            hidden_dropout_prob=config.hidden_dropout_prob,
+            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
+            initializer_range=config.initializer_range,
+            do_return_all_layers=True)
+~~~
+