diff --git a/assets/image-20240427182734627.png b/assets/image-20240427182734627.png new file mode 100644 index 0000000..0b9412a Binary files /dev/null and b/assets/image-20240427182734627.png differ diff --git a/assets/image-20240427183034655.png b/assets/image-20240427183034655.png new file mode 100644 index 0000000..fbb64fa Binary files /dev/null and b/assets/image-20240427183034655.png differ diff --git a/人人都能看懂的Transformer/第三章——位置编码.md b/人人都能看懂的Transformer/第三章——位置编码.md index 7ae9504..06cc697 100644 --- a/人人都能看懂的Transformer/第三章——位置编码.md +++ b/人人都能看懂的Transformer/第三章——位置编码.md @@ -172,6 +172,60 @@ tf.Tensor( -### 向量加法 +### 矩阵同位置相加 + +不管是Transformer原文中的方法还是GPT的方法,都是通过矩阵的简单相加。这里以GPT-2为例(Hugging Face开源的GPT),具体代码如下: + +~~~python +import torch +from transformers import GPT2Tokenizer, GPT2Model + +tokenizer = GPT2Tokenizer.from_pretrained('gpt2') # 初始化 +model = GPT2Model.from_pretrained('gpt2') + +text = "LLM with me" # 待处理的文本 +# 分词并转换为索引 +inputs = tokenizer(text, return_tensors="pt") +input_ids = inputs["input_ids"] +embeddings = model.get_input_embeddings() # 获取模型的嵌入层 +input_embeddings = embeddings(input_ids) # 将索引转换为嵌入向量 +# 获取位置编码矩阵 +position_ids = torch.arange(0, input_ids.size(1)).unsqueeze(0).to(input_ids.device) +position_embeddings = model.wpe(position_ids) +final_embeddings = input_embeddings + position_embeddings # 将位置编码与词嵌入相加以获得最终的输入嵌入 + +# 查看最终的输入嵌入 +print(final_embeddings) +print(final_embeddings.shape) +"""out: +tensor([[[ 0.2321, -0.3849, 0.1550, ..., 0.0664, 0.1922, 0.3908], + [ 0.0081, -0.1923, 0.1255, ..., -0.0160, 0.1091, -0.0756], + [ 0.0686, -0.0744, 0.0838, ..., 0.0598, 0.1280, 0.0136], + [ 0.1512, -0.0985, 0.1991, ..., -0.1582, 0.1241, 0.0501]]], + grad_fn=) +torch.Size([1, 4, 768]) +""" +~~~ + +image-20240427182734627 + +从代码来看,embedding跟position_embeddings就是同位置元素相加。以下面的方法来检验 + +~~~python +print(final_embeddings[0][0][0]) +print(input_embeddings[0][0][0] + position_embeddings[0][0][0]) +"""out: +tensor(0.2321, grad_fn=) +tensor(0.2321, grad_fn=) +""" + +print(final_embeddings[0][1][1]) +print(input_embeddings[0][1][1] + position_embeddings[0][1][1]) +"""out: +tensor(-0.1923, grad_fn=) +tensor(-0.1923, grad_fn=) +""" +~~~ + +image-20240427183034655 -以GPT-2为例(Hugging Face开源的GPT), \ No newline at end of file