|
|
|
@ -172,6 +172,60 @@ tf.Tensor(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### 向量加法
|
|
|
|
|
### 矩阵同位置相加
|
|
|
|
|
|
|
|
|
|
不管是Transformer原文中的方法还是GPT的方法,都是通过矩阵的简单相加。这里以GPT-2为例(Hugging Face开源的GPT),具体代码如下:
|
|
|
|
|
|
|
|
|
|
~~~python
|
|
|
|
|
import torch
|
|
|
|
|
from transformers import GPT2Tokenizer, GPT2Model
|
|
|
|
|
|
|
|
|
|
tokenizer = GPT2Tokenizer.from_pretrained('gpt2') # 初始化
|
|
|
|
|
model = GPT2Model.from_pretrained('gpt2')
|
|
|
|
|
|
|
|
|
|
text = "LLM with me" # 待处理的文本
|
|
|
|
|
# 分词并转换为索引
|
|
|
|
|
inputs = tokenizer(text, return_tensors="pt")
|
|
|
|
|
input_ids = inputs["input_ids"]
|
|
|
|
|
embeddings = model.get_input_embeddings() # 获取模型的嵌入层
|
|
|
|
|
input_embeddings = embeddings(input_ids) # 将索引转换为嵌入向量
|
|
|
|
|
# 获取位置编码矩阵
|
|
|
|
|
position_ids = torch.arange(0, input_ids.size(1)).unsqueeze(0).to(input_ids.device)
|
|
|
|
|
position_embeddings = model.wpe(position_ids)
|
|
|
|
|
final_embeddings = input_embeddings + position_embeddings # 将位置编码与词嵌入相加以获得最终的输入嵌入
|
|
|
|
|
|
|
|
|
|
# 查看最终的输入嵌入
|
|
|
|
|
print(final_embeddings)
|
|
|
|
|
print(final_embeddings.shape)
|
|
|
|
|
"""out:
|
|
|
|
|
tensor([[[ 0.2321, -0.3849, 0.1550, ..., 0.0664, 0.1922, 0.3908],
|
|
|
|
|
[ 0.0081, -0.1923, 0.1255, ..., -0.0160, 0.1091, -0.0756],
|
|
|
|
|
[ 0.0686, -0.0744, 0.0838, ..., 0.0598, 0.1280, 0.0136],
|
|
|
|
|
[ 0.1512, -0.0985, 0.1991, ..., -0.1582, 0.1241, 0.0501]]],
|
|
|
|
|
grad_fn=<AddBackward0>)
|
|
|
|
|
torch.Size([1, 4, 768])
|
|
|
|
|
"""
|
|
|
|
|
~~~
|
|
|
|
|
|
|
|
|
|
<img src="../assets/image-20240427182734627.png" alt="image-20240427182734627" style="zoom:50%;" />
|
|
|
|
|
|
|
|
|
|
从代码来看,embedding跟position_embeddings就是同位置元素相加。以下面的方法来检验
|
|
|
|
|
|
|
|
|
|
~~~python
|
|
|
|
|
print(final_embeddings[0][0][0])
|
|
|
|
|
print(input_embeddings[0][0][0] + position_embeddings[0][0][0])
|
|
|
|
|
"""out:
|
|
|
|
|
tensor(0.2321, grad_fn=<SelectBackward0>)
|
|
|
|
|
tensor(0.2321, grad_fn=<AddBackward0>)
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
print(final_embeddings[0][1][1])
|
|
|
|
|
print(input_embeddings[0][1][1] + position_embeddings[0][1][1])
|
|
|
|
|
"""out:
|
|
|
|
|
tensor(-0.1923, grad_fn=<SelectBackward0>)
|
|
|
|
|
tensor(-0.1923, grad_fn=<AddBackward0>)
|
|
|
|
|
"""
|
|
|
|
|
~~~
|
|
|
|
|
|
|
|
|
|
<img src="../assets/image-20240427183034655.png" alt="image-20240427183034655" style="zoom:50%;" />
|
|
|
|
|
|
|
|
|
|
以GPT-2为例(Hugging Face开源的GPT),
|