# If the import fails, uncomment the following line:
!pip install transformers
import torch
from torch import tensor
from transformers import AutoTokenizer, AutoModelForCausalLM
import pandas as pd
# Avoid a warning message
import os; os.environ["TOKENIZERS_PARALLELISM"] = "false"

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Requirement already satisfied: transformers in /usr/local/lib/python3.9/dist-packages (4.27.3)
Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers) (2.27.1)
Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (0.13.2)
Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers) (3.10.2)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers) (4.65.0)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (1.22.4)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (6.0)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (23.0)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (2022.10.31)
Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (0.13.3)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.5.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2022.12.7)
Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2.0.12)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (1.26.15)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (3.4)


tokenizer = AutoTokenizer.from_pretrained("distilgpt2", add_prefix_space=True) # smaller version of GPT-2
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'
model = AutoModelForCausalLM.from_pretrained("distilgpt2")


print(f"The tokenizer has {len(tokenizer.get_vocab())} strings in its vocabulary.")
print(f"The model has {model.num_parameters():,d} parameters.")

The tokenizer has 50257 strings in its vocabulary.
The model has 81,912,576 parameters.


phrase = "This weekend I plan to"


batch = tokenizer(["Hi", phrase], padding=True, return_tensors='pt')
batch

{'input_ids': tensor([[50256, 50256, 50256, 50256, 15902],
        [  770,  5041,   314,  1410,   284]]), 'attention_mask': tensor([[0, 0, 0, 0, 1],
        [1, 1, 1, 1, 1]])}


batch['input_ids'].shape

torch.Size([2, 5])


input_ids = tokenizer(phrase, return_tensors='pt')['input_ids']; input_ids

tensor([[ 770, 5041,  314, 1410,  284]])


token_embedding_module = model.transformer.wte
token_embedding_module

Embedding(50257, 768)


lm_head_module = model.lm_head
lm_head_module

Linear(in_features=768, out_features=50257, bias=False)


(token_embedding_module.weight.data == lm_head_module.weight.data).all()

tensor(True)


print(input_ids[0, -1],
    "which corresponds to",
    repr(tokenizer.decode(input_ids[0, -1])))

tensor(284) which corresponds to ' to'


with torch.no_grad():
    vec = token_embedding_module(input_ids[0, -1])
vec.shape

torch.Size([768])


with torch.no_grad():
    logits = lm_head_module(vec)
logits.shape

torch.Size([50257])


[tokenizer.decode(x) for x in logits.topk(k=10).indices]

[' to', 'to', ' To', 'To', ' for', ' in', ' with', ' on', ' TO', ' and']


import pandas as pd
logits = lm_head_module(token_embedding_module(input_ids))
pd.DataFrame([
    [tokenizer.decode(x) for x in y]
    for y in logits.topk(k=10).indices[0]
])


with torch.no_grad():
    model_output = model(input_ids, output_hidden_states=True)
hidden_states = model_output.hidden_states


len(hidden_states) # this is model.config.n_layer + 1, to include the input embeddings.

7


logits = lm_head_module(hidden_states[0])
pd.DataFrame([
    [tokenizer.decode(x) for x in y]
    for y in logits.topk(k=10).indices[0]
]).T


logits = lm_head_module(hidden_states[-1])
pd.DataFrame([
    [tokenizer.decode(x) for x in y]
    for y in logits.topk(k=10).indices[0]
]).T

	0	1	2	3	4	5	6	7	8	9
0	This	This	These	this	THIS	It	this	These	The	That
1	weekend	Weekend	weekends	week	afternoon	evening	week	Sunday	Friday	Saturday
2	I	I	we	my	We	they	me	My	My	you
3	plan	plans	plan	Plan	Plans	Plan	PLAN	intend	planning	planned
4	to	to	To	To	for	in	with	on	TO	and

	0	1	2	3	4
0	This	weekend	I	plan	to
1	This	weekends	I	plans	to
2	These	Weekend	we	plan	To
3	<\|endoftext\|>	week	my	Plan	for
4	theless	afternoon	me	Plans	To
5	It	evening	We	Plan	in
6	THIS	week	you	PLAN	on
7	There	fortnight	myself	intend	TO
8	this	holidays	My	planning	and
9	You	Saturdays	You	proposal	of

	0	1	2	3	4
0	The	,	was	to	go
1	A	in	had	on	take
2	.	was	got	a	spend
3	\n	's	went	for	make
4	The	at	decided	my	do
5	,	is	took	not	be
6	This	I	received	an	attend
7	I	�	started	the	visit
8	the	we	spent	and	run
9	It	the	met	this	have

Demo of Logits and Embeddings from a Language Model¶

Padding¶

Embeddings¶

Example of mapping¶

What the model does¶