require(tok)
require(torch)
source('GPT.R')
Learning ChatGPT 1: Probabilities for the next word
Inspired by the blog post by Stephen Wolfram about the workings of the GPT-2 system, I decided to learn a bit about ChatGPT myself. Luckily, GPT-2 is now available for R. My first task is simply to learn to run the model and generate the probability table for the words that can follow the text, “The best thing about AI is to be able to”. The following contents are mainly based on the blog post by Stephen Wolfram. Additional resources include OpenAI Github page for gpt-2 and the paper describing GPT-2 paper.
Load the libraries and R implementation of GPT-2.
Create a tokenizer to process inputs.
<- tok::tokenizer$from_pretrained("gpt2") tok
Load a GPT-2 model
::with_device(device = "meta",{
torch<- GPT(
Model0 block_size = 1024,
n_embd = 768,
N_Layers = 12,
nvoc = 50257,
Head = 12
) })
Apply the model weights
$load_state_dict(state_dict = torch_load("Model-weights.pt"),
Model0.refer_to_state_dict = TRUE)
Create a function to list up top_k
words with their probabilities.
<- function(model = NULL,
top_k_words device = NULL,
raw_text = NULL,
temperature = NULL,
top_k = 10){
= tok$encode(raw_text)$ids
idx paste0("Input text is ", raw_text)
= torch::torch_tensor(idx+1, dtype=torch::torch_int(), device=device0)
idx = torch::torch_unsqueeze(idx, 1)
idx = idx
idx_cond = model$eval()(idx_cond)
logits = logits[, min(idx$size(2),1024), ] / temperature
logits
= logits$topk(top_k)
logits = torch::nnf_softmax(logits[[1]],-1)
probs <- data.frame(token = NA,
df probability=as.numeric(probs))
for (i in 1:top_k) {
<- logits[[2]][,as.integer(i)]$unsqueeze(1)
idx_next <- tok$decode(as.integer(idx_next$cpu()-1))
token $token[i] <- token
df
}return(df)
}
Have the model run on GPU
= if (torch::cuda_is_available()) Model0$cuda() else Model0$cpu()
Model0 = if (torch::cuda_is_available()) "cuda" else "cpu" device0
Generate the results
set.seed(1) # for reproducibility, GPT-2 output is random.
<- top_k_words(model = Model0,
df device = device0,
raw_text = "The best thing about AI is its ability to",
temperature = 0.8,
top_k = 5)
names(df) <- c("Token","Probability")
::kable(df) knitr
Token | Probability |
---|---|
learn | 0.2770396 |
predict | 0.2047849 |
make | 0.1826125 |
understand | 0.1745481 |
do | 0.1610149 |