library(rentrez)
<- gsub("-", "/", Sys.Date()-2)
date_start <- gsub("-", "/", Sys.Date()-1)
date_end <- paste0("(typhoid OR cholera) AND ", date_start,":", date_end, "[dp]") # the search query
search_query <- entrez_search(db="pubmed", term=search_query) # any other useful parameters? search_results
PubMed search, ChatGPT summary, and sending an email in R
ChatGPT
R
xml
httr
- Search the PubMed database Use the entrez_search function from the rentrez package to search the PubMed database
- Fetch the details of the article in xml format
# Retrieve the details of the data in xml format based on pubmed ids
<- entrez_fetch(db="pubmed", id=search_results$ids, rettype="xml") article_details
- Parse the XML using the xml2 package
library(xml2)
# Parse the XML data
<- read_xml(article_details)
doc # Extract the titles and abstracts
<- xml_text(xml_find_all(doc, "//ArticleTitle"))
titles # abstracts <- xml_text(xml_find_all(doc, "//AbstractText"))
<- xml_text(xml_find_all(doc, "//Abstract"))
abstracts <- xml_text(xml_find_all(doc, ".//PubmedData/ArticleIdList/ArticleId[@IdType='doi']")) # to get the doi's dois
- Call ChatGPT to summarize the abstract in 1-2 sentences This is a subscription-based service. You must have a ChatGPT API key and must have signed up for their paid service.
# gpt-4o
<- function(prompt, api_key=NULL, model="gpt-4o", temperature=0.8){
prompt_chatgpt <- grep(model, c("gpt-3.5-turbo", "gpt-4o"), value=TRUE)
model <- httr::POST(
response url = "https://api.openai.com/v1/chat/completions",
::add_headers(Authorization = paste("Bearer", api_key)),
httr::content_type_json(),
httrencode = "json",
body = list(
model = model,
temperature = temperature, # this is the degree of randomness of the model's output
messages = list(list(
role = "user",
content = prompt
))
)
)return(httr::content(response)$choices[[1]]$message$content)
}
- Make R send you an email everyday Use simple HTML syntaxes (
<p></p>
or<b></b>
) to compose an email message usingblastula
package
library(blastula)
<- function(titles, abstract_summary, ids, dois) {
create_summary <- sapply(1:length(abstract_summary), function(i) paste0("<p>", " <b>", " <a href=https://pubmed.ncbi.nlm.nih.gov/", ids[i], "/> ", titles[i], "</a>", " </b> ", abstract_summary[i], " PMID=", ids[i], " DOI=", dois[i], "</p>"))
summary return(summary)
}
<- compose_email(
email title = "Test Email",
body = md(create_summary(titles, abstract_summary, ids, dois)))
%>%
email smtp_send(
from = "kimfinale@gmail.com",
to = "jonghoon.kim@ivi.int",
subject = "Daily summary of PubMed search",
# credentials = creds_key(id = "gmail"),
credentials = creds_file("gmail_cred")
)
- Save all as a single R script
<- gsub("-", "/", Sys.Date()-2)
date_start <- gsub("-", "/", Sys.Date()-1)
date_end <- paste0("(typhoid OR cholera OR transmission OR modeling) AND ", date_start,":", date_end, "[dp]")
search_query
<- Sys.getenv("CHATGPT_API_KEY")
chatgpt_api_key
<- rentrez::entrez_search(db="pubmed", term=search_query)
search_res <- "gpt-3.5"
model if (length(search_res$ids) > 0) { # one or more hits
<- search_res$ids
ids <- rentrez::entrez_fetch(db="pubmed", id=ids, rettype="xml")
details <- xml2::read_xml(details)
doc <- xml2::xml_text(xml2::xml_find_all(doc, "//ArticleTitle"))
titles <- xml2::xml_text(xml2::xml_find_all(doc, "//Abstract"))
abstracts <- xml2::xml_text(xml2::xml_find_all(doc, "//PubmedData/ArticleIdList/ArticleId[@IdType='doi']"))
dois
<- rep(NA, length(abstracts))
abstract_summary
if (length(abstracts) > 1) {
for (i in 1:length(abstracts)) {
<- paste0("Your task is to generate a short summary of a scientific article based on its title and abstract. Summarize the text delimited by triple backticks into a single sentence. Please do not repeat the title. ``` Title: ", titles[i], ". Abstract: ", abstracts[i], "```")
prompt <-
abstract_summary[i] prompt_chatgpt(prompt=prompt, api_key=chatgpt_api_key, model=model)
}
}# create a summary for the email
<- sapply(1:length(abstract_summary),
summary function(i) paste0("<p>", " <b>", " <a href=https://pubmed.ncbi.nlm.nih.gov/", ids[i], "/> ", titles[i], "</a>", " </b> ", abstract_summary[i], " PMID=", ids[i], ". DOI=", dois[i], ". </p>"))
<- paste0("<p>Please enjoy the articles retrieved from PubMed based on your search query, ", search_query, ", published between ", date_start, ", and ", date_end, ". Each article is accompanied by a one-sentence summary provided by the ChatGPT, ", model, ". For feedback, please contact Jong-Hoon Kim at jonghoon.kim@ivi.int.</p>")
intro
<- c(intro, summary)
summary <- blastula::compose_email(
email title = "Weekly summary of PubMed search",
body = blastula::md(summary))
::smtp_send(email,
blastulafrom = "kimfinale@gmail.com",
to = "jonghoon.kim@ivi.int",
subject = "Daily summary of PubMed search",
credentials = blastula::creds_file("gmail_creds")
)
else {
} "No articles matched your query."
}
- Register the file using the Windows task scheduler
library(taskscheduleR)
# Schedule the script to run daily at a specific time
taskscheduler_create(
taskname = "PubMed_ChatGPT_Summary",
rscript = "~/myblog/pubmed_chatgpt.R",
schedule = "WEEKLY", starttime = "22:00", startdate = "02/06/2024")