diff --git a/R/general_help.R b/R/general_help.R index 67e6e7c13..853bfc91d 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -272,6 +272,50 @@ rank_binarize_wrapper <- function( } +## chatgpt queries #### + +#' @title writeChatGPTqueryDEG +#' @name writeChatGPTqueryDEG +#' @description This function writes a query as a .txt file that can be used with +#' ChatGPT or a similar LLM service to find the most likely cell types based on the +#' top differential expressed genes (DEGs) between identified clusters. +#' @param DEG_output the output format from the differenetial expression functions +#' @param top_n_genes number of genes for each cluster +#' @param tissue_type tissue type +#' @param folder_name path to the folder where you want to save the .txt file +#' @param file_name name of .txt file +#' @returns writes a .txt file to the desired location +#' @details This function does not run any LLM service. It simply creates the .txt +#' file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) +#' @export +writeChatGPTquery = function(DEG_output, + top_n_genes = 10, + tissue_type = 'human breast cancer', + folder_name = getwd(), + file_name = 'chatgpt_query.txt') { + + chatgpt_query = paste0("Identify cell types of ", tissue_type, " tissue using the following markers. Identify one cell type for each row. Only provide the cell type name and the marker genes used for cell type identification.") + + selected_DEG_output = DEG_output[, head(.SD, top_n_genes), by="cluster"] + + finallist = list() + finallist[[1]] = chatgpt_query + + for(clus in unique(selected_DEG_output$cluster)) { + x = selected_DEG_output[cluster == clus][['feats']] + x = c(clus, x) + finallist[[as.numeric(clus)+1]] = x + } + + outputdt = data.table::data.table(finallist) + + cat('\n start writing \n') + data.table::fwrite(x = outputdt, + file = paste0(folder_name,'/', file_name), + sep2 = c(""," ",""), col.names = F) + +} + # IDs ####