Okay so, just worked out something here - you will need to make a directory on your Desktop named Biostars... this script will just make a dataframe of CHEMBL, UNIPROT, and ENSEMBL GENE ID, all matched. and then you can do the rest of what you need from the data frame...

curl::curl_download(url = "ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/latest/chembl_uniprot_mapping.txt", "~/Desktop/Biostars/chembl_uniprot_mapping.txt", quiet = FALSE)
chembl <- read.table("~/Desktop/Biostars/chembl_uniprot_mapping.txt", sep = "t")
chembl$V3 <- NULL
chembl$V4 <- NULL
colnames(chembl) <- c("uniprot", "chembl")


curl::curl_download(url = "ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz", "~/Desktop/Biostars/HUMAN_9606_idmapping_selected.tab.gz", quiet = FALSE)
system("gunzip ~/Desktop/Biostars/HUMAN_9606_idmapping_selected.tab.gz")
uniprot.db <- read.table("~/Desktop/Biostars/HUMAN_9606_idmapping_selected.tab", sep = "t")
ensembl <- as.data.frame(uniprot.db$V19)
uniprot <- as.data.frame(uniprot.db$V1)
ensembl.uniprot <- cbind(ensembl, uniprot)
colnames(ensembl.uniprot) <- c("ensembl", "uniprot")

ensembl.uniprot.chembl <- merge(chembl, ensembl.uniprot, by = "uniprot")

Here is a sample of the data frame:

enter image description here


Login
before adding your answer.

Traffic: 1389 users visited in the last hour



Source link