Hi DN99,

I think this may be what you need.

First download this file. It is the most current gene ontology database file: current.geneontology.org/annotations/goa_human.gaf.gz

Then extract the gzip file onto your Desktop. The following script will give you a master list of all GENE IDS and their associated GO IDs in a dataframe. Then you can use the merge() function to merge the GO:IDS with your gene list data frame.

system("awk 'NR>=42' ~/Desktop/goa_human.gaf > ~/Desktop/goa_human_no_header.txt")
GO <-read.csv("~/Desktop/goa_human_no_header.txt", header=F, sep="t")

GO$V4 <- NULL
GO$V7 <- NULL
GO$V8 <- NULL
GO$V1 <- NULL
GO$V6 <- NULL
GO$V10 <- NULL
GO$V13 <- NULL
GO$V14 <- NULL
GO$V16 <- NULL
GO$V17 <- NULL
GO$V12 <- NULL
GO$V15 <- NULL
GO$V2 <- NULL
GO$V9 <- NULL
GO$V11 <- NULL
colnames(GO) <- c("GENEID", "GOID")

If you want more information such as GO TERMS in a dataframe as well, you can use the follow script:

system("awk 'NR>=42' ~/Desktop/goa_human.gaf > ~/Desktop/goa_human_no_header.txt")
GO <-read.csv("~/Desktop/goa_human_no_header.txt", header=F, sep="t")

BiocManager::install("GO.db")
library(GO.db)
GOdb <- as.data.frame(GOTERM)
GO$V4 <- NULL
GO$V7 <- NULL
GO$V8 <- NULL
GO$V1 <- NULL
GO$V6 <- NULL
GO$V10 <- NULL
GO$V13 <- NULL
GO$V14 <- NULL
GO$V16 <- NULL
GO$V17 <- NULL
GO$V12 <- NULL
GO$V15 <- NULL
GO$V2 <- NULL
GO$V9 <- NULL
GO$V11 <- NULL
colnames(GO) <- c("GENEID", "GOID")
colnames(GOdb)[1] <- c("GOID")
GOdb <- head(GOdb,-1)
GENESwithGO <- merge(GO, GOdb, by = "GOID")
rm(GOdb, GO)
GENESwithGO$go_id <- NULL

This should create a master data frame for you of GO IDs and their gene ontology terms. It might be overkill for your purposes, but just throwing this here.

Hope this helps!



Source link