It is not uncommon that some genes do not have a HGNC symbol, especially non-coding genes.
symbol <- getBM(attributes = c("ensembl_gene_id","hgnc_symbol"),
mart = useMart("ensembl", dataset="hsapiens_gene_ensembl"))
## Almost 25000 Ensembl genes have no HGNC symbol
sum(symbol$ensembl_gene_id != "") ## 67159
sum(symbol$hgnc_symbol != "") ## 42848
Code for that plot:
library(biomaRt)
library(ggplot2)
symbol <- getBM(attributes = c("ensembl_gene_id","hgnc_symbol", "gene_biotype"),
mart = useMart("ensembl", dataset="hsapiens_gene_ensembl"))
## Almost 25000 Ensembl genes have no HGNC symbol
sum(symbol$ensembl_gene_id != "") ## 67159
sum(symbol$hgnc_symbol != "") ## 42848
df <- data.frame(table(symbol[symbol$ensembl_gene_id != "" &
symbol$hgnc_symbol == "",]$gene_biotype)) %>% arrange(Freq)
colnames(df) <- c("Gene_Biotype", "Number")
df$Gene_Biotype <- factor(df$Gene_Biotype, levels = df$Gene_Biotype)
ggplot(data=df, aes(x=Gene_Biotype, y=Number)) +
geom_bar(stat="identity") +
coord_flip() +
ggtitle("Genes without HGNC symbol")