gravatar for Kevin Blighe

1 hour ago by

Republic of Ireland

[following from comment trail]

Hi, it may be your lucky day because I have been processing other microarray studies all morning. Here is how you could process this (after you download the CEL files and unzip them):

[note that you may be asked to install one or both of these packages, while you'll notice another being automatically downloaded when you run rma()]

  # workspace setup
    require(oligo)
    require(drosophila2.db)

    gseID <- 'GSE21344'

  # read in and prepare data
    # raw CEL files
      message('--loading CEL files for ', gseID)

      raw <- read.celfiles(
        filenames = list.files('GSE21344_RAW/', pattern = '*cel', full.names = TRUE),
        sampleNames = gsub('\.cel$', '', list.files('GSE21344_RAW/', pattern = '*cel')))

    # RMA
      message('--RMA normalising...')
      gset <- rma(raw)
      message('--Done.')

      gset <- exprs(gset)
      probes <- rownames(gset)
      samIDs <- colnames(gset)

    # annotate
      annotLookup <- select(drosophila2.db, keys = probes,
        columns = c('PROBEID', 'ENSEMBL', 'SYMBOL'))

      # remove probes with any NA mapping
        annotLookup <- annotLookup[!is.na(annotLookup$ENSEMBL) & !is.na(annotLookup$SYMBOL),]
        annotLookup <- annotLookup[!duplicated(annotLookup$PROBEID),]

      # look up the ensembl ID and gene symbol
        probes <- probes[which(probes %in% annotLookup$PROBEID)]
        gset <- gset[probes,]
        all(rownames(gset)==probes)
        all(probes == annotLookup[match(probes, annotLookup$PROBEID),'PROBEID'])
        geneid <- annotLookup[match(probes, annotLookup$PROBEID),'SYMBOL']
        ens <- annotLookup[match(probes, annotLookup$PROBEID),'ENSEMBL']

  # finalise the dataset
    final <- data.frame(ens = ens, symbol = geneid, gset)
    head(final)
                         ens  symbol GSM533369 GSM533370 GSM533371 GSM533372
    1616608_a_at FBgn0001128   Gpdh1  3.075607  2.987812  3.121998  2.794935
    1622892_s_at FBgn0035889   mkg-p 11.411883 11.415364 11.400395 11.779498
    1622893_at   FBgn0040736     IM3  3.061561  3.233674  3.483630  3.316230
    1622894_at   FBgn0034454 CG15120  3.930225  3.829407  3.770370  3.873030
    1622895_at   FBgn0052075 CG32075 10.966323 10.857122 10.971413 10.743497
    1622896_at   FBgn0038966   pinta 13.159087 13.212790 13.309891 13.335578
                 GSM533373 GSM533374 GSM533375 GSM533376 GSM533377 GSM533378
    1616608_a_at  2.827791  2.918710  3.072793  3.108860  2.946866  3.167440
    1622892_s_at 11.803834 11.717940 11.691620 11.720805 11.688627 11.429850
    1622893_at    3.207360  3.162072  3.193889  3.101028  3.034796  3.258224
    1622894_at    4.131397  3.974184  3.777796  3.719844  3.854079  3.788482
    1622895_at   10.745481 10.780364 10.729215 10.632876 10.657724 11.078941
    1622896_at   13.271371 13.348126 13.644451 13.747381 13.740111 11.819523
                 GSM533379 GSM533380 GSM533381 GSM533382 GSM533383 GSM533384
    1616608_a_at  3.171195  3.078535  3.147919  3.067052  3.046781  3.056133
    1622892_s_at 11.484802 11.503235 11.868650 11.820685 11.868820 11.618499
    1622893_at    3.468744  3.510779  3.456605  3.573743  3.275730  2.915941
    1622894_at    3.904618  3.906584  3.704304  3.772455  3.656190  3.888913
    1622895_at   11.129996 11.103732 10.970281 10.920947 10.922826 10.798052
    1622896_at   11.855323 11.790097 11.928086 11.896962 11.949402 12.527205
                 GSM533385 GSM533386
    1616608_a_at  2.987426  2.987495
    1622892_s_at 11.594103 11.531154
    1622893_at    3.274847  3.020663
    1622894_at    3.529930  3.878161
    1622895_at   10.853586 10.790171
    1622896_at   12.542879 12.492722

Hopefully you can retrieve the metadata to match these GSM IDs. Please check the GEO record.

Kevin



Source link