gravatar for aranyak111

2 hours ago by

I am trying to do RNA seq analysis and my goal is to filter gene counts less than 5 using custom made python scripts. The code chunk goes as follows

 #  filter reads lower than five

        nano filtercounts.py

        #!/usr/bin/python

        #Load relevant modules

        import sys

        #Read in the HTseq gene count output files for each of the 8 samples

        inputlist = [sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], sys.argv[7], sys.argv[8]]  

        ##Determine the genes with sufficient coverage
        #Set up a variable to hold genes with sufficient coverage in any of the 8 samples

        genelist = []

        #For each HTseq file

        for i in range(len(inputlist)):

            #Open it

            inputfile = open(inputlist[i], 'r')

            #For each line

            for line in inputfile:

                #Determine the number of reads/transcripts for this gene

                splitline = line.strip().split('t')

                #Does it pass the threshold?

                if int(splitline[1]) >= 5: 

                    #If so, add it to list

                    if not splitline[0] in genelist:

                        genelist.append(splitline[0])

            #Close the file

            inputfile.close()

        #Write out the list of sufficiently expressed genes

        outputlist = open(sys.argv[9], 'w') 

        for gene in genelist:

            outputlist.write(gene + 'n')

        outputlist.close()
    ##Filter each of the HTseq files for only sufficiently expressed genes across our samples
    #For each HTseq file

    for i in range(len(inputlist)):

        #Open it

        inputfile = open(inputlist[i], 'r')

        #Open an output 'filtered' file

        outputfile = open(sys.argv[i+10], 'w')

        #For each line

        for line in inputfile:

            #Determine the gene

            splitline = line.strip().split('t')

            #Is it in our list?

            if splitline[0] in genelist:

                #If so, write out the original line

                outputfile.write(line)

        #Close the files

        inputfile.close()
        outputfile.close()

python filtercounts.py 'wth1count.txt' 'wth2count.txt' 'wth3count.txt' 'wth4count.txt' '99h1count.txt' '99h2count.txt' '99h3count.txt' '99h4count.txt' 'genelist.txt' 'wth1filtered.txt' 'wth2filtered.txt' 'wth3filtered.txt' 'wth4filtered.txt' '99h1filtered.txt' '99h2filtered.txt' '99h3filtered.txt' '99h4filtered.txt'

The program works fine but at the end of the code execution I am not getting the last two output files '99h3filtered.txt' '99h4filtered.txt'.



Source link