gravatar for danekhoffman0319

2 hours ago by

import gzip
input_file = open("example.bed","rb")#compress existing file
data = input_file.read()
with gzip.open("example.bed.gz", "wb") as filez:
    filez.write(data)
    filez.close()

import pandas as pd#converts gz file to .txt 
df=pd.read_csv("example.bed.gz", delimiter='t',header=1 )
df.to_csv('exampleziptotxt.bed', index=False) 



import gzip
import os
file_name = "exampleziptotxt.bed"
out_file_root = "example_by_chrom"
file_handle_dict = {}
with open(file_name, "rb") as file_reader:

   for line in file_reader:
      ff = line.split()
      chrom_name = ff[0].decode("utf-8")

      if not (chrom_name in file_handle_dict):
          out_file_chrom_name = out_file_root + "." + chrom_name + ".bed.gz"

          with gzip.open(out_file_chrom_name, "wb") as out_file_chrom_name_handle:
                file_handle_dict[chrom_name] = out_file_chrom_name_handle
               file_handle_dict[chrom_name].write(line)

          file_handle_dict[chrom_name].write(gzip.compress(line))
file_reader.close()

(Desired) program takes a .bed file compresses it, reconverts the gzipped file to a .txt file , and then reads the contents and produces individual gzipped .bed files for each chromosome containing each gene belonging to that chromosome. vs. (Reality) the current script produces gzipped files for every gene for each chromosome and then eventually throws an error

FileNotFoundError: [Errno 2] No such file or directory: example_by_chrom.chr12,11733136,11733137,Cyp3a23/3a1,1,-.bed.gz'

Any help with solving this problem will be greatly appreciated. I have been stuck for days with this issue.



Source link