gravatar for yzhao140

2 hours ago by

Hi, all! I did a project using this dataset support.10xgenomics.com/single-cell-multiome-atac-gex/datasets/1.0.0/human_brain_3k. You can download the data using this link cf.10xgenomics.com/samples/cell-arc/1.0.0/human_brain_3k/human_brain_3k_filtered_feature_bc_matrix.tar.gz. But when I analyze single cell ATAC-seq data, I found the max count can be 400. Why? The code I used is listed below.

import time
import numpy as np
import csv
import gzip
import os
import scipy.io
import codecs
import torch
from torch.utils.data import Dataset
from scipy.sparse import coo_matrix

path = r"F:zymfiltered_feature_bc_matrix"

mat = scipy.io.mmread(os.path.join(path, "matrix.mtx.gz"))
mat = mat.todense()

features_path = os.path.join(path, "features.tsv.gz")
feature_ids = [row[0] for row in csv.reader(codecs.iterdecode(gzip.open(features_path), 'utf-8'), delimiter="t")]
gene_names = [row[1] for row in csv.reader(codecs.iterdecode(gzip.open(features_path), 'utf-8'), delimiter="t")]
feature_types = [row[2] for row in csv.reader(codecs.iterdecode(gzip.open(features_path), 'utf-8'), delimiter="t")]

barcodes_path = os.path.join(path, "barcodes.tsv.gz")
barcodes = [row[0] for row in csv.reader(codecs.iterdecode(gzip.open(barcodes_path), 'utf-8'), delimiter="t")]

#36601
feature_types.count('Gene Expression')

X = mat[:36601,]
Y = mat[36601:,]

Y.max()

link

modified 2 hours ago

written
2 hours ago
by

yzhao1400



Source link