Examples

This section contains various short examples.

Generate bgef

from gefpy.bgef_writer_cy import generate_bgef

gem_file = "FP200000617TL_B6.gem"
ingef_file = "FP200000617TL_B6.bin1.gef"
bgef_file = "FP200000617TL_B6.bgef"
stromics = "Transcriptomics"
bin_sizes = [1,10,20,50,100,200,500]
region = [1000, 2000, 1000, 2000]

#generate bgef by gem
generate_bgef(input_file=gem_file,
              bgef_file=bgef_file,
              stromics=stromics,
              n_thread=8,
              bin_sizes=bin_sizes,
              region=region)

#generate bgef by bin1 gef
generate_bgef(input_file=ingef_file,
              bgef_file=bgef_file,
              stromics=stromics,
              n_thread=8,
              bin_sizes=bin_sizes,
              region=region)

Read bgef

from gefpy.bgef_reader_cy import BgefR

bgef = BgefR("FP200000617TL_B6.gef", 50, 4)

#get expression num
expnum = bgef.get_expression_num()

#get cell num
cellnum = bgef.get_cell_num()

#get gene num
genenum = bgef.get_gene_num()

#get gene name list
genelist = bgef.get_gene_names()

#get cell id list(cellid item is (x<<32|y))
cellid = bgef.get_cell_names()

#get gene data
#gene_index is a list that save the gene idx of each expression
#gene_names is a list of gene names
gene_index, gene_names = bgef.get_gene_data()

#get the all expression, each item is(x, y, count, exon)
explist = bgef.get_expression()

#get sparse matrix indexes of expression data
#uniq_cell is list that save all cell, each cell val (x<<32|y)
#cell_index is a list that save the cell idx of each expression
#count is a list that save the midcnt of each expression
uniq_cell, cell_index, count = bgef.get_exp_data()

#get the explist by the specified gene name in the region
explist = bgef.get_genedata_in_region(minx, maxx, miny, maxy, "xxx")

#get bgef minx miny
minx, miny = bgef.get_offset()

#gef bgef attr
minx, miny, maxx, maxy, maxexp, resolution = bgef.get_exp_attr()

Generate cgef

from gefpy.cgef_writer_cy import generate_cgef

mask_file = "FP200000617TL_B6_mask.tif"
bgef_file = "FP200000617TL_B6.raw.bgef"
cgef_file = "FP200000617TL_B6.cgef"
block_sizes = [256, 256]

# Generate cgef by bgef and mask
generate_cgef(cgef_file, bgef_file, mask_file, block_sizes)

Read cgef

from gefpy.cgef_reader_cy import CgefR

cgef = CgefR("FP200000617TL_B6.cgef")

#get the number of expression
expnum = cgef.get_expression_num()

#get cell num
cellnum = cgef.get_cell_num()

#get gene num
genenum = cgef.get_gene_num()

#get a list of gene names
genelist = cgef.get_gene_names()

#get a list of cell ids, each cell id is (cell.x <<32 | cell.y)
cellidlist = cgef.get_cell_names()

#get all cell
celllist = cgef.get_cells()

#get all gene
genelist = cgef.get_genes()

#get the count of each cell in each gene
cell_id, count = cgef.get_cellid_and_count()

#get the count of each gene in each cell
gene_id, count = cgef.get_geneid_and_count()

#get the borders
border = cgef.get_cellborders()

Correct cell

from gefpy.cgef_adjust_cy import CgefAdjust

adjust = CgefAdjust()
#1. get gene and cell info by bgef and cgef
bgef = "FP200000617TL_B6.raw.bgef"
cgef = "FP200000617TL_B6.cgef"

#genelist is a list of gene names
#cell is a list of cell data, every item include (geneid, x, y, midcnt, cellid)
genelist, cell = adjust.get_cell_data(bgef, cgef)

#2. do cell correct in stereopy

#3. write result to cgef
path = "FP200000617TL_B6.adjust.cgef"
celltype = np.dtype({'names':['cellid','offset','count'], 'formats':[np.uint32,np.uint32,np.uint32]})
dnbtype = np.dtype({'names':['x','y','count','gene_id'], 'formats':[np.int32,np.int32,np.uint16,np.uint16]})
celldata = np.array([(0, 0, 10),(10,10,5),(13,15,20),...], dtype = celltype)
dnbdata = np.array([(400,400,6,456),(5000,5000,7,258),...], dtype = dnbtype)
adjust.write_cgef_adjustdata(path, celldata, dnbdata)

Generate gem by gef

from gefpy.gef_to_gem_cy import gefToGem

strout = "FP200000617TL_B6.gem"
strsn = "FP200000617TL_B6"
obj = gefToGem(strout, strsn)

# generate bgem
strbgef = "FP200000617TL_B6.bgef"
binsize = 10
obj.bgef2gem(strbgef, binsize)

# generate cgem by bgef and cgef
strcgef = "FP200000617TL_B6.cgef"
obj.cgef2gem(strcgef, strbgef)

# generate cgem by bgef and mask
strmask = "FP200000617TL_B6_mask.tif"
obj.bgef2cgem(strmask, strbgef)