Skip to content

UC Davis TAMA Tutorial 1

GenomeRIK edited this page Oct 1, 2020 · 14 revisions

Setup and running TAMA Collapse on aligned cluster/polish reads

/share/workshop/isoseq_workshop/rkuo/folder_copy.tar.gz

cp /share/workshop/isoseq_workshop/rkuo/folder_copy.tar.gz .

untar zxvf /share/workshop/isoseq_workshop/rkuo/folder_copy.tar.gz

Get data

path

  /share/workshop/isoseq_workshop/rkuo/1_data

files

  alz.aligned.bam
  alz.collapsed.gff
  alz.flnc.bam
  alz.polished.cluster_report.csv
  gencode.v33.gtf
  hg38.fa

make working folders

  1_data
  2_map
  3_collapse
  4_merge
  5_degsig
  6_orf_nmd

Load modules

  samtools/1.10
  bedtools2/2.29.2
  biopython/1.71
  bamtools/2.4.2 
  minimap2/2.17

TAMA Collapse for Iso-Seq3 Cluster/Polish sequences

go to folder

  3_collapse

make sub folders

  1_tc_cluster_nolde_nc
  2_tc_flnc_nolde_nc
  3_tc_flnc_nolde_c
  4_tc_flnc_lde220_nc

go to folder

  1_tc_cluster_nolde_nc

make bash script This is for running TAMA Collapse

  run_tama_collapse.sh

Fill bash script

  spath='/share/workshop/isoseq_workshop/rkuo/tama/'
  pscript='tama_collapse.py'
  capflag='no_cap'
  fpath='/share/workshop/isoseq_workshop/rkuo/1_data/'
  sam='alz.aligned.bam'
  fasta='/share/workshop/isoseq_workshop/rkuo/1_data/hg38.fa'
  prefix=`echo ${sam} | sed 's/\.bam//' | awk '{print "tc_nc_nolde_cluster_"$1}' `
  python ${spath}${pscript} -s ${fpath}${sam} -f ${fasta} -p ${prefix} -d merge_dup -x ${capflag} -a 100 -z 100 -sj sj_priority -sjt 10 -log log_off -b BAM

run script

  sh run_tama_collapse.sh

make bash script This provides a summary of the resulting annotation bed12 file.

  run_summary_bed.sh

fill in

  file=$1
  echo "Genes"
  cat ${file} | awk -F "\t" '{print $4}' | awk -F ";" '{print $1}' | sort | uniq | wc -l
  echo "Transcripts"
  cat ${file} | awk -F "\t" '{print $4}' | awk -F ";" '{print $2}' | sort | uniq | wc -l
  echo "Multi-exon Transcripts"
  cat ${file} | awk -F "\t" '{if($10>1)print $4}' | awk -F ";" '{print $2}' | sort | uniq | wc -l
  echo "Multi-exon Genes"
  cat ${file} |  awk -F "\t" '{if($10>1)print $4}' |  awk -F ";" '{print $1}' | sort | uniq | wc -l 

run script

  sh run_summary_bed.sh output.bed