-->
-->ID: 9E46A0E9-54C6-AD98-F32A-8E350D9B060D
Yu Lab's preprocessing pipeline for PRO-cap libraries (two biological replicates)
Software | Parameter |
---|---|
mkdir |
raw_qc |
fastqc |
--out {{Workspace}}/raw_qc --threads 16 {{InputFile}} |
fastp |
-i {{InputFile:1}} -I {{InputFile:2}} --umi --umi_len={{UMI_LEN1||6}} --umi_loc={{UMI_LOC1||per_read}} -g --low_complexity_filter -w 16 -c -h ./{{JobName}}_rep1.html -j ./{{JobName}}_rep1.json -o {{JobName}}_rep1.1.fq.gz -O {{JobName}}_rep1.2.fq.gz --adapter_sequence {{ADAPT1_1||TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC}} --adapter_sequence_r2 {{ADAPT1_2||GATCGTCGGACTGTAGAACTCTGAAC}} -p --overlap_len_require 18 --low_complexity_filter -l 18 |
fastp |
-i {{InputFile:3}} -I {{InputFile:4}} --umi --umi_len={{UMI_LEN2||6}} --umi_loc={{UMI_LOC2||per_read}} -g --low_complexity_filter -w 16 -c -h ./{{JobName}}_rep2.html -j ./{{JobName}}_rep2.json -o {{JobName}}_rep2.1.fq.gz -O {{JobName}}_rep2.2.fq.gz --adapter_sequence {{ADAPT2_1||TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC}} --adapter_sequence_r2 {{ADAPT2_2||GATCGTCGGACTGTAGAACTCTGAAC}} -p --overlap_len_require 18 --low_complexity_filter -l 18 |
fastqc |
--threads 16 -o . {{JobName}}_rep1.1.fq.gz |
fastqc |
--threads 16 -o . {{JobName}}_rep1.2.fq.gz |
fastqc |
--threads 16 -o . {{JobName}}_rep2.1.fq.gz |
fastqc |
--threads 16 -o . {{JobName}}_rep2.2.fq.gz |
STAR |
--readFilesCommand zcat --runThreadN {{ThreadN}} --alignMatesGapMax 1000 --outFilterMultimapNmax 10 --outFilterMismatchNmax 1 --outFilterMultimapScoreRange 0 --outSAMtype BAM SortedByCoordinate --genomeDir {{STAR_INDEX}} --outFileNamePrefix {{JobName}}_rep1_ --readFilesIn {{JobName}}_rep1.1.fq.gz {{JobName}}_rep1.2.fq.gz --alignIntronMax 1000 --quantMode GeneCounts --outSAMattributes All |
STAR |
--readFilesCommand zcat --runThreadN {{ThreadN}} --alignMatesGapMax 1000 --outFilterMultimapNmax 10 --outFilterMismatchNmax 1 --outFilterMultimapScoreRange 0 --outSAMtype BAM SortedByCoordinate --genomeDir {{STAR_INDEX}} --outFileNamePrefix {{JobName}}_rep2_ --readFilesIn {{JobName}}_rep2.1.fq.gz {{JobName}}_rep2.2.fq.gz --alignIntronMax 1000 --quantMode GeneCounts --outSAMattributes All |
samtools |
view -hb -q 255 -o {{JobName}}_rep1_with_dup.bam {{JobName}}_rep1_Aligned.sortedByCoord.out.bam |
samtools |
index {{JobName}}_rep1_with_dup.bam |
umi_tools |
dedup --unpaired-reads=discard --umi-separator=: --paired -I {{JobName}}_rep1_with_dup.bam --output-stats={{JobName}}_rep1 -S {{JobName}}_rep1.bam |
samtools |
view -hb -q 255 -o {{JobName}}_rep2_with_dup.bam {{JobName}}_rep2_Aligned.sortedByCoord.out.bam |
samtools |
index {{JobName}}_rep2_with_dup.bam |
umi_tools |
dedup --unpaired-reads=discard --umi-separator=: --paired -I {{JobName}}_rep2_with_dup.bam --output-stats={{JobName}}_rep2 -S {{JobName}}_rep2.bam |
samtools |
index {{JobName}}_rep1.bam |
samtools |
index {{JobName}}_rep2.bam |
multiBamSummary |
bins --bamfiles {{JobName}}_rep1.bam {{JobName}}_rep2.bam -o {{JobName}}.npz -bs 500 -p 16 |
awk |
'BEGIN{OFS="\t";FS="\t"}{print $1,$3}' {{JobName}}_rep1_ReadsPerGene.out.tab > {{JobName}}_rep1.counts |
awk |
'BEGIN{OFS="\t";FS="\t"}{print $1,$3}' {{JobName}}_rep2_ReadsPerGene.out.tab > {{JobName}}_rep2.counts |
samtools |
merge -fh {{JobName}}_rep1.bam {{JobName}}_merged.bam {{JobName}}_rep1.bam {{JobName}}_rep2.bam |
pints_visualizer |
-b {{JobName}}_rep1.bam -e CoPRO --mapq-threshold 255 --chromosome-start-with chr -o {{JobName}}_rep1_rpm --filters U13369 chrM _ EBV Mycoplasma --rpm --cache |
pints_visualizer |
-b {{JobName}}_rep2.bam -e CoPRO --mapq-threshold 255 --chromosome-start-with chr -o {{JobName}}_rep2_rpm --filters U13369 chrM _ EBV Mycoplasma --rpm --cache |
pints_visualizer |
-b {{JobName}}_rep1.bam -e CoPRO --mapq-threshold 255 --chromosome-start-with chr -o {{JobName}}_rep1 --filters U13369 chrM _ EBV Mycoplasma |
pints_visualizer |
-b {{JobName}}_rep2.bam -e CoPRO --mapq-threshold 255 --chromosome-start-with chr -o {{JobName}}_rep2 --filters U13369 chrM _ EBV Mycoplasma |
pints_visualizer |
-b {{JobName}}_merged.bam -e CoPRO --mapq-threshold 255 --chromosome-start-with chr -o {{JobName}}_merged_rpm --filters U13369 chrM _ EBV Mycoplasma --rpm --cache |
pints_visualizer |
-b {{JobName}}_merged.bam -e CoPRO --mapq-threshold 255 --chromosome-start-with chr -o {{JobName}}_merged --filters U13369 chrM _ EBV Mycoplasma |
samtools |
merge -fh {{JobName}}_rep1_with_dup.bam {{JobName}}_merged_with_dup.bam {{JobName}}_rep1_with_dup.bam {{JobName}}_rep2_with_dup.bam |
pints_caller |
--file-prefix {{JobName}} --bw-pl {{JobName}}_merged_pl.bw --bw-mn {{JobName}}_merged_mn.bw --save-to . --file-prefix {{JobName}} --thread 16 --fdr-target {{FDR||0.1}} {{FlexiblePars}} |