bwa |
aln -t {{ThreadN}} {{JBL_GENOME_95BP_HG19_FASTA}} {{InputFile:1}} > PE1.sai |
bwa |
aln -t {{ThreadN}} {{JBL_GENOME_95BP_HG19_FASTA}} {{InputFile:2}} > PE2.sai |
bwa |
samse -n4 {{JBL_GENOME_95BP_HG19_FASTA}} PE1.sai {{InputFile:1}} > PE1.sam |
bwa |
samse -n4 {{JBL_GENOME_95BP_HG19_FASTA}} PE2.sai {{InputFile:2}} > PE2.sam |
python |
{{UserBin}}/merge_sam.py -o merged.sam |
java |
-cp {UserBin} convertCoordinates < merged.sam > alignment.converted.sam |
rm |
merged.sam |
samtools |
rmdup alignment.converted.sam alignment.converted.dedupped.sam |
samtools |
view -@ {ThreadN} -b -q 10 -F 4 -o alignment.bam alignment.converted.dedupped.sam |
rm |
alignment.converted.dedupped.sam |
samtools |
sort -@ {ThreadN} -o aligned.sorted.bam alignment.bam |
java |
-jar {UserBin}/picard.jar CleanSam I=aligned.sorted.bam O=alignments.cleaned.bam |
rm |
alignment.bam |
java |
-jar {UserBin}/picard.jar AddOrReplaceReadGroups I=alignments.cleaned.bam O=alignments.cleaned.added.bam SO=coordinate RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM={EXP} VALIDATION_STRINGENCY=SILENT |
rm |
alignments.cleaned.bam |
java |
-jar {UserBin}/picard.jar ReorderSam I=alignments.cleaned.added.bam O=alignments.cleaned.added.reordered.bam R={GENOME_HG19_FA} CREATE_INDEX=TRUE VALIDATION_STRINGENCY=SILENT |
rm |
alignments.cleaned.added.bam |
java |
-jar {UserBin}/GenomeAnalysisTK.jar -T BaseRecalibrator -R {GENOME_HG19_FA} -I alignments.cleaned.added.reordered.bam -o recal_data.table -knownSites {dbSNP138_HG19_VCF} -nct {ThreadN} -U ALLOW_N_CIGAR_READS |
java |
-jar {UserBin}/GenomeAnalysisTK.jar -T PrintReads -R {GENOME_HG19_FA} -I alignments.cleaned.added.reordered.bam -BQSR recal_data.table -o alignments.cleaned.added.reordered.bsqr.bam -nct {ThreadN} -U ALLOW_N_CIGAR_READS |
java |
-jar {UserBin}/GenomeAnalysisTK.jar -T UnifiedGenotyper -R {GENOME_HG19_FA} -I alignments.cleaned.added.reordered.bsqr.bam -o variants.vcf -out_mode EMIT_VARIANTS_ONLY -glm SNP -stand_call_conf 0 -stand_emit_conf 0 -U ALLOW_N_CIGAR_READS |
python |
{UserBin}/giremi_wrapper.py -m remove_homo -i {Workspace}/variants.vcf -o {Workspace}/variants.homozygous.vcf |
grep |
-v '#' {Workspace}/variants.homozygous.vcf | awk -v qual=20 '$6>=qual' | awk '{OFS="\t";split($10,a,":");split(a[2],b,",");print $1"\t"$2"\t"b[1]+b[2]","b[2]"\t"$4"\t"$5"\t"b[2]/(b[1]+b[2])}' > variants.homozygous.txt |
python |
{UserBin}/variant_filter.py -m ft -t {dbSNP150_HG19_GZ} -i variants.homozygous.txt -o variants.homozygous.desnp.txt |
perl |
{UserBin}/filter_mismatch_first6bp.pl -infile {Workspace}/variants.homozygous.desnp.txt -outfile {Workspace}/variants.homozygous.desnp.rmhex.txt -bamfile {Workspace}/alignments.cleaned.added.reordered.bsqr.bam |
AnnotateTable.py |
-a {RMSK_HG19} -i variants.homozygous.desnp.rmhex.txt -u -c1,2,3 -n rmsk -o variants.homozygous.desnp.rmhex.repeat_anned.txt |
grep |
"Alu" variants.homozygous.desnp.rmhex.repeat_anned.txt > alu_editing_sites.txt |
grep |
-v "Alu" variants.homozygous.desnp.rmhex.repeat_anned.txt > non_alu_candidates.txt |
awk |
'{OFS="\t";$2=$2-1"\t"$2;print $0}' non_alu_candidates.txt | intersectBed -a stdin -b {SIMPLE_REPEAT_HG19} -v | cut -f1,3-7 > non_alu_candidates.rmsk.txt |
perl |
{UserBin}/filter_intron_near_splicejuncts.pl -infile non_alu_candidates.rmsk.txt -outfile non_alu_candidates.rmsk.rmintron.txt -genefile {GENE_ANNOTATION_HG19_TXT} |
python |
{UserBin}/variant_filter.py -m rh -t {GENOME_HG19_FA} -i {Workspace}/non_alu_candidates.rmsk.rmintron.txt -o {Workspace}/non_alu_candidates.rmsk.rmintron.rmhom.txt |
perl |
{UserBin}/BLAT_candidates.pl -infile {Workspace}/non_alu_candidates.rmsk.rmintron.rmhom.txt -outfile {Workspace}/non_alu_candidates.rmsk.rmintron.rmhom.rmblat.txt -bamfile alignments.cleaned.added.reordered.bsqr.bam -refgenome {GENOME_HG19_FA} |
cat |
alu_editing_sites.txt non_alu_candidates.rmsk.rmintron.rmhom.rmblat.txt > {JobName}_editing_sites.txt |