--> -->

Environment

  • Operation System: Linux/Unix
  • CPU cores: 24
  • Memory: 125GB

Steps

Software Parameter
bwa aln -t {{ThreadN}} {{JBL_GENOME_95BP_HG19_FASTA}} {{InputFile:1}} > PE1.sai
bwa aln -t {{ThreadN}} {{JBL_GENOME_95BP_HG19_FASTA}} {{InputFile:2}} > PE2.sai
bwa samse -n4 {{JBL_GENOME_95BP_HG19_FASTA}} PE1.sai {{InputFile:1}} > PE1.sam
bwa samse -n4 {{JBL_GENOME_95BP_HG19_FASTA}} PE2.sai {{InputFile:2}} > PE2.sam
python {{UserBin}}/merge_sam.py -o merged.sam
java -cp {UserBin} convertCoordinates < merged.sam > alignment.converted.sam
rm merged.sam
samtools rmdup alignment.converted.sam alignment.converted.dedupped.sam
samtools view -@ {ThreadN} -b -q 10 -F 4 -o alignment.bam alignment.converted.dedupped.sam
rm alignment.converted.dedupped.sam
samtools sort -@ {ThreadN} -o aligned.sorted.bam alignment.bam
java -jar {UserBin}/picard.jar CleanSam I=aligned.sorted.bam O=alignments.cleaned.bam
rm alignment.bam
java -jar {UserBin}/picard.jar AddOrReplaceReadGroups I=alignments.cleaned.bam O=alignments.cleaned.added.bam SO=coordinate RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM={EXP} VALIDATION_STRINGENCY=SILENT
rm alignments.cleaned.bam
java -jar {UserBin}/picard.jar ReorderSam I=alignments.cleaned.added.bam O=alignments.cleaned.added.reordered.bam R={GENOME_HG19_FA} CREATE_INDEX=TRUE VALIDATION_STRINGENCY=SILENT
rm alignments.cleaned.added.bam
java -jar {UserBin}/GenomeAnalysisTK.jar -T BaseRecalibrator -R {GENOME_HG19_FA} -I alignments.cleaned.added.reordered.bam -o recal_data.table -knownSites {dbSNP138_HG19_VCF} -nct {ThreadN} -U ALLOW_N_CIGAR_READS
java -jar {UserBin}/GenomeAnalysisTK.jar -T PrintReads -R {GENOME_HG19_FA} -I alignments.cleaned.added.reordered.bam -BQSR recal_data.table -o alignments.cleaned.added.reordered.bsqr.bam -nct {ThreadN} -U ALLOW_N_CIGAR_READS
java -jar {UserBin}/GenomeAnalysisTK.jar -T UnifiedGenotyper -R {GENOME_HG19_FA} -I alignments.cleaned.added.reordered.bsqr.bam -o variants.vcf -out_mode EMIT_VARIANTS_ONLY -glm SNP -stand_call_conf 0 -stand_emit_conf 0 -U ALLOW_N_CIGAR_READS
python {UserBin}/giremi_wrapper.py -m remove_homo -i {Workspace}/variants.vcf -o {Workspace}/variants.homozygous.vcf
grep -v '#' {Workspace}/variants.homozygous.vcf | awk -v qual=20 '$6>=qual' | awk '{OFS="\t";split($10,a,":");split(a[2],b,",");print $1"\t"$2"\t"b[1]+b[2]","b[2]"\t"$4"\t"$5"\t"b[2]/(b[1]+b[2])}' > variants.homozygous.txt
python {UserBin}/variant_filter.py -m ft -t {dbSNP150_HG19_GZ} -i variants.homozygous.txt -o variants.homozygous.desnp.txt
perl {UserBin}/filter_mismatch_first6bp.pl -infile {Workspace}/variants.homozygous.desnp.txt -outfile {Workspace}/variants.homozygous.desnp.rmhex.txt -bamfile {Workspace}/alignments.cleaned.added.reordered.bsqr.bam
AnnotateTable.py -a {RMSK_HG19} -i variants.homozygous.desnp.rmhex.txt -u -c1,2,3 -n rmsk -o variants.homozygous.desnp.rmhex.repeat_anned.txt
grep "Alu" variants.homozygous.desnp.rmhex.repeat_anned.txt > alu_editing_sites.txt
grep -v "Alu" variants.homozygous.desnp.rmhex.repeat_anned.txt > non_alu_candidates.txt
awk '{OFS="\t";$2=$2-1"\t"$2;print $0}' non_alu_candidates.txt | intersectBed -a stdin -b {SIMPLE_REPEAT_HG19} -v | cut -f1,3-7 > non_alu_candidates.rmsk.txt
perl {UserBin}/filter_intron_near_splicejuncts.pl -infile non_alu_candidates.rmsk.txt -outfile non_alu_candidates.rmsk.rmintron.txt -genefile {GENE_ANNOTATION_HG19_TXT}
python {UserBin}/variant_filter.py -m rh -t {GENOME_HG19_FA} -i {Workspace}/non_alu_candidates.rmsk.rmintron.txt -o {Workspace}/non_alu_candidates.rmsk.rmintron.rmhom.txt
perl {UserBin}/BLAT_candidates.pl -infile {Workspace}/non_alu_candidates.rmsk.rmintron.rmhom.txt -outfile {Workspace}/non_alu_candidates.rmsk.rmintron.rmhom.rmblat.txt -bamfile alignments.cleaned.added.reordered.bsqr.bam -refgenome {GENOME_HG19_FA}
cat alu_editing_sites.txt non_alu_candidates.rmsk.rmintron.rmhom.rmblat.txt > {JobName}_editing_sites.txt

References

Reference Description
dbSNP138_HG19_VCF
GENOME_HG19_FA
SIMPLE_REPEAT_HG19
GENE_ANNOTATION_HG19_TXT
dbSNP150_HG19_GZ
JBL_GENOME_95BP_HG19_FASTA
RMSK_HG19
Export