#P. citri all data (VF+MF susphire and Edinburgh all Pcitri samples) RNAspades v3.13.0 transcriptome assembly #run on bufo cd /DATA/markop/ mkdir Pcitri_rnaspades_alldata cd /DATA/markop/Pcitri_rnaspades_alldata #unzip fastq files cp /NFSs-datarepo/ngs_omics/NGS_pci_rna-seq_2018_mpe_Pcitri/data/*.gz ./input cp /NFSs-datarepo/ngs_omics/NGS_pci_rna-seq_2019_mpe_SUSPHIRE-Edinburgh/data/*.gz ./input pigz -d -v ./input/*.gz #combine all SUSPHIRE (150 nt) and Edinburgh RNAseq reads (75 nt, 50 nt separately) into two files cat ./input/MF_*_1.fastq ./input/VF_*_1.fastq > ./input/SUSPHIRE_1.fastq cat ./input/MF_*_2.fastq ./input/VF_*_2.fastq > ./input/SUSPHIRE_2.fastq cat ./input/PC_ALL_1_1.fastq ./input/PC_F_1_1.fastq ./input/PC_M_1_1.fastq > ./input/Edinburgh75_1.fastq cat ./input/PC_ALL_1_2.fastq ./input/PC_F_1_2.fastq ./input/PC_M_1_2.fastq > ./input/Edinburgh75_2.fastq cat ./input/PC_F_2_1.fastq ./input/PC_M_3_1.fastq > ./input/Edinburgh50_1.fastq cat ./input/PC_F_2_2.fastq ./input/PC_M_3_2.fastq > ./input/Edinburgh50_2.fastq rm ./input/MF*.fastq rm ./input/VF*.fastq rm ./input/PF*.fastq rm ./input/PC*.fastq #pipeline for cleaning reads suggested here: http://seqanswers.com/forums/showthread.php?p=204724#post204724 #Trim adapters /DATA/markop/bbmap/bbduk.sh -Xmx230g in=./input/SUSPHIRE_1.fastq in2=./input/SUSPHIRE_2.fastq out=./input/SUSPHIRE_trimmed.fastq ktrim=r k=23 mink=11 hdist=1 ref=/DATA/markop/bbmap/resources/adapters.fa tbo tpe maxns=0 trimq=20 qtrim=r maq=12 /DATA/markop/bbmap/bbduk.sh -Xmx230g in=./input/Edinburgh50_1.fastq in2=./input/Edinburgh50_2.fastq out=./input/Edinburgh50_trimmed.fastq ktrim=r k=23 mink=11 hdist=1 ref=/DATA/markop/bbmap/resources/adapters.fa tbo tpe maxns=0 trimq=20 qtrim=r maq=12 /DATA/markop/bbmap/bbduk.sh -Xmx230g in=./input/Edinburgh75_1.fastq in2=./input/Edinburgh75_2.fastq out=./input/Edinburgh75_trimmed.fastq ktrim=r k=23 mink=11 hdist=1 ref=/DATA/markop/bbmap/resources/adapters.fa tbo tpe maxns=0 trimq=20 qtrim=r maq=12 #Remove small contaminants /DATA/markop/bbmap/bbduk.sh -Xmx230g in=./input/SUSPHIRE_trimmed.fastq out=./input/SUSPHIRE_filtered.fastq k=31 ref=/DATA/markop/bbmap/resources/sequencing_artifacts.fa.gz,/DATA/markop/bbmap/resources/phix174_ill.ref.fa.gz /DATA/markop/bbmap/bbduk.sh -Xmx230g in=./input/Edinburgh50_trimmed.fastq out=./input/Edinburgh50_filtered.fastq k=31 ref=/DATA/markop/bbmap/resources/sequencing_artifacts.fa.gz,/DATA/markop/bbmap/resources/phix174_ill.ref.fa.gz /DATA/markop/bbmap/bbduk.sh -Xmx230g in=./input/Edinburgh75_trimmed.fastq out=./input/Edinburgh75_filtered.fastq k=31 ref=/DATA/markop/bbmap/resources/sequencing_artifacts.fa.gz,/DATA/markop/bbmap/resources/phix174_ill.ref.fa.gz rm ./input/*_trimmed.fastq # SPAdes BayesHammer error correction # you cannot specify --only-error-correction in RNA-Seq mode! /home/administrator/Software/SPAdes-3.13.1-Linux/bin/spades.py \ --only-error-correction \ -m 230 \ -t 32 \ -o ./input \ --pe1-fr --pe2-fr --pe3-fr \ --pe1-12 ./input/SUSPHIRE_filtered.fastq \ --pe2-12 ./input/Edinburgh50_filtered.fastq \ --pe3-12 ./input/Edinburgh75_filtered.fastq rm -rf ./input/tmp rm -rf ./input/split_input #Error-correct 1 /DATA/markop/bbmap/bbmerge.sh -Xmx230g in1=./input/corrected/SUSPHIRE_filtered_1.00.0_0.cor.fastq.gz in2=./input/corrected/SUSPHIRE_filtered_2.00.0_0.cor.fastq.gz out=./input/SUSPHIRE_ecco_PE.fastq ecco mix vstrict adapters=default /DATA/markop/bbmap/bbmerge.sh -Xmx230g in=./input/corrected/SUSPHIRE_filtered__unpaired.00.0_0.cor.fastq.gz out=./input/SUSPHIRE_ecco_SE.fastq ecco mix vstrict adapters=default /DATA/markop/bbmap/bbmerge.sh -Xmx230g in1=./input/corrected/Edinburgh50_filtered_1.00.1_0.cor.fastq.gz in2=./input/corrected/Edinburgh50_filtered_2.00.1_0.cor.fastq.gz out=./input/Edinburgh50_ecco_PE.fastq ecco mix vstrict adapters=default /DATA/markop/bbmap/bbmerge.sh -Xmx230g in=./input/corrected/Edinburgh50_filtered__unpaired.00.1_0.cor.fastq.gz out=./input/Edinburgh50_ecco_SE.fastq ecco mix vstrict adapters=default /DATA/markop/bbmap/bbmerge.sh -Xmx230g in1=./input/corrected/Edinburgh75_filtered_1.00.2_0.cor.fastq.gz in2=./input/corrected/Edinburgh75_filtered_2.00.2_0.cor.fastq.gz out=./input/Edinburgh75_ecco_PE.fastq ecco mix vstrict adapters=default /DATA/markop/bbmap/bbmerge.sh -Xmx230g in=./input/corrected/Edinburgh75_filtered__unpaired.00.2_0.cor.fastq.gz out=./input/Edinburgh75_ecco_SE.fastq ecco mix vstrict adapters=default rm ./input/*_filtered.fastq #Error-correct 2 #used parameters that were suggested by Brian Bushnell here: http://seqanswers.com/forums/archive/index.php/t-72901.html and https://www.biostars.org/p/225338/ /DATA/markop/bbmap/clumpify.sh -Xmx200g in=./input/SUSPHIRE_ecco_PE.fastq out=./input/SUSPHIRE_eccc_PE.fastq ecc passes=6 minid=0.98 /DATA/markop/bbmap/clumpify.sh -Xmx200g in=./input/SUSPHIRE_ecco_SE.fastq out=./input/SUSPHIRE_eccc_SE.fastq ecc passes=6 minid=0.98 /DATA/markop/bbmap/clumpify.sh -Xmx200g in=./input/Edinburgh50_ecco_PE.fastq out=./input/Edinburgh50_eccc_PE.fastq ecc passes=6 minid=0.98 /DATA/markop/bbmap/clumpify.sh -Xmx200g in=./input/Edinburgh50_ecco_SE.fastq out=./input/Edinburgh50_eccc_SE.fastq ecc passes=6 minid=0.98 /DATA/markop/bbmap/clumpify.sh -Xmx200g in=./input/Edinburgh75_ecco_PE.fastq out=./input/Edinburgh75_eccc_PE.fastq ecc passes=6 minid=0.98 /DATA/markop/bbmap/clumpify.sh -Xmx200g in=./input/Edinburgh75_ecco_SE.fastq out=./input/Edinburgh75_eccc_SE.fastq ecc passes=6 minid=0.98 #Error-correct 3 /DATA/markop/bbmap/tadpole.sh -Xmx200g in=./input/SUSPHIRE_eccc_PE.fastq out=./input/SUSPHIRE_ecct_PE.fastq ecc /DATA/markop/bbmap/tadpole.sh -Xmx200g in=./input/SUSPHIRE_eccc_SE.fastq out=./input/SUSPHIRE_ecct_SE.fastq ecc /DATA/markop/bbmap/tadpole.sh -Xmx200g in=./input/Edinburgh50_eccc_PE.fastq out=./input/Edinburgh50_ecct_PE.fastq ecc /DATA/markop/bbmap/tadpole.sh -Xmx200g in=./input/Edinburgh50_eccc_SE.fastq out=./input/Edinburgh50_ecct_SE.fastq ecc /DATA/markop/bbmap/tadpole.sh -Xmx200g in=./input/Edinburgh75_eccc_PE.fastq out=./input/Edinburgh75_ecct_PE.fastq ecc /DATA/markop/bbmap/tadpole.sh -Xmx200g in=./input/Edinburgh75_eccc_SE.fastq out=./input/Edinburgh75_ecct_SE.fastq ecc rm ./input/*_ecco_*.fastq rm ./input/*_eccc_*.fastq #Merge /DATA/markop/bbmap/bbmerge.sh -Xmx200g in=./input/SUSPHIRE_ecct_PE.fastq out=./input/SUSPHIRE_merged.fastq outu=./input/SUSPHIRE_unmerged.fastq rem k=62 extend2=50 adapters=default /DATA/markop/bbmap/bbmerge.sh -Xmx200g in=./input/Edinburgh50_ecct_PE.fastq out=./input/Edinburgh50_merged.fastq outu=./input/Edinburgh50_unmerged.fastq rem k=62 extend2=50 adapters=default /DATA/markop/bbmap/bbmerge.sh -Xmx200g in=./input/Edinburgh75_ecct_PE.fastq out=./input/Edinburgh75_merged.fastq outu=./input/Edinburgh75_unmerged.fastq rem k=62 extend2=50 adapters=default #Split unmerged into two files # not needed /DATA/markop/bbmap/reformat.sh in=./input/SUSPHIRE_unmerged.fastq out1=./input/SUSPHIRE_unmerged_1.fastq out2=./input/SUSPHIRE_unmerged_2.fastq overwrite=true rm ./input/corrected/*.fastq.gz rm ./input/*._ecct_PE.fastq #running rnaSPAdes (BayesHammer error correction turned off) # rnaSPAdes can take as an input only paired-end and single-end libraries; merged will therefore be treated as single! # rnaSPAdes does not support --careful and --cov-cutoff options. # By default rnaSPAdes uses 2 k-mer sizes, which are automatically detected using read length (approximately one third and half of the maximal read length). We recommend not to change this parameter because smaller k-mer sizes typically result in multiple chimeric (misassembled) transcripts. ulimit -n 32000 cat ./input/SUSPHIRE_merged.fastq ./input/SUSPHIRE_ecct_SE.fastq > ./input/SUSPHIRE_SE.fastq cat ./input/Edinburgh50_merged.fastq ./input/Edinburgh50_ecct_SE.fastq > ./input/Edinburgh50_SE.fastq cat ./input/Edinburgh75_merged.fastq ./input/Edinburgh75_ecct_SE.fastq > ./input/Edinburgh75_SE.fastq /home/administrator/Software/SPAdes-3.13.1-Linux/bin/rnaspades.py \ --only-assembler \ -k 29,49 \ -m 245 \ -t 32 \ -o ./output \ --ss-rf \ --pe1-fr --pe2-fr --pe3-fr \ --pe1-12 ./input/SUSPHIRE_unmerged.fastq \ --pe1-s ./input/SUSPHIRE_SE.fastq \ --pe2-12 ./input/Edinburgh50_unmerged.fastq \ --pe2-s ./input/Edinburgh50_SE.fastq \ --pe3-12 ./input/Edinburgh75_unmerged.fastq \ --pe3-s ./input/Edinburgh75_SE.fastq rm ./input/*.fastq #produced xx hard-filtered, x normal-filtered, and x soft-filtered transcripts. #should be mapped to genome scaffolds #Evaluate assemlby /DATA/markop/bbmap/stats.sh -Xmx200g in=./output/soft_filtered_transcripts.fasta out=./output/soft_filtered_transcripts.bbmap.stats