# metaSPAdes v3.13.0 de novo assembly of DNA seq reads from all gut content samples # @bufo cd /DATA/markop/CPB_metaSpades mkdir output mkdir input cp /STORK_ngs/NGS_mpe_2018_CPB-gut-metagenome_DNAseq/original_data_pt1/*.fq.gz ./input cp /STORK_ngs/NGS_mpe_2018_CPB-gut-metagenome_DNAseq/original_data_pt2/*.fq.gz ./input gunzip ./input/*.gz #combine all DNA reads into two files cat ./input/*_1.fq > ./input/CPB_metaDNA_1.fq cat ./input/*_2.fq > ./input/CPB_metaDNA_2.fq #used pipeline for cleaning reads suggested here: http://seqanswers.com/forums/showthread.php?p=204724#post204724 #Trim adapters /DATA/markop/bbmap/bbduk.sh -Xmx230g in=./input/CPB_metaDNA_1.fq in2=./input/CPB_metaDNA_2.fq out=./input/trimmed.fq ktrim=r k=23 mink=11 hdist=1 ref=/DATA/markop/bbmap/resources/adapters.fa tbo tpe maxns=0 trimq=20 qtrim=r maq=12 rm -rf ./input/RI0*.fq rm -rf ./input/CPB_metaDNA*.fq #Remove small contaminants /DATA/markop/bbmap/bbduk.sh -Xmx230g in=./input/trimmed.fq out=./input/filtered.fq k=31 ref=/DATA/markop/bbmap/resources/sequencing_artifacts.fa.gz,/DATA/markop/bbmap/resources/phix174_ill.ref.fa.gz rm ./input/trimmed.fq #Error-correct 1 /DATA/markop/bbmap/bbmerge.sh -Xmx230g in=./input/filtered.fq out=./input/ecco.fq ecco mix vstrict adapters=default #Error-correct 2 #used parameters that were suggested by Brian Bushnell here: http://seqanswers.com/forums/archive/index.php/t-72901.html and https://www.biostars.org/p/225338/ /DATA/markop/bbmap/clumpify.sh -Xmx200g in=./input/ecco.fq out=./input/eccc.fq ecc passes=6 minid=0.98 #Error-correct 3 /DATA/markop/bbmap/tadpole.sh -Xmx200g in=./input/eccc.fq out=./input/ecct.fq ecc rm ./input/filtered.fq rm ./input/ecco.fq #Merge /DATA/markop/bbmap/bbmerge.sh -Xmx200g in=./input/ecct.fq out=./input/merged.fq outu=./input/unmerged.fq rem k=62 extend2=50 adapters=default # rm ./input/ecct.fq #Split unmerged into two files /DATA/markop/bbmap/reformat.sh in=./input/unmerged.fq out1=./input/unmerged_1.fq out2=./input/unmerged_2.fq overwrite=true rm ./input/unmerged.fq #run metaSPAdes # k 127 gave out-of-mem error code (-6) python /home/administrator/SPAdes-3.13.0-Linux/bin/spades.py --meta -k 21,41,71,101 -m 230 -t 32 -o output -1 ./input/unmerged_1.fq -2 ./input/unmerged_2.fq -s ./input/merged.fq # python /home/administrator/SPAdes-3.13.0-Linux/bin/spades.py --continue -o ./output # I changed the params.txt to omit k-mer assembly 127 #Evaluate /DATA/markop/bbmap/stats.sh -Xmx200g in=./output/contigs.fasta #filter contigs /DATA/markop/bbmap/reformat.sh in=./output/contigs.fasta out=./output/contigs1to5k.fasta minlength=1000 maxlength=5999 /DATA/markop/bbmap/reformat.sh in=./output/contigs.fasta out=./output/contigs5kup.fasta minlength=5000