#P. citri all data (VF+MF susphire and Edinburgh all Pcitri samples) ##mapping assembled transcripts to genome # the genome is 486392826 nt long and has 17212 scaffolds, therefore genomeChrBinNbits was set to 15 [log2(486392826/17202)=14.8] #mkdir star_genome_index # #STARlong --runThreadN 32 --runMode genomeGenerate \ #--genomeDir ./star_genome_index \ #--genomeFastaFiles ./input/Planococcus_citri_Pcitri.v1.scaffolds.fa \ #--sjdbGTFtagExonParentTranscript Parent \ #--sjdbGTFfile ./input/Planococcus_citri_Pcitri.v1.gff3 \ #--genomeChrBinNbits 15 \ #--limitGenomeGenerateRAM=220000000000 cp -avr /DATA/markop/Pcitri_rnaspades/star_genome_index . mkdir STAR_mapSpadesTRtoGenome #mapping to soft filtered transcriptome (soft_filtered_transcripts.fasta) #parameters same as for the potato tr mapping but export BAM instead of SAM file STARlong \ --runMode alignReads \ --outSAMattributes NH HI NM MD \ --outFilterType BySJout \ --outFilterMultimapNmax 20 \ --alignSJoverhangMin 8 \ --alignSJDBoverhangMin 1 \ --outFilterMismatchNmax 999 \ --outFilterMismatchNoverReadLmax 0.08\ --alignIntronMin 20 \ --alignIntronMax 1000000 \ --alignMatesGapMax 1000000 \ --outSAMtype BAM SortedByCoordinate \ --limitBAMsortRAM 100000000000 \ --runThreadN 28 \ --genomeDir ./star_genome_index \ --readFilesIn ./output/soft_filtered_transcripts.fasta \ --outFileNamePrefix ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong. \ --seedPerReadNmax 100000 \ --seedPerWindowNmax 1000 \ --seedSearchLmax 30 \ --seedSearchStartLmax 30 \ --alignTranscriptsPerReadNmax 100000 \ --alignTranscriptsPerWindowNmax 10000 \ --scoreGapNoncan -20 \ --scoreDelOpen -1 \ --scoreDelBase -1 \ --scoreInsOpen -1 \ --scoreInsBase -1 \ --chimMultimapNmax 20 \ --chimSegmentMin 100 #index bam file /DATA/majak/samtools-1.6/samtools index ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sortedByCoord.out.bam #convert bam to sam for MatchAnnot /DATA/majak/samtools-1.6/samtools view -@ 28 -O SAM -o ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sortedByCoord.out.sam ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sortedByCoord.out.bam sort -k 3,3 -k 4,4n ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sortedByCoord.out.sam > ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sorted2.out.sam rm ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sortedByCoord.out.sam #gff3 to gtf gffread ./input/Planococcus_citri_Pcitri.v1_mod.gff3 -T -E -F -O --gene2exon -o ./input/Planococcus_citri_Pcitri.v1.gtf #MatchAnnot python /home/administrator/Software/MatchAnnot/matchAnnot.py \ --gtf=./input/Planococcus_citri_Pcitri.v1.gtf \ --format=alt \ ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sorted2.out.sam > ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sorted2.out.matchAnnot.txt #parsing the matchannot results: EvigenetrID DMgeneID DMtrID exon_match match_score grep "result:" ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sorted2.out.matchAnnot.txt | tr -s ' ' | awk -F'[ ]' '{print $2, $3, $4, $6, $8}' > ./STAR_mapSpadesTRtoGenome/PcSpadesTRsofttoGenome_STARlong.Aligned.sorted2.out.matchAnnot.parsed.txt