#InterProScan of Iso-seq transcripts not mapped (or ignored due to too many differences with genome) to the P citri genome # precalc server lookup disabled because these are nonpublished sequences! #run on bufo mkdir /DATB/markop/_S_P4_Pcitri_IsoSeq/_A_03-IPS mkdir /DATB/markop/_S_P4_Pcitri_IsoSeq/_A_03-IPS/input mkdir /DATB/markop/_S_P4_Pcitri_IsoSeq/_A_03-IPS/output mkdir /DATB/markop/_S_P4_Pcitri_IsoSeq/_A_03-IPS/scripts cd /DATB/markop/_S_P4_Pcitri_IsoSeq/_A_03-IPS/scripts conda activate isoseq #Clone ANGEL repo and install git clone https://github.com/Magdoll/ANGEL.git cd ANGEL # does not work with biopython version >1.77 ! conda install -n isoseq -c cctbx202008 biopython python setup.py build python setup.py install cd .. #run ANGEL - get best ORF from Iso-seq transcripts ## does not work due to problems with classifier_pickle file #angel_predict.py \ #--output_mode=all \ #--min_angel_aa_length 100 \ #--min_dumb_aa_length 100 \ #--cpus 30 \ #../../_A_02_cDNAcupcake-dry/output/Pcitri.ignored_ids.fasta ./ANGEL/training_example/test_angel.ANGEL.pickle ../output/Pcitri.ignored_ids # outputs the longest ORF (or first ORF) that exceed the user-defined minimum length and have a positive log-odds scores based on hexamer frequencies dumb_predict.py ../../_A_02_cDNAcupcake-dry/output/Pcitri.ignored_ids.fasta ../output/Pcitri.ignored_ids.dumb --min_aa_length 100 --cpus 30 conda deactivate #switch to java 11 update-java-alternatives --list sudo update-alternatives --set java /usr/lib/jvm/java-11-oracle java -version cd /DATB/markop/_S_P4_Pcitri_IsoSeq/_A_03-IPS/scripts cat ../output/Pcitri.ignored_ids.dumb.final.pep | tr -d '*' > ../output/Pcitri.ignored_ids.dumb.final.pep.rmstar #IPS run on translated aa (dumb predictions) ./interproscan-5.46-81.0/interproscan.sh \ --applications Pfam \ --seqtype p \ -i ../output/Pcitri.ignored_ids.dumb.final.pep.rmstar.IPS \ --output-dir ../output/ \ --cpu 30 \ --disable-precalc \ --formats TSV,GFF3,HTML ## training ANGEL (non-dumb) - NOT WORKING mkdir /DATB/markop/_S_P4_Pcitri_IsoSeq/_A_03-IPS/output/trainingset angel_make_training_set.py ../output/Pcitri.ignored_ids.dumb.final ../output/trainingset/Pcitri.ignored_ids.dumb.training --random --cpus 30 ## failed ## running CD-HIT to generate non-redundant set.... ## /bin/sh: cd-hit: command not found