# zagor
cat ../../_A_02_cdhit_3cvs-GFFmerged/input/StPGSC4.04n_seq_4_PGSC-p-rep_ITAG-pep_gff-2019-04-23.fasta ../../../_S_03_stCuSTr/_A_03.2_components/output/fasta_2_all-per-cv/stCuSTr-*_aa_all.fasta > ../input/5cv.aa.fasta
cat ../../_A_01_GC_content-count/input/StPGSC4.04n_seq_3_PGSC-c-rep_ITAG-cds_gff-2019-04-23.fasta ../../../_S_03_stCuSTr/_A_03.2_components/output/fasta_2_all-per-cv/stCuSTr-*_cds_all.fasta > ../input/5cv.cds.fasta
cut -f1 -d " " ../input/5cv.aa.fasta | sponge ../input/5cv.aa.fasta
cut -f1 -d " " ../input/5cv.cds.fasta | sponge ../input/5cv.cds.fasta
python MSA.py
### example for python MSA.py input ###
# $ python MSA.py
# Enter your choice [1-3] : 1
# Path to your reference .fasta file: ../input/my.test.aa.fasta
# Path to your folder containing files with written gene IDs of interest, line by line: ../input/folderWithIds
# Where would you like to store your results (directory name, e.g. here/and/there)?: aa
# Preferred width of the alignment is (full or integer): full
# Consensus on or off: off
# Enter your choice [1-3] : 3
# Number of iterations: 1
# Number of CPU (up to 30): 1
rm ../input/5cv.aa.fasta
rm ../input/5cv.cds.fasta
#############################
# to merge Mview HTML files #
#
# mkdir ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/html
# mv ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/*mview.html ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/html/
# # merge .htmls
# grep -n "TABLE" ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/html/Cluster_39_cdhit_090_045.txt.aligned.mview.html
# x=$(grep -n "
" ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/html/Cluster_39_cdhit_090_045.txt.aligned.mview.html)
# y=$(echo $y | cut -f1 -d ":")
# head -n$(expr $x - 1) ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/html/Cluster_39_cdhit_090_045.txt.aligned.mview.html > ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/Merged_tetra-to-DM.aa.html
# for f in ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/html/*.html
# do
# sed -n "$x,$(expr $y + 1) p " $f | sed "$ d" >> ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/Merged_tetra-to-DM.aa.html
# done
# tail -3 ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/html/Cluster_39_cdhit_090_045.txt.aligned.mview.html >> ./aa_ClustalOmega_singletonsReclustered_tetraDM_45_90_180_off_8_28/Merged_tetra-to-DM.aa.html
#
#
# mkdir ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/html
# mv ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/*mview.html ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/html/
# # merge .htmls
# grep -n "TABLE" ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/html/Cluster_3_cdhit_045_090.txt.aligned.mview.html
# x=$(grep -n "" ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/html/Cluster_3_cdhit_045_090.txt.aligned.mview.html)
# y=$(echo $y | cut -f1 -d ":")
# head -n$(expr $x - 1) ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/html/Cluster_3_cdhit_045_090.txt.aligned.mview.html > ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/Merged_DM-to-tetra.aa.html
# for f in ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/html/*.html
# do
# sed -n "$x,$(expr $y + 1) p " $f | sed "$ d" >> ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/Merged_DM-to-tetra.aa.html
# done
# tail -3 ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/html/Cluster_3_cdhit_045_090.txt.aligned.mview.html >> ./aa_ClustalOmega_singletonsReclustered_DMtetra_45_90_180_off_8_28/Merged_DM-to-tetra.aa.html
#
# mkdir ./aa_ClustalOmega_moreThan20_180_off_8_28/html
# mv ./aa_ClustalOmega_moreThan20_180_off_8_28/*mview.html ./aa_ClustalOmega_moreThan20_180_off_8_28/html/
# # merge .htmls
# grep -n "TABLE" ./aa_ClustalOmega_moreThan20_180_off_8_28/html/Cluster_122_cdhit-est.txt.aligned.mview.html
# x=$(grep -n "" ./aa_ClustalOmega_moreThan20_180_off_8_28/html/Cluster_122_cdhit-est.txt.aligned.mview.html)
# y=$(echo $y | cut -f1 -d ":")
# head -n$(expr $x - 1) ./aa_ClustalOmega_moreThan20_180_off_8_28/html/Cluster_122_cdhit-est.txt.aligned.mview.html > ./aa_ClustalOmega_moreThan20_180_off_8_28/Merged_cdhit-est_moreThan20.aa.html
# for f in ./aa_ClustalOmega_moreThan20_180_off_8_28/html/*.html
# do
# sed -n "$x,$(expr $y + 1) p " $f | sed "$ d" >> ./aa_ClustalOmega_moreThan20_180_off_8_28/Merged_cdhit-est_moreThan20.aa.html
# done
# tail -3 ./aa_ClustalOmega_moreThan20_180_off_8_28/html/Cluster_122_cdhit-est.txt.aligned.mview.html >> ./aa_ClustalOmega_moreThan20_180_off_8_28/Merged_cdhit-est_moreThan20.aa.html
#
#
# mkdir ./aa_ClustalOmega_upto20_180_off_8_28/html
# mv ./aa_ClustalOmega_upto20_180_off_8_28/*mview.html ./aa_ClustalOmega_upto20_180_off_8_28/html/
# # merge .htmls
# grep -n "TABLE" ./aa_ClustalOmega_upto20_180_off_8_28/html/Cluster_191_cdhit-est.txt.aligned.mview.html
# # 259:
# x=$(grep -n "" ./aa_ClustalOmega_upto20_180_off_8_28/html/Cluster_191_cdhit-est.txt.aligned.mview.html)
# y=$(echo $y | cut -f1 -d ":")
# head -n$(expr $x - 1) ./aa_ClustalOmega_upto20_180_off_8_28/html/Cluster_191_cdhit-est.txt.aligned.mview.html > ./aa_ClustalOmega_upto20_180_off_8_28/Merged_cdhit-est_upTo20.aa.html
#
# for f in ./aa_ClustalOmega_upto20_180_off_8_28/html/*.html
# do
# sed -n "$x,$(expr $y + 1) p " $f | sed "$ d" >> ./aa_ClustalOmega_upto20_180_off_8_28/Merged_cdhit-est_upTo20.aa.html
# done
# tail -3 ./aa_ClustalOmega_upto20_180_off_8_28/html/Cluster_191_cdhit-est.txt.aligned.mview.html >> ./aa_ClustalOmega_upto20_180_off_8_28/Merged_cdhit-est_upTo20.aa.html
#
#
# find ./aa_ClustalOmega_upto10_180_off_8_28 -name "*aligned.mview.html" -size +550 -print
#
# mkdir ./aa_ClustalOmega_upto10_180_off_8_28/html
# mv ./aa_ClustalOmega_upto10_180_off_8_28/*mview.html ./aa_ClustalOmega_upto10_180_off_8_28/html/
#
# merge .htmls
#grep -n "TABLE" ./aa_ClustalOmega_upto10_180_off_8_28/html/Cluster_12_cdhit-est.txt.aligned.mview.html
# x=$(grep -n "" ./aa_ClustalOmega_upto10_180_off_8_28/html/Cluster_12_cdhit-est.txt.aligned.mview.html)
# y=$(echo $y | cut -f1 -d ":")
#
# head -n$(expr $x - 1) ./aa_ClustalOmega_upto10_180_off_8_28/html/Cluster_12_cdhit-est.txt.aligned.mview.html > ./aa_ClustalOmega_upto10_180_off_8_28/Merged_cdhit-est_upTo10.aa.html
#
# for f in ./aa_ClustalOmega_upto10_180_off_8_28/html/*.html
# do
# sed -n "$x,$(expr $y + 1) p " $f | sed "$ d" >> ./aa_ClustalOmega_upto10_180_off_8_28/Merged_cdhit-est_upTo10.aa.html
# done
#
# tail -3 ./aa_ClustalOmega_upto10_180_off_8_28/html/Cluster_12_cdhit-est.txt.aligned.mview.html >> ./aa_ClustalOmega_upto10_180_off_8_28/Merged_cdhit-est_upTo10.aa.html
mkdir ./MAFFT_MAFFT_upto5_180_off_15_28/html
mv ./MAFFT_MAFFT_upto5_180_off_15_28/*mview.html ./MAFFT_MAFFT_upto5_180_off_15_28/html/
# list file exstansions
find ./MAFFT_MAFFT_upto5_180_off_15_28/ -type f | awk -F. '!a[$NF]++{print $NF}'
rm ./MAFFT_MAFFT_upto5_180_off_15_28/*.txt
rm ./MAFFT_MAFFT_upto5_180_off_15_28/*.aligned
rm ./MAFFT_MAFFT_upto5_180_off_15_28/*.fasta
rm ./MAFFT_MAFFT_upto5_180_off_15_28/*.ph
ll ./MAFFT_MAFFT_upto5_180_off_15_28/
# largest file
du -hsx ./MAFFT_MAFFT_upto5_180_off_15_28/html/* | sort -rh | head -1
##
# merge .htmls
grep -n "TABLE" ./MAFFT_MAFFT_upto5_180_off_15_28/html/Cluster_9_cdhit-est.txt.aligned.mview.html
# 259:
x=$(grep -n "" ./MAFFT_MAFFT_upto5_180_off_15_28/html/Cluster_9_cdhit-est.txt.aligned.mview.html)
y=$(echo $y | cut -f1 -d ":")
head -n$(expr $x - 1) ./MAFFT_MAFFT_upto5_180_off_15_28/html/Cluster_9_cdhit-est.txt.aligned.mview.html > ./MAFFT_MAFFT_upto5_180_off_15_28/Merged_cdhit-est_upTo5.aa.html
for f in ./MAFFT_MAFFT_upto5_180_off_15_28/html/*.html
do
sed -n "$x,$(expr $y + 1) p " $f | sed "$ d" >> ./MAFFT_MAFFT_upto5_180_off_15_28/Merged_cdhit-est_upTo5.aa.html
done
tail -3 ./MAFFT_MAFFT_upto5_180_off_15_28/html/Cluster_9_cdhit-est.txt.aligned.mview.html >> ./MAFFT_MAFFT_upto5_180_off_15_28/Merged_cdhit-est_upTo5.aa.html