diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..f823690 Binary files /dev/null and b/.DS_Store differ diff --git a/.RData b/.RData new file mode 100644 index 0000000..7b3ed70 Binary files /dev/null and b/.RData differ diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged b/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged deleted file mode 100644 index 86f632f..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged +++ /dev/null @@ -1,198 +0,0 @@ -AA Codon1 Codon2 Paired_total Total_codon1 Total_codon2 -Ala GCA GCA 15509 49169 49169 -Ala GCA GCC 9744 49169 36509 -Ala GCA GCG 5723 49169 18758 -Ala GCA GCT 16225 49169 60888 -Ala GCC GCA 9705 36509 49169 -Ala GCC GCC 8333 36509 36509 -Ala GCC GCG 3868 36509 18758 -Ala GCC GCT 13109 36509 60888 -Ala GCG GCA 5705 18758 49169 -Ala GCG GCC 3908 18758 36509 -Ala GCG GCG 2300 18758 18758 -Ala GCG GCT 6081 18758 60888 -Ala GCT GCA 16259 60888 49169 -Ala GCT GCC 13044 60888 36509 -Ala GCT GCG 6002 60888 18758 -Ala GCT GCT 23172 60888 60888 -Arg AGA AGA 30245 63169 63169 -Arg AGA AGG 12611 63169 28460 -Arg AGA CGA 3789 63169 9715 -Arg AGA CGC 3118 63169 8258 -Arg AGA CGG 2140 63169 5792 -Arg AGA CGT 8181 63169 19143 -Arg AGG AGA 12433 28460 63169 -Arg AGG AGG 6167 28460 28460 -Arg AGG CGA 2092 28460 9715 -Arg AGG CGC 1695 28460 8258 -Arg AGG CGG 1222 28460 5792 -Arg AGG CGT 3490 28460 19143 -Arg CGA AGA 3913 9715 63169 -Arg CGA AGG 2054 9715 28460 -Arg CGA CGA 907 9715 9715 -Arg CGA CGC 667 9715 8258 -Arg CGA CGG 544 9715 5792 -Arg CGA CGT 1191 9715 19143 -Arg CGC AGA 3094 8258 63169 -Arg CGC AGG 1631 8258 28460 -Arg CGC CGA 599 8258 9715 -Arg CGC CGC 664 8258 8258 -Arg CGC CGG 524 8258 5792 -Arg CGC CGT 1264 8258 19143 -Arg CGG AGA 2179 5792 63169 -Arg CGG AGG 1225 5792 28460 -Arg CGG CGA 474 5792 9715 -Arg CGG CGC 425 5792 8258 -Arg CGG CGG 366 5792 5792 -Arg CGG CGT 773 5792 19143 -Arg CGT AGA 8385 19143 63169 -Arg CGT AGG 3406 19143 28460 -Arg CGT CGA 1260 19143 9715 -Arg CGT CGC 1202 19143 8258 -Arg CGT CGG 672 19143 5792 -Arg CGT CGT 3297 19143 19143 -Gly GGA GGA 8457 33972 33972 -Gly GGA GGC 6884 33972 29617 -Gly GGA GGG 4279 33972 18369 -Gly GGA GGT 12662 33972 67526 -Gly GGC GGA 6776 29617 33972 -Gly GGC GGC 6180 29617 29617 -Gly GGC GGG 3681 29617 18369 -Gly GGC GGT 11597 29617 67526 -Gly GGG GGA 4442 18369 33972 -Gly GGG GGC 3684 18369 29617 -Gly GGG GGG 2412 18369 18369 -Gly GGG GGT 6993 18369 67526 -Gly GGT GGA 12611 67526 33972 -Gly GGT GGC 11446 67526 29617 -Gly GGT GGG 7074 67526 18369 -Gly GGT GGT 33684 67526 67526 -Ile ATA ATA 17537 56141 56141 -Ile ATA ATC 12815 56141 51253 -Ile ATA ATT 23895 56141 91241 -Ile ATC ATA 12861 51253 56141 -Ile ATC ATC 14044 51253 51253 -Ile ATC ATT 22614 51253 91241 -Ile ATT ATA 23544 91241 56141 -Ile ATT ATC 22697 91241 51253 -Ile ATT ATT 41955 91241 91241 -Leu CTA CTA 5965 40807 40807 -Leu CTA CTC 2347 40807 17519 -Leu CTA CTG 4517 40807 32446 -Leu CTA CTT 5289 40807 38489 -Leu CTA TTA 11155 40807 80235 -Leu CTA TTG 10611 40807 79741 -Leu CTC CTA 2315 17519 40807 -Leu CTC CTC 1451 17519 17519 -Leu CTC CTG 2103 17519 32446 -Leu CTC CTT 2488 17519 38489 -Leu CTC TTA 4293 17519 80235 -Leu CTC TTG 4369 17519 79741 -Leu CTG CTA 4565 32446 40807 -Leu CTG CTC 2081 32446 17519 -Leu CTG CTG 4185 32446 32446 -Leu CTG CTT 4266 32446 38489 -Leu CTG TTA 8204 32446 80235 -Leu CTG TTG 8451 32446 79741 -Leu CTT CTA 5163 38489 40807 -Leu CTT CTC 2517 38489 17519 -Leu CTT CTG 3976 38489 32446 -Leu CTT CTT 5708 38489 38489 -Leu CTT TTA 10331 38489 80235 -Leu CTT TTG 9849 38489 79741 -Leu TTA CTA 11266 80235 40807 -Leu TTA CTC 4445 80235 17519 -Leu TTA CTG 8393 80235 32446 -Leu TTA CTT 10237 80235 38489 -Leu TTA TTA 23244 80235 80235 -Leu TTA TTG 20816 80235 79741 -Leu TTG CTA 10567 79741 40807 -Leu TTG CTC 4181 79741 17519 -Leu TTG CTG 8498 79741 32446 -Leu TTG CTT 9547 79741 38489 -Leu TTG TTA 21155 79741 80235 -Leu TTG TTG 23978 79741 79741 -Pro CCA CCA 22189 53715 53715 -Pro CCA CCC 7373 53715 21058 -Pro CCA CCG 6096 53715 16552 -Pro CCA CCT 15317 53715 41144 -Pro CCC CCA 7432 21058 53715 -Pro CCC CCC 3581 21058 21058 -Pro CCC CCG 2636 21058 16552 -Pro CCC CCT 6336 21058 41144 -Pro CCG CCA 5929 16552 53715 -Pro CCG CCC 2754 16552 21058 -Pro CCG CCG 2160 16552 16552 -Pro CCG CCT 4909 16552 41144 -Pro CCT CCA 15618 41144 53715 -Pro CCT CCC 6211 41144 21058 -Pro CCT CCG 4740 41144 16552 -Pro CCT CCT 12547 41144 41144 -Ser AGC AGC 4132 30389 30389 -Ser AGC AGT 5480 30389 44212 -Ser AGC TCA 5667 30389 58166 -Ser AGC TCC 4194 30389 42870 -Ser AGC TCG 2899 30389 26681 -Ser AGC TCT 7056 30389 71104 -Ser AGT AGC 5488 44212 30389 -Ser AGT AGT 7912 44212 44212 -Ser AGT TCA 8910 44212 58166 -Ser AGT TCC 6232 44212 42870 -Ser AGT TCG 4001 44212 26681 -Ser AGT TCT 10442 44212 71104 -Ser TCA AGC 5918 58166 30389 -Ser TCA AGT 9079 58166 44212 -Ser TCA TCA 13273 58166 58166 -Ser TCA TCC 8552 58166 42870 -Ser TCA TCG 5712 58166 26681 -Ser TCA TCT 14334 58166 71104 -Ser TCC AGC 4290 42870 30389 -Ser TCC AGT 6442 42870 44212 -Ser TCC TCA 8459 42870 58166 -Ser TCC TCC 7202 42870 42870 -Ser TCC TCG 3962 42870 26681 -Ser TCC TCT 11482 42870 71104 -Ser TCG AGC 3026 26681 30389 -Ser TCG AGT 4005 26681 44212 -Ser TCG TCA 5644 26681 58166 -Ser TCG TCC 4133 26681 42870 -Ser TCG TCG 2905 26681 26681 -Ser TCG TCT 6407 26681 71104 -Ser TCT AGC 6774 71104 30389 -Ser TCT AGT 10200 71104 44212 -Ser TCT TCA 14782 71104 58166 -Ser TCT TCC 11480 71104 42870 -Ser TCT TCG 6546 71104 26681 -Ser TCT TCT 19700 71104 71104 -Thr ACA ACA 17506 54938 54938 -Thr ACA ACC 10284 54938 37813 -Thr ACA ACG 7851 54938 24772 -Thr ACA ACT 17315 54938 60844 -Thr ACC ACA 10436 37813 54938 -Thr ACC ACC 8390 37813 37813 -Thr ACC ACG 4731 37813 24772 -Thr ACC ACT 12750 37813 60844 -Thr ACG ACA 7756 24772 54938 -Thr ACG ACC 4726 24772 37813 -Thr ACG ACG 3674 24772 24772 -Thr ACG ACT 7654 24772 60844 -Thr ACT ACA 17192 60844 54938 -Thr ACT ACC 13016 60844 37813 -Thr ACT ACG 7571 60844 24772 -Thr ACT ACT 20832 60844 60844 -Val GTA GTA 8820 37107 37107 -Val GTA GTC 6609 37107 33983 -Val GTA GTG 7180 37107 32562 -Val GTA GTT 12940 37107 64798 -Val GTC GTA 6493 33983 37107 -Val GTC GTC 7295 33983 33983 -Val GTC GTG 5997 33983 32562 -Val GTC GTT 12867 33983 64798 -Val GTG GTA 7205 32562 37107 -Val GTG GTC 6058 32562 33983 -Val GTG GTG 6817 32562 32562 -Val GTG GTT 11264 32562 64798 -Val GTT GTA 12894 64798 37107 -Val GTT GTC 12790 64798 33983 -Val GTT GTG 11210 64798 32562 -Val GTT GTT 25333 64798 64798 diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED b/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED deleted file mode 100644 index e8734d1..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED +++ /dev/null @@ -1,198 +0,0 @@ -AA Codon1 Codon2 Paired_total Total_codon1 Total_codon2 -Ala GCA GCA 15509 49169 49169 -Ala GCA GCC 9744 49169 36509 -Ala GCA GCG 5723 49169 18758 -Ala GCA GCT 16225 49169 60888 -Ala GCC GCA 9705 36509 49169 -Ala GCC GCC 8333 36509 36509 -Ala GCC GCG 3868 36509 18758 -Ala GCC GCT 13109 36509 60888 -Ala GCG GCA 5705 18758 49169 -Ala GCG GCC 3908 18758 36509 -Ala GCG GCG 2300 18758 18758 -Ala GCG GCT 6081 18758 60888 -Ala GCT GCA 16259 60888 49169 -Ala GCT GCC 13044 60888 36509 -Ala GCT GCG 6002 60888 18758 -Ala GCT GCT 23172 60888 60888 -Arg AGA AGA 30245 63169 63169 -Arg AGA AGG 12611 63169 28460 -Arg AGA CGA 3789 63169 9715 -Arg AGA CGC 3118 63169 8258 -Arg AGA CGG 2140 63169 5792 -Arg AGA CGT 8181 63169 19143 -Arg AGG AGA 12433 28460 63169 -Arg AGG AGG 6167 28460 28460 -Arg AGG CGA 2092 28460 9715 -Arg AGG CGC 1695 28460 8258 -Arg AGG CGG 1222 28460 5792 -Arg AGG CGT 3490 28460 19143 -Arg CGA AGA 3913 9715 63169 -Arg CGA AGG 2054 9715 28460 -Arg CGA CGA 907 9715 9715 -Arg CGA CGC 667 9715 8258 -Arg CGA CGG 544 9715 5792 -Arg CGA CGT 1191 9715 19143 -Arg CGC AGA 3094 8258 63169 -Arg CGC AGG 1631 8258 28460 -Arg CGC CGA 599 8258 9715 -Arg CGC CGC 664 8258 8258 -Arg CGC CGG 524 8258 5792 -Arg CGC CGT 1264 8258 19143 -Arg CGG AGA 2179 5792 63169 -Arg CGG AGG 1225 5792 28460 -Arg CGG CGA 474 5792 9715 -Arg CGG CGC 425 5792 8258 -Arg CGG CGG 366 5792 5792 -Arg CGG CGT 773 5792 19143 -Arg CGT AGA 8385 19143 63169 -Arg CGT AGG 3406 19143 28460 -Arg CGT CGA 1260 19143 9715 -Arg CGT CGC 1202 19143 8258 -Arg CGT CGG 672 19143 5792 -Arg CGT CGT 3297 19143 19143 -Gly GGA GGA 8457 33972 33972 -Gly GGA GGC 6884 33972 29617 -Gly GGA GGG 4279 33972 18369 -Gly GGA GGT 12662 33972 67526 -Gly GGC GGA 6776 29617 33972 -Gly GGC GGC 6180 29617 29617 -Gly GGC GGG 3681 29617 18369 -Gly GGC GGT 11597 29617 67526 -Gly GGG GGA 4442 18369 33972 -Gly GGG GGC 3684 18369 29617 -Gly GGG GGG 2412 18369 18369 -Gly GGG GGT 6993 18369 67526 -Gly GGT GGA 12611 67526 33972 -Gly GGT GGC 11446 67526 29617 -Gly GGT GGG 7074 67526 18369 -Gly GGT GGT 33684 67526 67526 -Ile ATA ATA 17537 56141 56141 -Ile ATA ATC 12815 56141 51253 -Ile ATA ATT 23895 56141 91241 -Ile ATC ATA 12861 51253 56141 -Ile ATC ATC 14044 51253 51253 -Ile ATC ATT 22614 51253 91241 -Ile ATT ATA 23544 91241 56141 -Ile ATT ATC 22697 91241 51253 -Ile ATT ATT 41955 91241 91241 -Leu CTA CTA 5965 40807 40807 -Leu CTA CTC 2347 40807 17519 -Leu CTA CTG 4517 40807 32446 -Leu CTA CTT 5289 40807 38489 -Leu CTA TTA 11155 40807 80235 -Leu CTA TTG 10611 40807 79741 -Leu CTC CTA 2315 17519 40807 -Leu CTC CTC 1451 17519 17519 -Leu CTC CTG 2103 17519 32446 -Leu CTC CTT 2488 17519 38489 -Leu CTC TTA 4293 17519 80235 -Leu CTC TTG 4369 17519 79741 -Leu CTG CTA 4565 32446 40807 -Leu CTG CTC 2081 32446 17519 -Leu CTG CTG 4185 32446 32446 -Leu CTG CTT 4266 32446 38489 -Leu CTG TTA 8204 32446 80235 -Leu CTG TTG 8451 32446 79741 -Leu CTT CTA 5163 38489 40807 -Leu CTT CTC 2517 38489 17519 -Leu CTT CTG 3976 38489 32446 -Leu CTT CTT 5708 38489 38489 -Leu CTT TTA 10331 38489 80235 -Leu CTT TTG 9849 38489 79741 -Leu TTA CTA 11266 80235 40807 -Leu TTA CTC 4445 80235 17519 -Leu TTA CTG 8393 80235 32446 -Leu TTA CTT 10237 80235 38489 -Leu TTA TTA 23244 80235 80235 -Leu TTA TTG 20816 80235 79741 -Leu TTG CTA 10567 79741 40807 -Leu TTG CTC 4181 79741 17519 -Leu TTG CTG 8498 79741 32446 -Leu TTG CTT 9547 79741 38489 -Leu TTG TTA 21155 79741 80235 -Leu TTG TTG 23978 79741 79741 -Pro CCA CCA 22189 53715 53715 -Pro CCA CCC 7373 53715 21058 -Pro CCA CCG 6096 53715 16552 -Pro CCA CCT 15317 53715 41144 -Pro CCC CCA 7432 21058 53715 -Pro CCC CCC 3581 21058 21058 -Pro CCC CCG 2636 21058 16552 -Pro CCC CCT 6336 21058 41144 -Pro CCG CCA 5929 16552 53715 -Pro CCG CCC 2754 16552 21058 -Pro CCG CCG 2160 16552 16552 -Pro CCG CCT 4909 16552 41144 -Pro CCT CCA 15618 41144 53715 -Pro CCT CCC 6211 41144 21058 -Pro CCT CCG 4740 41144 16552 -Pro CCT CCT 12547 41144 41144 -Ser AGC AGC 4132 30389 30389 -Ser AGC AGT 5480 30389 44212 -Ser AGC TCA 5667 30389 58166 -Ser AGC TCC 4194 30389 42870 -Ser AGC TCG 2899 30389 26681 -Ser AGC TCT 7056 30389 71104 -Ser AGT AGC 5488 44212 30389 -Ser AGT AGT 7912 44212 44212 -Ser AGT TCA 8910 44212 58166 -Ser AGT TCC 6232 44212 42870 -Ser AGT TCG 4001 44212 26681 -Ser AGT TCT 10442 44212 71104 -Ser TCA AGC 5918 58166 30389 -Ser TCA AGT 9079 58166 44212 -Ser TCA TCA 13273 58166 58166 -Ser TCA TCC 8552 58166 42870 -Ser TCA TCG 5712 58166 26681 -Ser TCA TCT 14334 58166 71104 -Ser TCC AGC 4290 42870 30389 -Ser TCC AGT 6442 42870 44212 -Ser TCC TCA 8459 42870 58166 -Ser TCC TCC 7202 42870 42870 -Ser TCC TCG 3962 42870 26681 -Ser TCC TCT 11482 42870 71104 -Ser TCG AGC 3026 26681 30389 -Ser TCG AGT 4005 26681 44212 -Ser TCG TCA 5644 26681 58166 -Ser TCG TCC 4133 26681 42870 -Ser TCG TCG 2905 26681 26681 -Ser TCG TCT 6407 26681 71104 -Ser TCT AGC 6774 71104 30389 -Ser TCT AGT 10200 71104 44212 -Ser TCT TCA 14782 71104 58166 -Ser TCT TCC 11480 71104 42870 -Ser TCT TCG 6546 71104 26681 -Ser TCT TCT 19700 71104 71104 -Thr ACA ACA 17506 54938 54938 -Thr ACA ACC 10284 54938 37813 -Thr ACA ACG 7851 54938 24772 -Thr ACA ACT 17315 54938 60844 -Thr ACC ACA 10436 37813 54938 -Thr ACC ACC 8390 37813 37813 -Thr ACC ACG 4731 37813 24772 -Thr ACC ACT 12750 37813 60844 -Thr ACG ACA 7756 24772 54938 -Thr ACG ACC 4726 24772 37813 -Thr ACG ACG 3674 24772 24772 -Thr ACG ACT 7654 24772 60844 -Thr ACT ACA 17192 60844 54938 -Thr ACT ACC 13016 60844 37813 -Thr ACT ACG 7571 60844 24772 -Thr ACT ACT 20832 60844 60844 -Val GTA GTA 8820 37107 37107 -Val GTA GTC 6609 37107 33983 -Val GTA GTG 7180 37107 32562 -Val GTA GTT 12940 37107 64798 -Val GTC GTA 6493 33983 37107 -Val GTC GTC 7295 33983 33983 -Val GTC GTG 5997 33983 32562 -Val GTC GTT 12867 33983 64798 -Val GTG GTA 7205 32562 37107 -Val GTG GTC 6058 32562 33983 -Val GTG GTG 6817 32562 32562 -Val GTG GTT 11264 32562 64798 -Val GTT GTA 12894 64798 37107 -Val GTT GTC 12790 64798 33983 -Val GTT GTG 11210 64798 32562 -Val GTT GTT 25333 64798 64798 diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED.R b/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED.R deleted file mode 100644 index f06c460..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED.R +++ /dev/null @@ -1,703 +0,0 @@ -################################################# -### SCRIPT TO ANALYZE CODON AUTOCORRELATION ### -################################################# -# Eva Maria Novoa, August 2013 - - -# STEPS: -# 1. BUILD OBSERVED CODON CO-OCURRENCE MATRICES AND CODON COUNT VECTORS for each AA ### -# 2. METHOD 1: COMPUTES for each pair, STANDARD DEVIATIONS from EXPECTED. -# 3. METHOD 2: COMPUTES LOG LIKELIHOOD RATIOS & MAXIMUM LIKELIHOOD TEST. Outputs a p-value - - -# INFO: -## Input: merged files from "codon counts" and "codon pairs" -## Analysis only for codons from the AA: Ala, Arg, Gly, Ile, Leu, Ser, Pro, Thr, Val -## Thus, expects a total of 197 lines (all possible pair combinations within each amino acid) plus a header line -## Header line expected: AA Codon1 Codon2 Paired_total Total_codon1 Total_codon2 - -##### 1. READ DATA -data<-read.table("Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED",header=T) # file must be "***.merged" -head(data) -ala_data<-data[1:16,] -arg_data<-data[17:52,] -gly_data<-data[53:68,] -ile_data<-data[69:77,] -leu_data<-data[78:113,] -pro_data<-data[114:129,] -ser_data<-data[130:165,] -thr_data<-data[166:181,] -val_data<-data[182:197,] - -##### 2. TOTAL SINGLE CODON READS for each codon and Amino acid (single sum) -ala_per_codon<-as.matrix(ala_data[1:4,6]) -arg_per_codon<-as.matrix(arg_data[1:6,6]) -gly_per_codon<-as.matrix(gly_data[1:4,6]) -ile_per_codon<-as.matrix(ile_data[1:3,6]) -leu_per_codon<-as.matrix(leu_data[1:6,6]) -pro_per_codon<-as.matrix(pro_data[1:4,6]) -ser_per_codon<-as.matrix(ser_data[1:6,6]) -thr_per_codon<-as.matrix(thr_data[1:4,6]) -val_per_codon<-as.matrix(val_data[1:4,6]) - -rownames(ala_per_codon)<-ala_data[1:4,3] -rownames(arg_per_codon)<-arg_data[1:6,3] -rownames(gly_per_codon)<-gly_data[1:4,3] -rownames(ile_per_codon)<-ile_data[1:3,3] -rownames(leu_per_codon)<-leu_data[1:6,3] -rownames(pro_per_codon)<-pro_data[1:4,3] -rownames(ser_per_codon)<-ser_data[1:6,3] -rownames(thr_per_codon)<-thr_data[1:4,3] -rownames(val_per_codon)<-val_data[1:4,3] - -##### 3. TOTAL PAIRED CODON READS for each amino acid (build combination by combination and then bind to have a matrix) - -#Codon1 (A-ended) -ala_pairs_codon1<-as.matrix(ala_data[1:4,4]) -arg_pairs_codon1<-as.matrix(arg_data[1:6,4]) -gly_pairs_codon1<-as.matrix(gly_data[1:4,4]) -ile_pairs_codon1<-as.matrix(ile_data[1:3,4]) -leu_pairs_codon1<-as.matrix(leu_data[1:6,4]) -pro_pairs_codon1<-as.matrix(pro_data[1:4,4]) -ser_pairs_codon1<-as.matrix(ser_data[1:6,4]) -thr_pairs_codon1<-as.matrix(thr_data[1:4,4]) -val_pairs_codon1<-as.matrix(val_data[1:4,4]) -rownames(ala_pairs_codon1)<-ala_data[1:4,3] -rownames(arg_pairs_codon1)<-arg_data[1:6,3] -rownames(gly_pairs_codon1)<-gly_data[1:4,3] -rownames(ile_pairs_codon1)<-ile_data[1:3,3] -rownames(leu_pairs_codon1)<-leu_data[1:6,3] -rownames(pro_pairs_codon1)<-pro_data[1:4,3] -rownames(ser_pairs_codon1)<-ser_data[1:6,3] -rownames(thr_pairs_codon1)<-thr_data[1:4,3] -rownames(val_pairs_codon1)<-val_data[1:4,3] - -#Codon2 (C-ended) -ala_pairs_codon2<-as.matrix(ala_data[5:8,4]) -arg_pairs_codon2<-as.matrix(arg_data[7:12,4]) -gly_pairs_codon2<-as.matrix(gly_data[5:8,4]) -ile_pairs_codon2<-as.matrix(ile_data[4:6,4]) -leu_pairs_codon2<-as.matrix(leu_data[7:12,4]) -pro_pairs_codon2<-as.matrix(pro_data[5:8,4]) -ser_pairs_codon2<-as.matrix(ser_data[7:12,4]) -thr_pairs_codon2<-as.matrix(thr_data[5:8,4]) -val_pairs_codon2<-as.matrix(val_data[5:8,4]) -rownames(ala_pairs_codon2)<-ala_data[1:4,3] -rownames(arg_pairs_codon2)<-arg_data[1:6,3] -rownames(gly_pairs_codon2)<-gly_data[1:4,3] -rownames(ile_pairs_codon2)<-ile_data[1:3,3] -rownames(leu_pairs_codon2)<-leu_data[1:6,3] -rownames(pro_pairs_codon2)<-pro_data[1:4,3] -rownames(ser_pairs_codon2)<-ser_data[1:6,3] -rownames(thr_pairs_codon2)<-thr_data[1:4,3] -rownames(val_pairs_codon2)<-val_data[1:4,3] - -#Codon3 (G-ended) -ala_pairs_codon3<-as.matrix(ala_data[9:12,4]) -arg_pairs_codon3<-as.matrix(arg_data[13:18,4]) -gly_pairs_codon3<-as.matrix(gly_data[9:12,4]) -#ile_pairs_codon3<-as.matrix(ile_data[7:9,4]) --> doesn't exist G-ended in Ile -leu_pairs_codon3<-as.matrix(leu_data[13:18,4]) -pro_pairs_codon3<-as.matrix(pro_data[9:12,4]) -ser_pairs_codon3<-as.matrix(ser_data[13:18,4]) -thr_pairs_codon3<-as.matrix(thr_data[9:12,4]) -val_pairs_codon3<-as.matrix(val_data[9:12,4]) -rownames(ala_pairs_codon3)<-ala_data[1:4,3] -rownames(arg_pairs_codon3)<-arg_data[1:6,3] -rownames(gly_pairs_codon3)<-gly_data[1:4,3] -#rownames(ile_pairs_codon3)<-ile_data[1:3,3] -rownames(leu_pairs_codon3)<-leu_data[1:6,3] -rownames(pro_pairs_codon3)<-pro_data[1:4,3] -rownames(ser_pairs_codon3)<-ser_data[1:6,3] -rownames(thr_pairs_codon3)<-thr_data[1:4,3] -rownames(val_pairs_codon3)<-val_data[1:4,3] - -#Codon4 (T-ended) -ala_pairs_codon4<-as.matrix(ala_data[13:16,4]) -arg_pairs_codon4<-as.matrix(arg_data[19:24,4]) -gly_pairs_codon4<-as.matrix(gly_data[13:16,4]) -ile_pairs_codon4<-as.matrix(ile_data[7:9,4]) -leu_pairs_codon4<-as.matrix(leu_data[19:24,4]) -pro_pairs_codon4<-as.matrix(pro_data[13:16,4]) -ser_pairs_codon4<-as.matrix(ser_data[19:24,4]) -thr_pairs_codon4<-as.matrix(thr_data[13:16,4]) -val_pairs_codon4<-as.matrix(val_data[13:16,4]) -rownames(ala_pairs_codon4)<-ala_data[1:4,3] -rownames(arg_pairs_codon4)<-arg_data[1:6,3] -rownames(gly_pairs_codon4)<-gly_data[1:4,3] -rownames(ile_pairs_codon4)<-ile_data[1:3,3] -rownames(leu_pairs_codon4)<-leu_data[1:6,3] -rownames(pro_pairs_codon4)<-pro_data[1:4,3] -rownames(ser_pairs_codon4)<-ser_data[1:6,3] -rownames(thr_pairs_codon4)<-thr_data[1:4,3] -rownames(val_pairs_codon4)<-val_data[1:4,3] - -#Codon5 (only for 6box) -arg_pairs_codon5<-as.matrix(arg_data[25:30,4]) -leu_pairs_codon5<-as.matrix(leu_data[25:30,4]) -ser_pairs_codon5<-as.matrix(ser_data[25:30,4]) -rownames(arg_pairs_codon5)<-arg_data[1:6,3] -rownames(leu_pairs_codon5)<-leu_data[1:6,3] -rownames(ser_pairs_codon5)<-ser_data[1:6,3] - -#Codon6 (only for 6box) -arg_pairs_codon6<-as.matrix(arg_data[31:36,4]) -leu_pairs_codon6<-as.matrix(leu_data[31:36,4]) -ser_pairs_codon6<-as.matrix(ser_data[31:36,4]) -rownames(arg_pairs_codon6)<-arg_data[1:6,3] -rownames(leu_pairs_codon6)<-leu_data[1:6,3] -rownames(ser_pairs_codon6)<-ser_data[1:6,3] - -# Merge all into matrix of codon pairs -ala_pairs_all<-cbind(ala_pairs_codon1,ala_pairs_codon2,ala_pairs_codon3,ala_pairs_codon4) -arg_pairs_all<-cbind(arg_pairs_codon1,arg_pairs_codon2,arg_pairs_codon3,arg_pairs_codon4,arg_pairs_codon5,arg_pairs_codon6) -gly_pairs_all<-cbind(gly_pairs_codon1,gly_pairs_codon2,gly_pairs_codon3,gly_pairs_codon4) -ile_pairs_all<-cbind(ile_pairs_codon1,ile_pairs_codon2,ile_pairs_codon4) -leu_pairs_all<-cbind(leu_pairs_codon1,leu_pairs_codon2,leu_pairs_codon3,leu_pairs_codon4,leu_pairs_codon5,leu_pairs_codon6) -pro_pairs_all<-cbind(pro_pairs_codon1,pro_pairs_codon2,pro_pairs_codon3,pro_pairs_codon4) -ser_pairs_all<-cbind(ser_pairs_codon1,ser_pairs_codon2,ser_pairs_codon3,ser_pairs_codon4,ser_pairs_codon5,ser_pairs_codon6) -thr_pairs_all<-cbind(thr_pairs_codon1,thr_pairs_codon2,thr_pairs_codon3,thr_pairs_codon4) -val_pairs_all<-cbind(val_pairs_codon1,val_pairs_codon2,val_pairs_codon3,val_pairs_codon4) -colnames(ala_pairs_all)<-rownames(ala_pairs_codon4) -colnames(arg_pairs_all)<-rownames(arg_pairs_codon4) -colnames(gly_pairs_all)<-rownames(gly_pairs_codon4) -colnames(ile_pairs_all)<-rownames(ile_pairs_codon4) -colnames(leu_pairs_all)<-rownames(leu_pairs_codon4) -colnames(pro_pairs_all)<-rownames(pro_pairs_codon4) -colnames(ser_pairs_all)<-rownames(ser_pairs_codon4) -colnames(thr_pairs_all)<-rownames(thr_pairs_codon4) -colnames(val_pairs_all)<-rownames(val_pairs_codon4) - - -##### 4. GET TOTALS PER COLUMN, PER ROW AND PER AMINO ACID - -ala_row_sums<-rowSums(ala_pairs_all) -ala_col_sums<-colSums(ala_pairs_all) -ala_total_sum<-sum(colSums(ala_pairs_all)) - -arg_row_sums<-rowSums(arg_pairs_all) -arg_col_sums<-colSums(arg_pairs_all) -arg_total_sum<-sum(colSums(arg_pairs_all)) - -gly_row_sums<-rowSums(gly_pairs_all) -gly_col_sums<-colSums(gly_pairs_all) -gly_total_sum<-sum(colSums(gly_pairs_all)) - -ile_row_sums<-rowSums(ile_pairs_all) -ile_col_sums<-colSums(ile_pairs_all) -ile_total_sum<-sum(colSums(ile_pairs_all)) - -leu_row_sums<-rowSums(leu_pairs_all) -leu_col_sums<-colSums(leu_pairs_all) -leu_total_sum<-sum(colSums(leu_pairs_all)) - -pro_row_sums<-rowSums(pro_pairs_all) -pro_col_sums<-colSums(pro_pairs_all) -pro_total_sum<-sum(colSums(pro_pairs_all)) - -ser_row_sums<-rowSums(ser_pairs_all) -ser_col_sums<-colSums(ser_pairs_all) -ser_total_sum<-sum(colSums(ser_pairs_all)) - -thr_row_sums<-rowSums(thr_pairs_all) -thr_col_sums<-colSums(thr_pairs_all) -thr_total_sum<-sum(colSums(thr_pairs_all)) - -val_row_sums<-rowSums(val_pairs_all) -val_col_sums<-colSums(val_pairs_all) -val_total_sum<-sum(colSums(val_pairs_all)) - - -#For a given amino acid, there will be a difference between the total number of pairs and the total number of codons that corresponds to the number of sequences analyzed (because for example, if the protein has 7 Ala codons, that will give 6 Ala codon pairs). -ala_per_codon -colSums(ala_per_codon) -ala_pairs_all -colSums(ala_pairs_all) - -ala_num_of_first_codons<-colSums(ala_per_codon)-ala_total_sum -arg_num_of_first_codons<-colSums(arg_per_codon)-arg_total_sum -gly_num_of_first_codons<-colSums(gly_per_codon)-gly_total_sum -ile_num_of_first_codons<-colSums(ile_per_codon)-ile_total_sum -leu_num_of_first_codons<-colSums(leu_per_codon)-leu_total_sum -ser_num_of_first_codons<-colSums(ser_per_codon)-ser_total_sum -pro_num_of_first_codons<-colSums(pro_per_codon)-pro_total_sum -thr_num_of_first_codons<-colSums(thr_per_codon)-thr_total_sum -val_num_of_first_codons<-colSums(val_per_codon)-val_total_sum - -### PROBABILITIES - -# Codon counts probability vectors -prob_ala_codon<-ala_per_codon/sum(ala_per_codon) -prob_arg_codon<-arg_per_codon/sum(arg_per_codon) -prob_gly_codon<-gly_per_codon/sum(gly_per_codon) -prob_ile_codon<-ile_per_codon/sum(ile_per_codon) -prob_leu_codon<-leu_per_codon/sum(leu_per_codon) -prob_pro_codon<-pro_per_codon/sum(pro_per_codon) -prob_ser_codon<-ser_per_codon/sum(ser_per_codon) -prob_thr_codon<-thr_per_codon/sum(thr_per_codon) -prob_val_codon<-val_per_codon/sum(val_per_codon) - -# Codon pairs probability matrices (normalized per row) -prob_ala_pairs<-ala_pairs_all/ala_row_sums -prob_arg_pairs<-arg_pairs_all/arg_row_sums -prob_gly_pairs<-gly_pairs_all/gly_row_sums -prob_ile_pairs<-ile_pairs_all/ile_row_sums -prob_leu_pairs<-leu_pairs_all/leu_row_sums -prob_pro_pairs<-pro_pairs_all/pro_row_sums -prob_ser_pairs<-ser_pairs_all/ser_row_sums -prob_thr_pairs<-thr_pairs_all/thr_row_sums -prob_val_pairs<-val_pairs_all/val_row_sums - - - -##### 5. ANALYSIS METHOD 1: Z-SCORES AND STANDARD DEVIATIONS FROM EXPECTED - -## 5.1 OBTAIN EXPECTED CO-OCCURENCE MATRICES - -ala_exp<-matrix(0,ncol=length(ala_row_sums),nrow=length(ala_row_sums)) -for (i in 1:length(ala_row_sums)) { - for (j in 1:length(ala_row_sums)) { - ala_exp[i,j]<-prob_ala_codon[i]*prob_ala_codon[j]* colSums(ala_per_codon) - } -} -colnames(ala_exp)<-rownames(ala_pairs_all) -rownames(ala_exp)<-rownames(ala_pairs_all) - -arg_exp<-matrix(0,ncol=length(arg_row_sums),nrow=length(arg_row_sums)) -for (i in 1:length(arg_row_sums)) { - for (j in 1:length(arg_row_sums)) { - arg_exp[i,j]<-prob_arg_codon[i]*prob_arg_codon[j]* colSums(arg_per_codon) - } -} -colnames(arg_exp)<-rownames(arg_pairs_all) -rownames(arg_exp)<-rownames(arg_pairs_all) - -gly_exp<-matrix(0,ncol=length(gly_row_sums),nrow=length(gly_row_sums)) -for (i in 1:length(gly_row_sums)) { - for (j in 1:length(gly_row_sums)) { - gly_exp[i,j]<-prob_gly_codon[i]*prob_gly_codon[j]* colSums(gly_per_codon) - } -} -colnames(gly_exp)<-rownames(gly_pairs_all) -rownames(gly_exp)<-rownames(gly_pairs_all) - -ile_exp<-matrix(0,ncol=length(ile_row_sums),nrow=length(ile_row_sums)) -for (i in 1:length(ile_row_sums)) { - for (j in 1:length(ile_row_sums)) { - ile_exp[i,j]<-prob_ile_codon[i]*prob_ile_codon[j]* colSums(ile_per_codon) - } -} -colnames(ile_exp)<-rownames(ile_pairs_all) -rownames(ile_exp)<-rownames(ile_pairs_all) - -leu_exp<-matrix(0,ncol=length(leu_row_sums),nrow=length(leu_row_sums)) -for (i in 1:length(leu_row_sums)) { - for (j in 1:length(leu_row_sums)) { - leu_exp[i,j]<-prob_leu_codon[i]*prob_leu_codon[j]* colSums(leu_per_codon) - } -} -colnames(leu_exp)<-rownames(leu_pairs_all) -rownames(leu_exp)<-rownames(leu_pairs_all) - -pro_exp<-matrix(0,ncol=length(pro_row_sums),nrow=length(pro_row_sums)) -for (i in 1:length(pro_row_sums)) { - for (j in 1:length(pro_row_sums)) { - pro_exp[i,j]<-prob_pro_codon[i]*prob_pro_codon[j]* colSums(pro_per_codon) - } -} -colnames(pro_exp)<-rownames(pro_pairs_all) -rownames(pro_exp)<-rownames(pro_pairs_all) - -ser_exp<-matrix(0,ncol=length(ser_row_sums),nrow=length(ser_row_sums)) -for (i in 1:length(ser_row_sums)) { - for (j in 1:length(ser_row_sums)) { - ser_exp[i,j]<-prob_ser_codon[i]*prob_ser_codon[j]* colSums(ser_per_codon) - } -} -colnames(ser_exp)<-rownames(ser_pairs_all) -rownames(ser_exp)<-rownames(ser_pairs_all) - -thr_exp<-matrix(0,ncol=length(thr_row_sums),nrow=length(thr_row_sums)) -for (i in 1:length(thr_row_sums)) { - for (j in 1:length(thr_row_sums)) { - thr_exp[i,j]<-prob_thr_codon[i]*prob_thr_codon[j]* colSums(thr_per_codon) - } -} -colnames(thr_exp)<-rownames(thr_pairs_all) -rownames(thr_exp)<-rownames(thr_pairs_all) - -val_exp<-matrix(0,ncol=length(val_row_sums),nrow=length(val_row_sums)) -for (i in 1:length(val_row_sums)) { - for (j in 1:length(val_row_sums)) { - val_exp[i,j]<-prob_val_codon[i]*prob_val_codon[j]* colSums(val_per_codon) - } -} -colnames(val_exp)<-rownames(val_pairs_all) -rownames(val_exp)<-rownames(val_pairs_all) - - -## 5.2 COMPUTE Z-SCORES (=observed-expected/SE) - -#Observed: e.g. ala_pairs_all -#Expected: e.g. ala_exp -#Standard dev from expected = (obs-exp)/sqrt(exp*(1-exp/total) - -ala_sdevs<-matrix(0,ncol=length(ala_row_sums),nrow=length(ala_row_sums)) -for (i in 1:length(ala_row_sums)) { # i = row pos - for (j in 1:length(ala_row_sums)) { # j = col pos - ala_sdevs[i,j]<-(ala_pairs_all[i,j]-ala_exp[i,j])/(sqrt (ala_exp[i,j]*(1-ala_exp[i,j]/ala_total_sum))) - } -} -colnames(ala_sdevs)<-rownames(ala_pairs_all) -rownames(ala_sdevs)<-rownames(ala_pairs_all) - - -arg_sdevs<-matrix(0,ncol=length(arg_row_sums),nrow=length(arg_row_sums)) -for (i in 1:length(arg_row_sums)) { # i = row pos - for (j in 1:length(arg_row_sums)) { # j = col pos - arg_sdevs[i,j]<-(arg_pairs_all[i,j]-arg_exp[i,j])/(sqrt (arg_exp[i,j]*(1-arg_exp[i,j]/arg_total_sum))) - } -} -colnames(arg_sdevs)<-rownames(arg_pairs_all) -rownames(arg_sdevs)<-rownames(arg_pairs_all) - - -gly_sdevs<-matrix(0,ncol=length(gly_row_sums),nrow=length(gly_row_sums)) -for (i in 1:length(gly_row_sums)) { # i = row pos - for (j in 1:length(gly_row_sums)) { # j = col pos - gly_sdevs[i,j]<-(gly_pairs_all[i,j]-gly_exp[i,j])/(sqrt (gly_exp[i,j]*(1-gly_exp[i,j]/gly_total_sum))) - } -} -colnames(gly_sdevs)<-rownames(gly_pairs_all) -rownames(gly_sdevs)<-rownames(gly_pairs_all) - - -ile_sdevs<-matrix(0,ncol=length(ile_row_sums),nrow=length(ile_row_sums)) -for (i in 1:length(ile_row_sums)) { # i = row pos - for (j in 1:length(ile_row_sums)) { # j = col pos - ile_sdevs[i,j]<-(ile_pairs_all[i,j]-ile_exp[i,j])/(sqrt (ile_exp[i,j]*(1-ile_exp[i,j]/ile_total_sum))) - } -} -colnames(ile_sdevs)<-rownames(ile_pairs_all) -rownames(ile_sdevs)<-rownames(ile_pairs_all) - - -leu_sdevs<-matrix(0,ncol=length(leu_row_sums),nrow=length(leu_row_sums)) -for (i in 1:length(leu_row_sums)) { # i = row pos - for (j in 1:length(leu_row_sums)) { # j = col pos - leu_sdevs[i,j]<-(leu_pairs_all[i,j]-leu_exp[i,j])/(sqrt (leu_exp[i,j]*(1-leu_exp[i,j]/leu_total_sum))) - } -} -colnames(leu_sdevs)<-rownames(leu_pairs_all) -rownames(leu_sdevs)<-rownames(leu_pairs_all) - - -pro_sdevs<-matrix(0,ncol=length(pro_row_sums),nrow=length(pro_row_sums)) -for (i in 1:length(pro_row_sums)) { # i = row pos - for (j in 1:length(pro_row_sums)) { # j = col pos - pro_sdevs[i,j]<-(pro_pairs_all[i,j]-pro_exp[i,j])/(sqrt (pro_exp[i,j]*(1-pro_exp[i,j]/pro_total_sum))) - } -} -colnames(pro_sdevs)<-rownames(pro_pairs_all) -rownames(pro_sdevs)<-rownames(pro_pairs_all) - - -ser_sdevs<-matrix(0,ncol=length(ser_row_sums),nrow=length(ser_row_sums)) -for (i in 1:length(ser_row_sums)) { # i = row pos - for (j in 1:length(ser_row_sums)) { # j = col pos - ser_sdevs[i,j]<-(ser_pairs_all[i,j]-ser_exp[i,j])/(sqrt (ser_exp[i,j]*(1-ser_exp[i,j]/ser_total_sum))) - } -} -colnames(ser_sdevs)<-rownames(ser_pairs_all) -rownames(ser_sdevs)<-rownames(ser_pairs_all) - - -thr_sdevs<-matrix(0,ncol=length(thr_row_sums),nrow=length(thr_row_sums)) -for (i in 1:length(thr_row_sums)) { # i = row pos - for (j in 1:length(thr_row_sums)) { # j = col pos - thr_sdevs[i,j]<-(thr_pairs_all[i,j]-thr_exp[i,j])/(sqrt (thr_exp[i,j]*(1-thr_exp[i,j]/thr_total_sum))) - } -} -colnames(thr_sdevs)<-rownames(thr_pairs_all) -rownames(thr_sdevs)<-rownames(thr_pairs_all) - - -val_sdevs<-matrix(0,ncol=length(val_row_sums),nrow=length(val_row_sums)) -for (i in 1:length(val_row_sums)) { # i = row pos - for (j in 1:length(val_row_sums)) { # j = col pos - val_sdevs[i,j]<-(val_pairs_all[i,j]-val_exp[i,j])/(sqrt (val_exp[i,j]*(1-val_exp[i,j]/val_total_sum))) - } -} -colnames(val_sdevs)<-rownames(val_pairs_all) -rownames(val_sdevs)<-rownames(val_pairs_all) - -sink("Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_sdevs.txt",append=FALSE, split=FALSE) -ala_sdevs -arg_sdevs -gly_sdevs -ile_sdevs -leu_sdevs -ser_sdevs -pro_sdevs -thr_sdevs -val_sdevs -sink() - - -##### 6. ANALYSIS THROUGH LIKELIHOOD RATIO MODELS - -## 6.1. OBTAIN PROBABILITY MATRICES - -# Codon pairs probability matrices (normalized per matrix) ; ala_total_sum --> total sum of pairs --> "OBSERVED" MATRIX NORMALIZED TO 1 -prob_matrix_ala_pairs<-ala_pairs_all/ala_total_sum -prob_matrix_arg_pairs<-arg_pairs_all/arg_total_sum -prob_matrix_gly_pairs<-gly_pairs_all/gly_total_sum -prob_matrix_ile_pairs<-ile_pairs_all/ile_total_sum -prob_matrix_leu_pairs<-leu_pairs_all/leu_total_sum -prob_matrix_pro_pairs<-pro_pairs_all/pro_total_sum -prob_matrix_ser_pairs<-ser_pairs_all/ser_total_sum -prob_matrix_thr_pairs<-thr_pairs_all/thr_total_sum -prob_matrix_val_pairs<-val_pairs_all/val_total_sum - - -# Codon pairs probability matrices (normalized per matrix) --> "EXPECTED" MATRIX NORMALIZED TO 1 -prob_matrix_ala_exp<-ala_exp/colSums(ala_per_codon) -prob_matrix_arg_exp<-arg_exp/colSums(arg_per_codon) -prob_matrix_gly_exp<-gly_exp/colSums(gly_per_codon) -prob_matrix_ile_exp<-ile_exp/colSums(ile_per_codon) -prob_matrix_leu_exp<-leu_exp/colSums(leu_per_codon) -prob_matrix_pro_exp<-pro_exp/colSums(pro_per_codon) -prob_matrix_ser_exp<-ser_exp/colSums(ser_per_codon) -prob_matrix_thr_exp<-thr_exp/colSums(thr_per_codon) -prob_matrix_val_exp<-val_exp/colSums(val_per_codon) - -## 6.2. LOG LIKELIHOOD RATIOS AND THE P-VALUE of THE LOG LIKELIHOOD STATISTIC - -# Step 1 : -# Compute the log likelihood ratios -# log P(null)/P(alt) = log P(null) - log P(alt) -# P(null)= P(codon1)^num(codon1)*P(codon2)^num(codon2).... -# log P(null) = num(codon1)*log P(codon1) + num(codon2)*log P(codon2).... -# log P(alt)= log P(codon pairs) + log P(first codon) -# log P(alt)= num( - -# Step 2: -# Compute the lrt between two log likelihoods (Gerald Quon) - -lrt <- function(lnull,lalt,df=1) { - stat=-2*(lnull-lalt); - pval=1-pchisq(q=stat,df,ncp=0); - pval; -}; - -# Remember to change df for each case!! -# 3box --> df(null)=2;df(alt)=6+2 -->df2-1--> 6 -# 4box --> df(null)=3;df(alt)=12+3 -->df2-1--> 12 -# 6box --> df(null)=5;df(alt)=30+5 -->df2-1--> 30 - -#Ala -log_Pnull=0 # individual codons probabilities -for (i in 1:length(ala_row_sums)) { - log_Pnull<-log_Pnull+ala_per_codon[i]*log(prob_ala_codon[i]) -} - -log_Palt_part1=0 # first codon probabilities --> assumed from general codon abundances -for (i in 1:length(ala_row_sums)) { - log_Palt_part1<-log_Palt_part1+prob_ala_codon[i]*ala_num_of_first_codons*log(prob_ala_codon[i]) -} - -log_Palt_part2=0 # pair probability -for (i in 1:length(ala_row_sums)) { - for (j in 1:length(ala_row_sums)) { - log_Palt_part2<-log_Palt_part2+ala_pairs_all[i,j]*log(prob_ala_pairs[i,j]) # prob_ala_pairs OR prob_matrix_ala_pairs? - } -} -log_Palt<-log_Palt_part1+log_Palt_part2 -pval_ala<-lrt(log_Pnull,log_Palt,12) - -colSums(ala_per_codon) # num of ind. ala codons -ala_total_sum # num of ala pairs -ala_num_of_first_codons -log_Pnull -log_Palt -log_Palt_part1 -log_Palt_part2 -pval_ala -prob_ala_codon -ala_num_of_first_codons -ala_per_codon -ala_exp -prob_ala_pairs -ala_pairs_all - -# Ala codon pair by codon pair - -#a) Using number of codons multiplying the probabilities -ala_pval_matrix<-matrix(0,ncol=length(ala_row_sums),nrow=length(ala_row_sums)) -colnames(ala_pval_matrix)<-rownames(ala_pairs_all) -rownames(ala_pval_matrix)<-rownames(ala_pairs_all) - -for (i in 1:length(ala_row_sums)) { - for (j in 1:length(ala_row_sums)) { - log_Pnull<-ala_per_codon[i]*log(prob_ala_codon[i])+ala_per_codon[j]*log(prob_ala_codon[j]) - log_Palt<-ala_pairs_all[i,j]*log(prob_matrix_ala_pairs[i,j]) - ala_pval_matrix[i,j]<-lrt(log_Pnull,log_Palt,0) # df? - } -} - -log_Pnull -log_Palt -ala_per_codon -prob_ala_codon -ala_pairs_all -prob_matrix_ala_pairs -ala_pval_matrix - -#b) Only using the probabilities -for (i in 1:length(ala_row_sums)) { - for (j in 1:length(ala_row_sums)) { - log_Pnull<-log(prob_ala_codon[i])+log(prob_ala_codon[j]) - log_Palt<-log(prob_matrix_ala_pairs[i,j]) - ala_pval_matrix[i,j]<-lrt(log_Pnull,log_Palt,0) # df? - } -} - -log_Pnull -log_Palt -ala_per_codon -prob_ala_codon -ala_pairs_all -prob_matrix_ala_pairs -ala_pval_matrix - -#Val -#b) Only using the probabilities -val_pval_matrix<-matrix(0,ncol=length(val_row_sums),nrow=length(val_row_sums)) -colnames(val_pval_matrix)<-rownames(val_pairs_all) -rownames(val_pval_matrix)<-rownames(val_pairs_all) - -for (i in 1:length(val_row_sums)) { - for (j in 1:length(val_row_sums)) { - log_Pnull<-log(prob_val_codon[i])+log(prob_val_codon[j]) - log_Palt<-log(prob_matrix_val_pairs[i,j]) - val_pval_matrix[i,j]<-lrt(log_Pnull,log_Palt,1) # df? - } -} - -log_Pnull -log_Palt -prob_val_codon -prob_matrix_val_pairs -val_pval_matrix - - -## NOT WORKING YET - -##### 7. TO ADJUST FOR EFFECT SIZE (COMPARISON ACROSS GENOMES) BUILD MATRICES OF (P(obs)-P(exp))/P(exp) -- Irwin recomendation - -#prob_matrix_ala_pairs --> observed matrix (norm to sum 1) -#prob_matrix_ala_exp --> expected matrix (norm to sum 1) - -ala_effect_matrix<-(prob_matrix_ala_pairs-prob_matrix_ala_exp)/prob_matrix_ala_exp -arg_effect_matrix<-(prob_matrix_arg_pairs-prob_matrix_arg_exp)/prob_matrix_arg_exp -gly_effect_matrix<-(prob_matrix_gly_pairs-prob_matrix_gly_exp)/prob_matrix_gly_exp -ile_effect_matrix<-(prob_matrix_ile_pairs-prob_matrix_ile_exp)/prob_matrix_ile_exp -leu_effect_matrix<-(prob_matrix_leu_pairs-prob_matrix_leu_exp)/prob_matrix_leu_exp -pro_effect_matrix<-(prob_matrix_pro_pairs-prob_matrix_pro_exp)/prob_matrix_pro_exp -ser_effect_matrix<-(prob_matrix_ser_pairs-prob_matrix_ser_exp)/prob_matrix_ser_exp -thr_effect_matrix<-(prob_matrix_thr_pairs-prob_matrix_thr_exp)/prob_matrix_thr_exp -val_effect_matrix<-(prob_matrix_val_pairs-prob_matrix_val_exp)/prob_matrix_val_exp - -sink("Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_effect_matrix.txt",append=FALSE, split=FALSE) -ala_effect_matrix -arg_effect_matrix -gly_effect_matrix -ile_effect_matrix -leu_effect_matrix -ser_effect_matrix -pro_effect_matrix -thr_effect_matrix -val_effect_matrix -sink() - -# Convert into appended vectors, to have one vector per species -ala_effect_vector<-as.vector(t(ala_effect_matrix)) -arg_effect_vector<-as.vector(t(arg_effect_matrix)) -gly_effect_vector<-as.vector(t(gly_effect_matrix)) -ile_effect_vector<-as.vector(t(ile_effect_matrix)) -leu_effect_vector<-as.vector(t(leu_effect_matrix)) -ser_effect_vector<-as.vector(t(ser_effect_matrix)) -pro_effect_vector<-as.vector(t(pro_effect_matrix)) -thr_effect_vector<-as.vector(t(thr_effect_matrix)) -val_effect_vector<-as.vector(t(val_effect_matrix)) - -Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED<-c(ala_effect_vector, arg_effect_vector, gly_effect_vector, ile_effect_vector, leu_effect_vector, ser_effect_vector, pro_effect_vector, thr_effect_vector, val_effect_vector) # all_all_vector - -write.table(data.frame(Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED), "Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_effect_vector.txt", row.names=F) - -##### 8. BUILD NEW INDEX: Relative synonymous codon pair usage (RSCPU) (P(obs)-/P(exp) - -## Way A) (P(obs)-P(exp))/P(exp) -ala_rscpu_matrix<-prob_matrix_ala_pairs/prob_matrix_ala_exp -arg_rscpu_matrix<-prob_matrix_arg_pairs/prob_matrix_arg_exp -gly_rscpu_matrix<-prob_matrix_gly_pairs/prob_matrix_gly_exp -ile_rscpu_matrix<-prob_matrix_ile_pairs/prob_matrix_ile_exp -leu_rscpu_matrix<-prob_matrix_leu_pairs/prob_matrix_leu_exp -pro_rscpu_matrix<-prob_matrix_pro_pairs/prob_matrix_pro_exp -ser_rscpu_matrix<-prob_matrix_ser_pairs/prob_matrix_ser_exp -thr_rscpu_matrix<-prob_matrix_thr_pairs/prob_matrix_thr_exp -val_rscpu_matrix<-prob_matrix_val_pairs/prob_matrix_val_exp - - -# Convert into appended vectors, to have one vector per species -ala_rscpu_vector<-as.vector(t(ala_rscpu_matrix)) -arg_rscpu_vector<-as.vector(t(arg_rscpu_matrix)) -gly_rscpu_vector<-as.vector(t(gly_rscpu_matrix)) -ile_rscpu_vector<-as.vector(t(ile_rscpu_matrix)) -leu_rscpu_vector<-as.vector(t(leu_rscpu_matrix)) -ser_rscpu_vector<-as.vector(t(ser_rscpu_matrix)) -pro_rscpu_vector<-as.vector(t(pro_rscpu_matrix)) -thr_rscpu_vector<-as.vector(t(thr_rscpu_matrix)) -val_rscpu_vector<-as.vector(t(val_rscpu_matrix)) - -Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED<-c(ala_rscpu_vector, arg_rscpu_vector, gly_rscpu_vector, ile_rscpu_vector, leu_rscpu_vector, ser_rscpu_vector, pro_rscpu_vector, thr_rscpu_vector, val_rscpu_vector) # all_all_vector - -write.table(data.frame(Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED), "Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_rscpu_vector.txt", row.names=F) - - -## Way B) (P(obs)-P(exp))/P(exp) - - -##Relative synonymous codon usage -#RSCU(codon)= Obs frequency of codon(compared to total for that AA)/ Exp frequency of codon(compared to total for that AA). -#Ex. Ala(AGC)= obs frequency/0.25 - -##Relative synonumous codon pair usage -#RSCU(codon pair) = Obs frequency of codon(compared to total for that AA)/ Exp frequency of codon(compared to total for that AA -#Ex. Ala(AGC-AGC)= obs frequency/0.0625 - - -ala_rscpu2_matrix<-prob_matrix_ala_pairs/(1/16) -arg_rscpu2_matrix<-prob_matrix_arg_pairs/(1/36) -gly_rscpu2_matrix<-prob_matrix_gly_pairs/(1/16) -ile_rscpu2_matrix<-prob_matrix_ile_pairs/(1/9) -leu_rscpu2_matrix<-prob_matrix_leu_pairs/(1/36) -pro_rscpu2_matrix<-prob_matrix_pro_pairs/(1/16) -ser_rscpu2_matrix<-prob_matrix_ser_pairs/(1/36) -thr_rscpu2_matrix<-prob_matrix_thr_pairs/(1/16) -val_rscpu2_matrix<-prob_matrix_val_pairs/(1/16) - -# Convert into appended vectors, to have one vector per species -ala_rscpu2_vector<-as.vector(t(ala_rscpu2_matrix)) -arg_rscpu2_vector<-as.vector(t(arg_rscpu2_matrix)) -gly_rscpu2_vector<-as.vector(t(gly_rscpu2_matrix)) -ile_rscpu2_vector<-as.vector(t(ile_rscpu2_matrix)) -leu_rscpu2_vector<-as.vector(t(leu_rscpu2_matrix)) -ser_rscpu2_vector<-as.vector(t(ser_rscpu2_matrix)) -pro_rscpu2_vector<-as.vector(t(pro_rscpu2_matrix)) -thr_rscpu2_vector<-as.vector(t(thr_rscpu2_matrix)) -val_rscpu2_vector<-as.vector(t(val_rscpu2_matrix)) - -Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED<-c(ala_rscpu2_vector, arg_rscpu2_vector, gly_rscpu2_vector, ile_rscpu2_vector, leu_rscpu2_vector, ser_rscpu2_vector, pro_rscpu2_vector, thr_rscpu2_vector, val_rscpu2_vector) # all_all_vector - -write.table(data.frame(Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED), "Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_rscpu_vector_way2.txt", row.names=F) diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_effect_matrix.txt b/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_effect_matrix.txt deleted file mode 100644 index 2755d9a..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_effect_matrix.txt +++ /dev/null @@ -1,50 +0,0 @@ - GCA GCC GCG GCT -GCA 0.10492170 -0.06881776 0.06538898 -0.06459173 -GCC -0.06507576 0.07679236 -0.01712442 0.01067053 -GCG 0.06875042 -0.02718456 0.12586218 -0.09487573 -GCT -0.06654781 0.01570684 -0.08296223 0.07654097 - AGA AGG CGA CGC CGG CGT -AGA 0.072659577 -0.02129149 -0.09764097 -0.16062166 -0.15716802 -0.01869187 -AGG -0.007279574 0.07750796 0.05133003 -0.01788913 0.05169297 -0.11525915 -CGA -0.126236039 0.07078014 0.35999718 0.05663616 0.19212917 -0.04118665 -CGC -0.154110644 0.02064863 0.17658818 0.37795378 0.25748215 0.07605859 -CGG -0.172253129 0.04911739 0.36818200 0.55040152 0.54397516 -0.14227701 -CGT -0.042566275 -0.09343936 -0.09369310 0.13156245 -0.01336329 0.27325467 - GGA GGC GGG GGT -GGA 0.14616408 0.05337719 0.11338309 -0.1401354 -GGC 0.07016656 0.10199352 0.05917016 -0.1048118 -GGG 0.07252729 0.05830764 0.11809568 -0.1079664 -GGT -0.13665801 -0.09300214 -0.11818053 0.1554584 - ATA ATC ATT -ATA 0.14364516 -0.081304506 -0.055272947 -ATC -0.08459041 0.098875579 -0.002402359 -ATT -0.04118871 -0.006050445 0.035857311 - CTA CTC CTG CTT TTA TTG -CTA 0.060694673 -0.04113760 0.02092591 -0.026625185 0.01887404 -0.03842174 -CTC -0.027883343 0.39990338 0.08405391 0.105316240 -0.06362918 -0.11378631 -CTG 0.010191092 0.09551435 0.17712453 -0.057247137 -0.04535593 -0.02742487 -CTT -0.002870541 0.09258117 0.01151502 0.140932588 -0.01842951 -0.07891885 -TTA 0.008835429 -0.09564906 -0.06685334 -0.009416356 0.06913471 -0.02092317 -TTG -0.034417814 -0.07393743 -0.03280390 -0.049782317 -0.03661247 0.11660322 - AGC AGT TCA TCC TCG TCT -AGC 0.254119220 0.14490344 -0.06157260 -0.07700632 0.04607239 -0.121288967 -AGT 0.143234487 0.13453340 -0.01044489 -0.04733896 -0.04836242 -0.090553185 -TCA -0.101374096 -0.02886485 0.09961847 -0.04915950 0.01935870 0.001800617 -TCC -0.097660722 -0.07839435 -0.03870576 0.09839230 0.01279407 0.055615359 -TCG 0.002169156 -0.04931287 0.03164013 -0.02910958 0.14380834 -0.032855884 -TCT -0.084708437 -0.06897611 -0.02856108 0.05579926 -0.05339255 0.092168104 - CCA CCC CCG CCT -CCA 0.07250121 -0.08368621 -0.069992149 -0.0144593339 -CCC -0.09096050 0.12621498 0.101913640 -0.0002552812 -CCG -0.04379695 0.05470020 0.099522239 -0.0293278377 -CCT -0.03345330 0.01986516 0.005280516 0.0336609209 - ACA ACC ACG ACT -ACA 0.07483160 -0.06906516 0.05609522 -0.04690763 -ACC -0.08262420 0.08737385 -0.06504501 0.04837786 -ACG 0.06903089 -0.06405585 0.10947163 -0.06916306 -ACT -0.04008874 0.02695280 -0.05895840 0.04278468 - GTA GTC GTG GTT -GTA 0.12355820 -0.09683621 0.04593753 -0.05939151 -GTC -0.08070083 0.10800199 -0.03972603 0.01879289 -GTG 0.04230832 -0.04939534 0.12774218 -0.06809506 -GTT -0.05603584 0.02492635 -0.06360595 0.05828367 diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_effect_vector.txt b/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_effect_vector.txt deleted file mode 100644 index fd7dcfd..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_effect_vector.txt +++ /dev/null @@ -1,198 +0,0 @@ -"Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED" -0.104921701888251 --0.0688177572888358 -0.0653889808394691 --0.0645917293309439 --0.0650757575499656 -0.0767923606771531 --0.0171244197931674 -0.0106705308585436 -0.0687504184652553 --0.0271845587922137 -0.125862183931156 --0.0948757310444828 --0.0665478078845295 -0.015706837551721 --0.0829622326693603 -0.0765409665237561 -0.0726595774723123 --0.0212914875019273 --0.0976409657738499 --0.160621659067065 --0.157168022049154 --0.0186918734740361 --0.00727957442988851 -0.0775079584868218 -0.0513300263608309 --0.0178891333167876 -0.0516929663137883 --0.115259153460923 --0.12623603867036 -0.0707801436936991 -0.359997176745521 -0.0566361589934048 -0.192129165569057 --0.0411866540335954 --0.15411064414063 -0.0206486321447241 -0.176588177042739 -0.377953784479803 -0.257482147087888 -0.0760585916291342 --0.1722531285843 -0.0491173917024076 -0.368181995927357 -0.550401517821301 -0.543975158782316 --0.14227701141219 --0.0425662751211794 --0.0934393557189139 --0.0936930991698508 -0.131562445773066 --0.0133632884250336 -0.273254670986774 -0.146164083083823 -0.0533771947759265 -0.113383087202063 --0.140135377486621 -0.0701665597457907 -0.101993517159654 -0.0591701609242743 --0.104811803748051 -0.0725272917914517 -0.0583076445065834 -0.118095682354466 --0.107966402046631 --0.13665800886017 --0.0930021394431371 --0.118180527214036 -0.155458375795056 -0.143645161785697 --0.0813045058661671 --0.0552729472251948 --0.0845904084188578 -0.0988755791083539 --0.00240235922844972 --0.0411887136402494 --0.00605044506287885 -0.0358573114227829 -0.060694673300244 --0.0411375962714988 -0.020925910037705 --0.0266251847323358 -0.0188740420590131 --0.0384217362952973 --0.0278833427426382 -0.399903383049219 -0.0840539085406802 -0.105316240422842 --0.0636291794889333 --0.11378631302906 -0.0101910921446471 -0.0955143535132392 -0.17712453133286 --0.057247136672282 --0.0453559321564203 --0.0274248672918068 --0.00287054078042293 -0.0925811705093491 -0.0115150188521239 -0.140932588407342 --0.0184295070167115 --0.0789188529506892 -0.00883542864976827 --0.0956490590654647 --0.0668533381879271 --0.00941635606033492 -0.0691347084782484 --0.0209231747797454 --0.0344178143114791 --0.0739374316249617 --0.0328039013277312 --0.0497823172422057 --0.0366124701590725 -0.116603221244518 -0.254119219501185 -0.144903442330671 --0.0615725958205222 --0.0770063175049681 -0.0460723927678094 --0.121288967419996 -0.143234486875379 -0.134533397499429 --0.0104448924691821 --0.0473389602707081 --0.0483624172623195 --0.0905531848155133 --0.101374096065377 --0.028864852065251 -0.099618468611429 --0.0491595006892021 -0.0193586961057754 -0.00180061738241349 --0.0976607215887731 --0.0783943496440629 --0.0387057630800396 -0.0983922980403901 -0.0127940653810512 -0.0556153591727642 -0.00216915619097141 --0.0493128667831562 -0.031640126179339 --0.0291095845536596 -0.143808342971038 --0.0328558835899142 --0.0847084372771619 --0.068976113317999 --0.0285610844568046 -0.0557992642876025 --0.0533925521174121 -0.0921681043299867 -0.0725012130923806 --0.0836862118444973 --0.0699921493170756 --0.0144593338838623 --0.0909605005287242 -0.126214981791138 -0.101913639928946 --0.000255281179636255 --0.0437969543324158 -0.0547002014715696 -0.0995222391049838 --0.029327837708489 --0.033453298572104 -0.0198651647795564 -0.00528051575717876 -0.0336609208838984 -0.0748316010699172 --0.069065161554672 -0.0560952190679225 --0.0469076310450297 --0.0826241971472064 -0.087373848565119 --0.0650450101158224 -0.0483778583379641 -0.0690308876872434 --0.0640558491024029 -0.109471630883217 --0.0691630601975962 --0.0400887407831951 -0.0269528037652922 --0.0589584021598733 -0.0427846847087729 -0.123558197078949 --0.0968362065338968 -0.0459375260081441 --0.0593915065106629 --0.0807008299680462 -0.108001991863246 --0.0397260283217286 -0.0187928851503963 -0.0423083187700868 --0.0493953436522625 -0.127742184340468 --0.0680950576654202 --0.0560358379283371 -0.0249263528717865 --0.0636059526800439 -0.0582836730532565 diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_rscpu_vector.txt b/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_rscpu_vector.txt deleted file mode 100644 index af2ee1c..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_rscpu_vector.txt +++ /dev/null @@ -1,198 +0,0 @@ -"Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED" -1.10492170188825 -0.931182242711164 -1.06538898083947 -0.935408270669056 -0.934924242450034 -1.07679236067715 -0.982875580206833 -1.01067053085854 -1.06875041846526 -0.972815441207786 -1.12586218393116 -0.905124268955517 -0.93345219211547 -1.01570683755172 -0.91703776733064 -1.07654096652376 -1.07265957747231 -0.978708512498073 -0.90235903422615 -0.839378340932935 -0.842831977950846 -0.981308126525964 -0.992720425570112 -1.07750795848682 -1.05133002636083 -0.982110866683212 -1.05169296631379 -0.884740846539077 -0.87376396132964 -1.0707801436937 -1.35999717674552 -1.0566361589934 -1.19212916556906 -0.958813345966405 -0.84588935585937 -1.02064863214472 -1.17658817704274 -1.3779537844798 -1.25748214708789 -1.07605859162913 -0.8277468714157 -1.04911739170241 -1.36818199592736 -1.5504015178213 -1.54397515878232 -0.85772298858781 -0.957433724878821 -0.906560644281086 -0.906306900830149 -1.13156244577307 -0.986636711574966 -1.27325467098677 -1.14616408308382 -1.05337719477593 -1.11338308720206 -0.859864622513379 -1.07016655974579 -1.10199351715965 -1.05917016092427 -0.895188196251949 -1.07252729179145 -1.05830764450658 -1.11809568235447 -0.892033597953369 -0.86334199113983 -0.906997860556863 -0.881819472785964 -1.15545837579506 -1.1436451617857 -0.918695494133833 -0.944727052774805 -0.915409591581142 -1.09887557910835 -0.99759764077155 -0.958811286359751 -0.993949554937121 -1.03585731142278 -1.06069467330024 -0.958862403728501 -1.02092591003771 -0.973374815267664 -1.01887404205901 -0.961578263704703 -0.972116657257362 -1.39990338304922 -1.08405390854068 -1.10531624042284 -0.936370820511067 -0.88621368697094 -1.01019109214465 -1.09551435351324 -1.17712453133286 -0.942752863327718 -0.95464406784358 -0.972575132708193 -0.997129459219577 -1.09258117050935 -1.01151501885212 -1.14093258840734 -0.981570492983288 -0.921081147049311 -1.00883542864977 -0.904350940934535 -0.933146661812073 -0.990583643939665 -1.06913470847825 -0.979076825220255 -0.965582185688521 -0.926062568375038 -0.967196098672269 -0.950217682757794 -0.963387529840928 -1.11660322124452 -1.25411921950118 -1.14490344233067 -0.938427404179478 -0.922993682495032 -1.04607239276781 -0.878711032580004 -1.14323448687538 -1.13453339749943 -0.989555107530818 -0.952661039729292 -0.951637582737681 -0.909446815184487 -0.898625903934623 -0.971135147934749 -1.09961846861143 -0.950840499310798 -1.01935869610578 -1.00180061738241 -0.902339278411227 -0.921605650355937 -0.96129423691996 -1.09839229804039 -1.01279406538105 -1.05561535917276 -1.00216915619097 -0.950687133216844 -1.03164012617934 -0.97089041544634 -1.14380834297104 -0.967144116410086 -0.915291562722838 -0.931023886682001 -0.971438915543195 -1.0557992642876 -0.946607447882588 -1.09216810432999 -1.07250121309238 -0.916313788155503 -0.930007850682924 -0.985540666116138 -0.909039499471276 -1.12621498179114 -1.10191363992895 -0.999744718820364 -0.956203045667584 -1.05470020147157 -1.09952223910498 -0.970672162291511 -0.966546701427896 -1.01986516477956 -1.00528051575718 -1.0336609208839 -1.07483160106992 -0.930934838445328 -1.05609521906792 -0.95309236895497 -0.917375802852794 -1.08737384856512 -0.934954989884178 -1.04837785833796 -1.06903088768724 -0.935944150897597 -1.10947163088322 -0.930836939802404 -0.959911259216805 -1.02695280376529 -0.941041597840127 -1.04278468470877 -1.12355819707895 -0.903163793466103 -1.04593752600814 -0.940608493489337 -0.919299170031954 -1.10800199186325 -0.960273971678271 -1.0187928851504 -1.04230831877009 -0.950604656347737 -1.12774218434047 -0.93190494233458 -0.943964162071663 -1.02492635287179 -0.936394047319956 -1.05828367305326 diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_rscpu_vector_way2.txt b/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_rscpu_vector_way2.txt deleted file mode 100644 index 15206b3..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_rscpu_vector_way2.txt +++ /dev/null @@ -1,198 +0,0 @@ -"Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED" -1.5637323788338 -0.978530062323946 -0.57522040242742 -1.6393529400644 -0.982462331507937 -0.840194848979438 -0.394033537718906 -1.31519280092257 -0.577035295896954 -0.390000441119941 -0.231903054440502 -0.605166144674737 -1.63592480795528 -1.32174658289589 -0.613131510457693 -2.33637285978057 -8.51312363661952 -3.499542607839 -1.10140032369291 -0.870874674547885 -0.613327703891352 -2.36014355077053 -3.54964464147491 -1.73583843501513 -0.57814369150658 -0.45908099359651 -0.344803321370769 -0.958693969460277 -1.06649778340722 -0.58883963127155 -0.255295193863908 -0.168601787347829 -0.133417774963057 -0.354654844838505 -0.877630004925762 -0.477095207937513 -0.187741890085145 -0.186897473787911 -0.119625642108226 -0.338329463091971 -0.602350292027303 -0.343958905073535 -0.153120821898529 -0.147491379916966 -0.103018788262613 -0.189149250580536 -2.30272324255858 -0.982337625782844 -0.335233270002111 -0.355780733234818 -0.217577932587432 -0.928013510660756 -0.947151796838907 -0.758886197869272 -0.497487085439095 -1.41238397894472 -0.770981786619255 -0.69213646736011 -0.412593971804959 -1.28190841511389 -0.479231706121992 -0.41225798322857 -0.270134815416276 -0.792261063123854 -1.41809578474332 -1.29881984012544 -0.783189371561367 -3.77247973568899 -0.822209603984122 -0.602978714537252 -1.10384346901991 -0.600822037694961 -0.658442816807493 -1.06413248455423 -1.12029985101218 -1.06024108938227 -1.96702993300757 -0.760071639424336 -0.294981700799218 -0.58168097803388 -0.657879274827804 -1.4355351365892 -1.34646722779496 -0.299059201631 -0.184889178341108 -0.265164975966814 -0.320720924799841 -0.566390349914698 -0.532750968052498 -0.575564726786207 -0.267968257788664 -0.533260655656471 -0.506629478348895 -1.06945201503578 -1.08283131464007 -0.673934434352945 -0.317025689671039 -0.543581829636918 -0.727324210869088 -1.30441800046721 -1.21649688878192 -1.42139130557896 -0.547022220963734 -1.04536927574807 -1.31639565916057 -2.96179466668554 -2.69561031551078 -1.35207379143866 -0.556706285439216 -1.07684248529339 -1.25497830288186 -2.6524142910741 -3.05532234201454 -0.557708458308338 -0.740731853629274 -0.79877024595081 -0.579034193161368 -0.408428314337133 -0.914307138572286 -0.739652069586083 -1.06790641871626 -1.2254199160168 -0.869496100779844 -0.540566886622675 -1.37672465506899 -0.764892021595681 -1.20260947810438 -1.79149670065987 -1.14173665266947 -0.761787642471506 -1.99517096580684 -0.566076784643071 -0.841151769646071 -1.15428914217157 -0.972075584883023 -0.557843431313737 -1.5494901019796 -0.39128674265147 -0.54002699460108 -0.770965806838632 -0.534763047390522 -0.392096580683863 -0.883533293341332 -0.952369526094781 -1.40938812237552 -1.93470305938812 -1.5497600479904 -0.864772045590882 -2.65896820635873 -2.8215023683123 -0.94503608099946 -0.753918046857615 -1.98594907333821 -0.937533776266014 -0.455351750007947 -0.350192326032362 -0.789776520329338 -0.775153383984487 -0.33518771656547 -0.274660647868519 -0.602727532822583 -1.9476746034269 -0.805671233747655 -0.624217185364148 -1.59544775407699 -1.63146245427646 -0.972577526152699 -0.722816336991216 -1.6021993895762 -0.958411966170406 -0.781901633233149 -0.440437082081033 -1.21301926795741 -0.73166981198015 -0.440903054448871 -0.342396495887794 -0.705575359381189 -1.61366230982503 -1.1882295379884 -0.713310500687309 -1.94142727336269 -0.872338847266523 -0.642187770442351 -0.712607867863413 -1.27527631481344 -0.653660707662636 -0.721509284672255 -0.599164255866281 -1.26499023316767 -0.710135252083179 -0.59313107336251 -0.674232870954182 -1.10872091585688 -1.27982592784907 -1.27260588977079 -1.11406176594219 -2.50555102242663 diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_sdevs.txt b/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_sdevs.txt deleted file mode 100644 index 014d842..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.codon_counts.merged.FIXED_sdevs.txt +++ /dev/null @@ -1,50 +0,0 @@ - GCA GCC GCG GCT -GCA 7.686571 -11.465556 1.719912 -14.603882 -GCC -11.077783 3.093353 -3.690202 -3.624453 -GCG 1.965255 -4.319967 3.746540 -11.151449 -GCT -14.872322 -3.038529 -10.179594 5.385028 - AGA AGG CGA CGC CGG CGT -AGA 3.878029 -8.49960371 -9.77743712 -12.7756411 -10.476738300 -6.5984385 -AGG -6.872431 1.93499011 -0.02477794 -2.7921298 -0.006933527 -10.2764822 -CGA -11.647065 0.82027347 7.77909246 0.1101982 2.730793204 -3.3081073 -CGC -12.384235 -1.25031472 2.90136461 6.9924686 3.690161736 0.7908914 -CGG -11.232676 -0.09305286 6.15921691 8.9480190 7.394145827 -5.3165035 -CGT -8.830041 -8.93506408 -5.17407125 2.6079724 -1.786893586 11.1013820 - GGA GGC GGG GGT -GGA 8.617695 0.5642447 4.2008900 -23.36939 -GGC 1.912802 4.1597661 0.7486052 -17.55219 -GGG 1.640410 0.6982309 3.2835952 -13.84253 -GGT -22.933626 -16.1809331 -14.7587433 20.53521 - ATA ATC ATT -ATA 13.83906 -14.040293 -15.0177226 -ATC -14.43778 7.384145 -5.8833605 -ATT -12.66849 -6.460875 0.2450823 - CTA CTC CTG CTT TTA TTG -CTA 2.7662595 -3.165142 -0.1884080 -3.661634 -0.5175876 -6.573966 -CTC -2.5186507 11.991263 2.6202843 3.862703 -6.0021608 -9.417727 -CTG -0.9036737 3.118285 9.0969876 -5.239133 -6.5089101 -4.804768 -CTT -1.9350964 3.259586 -0.7915904 8.266788 -4.3404164 -10.530606 -TTA -1.5824174 -8.201548 -8.5335910 -3.413011 6.8902539 -6.760610 -TTG -6.1506204 -6.689166 -5.3097663 -7.542215 -9.1347585 14.003290 - AGC AGT TCA TCC TCG TCT -AGC 13.064987 8.266873 -6.8840767 -6.939433 1.1188480 -12.889368 -AGT 8.151682 9.147157 -3.4262448 -5.963585 -4.7472848 -12.369482 -TCA -10.044358 -5.200411 8.2773930 -7.041808 -0.4285444 -2.881516 -TCC -8.342787 -8.519270 -6.0508777 5.934674 -0.7843306 3.207953 -TCG -1.226401 -4.808682 0.4838044 -3.449150 5.9363337 -4.773979 -TCT -9.669102 -10.062292 -6.6320316 3.227303 -6.4648365 9.218045 - CCA CCC CCG CCT -CCA 3.040494 -12.40629152 -9.819201 -8.857431 -CCC -13.067613 4.09063443 2.419478 -4.184182 -CCG -7.724107 0.09463201 2.036009 -5.709773 -CCT -11.359563 -2.59674544 -3.303064 -2.165700 - ACA ACC ACG ACT -ACA 4.735030 -11.618715 1.476702 -11.9828681 -ACC -13.077523 4.276175 -7.365768 1.0743771 -ACG 2.589292 -7.295692 4.023313 -9.8082084 -ACT -11.030914 -1.361016 -8.882220 0.5706565 - GTA GTC GTG GTT -GTA 7.33153326 -11.751400 0.3874841 -12.096846 -GTC -10.37854982 5.421614 -6.4373031 -2.575402 -GTG 0.08553048 -7.205691 6.7197048 -12.239188 -GTT -11.69362489 -1.872959 -11.7368539 2.803035 diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.paired_codons b/results_examples/Saccharomyces_cerevisiae.CDS.fa.paired_codons deleted file mode 100644 index 30a6652..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.paired_codons +++ /dev/null @@ -1,197 +0,0 @@ -Ala GCA GCA 15509 -Ala GCA GCC 9744 -Ala GCA GCG 5723 -Ala GCA GCT 16225 -Ala GCC GCA 9705 -Ala GCC GCC 8333 -Ala GCC GCG 3868 -Ala GCC GCT 13109 -Ala GCG GCA 5705 -Ala GCG GCC 3908 -Ala GCG GCG 2300 -Ala GCG GCT 6081 -Ala GCT GCA 16259 -Ala GCT GCC 13044 -Ala GCT GCG 6002 -Ala GCT GCT 23172 -Arg AGA AGA 30245 -Arg AGA AGG 12611 -Arg AGA CGA 3789 -Arg AGA CGC 3118 -Arg AGA CGG 2140 -Arg AGA CGT 8181 -Arg AGG AGA 12433 -Arg AGG AGG 6167 -Arg AGG CGA 2092 -Arg AGG CGC 1695 -Arg AGG CGG 1222 -Arg AGG CGT 3490 -Arg CGA AGA 3913 -Arg CGA AGG 2054 -Arg CGA CGA 907 -Arg CGA CGC 667 -Arg CGA CGG 544 -Arg CGA CGT 1191 -Arg CGC AGA 3094 -Arg CGC AGG 1631 -Arg CGC CGA 599 -Arg CGC CGC 664 -Arg CGC CGG 524 -Arg CGC CGT 1264 -Arg CGG AGA 2179 -Arg CGG AGG 1225 -Arg CGG CGA 474 -Arg CGG CGC 425 -Arg CGG CGG 366 -Arg CGG CGT 773 -Arg CGT AGA 8385 -Arg CGT AGG 3406 -Arg CGT CGA 1260 -Arg CGT CGC 1202 -Arg CGT CGG 672 -Arg CGT CGT 3297 -Gly GGA GGA 8457 -Gly GGA GGC 6884 -Gly GGA GGG 4279 -Gly GGA GGT 12662 -Gly GGC GGA 6776 -Gly GGC GGC 6180 -Gly GGC GGG 3681 -Gly GGC GGT 11597 -Gly GGG GGA 4442 -Gly GGG GGC 3684 -Gly GGG GGG 2412 -Gly GGG GGT 6993 -Gly GGT GGA 12611 -Gly GGT GGC 11446 -Gly GGT GGG 7074 -Gly GGT GGT 33684 -Ile ATA ATA 17537 -Ile ATA ATC 12815 -Ile ATA ATT 23895 -Ile ATC ATA 12861 -Ile ATC ATC 14044 -Ile ATC ATT 22614 -Ile ATT ATA 23544 -Ile ATT ATC 22697 -Ile ATT ATT 41955 -Leu CTA CTA 5965 -Leu CTA CTC 2347 -Leu CTA CTG 4517 -Leu CTA CTT 5289 -Leu CTA TTA 11155 -Leu CTA TTG 10611 -Leu CTC CTA 2315 -Leu CTC CTC 1451 -Leu CTC CTG 2103 -Leu CTC CTT 2488 -Leu CTC TTA 4293 -Leu CTC TTG 4369 -Leu CTG CTA 4565 -Leu CTG CTC 2081 -Leu CTG CTG 4185 -Leu CTG CTT 4266 -Leu CTG TTA 8204 -Leu CTG TTG 8451 -Leu CTT CTA 5163 -Leu CTT CTC 2517 -Leu CTT CTG 3976 -Leu CTT CTT 5708 -Leu CTT TTA 10331 -Leu CTT TTG 9849 -Leu TTA CTA 11266 -Leu TTA CTC 4445 -Leu TTA CTG 8393 -Leu TTA CTT 10237 -Leu TTA TTA 23244 -Leu TTA TTG 20816 -Leu TTG CTA 10567 -Leu TTG CTC 4181 -Leu TTG CTG 8498 -Leu TTG CTT 9547 -Leu TTG TTA 21155 -Leu TTG TTG 23978 -Pro CCA CCA 22189 -Pro CCA CCC 7373 -Pro CCA CCG 6096 -Pro CCA CCT 15317 -Pro CCC CCA 7432 -Pro CCC CCC 3581 -Pro CCC CCG 2636 -Pro CCC CCT 6336 -Pro CCG CCA 5929 -Pro CCG CCC 2754 -Pro CCG CCG 2160 -Pro CCG CCT 4909 -Pro CCT CCA 15618 -Pro CCT CCC 6211 -Pro CCT CCG 4740 -Pro CCT CCT 12547 -Ser AGC AGC 4132 -Ser AGC AGT 5480 -Ser AGC TCA 5667 -Ser AGC TCC 4194 -Ser AGC TCG 2899 -Ser AGC TCT 7056 -Ser AGT AGC 5488 -Ser AGT AGT 7912 -Ser AGT TCA 8910 -Ser AGT TCC 6232 -Ser AGT TCG 4001 -Ser AGT TCT 10442 -Ser TCA AGC 5918 -Ser TCA AGT 9079 -Ser TCA TCA 13273 -Ser TCA TCC 8552 -Ser TCA TCG 5712 -Ser TCA TCT 14334 -Ser TCC AGC 4290 -Ser TCC AGT 6442 -Ser TCC TCA 8459 -Ser TCC TCC 7202 -Ser TCC TCG 3962 -Ser TCC TCT 11482 -Ser TCG AGC 3026 -Ser TCG AGT 4005 -Ser TCG TCA 5644 -Ser TCG TCC 4133 -Ser TCG TCG 2905 -Ser TCG TCT 6407 -Ser TCT AGC 6774 -Ser TCT AGT 10200 -Ser TCT TCA 14782 -Ser TCT TCC 11480 -Ser TCT TCG 6546 -Ser TCT TCT 19700 -Thr ACA ACA 17506 -Thr ACA ACC 10284 -Thr ACA ACG 7851 -Thr ACA ACT 17315 -Thr ACC ACA 10436 -Thr ACC ACC 8390 -Thr ACC ACG 4731 -Thr ACC ACT 12750 -Thr ACG ACA 7756 -Thr ACG ACC 4726 -Thr ACG ACG 3674 -Thr ACG ACT 7654 -Thr ACT ACA 17192 -Thr ACT ACC 13016 -Thr ACT ACG 7571 -Thr ACT ACT 20832 -Val GTA GTA 8820 -Val GTA GTC 6609 -Val GTA GTG 7180 -Val GTA GTT 12940 -Val GTC GTA 6493 -Val GTC GTC 7295 -Val GTC GTG 5997 -Val GTC GTT 12867 -Val GTG GTA 7205 -Val GTG GTC 6058 -Val GTG GTG 6817 -Val GTG GTT 11264 -Val GTT GTA 12894 -Val GTT GTC 12790 -Val GTT GTG 11210 -Val GTT GTT 25333 diff --git a/results_examples/Saccharomyces_cerevisiae.CDS.fa.total_codons b/results_examples/Saccharomyces_cerevisiae.CDS.fa.total_codons deleted file mode 100644 index 8ce7f6d..0000000 --- a/results_examples/Saccharomyces_cerevisiae.CDS.fa.total_codons +++ /dev/null @@ -1,41 +0,0 @@ -Ala GCA 49169 -Ala GCC 36509 -Ala GCG 18758 -Ala GCT 60888 -Arg CGA 9715 -Arg CGC 8258 -Arg CGG 5792 -Arg CGT 19143 -Arg AGA 63169 -Arg AGG 28460 -Gly GGT 67526 -Gly GGG 18369 -Gly GGC 29617 -Gly GGA 33972 -Ile ATT 91241 -Ile ATC 51253 -Ile ATA 56141 -Leu CTT 38489 -Leu TTG 79741 -Leu CTG 32446 -Leu CTC 17519 -Leu TTA 80235 -Leu CTA 40807 -Pro CCT 41144 -Pro CCG 16552 -Pro CCC 21058 -Pro CCA 53715 -Ser AGT 44212 -Ser TCT 71104 -Ser TCG 26681 -Ser AGC 30389 -Ser TCC 42870 -Ser TCA 58166 -Thr ACT 60844 -Thr ACG 24772 -Thr ACC 37813 -Thr ACA 54938 -Val GTT 64798 -Val GTG 32562 -Val GTC 33983 -Val GTA 37107 diff --git a/src/codon_autocorrelation_multiple_sequences.ALL_AA.codonVSotherAAbox.py b/src/codon_autocorrelation_multiple_sequences.ALL_AA.codonVSotherAAbox.py deleted file mode 100755 index 62572c8..0000000 --- a/src/codon_autocorrelation_multiple_sequences.ALL_AA.codonVSotherAAbox.py +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/python - -################################## -## CODON AUTOCORRELATION ######### -## Eva Maria Novoa, March 2015 ## -################################## - - -import sys -import re - - -## USAGE - -if sys.argv[1]=="-h" or len(sys.argv)<3: - print "\nUsage:",sys.argv[0],"