Advertisement
icarussiano

Untitled

Mar 20th, 2024
34
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 53.64 KB | None | 0 0
  1. #include <iostream>
  2. #include <sstream>
  3. #include <stdio.h>
  4. #include <fstream>
  5. #include <string>
  6. #include <cstdlib>
  7. #include <ctime>
  8. #include <cmath>
  9. #include <iomanip>
  10. #include <vector>
  11. #include <stdlib.h>
  12.  
  13.  
  14.  
  15. using namespace std;
  16.  
  17. class Computer {
  18.  
  19. public:
  20.  
  21.  
  22. void read(string address2){
  23. ifstream inFile;
  24.  
  25.  
  26. input=address2;
  27.  
  28. int k1,k2,k3;
  29. bool f,f1,f2,f3,f4,f5,f6,f7,f8,f9;
  30. f=f1=f2=f3=f4=f5=f6=f7=f8=f9=false;
  31.  
  32. string md1,p1,lm1,rm1,ming1,maxg1,t1,output,a,b,s1;
  33.  
  34.  
  35.  
  36. output="output.txt";
  37.  
  38. t1="30";
  39. p1="1";
  40. md1="1";
  41. g1="1";
  42. lm1="6";
  43. rm1="6";
  44. ming1="0";
  45. maxg1="0";
  46. s1="1";
  47. //getline(cin,input);
  48. input=input+' ';
  49.  
  50. x=input.length();
  51.  
  52.  
  53. i=0;
  54. j=0;
  55. k1=k2=k3=0;
  56. for(i=0 ; i<=x ; i++){
  57. if((input[i]=='-')&&(input[i+1]=='h')){
  58. cout<<"Help commands:"<<"\n";
  59. cout<<"----------------------------------------------------------------------------------------------------"<<"\n";
  60. cout<<"-i input file"<<"\n";
  61. cout<<"-m left motif width (default 6)"<<"\n";
  62. cout<<"-M right motif width (default 6)"<<"\n";
  63. cout<<"-g min gap between two motif blocks (default 0)"<<"\n";
  64. cout<<"-G max gap between two motif blocks (default 0)"<<"\n";
  65. cout<<"-t number of times trying to repeat process to find best motif (default 30)"<<"\n";
  66. cout<<"-o output file (default output.txt)"<<"\n";
  67. cout<<"-f 1 for fasta file, or 2 for text file (no header) (default 1)"<<"\n";
  68. cout<<"-p 1 for the given strand, or 2 for both the given and reverse complement strands (default 1)"<<"\n";
  69. cout<<"-n 1 for PWM, or 2 for DWM (default 1)"<<"\n";
  70. cout<<"-s 1 for one occurrence motif site per sequence, or 2 for any number of repetitions (default 1)"<<"\n";
  71. cout<<"----------------------------------------------------------------------------------------------------"<<"\n";
  72. exit(0);
  73. }
  74.  
  75. if((input[i]=='-')&&(input[i+1]=='i')){
  76. i=i+3;
  77. k3=i;
  78. address.clear();
  79. a=input[i];
  80. i++;
  81. while (input[i] != ' '){
  82. a+=input[i];
  83. k1++;
  84. j++;
  85. i++;}
  86. //address=input.substr(k3,k1);
  87. address=a;
  88. }
  89. k1=0;
  90. j=0;
  91. if((input[i]=='-')&&(input[i+1]=='o')){
  92. i=i+3;
  93. output.clear();
  94. k3=i;
  95. b=input[i];
  96. i++;
  97. while (input[i] != ' '){
  98. b+=input[i];
  99. k1++;
  100. j++;
  101. i++;
  102. }
  103. output=b;
  104. }
  105. }
  106.  
  107.  
  108. cout<<"\n";
  109.  
  110.  
  111. i=0;
  112. j=0;
  113.  
  114. while(i <= x){
  115. if((input[i]=='-')&&(input[i+1]=='f')){
  116. j=0;
  117. i+=3;
  118. g1=input[i];
  119. i++;
  120. if(input[i] != ' '){
  121. g1=g1+input[i];
  122. i++;}
  123. //while (input[i] != ' ') {
  124. //lm1[j]=input[i];
  125. //j++;
  126. //i++;}
  127. }
  128.  
  129.  
  130. else if((input[i]=='-')&&(input[i+1]=='n')){
  131. j=0;
  132. i+=3;
  133. md1=input[i];
  134. i++;
  135. if(input[i] != ' '){
  136. md1=md1+input[i];
  137. i++;}
  138. //while (input[i] != ' ') {
  139. //lm1[j]=input[i];
  140. //j++;
  141. //i++;}
  142. }
  143.  
  144.  
  145.  
  146. else if((input[i]=='-')&&(input[i+1]=='p')){
  147. j=0;
  148. i+=3;
  149. p1=input[i];
  150. i++;
  151. if(input[i] != ' '){
  152. p1=p1+input[i];
  153. i++;}
  154. //while (input[i] != ' ') {
  155. //lm1[j]=input[i];
  156. //j++;
  157. //i++;}
  158. }
  159.  
  160. else if((input[i]=='-')&&(input[i+1]=='s')){
  161. j=0;
  162. i+=3;
  163. s1=input[i];
  164. i++;
  165. if(input[i] != ' '){
  166. s1=s1+input[i];
  167. i++;}
  168. //while (input[i] != ' ') {
  169. //lm1[j]=input[i];
  170. //j++;
  171. //i++;}
  172. }
  173.  
  174.  
  175. else if((input[i]=='-')&&(input[i+1]=='m')){
  176. j=0;
  177. i+=3;
  178. lm1=input[i];
  179. i++;
  180. if(input[i] != ' '){
  181. lm1=lm1+input[i];
  182. i++;}
  183. //while (input[i] != ' ') {
  184. //lm1[j]=input[i];
  185. //j++;
  186. //i++;}
  187. }
  188.  
  189.  
  190. else if((input[i]=='-')&&(input[i+1]=='M')){
  191. j=0;
  192. i+=3;
  193. rm1=input[i];
  194. i++;
  195. if(input[i] != ' '){
  196. rm1=rm1+input[i];
  197. i++;}
  198. //while (input[i] != ' ') {
  199. //rm1[j]=input[i];
  200. //j++;
  201. //i++;}
  202. }
  203.  
  204.  
  205. else if((input[i]=='-')&&(input[i+1]=='g')){
  206. j=0;
  207. i+=3;
  208. ming1=input[i];
  209. i++;
  210. if(input[i] != ' '){
  211. ming1=ming1+input[i];
  212. i++;}
  213. //while (input[i] != ' ') {
  214. //ming1[j]=input[i];
  215. //j++;
  216. //i++;}
  217. }
  218.  
  219.  
  220. else if((input[i]=='-')&&(input[i+1]=='G')){
  221.  
  222. j=0;
  223. i+=3;
  224. maxg1=input[i];
  225. i++;
  226. if(input[i] != ' '){
  227. maxg1=maxg1+input[i];
  228. i++;}
  229. //while (input[i] != ' ') {
  230. //maxg1[j]=input[i];
  231. //j++;
  232. //i++;}
  233. }
  234.  
  235.  
  236. else if((input[i]=='-')&&(input[i+1]=='t')){
  237. j=0;
  238. i+=3;
  239. t1=input[i];
  240. i++;
  241. if(input[i] != ' '){
  242. t1=t1+input[i];
  243. i++;}
  244. //while (input[i] != ' ') {
  245. //t1[j]=input[i];
  246. //j++;
  247. //i++;}
  248. }
  249.  
  250.  
  251. else
  252. i++;
  253.  
  254. }
  255.  
  256. g=atoi(g1.c_str());
  257. p=atoi(p1.c_str());
  258. s=atoi(s1.c_str());
  259. md=atoi(md1.c_str());
  260. lml=atoi(lm1.c_str());
  261. rml=atoi(rm1.c_str());
  262. ming=atoi(ming1.c_str());
  263. maxg=atoi(maxg1.c_str());
  264. tf=atoi(t1.c_str());
  265.  
  266.  
  267. cout<<"\n";
  268.  
  269.  
  270. while(f==false){
  271. //cout<<"\n If you want choose Fasta file, please input 1 and Text file, please input 2:";
  272. //cin>>g;
  273. if(g==1 || g==2)
  274. f=true;
  275. else
  276. {cout<<"\n Please input just number 1 (Text file) or 2 (Fasta file) for type of input file:";
  277. cin>>g;}
  278. }
  279.  
  280.  
  281. //cout<<"\n Please enter a your data file address:";
  282. //getline(cin,address3);
  283.  
  284. //getline(cin,address);
  285. while(f7==false){
  286. cout << "\n";
  287. myfile4.open(address.c_str() , ios::out | ios::app | ios::binary);
  288. if(!myfile4.is_open()){
  289. cout << "\n ERROR Your Data is not correct.\n";
  290. cout<<"\n Please enter a your data file address:";
  291. getline(cin,address);}
  292. else
  293. f7=true;
  294. }
  295.  
  296.  
  297.  
  298. while(f6==false){
  299. //cout<<"\n Please enter your method, for Mono-Nucleotide enter number 1 and Di-Nucleotide enter number 2:";
  300. //cin>>md;
  301. if(md==1 || md==2)
  302. f6=true;
  303. else
  304. {cout<<"\n Please input just number 1 (Mono-Nucleotide) or 2 (Di-Nucleotide) for analysis method:";
  305. cin>>md;}
  306. }
  307.  
  308. while(f5==false){
  309. // cout<<"\n Please choose read pattern (For just Positive direction input 1 and Positive and Negative direction input 2):";
  310. // cin>>p;
  311. if(p==1 || p==2)
  312. f5=true;
  313. else
  314. {cout<<"\n Please input just number 1 (Positive direction) or 2 (Positive and Negative direction) for pattern:";
  315. cin>>p;}
  316. }
  317.  
  318. while(f9==false){
  319.  
  320. if(s==1 || s==2 || s==3)
  321. f9=true;
  322. else
  323. {cout<<"\n Please input just number 1 for one occurrence motif site per sequence, or 2 for any number of repetitions of motifs, or 3 for zero or one occurrence motif site per sequence:";
  324. cin>>s;}
  325. }
  326.  
  327. while(f1==false){
  328. //cout<<"\n Please enter Length of left motif:";
  329. // cin>>lml;
  330. if(lml>0 && lml<=25)
  331. f1=true;
  332. else
  333. {cout<<"\n Please enter length of left motif between 1 ~ 25:";
  334. cin>>lml;}
  335. }
  336.  
  337. while(f2==false){
  338. // cout<<"\n Please enter Length of right motif:";
  339. // cin>>rml;
  340. if(rml>=0 && rml<=25)
  341. f2=true;
  342. else
  343. { cout<<"\n Please enter length of right motif between 0 ~ 24:";
  344. cin>>rml;}
  345. }
  346.  
  347. if(rml>0){
  348. while(f3==false){
  349. // cout<<"\n Please enter minimum Length of Gap between two motif blocks:";
  350. // cin>>ming;
  351. if(ming>=0 && ming<=25)
  352. f3=true;
  353. else
  354. { cout<<"\n Please enter minimum Length of Gap between 0 ~ 25:";
  355. cin>>ming;
  356. }
  357. }
  358. }
  359. while(f4==false){
  360. // cout<<"\n Please enter maximum Length of Gap between two motif blocks:";
  361. // cin>>maxg;
  362. if(maxg>=0 && maxg<=25)
  363. f4=true;
  364. else
  365. {
  366. cout<<"\n Please enter maximum Length of Gap between 0 ~ 25:";
  367. cin>>maxg;
  368.  
  369. }
  370. }
  371.  
  372. while(f4==false){
  373.  
  374. if(tf>=0 && tf<=100)
  375. f4=true;
  376. else
  377. {
  378. cout<<"\n Please enter number of time trying to find motif(Maximum trying is 100):";
  379. cin>>tf;
  380. }}
  381.  
  382.  
  383.  
  384. cout<<"\n\t\t\t Please waiting...\n";
  385.  
  386. //o="/home/mohammad/Documents/test1.txt";
  387.  
  388.  
  389. student=freopen(output.c_str(), "w",stdout);
  390. if( student == NULL)
  391. {
  392. printf("Can't reopen textfile1.txt\n");
  393.  
  394. }
  395. }
  396.  
  397.  
  398.  
  399.  
  400. void output_information()
  401. {
  402.  
  403. cout<<"\n-------------------------------------------------------------------------\n";
  404. //lml=22;
  405. //rml=0;
  406. lms=lml+rml;
  407. //md=2;
  408. if(md==1){
  409. nu=4;
  410. lm=lms;}
  411. else if (md==2){
  412. nu=16;
  413. lm=lms-1;}
  414. //max=3
  415. //tf=3;
  416. //p=2;
  417. x=1;
  418. a=0;
  419. l=0;
  420. l2=20;
  421. z=0;
  422. b=0;
  423. initial=1;
  424. r=0;
  425. ns=0;
  426. nmax=30;
  427. //g=2;
  428. //ming=0;
  429. //maxg=0;
  430. min=0;
  431. if(md==1){
  432. da[0]="A"; da[1]="C"; da[2]="G"; da[3]="T";
  433. }
  434. else if (md==2){
  435. da[0]="AA"; da[1]="AC"; da[2]="AG"; da[3]="AT"; da[4]="CA"; da[5]="CC"; da[6]="CG"; da[7]="CT";
  436. da[8]="GA"; da[9]="GC"; da[10]="GG"; da[11]="GT"; da[12]="TA"; da[13]="TC"; da[14]="TG"; da[15]="TT";
  437. }
  438.  
  439.  
  440. }
  441.  
  442.  
  443. void textmining()
  444. {
  445.  
  446.  
  447. if(!myfile4.is_open()){
  448. cout << "\n ERROR Your Dtat Is Not Correct.\n";
  449. }
  450. else
  451. {
  452. i1=0;
  453. i=0;
  454. j=0;
  455. flag=false;
  456. while((myfile4 >> noskipws >> ch ) && (!myfile4.eof())) {
  457. /* if (k==0){
  458. cout<<">"<<i+1;
  459. cout<<'\n';
  460. k=1;
  461. }*/
  462.  
  463. if (ch!='\n'){
  464. cout << ch ;
  465. if(j<2000 and i<2000){
  466. str4[i][j]=(ch);
  467. j++;}
  468. }
  469. else{
  470. if(g==2){
  471. lenstr2[i1]=j;
  472. cout<<'\t'<<lenstr2[i1];
  473. sum=sum+lenstr2[i1];
  474. i1++;
  475. }
  476. else if(g==1){
  477.  
  478. lenstr2[i1]=j;
  479. if(flag==true)
  480. //cout<<'\t'<<lenstr2[i1];
  481. sum=sum+lenstr2[i1];
  482. i1++;
  483.  
  484.  
  485. }
  486.  
  487. str4[i][j]='\n';
  488. flag= !flag;
  489. j=0;
  490. i++;
  491. cout<<'\n';
  492. k=0;
  493.  
  494. }
  495.  
  496. }
  497.  
  498.  
  499. myfile4.close();
  500. }
  501. cout<<'\n';
  502. max2=i;
  503. max=i;
  504. k=0;
  505. y=0;
  506. if(g==1){
  507. i=1;
  508. while(i<i1){
  509. if(str4[i][0] != '>'){
  510. for(j=0; j<lenstr2[i]; j++)
  511. {
  512. str4[i][j]=toupper(str4[i][j]);
  513. if(str4[i][j]=='A' || str4[i][j]=='C' || str4[i][j]=='G' || str4[i][j]=='T')
  514. //cout<<str4[i][j];
  515. str2[k][y]=str4[i][j];
  516. y++;
  517. }}
  518. if ((str4[i][0] == '>')||(i+1==i1)){
  519.  
  520. lenstr2[k]=y;
  521. k++;
  522.  
  523. y=0;
  524. }
  525. i++;
  526. }
  527. max=k;
  528. }
  529.  
  530.  
  531. // cout<<'\n'<<"Number of sequence:"<<max<<" Number of line:"<<max<<'\n';
  532.  
  533.  
  534. i1=0;
  535. /*
  536. if(g==1){
  537. for(j=1 ; j<(max*2) ; j+=2){
  538. flag=true;
  539. x=0;
  540. ns++;
  541. for(k=0 ; k<lenstr2[i1] ; k++) {
  542. str2[i1][k]=str4[j][k];
  543.  
  544. if (((str2[i1][k]=='A')||(str2[i1][k]=='C')||(str2[i1][k]=='G')||(str2[i1][k]=='T'))){
  545. x++;
  546. if(flag==true){
  547. sigb[i1][0]=k;
  548. flag=false;}
  549. }
  550. }
  551. sigb[i1][1]=x;
  552. i1++;
  553. }
  554. }
  555. else */
  556. if(g==2){
  557. for(j=0 ; j<max ; j++){
  558. flag=true;
  559. x=0;
  560. ns++;
  561. for(k=0 ; k<lenstr2[i1] ; k++) {
  562. str4[j][k]=toupper(str4[j][k]);
  563. str2[i1][k]=str4[j][k];
  564. if (((str2[i1][k]=='A')||(str2[i1][k]=='C')||(str2[i1][k]=='G')||(str2[i1][k]=='T'))){
  565. x++;
  566. if(flag==true){
  567. sigb[i1][0]=k;
  568. flag=false;}
  569. }
  570. }
  571. sigb[i1][1]=x;
  572. i1++;
  573. }}
  574.  
  575.  
  576. for(i=0 ; i<ns ; i++){
  577. //cout<<">"<<i+1<< "\t"<<sigb[i][0]<<"\n";
  578.  
  579.  
  580. for(k=0 ; k<lenstr2[i] ; k++)
  581. {
  582. str2[i][k]=toupper(str2[i][k]);
  583.  
  584. //cout<<str2[i][k];
  585.  
  586. }
  587. // cout<<"\t"<<lenstr2[i]<<"\t"<<sigb[i][1]<<'\n';
  588. //cout<<'\n';
  589. }
  590. sum=0;
  591. g=0;
  592. k=0;
  593. // printf("\n--------------------------------------------------------------------\n");
  594. for(i=0 ; i<(max2) ; i++)
  595. if(str4[i][0] =='>'){
  596.  
  597. for(j=0 ; str4[i][j] !='\0' ; j++){
  598. str5[k][j]=str4[i][j];
  599.  
  600. }
  601. k++;
  602. }
  603.  
  604.  
  605.  
  606.  
  607. }
  608. //--------------------------------------------------------------
  609. void reverse(){
  610. for(i=0 ; i<max ; i++){
  611. k=0;
  612. for(j=lenstr2[i]-1 ; j>=0 ; j--){
  613. if(str2[i][j]=='A')
  614. str3[i][k]='T';
  615. else
  616. if(str2[i][j]=='C')
  617. str3[i][k]='G';
  618. else
  619. if(str2[i][j]=='G')
  620. str3[i][k]='C';
  621. else
  622. if(str2[i][j]=='T')
  623. str3[i][k]='A';
  624.  
  625. k++;
  626. }
  627.  
  628.  
  629.  
  630.  
  631. }
  632. /*
  633. cout<<"\n----------REVERSE------------------\n";
  634. for(i=0 ; i<max ; i++) {
  635. cout<<">"<<i+1<<'\n';
  636. for(j=0 ; j<lenstr2[i] ; j++){
  637. cout<<str3[i][j];}
  638. cout<<'\n';
  639. }
  640. */
  641. }
  642. //--------------------------------------------------------------
  643. void selectrandom()
  644. {
  645.  
  646. srand((unsigned)time(NULL)*(r+x+1));
  647.  
  648. for(i=0 ; i<max ; i++)
  649. rndnuml[i][r]=(rand() % (lenstr2[i]-maxg-lm-2));
  650.  
  651. for(i=0 ; i<max ; i++){
  652. j=rndnuml[i][r]+maxg+lml;
  653. k=rndnuml[i][r]+ming+lml;
  654. rndnumr[i][r]=(rand() % (j-k+1))+k;
  655.  
  656.  
  657. }
  658.  
  659. // for(i=0 ; i<max ; i++)
  660. // cout<<rndnuml[i][r]<<":"<<rndnumr[i][r]<<" "<<rndnumr[i][r]-rndnuml[i][r]-4<<"\n";
  661.  
  662.  
  663.  
  664.  
  665. }
  666.  
  667. //-------------------------------------------------------------
  668.  
  669. void startlocation()
  670. {
  671.  
  672. //const int n1=lm , n2=max;
  673. //char ran1[n2][n1];
  674.  
  675.  
  676.  
  677. //char ran1[n2][n1];
  678.  
  679. for(j=0 ; j<max ; j++){
  680. k=0;
  681. rnd=rndnuml[j][r];
  682. for(i=rnd ; i<rnd+lml ; i++){
  683. ran1[j][k]=str2[j][i];
  684. k++;
  685. }
  686.  
  687. rnd=rndnumr[j][r];
  688. for(i=rnd ; i<rnd+rml ; i++){
  689. ran1[j][k]=str2[j][i];
  690. k++;
  691. }
  692.  
  693. }
  694.  
  695. count_nucleotide(ran1);
  696.  
  697. }
  698.  
  699.  
  700. //-------------------------------------------------------------
  701. void count_nucleotide(char test1[1000][50]){
  702.  
  703.  
  704.  
  705. for(i=0 ; i<nu ; i++){
  706. for(j=0 ; j<(lm) ; j++)
  707. freq[i][j]=0;
  708. }
  709.  
  710. for(j=0 ; j<lm ; j++){
  711. for(i=0 ; i<max ; i++){
  712. subnuc=test1[i][j];
  713.  
  714. if (md==2)
  715. subnuc+=test1[i][j+1];
  716.  
  717. for(k=0 ; k<nu ; k++)
  718. if (da[k]==subnuc)
  719. freq[k][j]=freq[k][j]+1;
  720.  
  721. }
  722. }
  723. }
  724.  
  725.  
  726. //-----------------------------------------------------------------
  727.  
  728. void PPM(){
  729.  
  730. // printf("\nTherefore the resulting Position Probability Matrix (PPM):\n");
  731. // printf("\n Column1 Column2 Column3 Column4 Column5\n");
  732. sum=0;
  733. for(i=0 ; i<lm ; i++){
  734. // cout<<da[i]<<"\t";
  735. for(j=0 ; j<lm ; j++){
  736. freq1[i][j]=(float(freq[i][j])/(max+1));
  737. sum=sum+freq1[i][j];
  738. // if(j<11)
  739. // printf(" %.2f ",freq1[i][j]);
  740. }
  741.  
  742. //cout<<'\n' ;
  743. }
  744. // printf("\nThe Added total Table PPM:%.2f \n",sum);
  745.  
  746. }
  747.  
  748. //-----------------------------------------------------------------
  749.  
  750.  
  751. void selectseq()
  752. {
  753.  
  754.  
  755.  
  756.  
  757. for(j=0 ; j<max ; j++){
  758. k=0;
  759. rnd=rndnuml[j][r];
  760. for(i=rnd ; i<rnd+lml ; i++){
  761. ran1[j][k]=str2[j][i];
  762. k++;
  763. }
  764.  
  765. rnd=rndnumr[j][r];
  766. for(i=rnd ; i<rnd+rml ; i++){
  767. ran1[j][k]=str2[j][i];
  768. k++;
  769. }
  770.  
  771. }
  772.  
  773.  
  774. for(i=0 ; i<max ; i++){
  775. for(j=0 ; j<lms ; j++)
  776. if(selseq != i)
  777. seq1[i][j]=ran1[i][j];
  778. else
  779. seq1[i][j]='*';
  780. }
  781. // cout<<"\n------------------------------------------------------\n";
  782. if(p==2){
  783.  
  784. for(j=0 ; j<max ; j++){
  785. k=0;
  786. rnd=rndnuml[j][r];
  787. for(i=rnd ; i<rnd+lml ; i++){
  788. ran1[j][k]=str3[j][i];
  789. k++;
  790. }
  791.  
  792. rnd=rndnumr[j][r];
  793. for(i=rnd ; i<rnd+rml ; i++){
  794. ran1[j][k]=str3[j][i];
  795. k++;
  796. }
  797.  
  798. }
  799.  
  800.  
  801. for(i=0 ; i<max ; i++){
  802. for(j=0 ; j<lms ; j++)
  803. if(selseq != i)
  804. seq5[i][j]=ran1[i][j];
  805. else
  806. seq5[i][j]='*';
  807. }
  808.  
  809. }
  810.  
  811.  
  812. }
  813. //--------------------------------------------------------------------------------
  814. void Background_count(){
  815. int sum=0;
  816. int sum2=0;
  817.  
  818.  
  819. count_nucleotide(seq1);
  820.  
  821.  
  822. for(i=0 ; i<nu ; i++)
  823. BG[i]=0;
  824.  
  825. for(i=0 ; i<max ; i++){
  826. if(i!=selseq)
  827. for(j=0 ; j<lenstr2[i]-1 ; j++){
  828. for(k=0 ; k<nu ; k++){
  829. if(rml==0){
  830. if (j<rndnuml[i][r]-1 || (j>(rndnuml[i][r]+rml-1))){
  831. subnuc=str2[i][j];
  832.  
  833. if(md==2)
  834. subnuc+=str2[i][j+1];
  835.  
  836. if(subnuc==da[k])
  837. BG[k]++;}
  838. }
  839. else
  840. {
  841. if (j<rndnuml[i][r]-1 || ((j>(rndnuml[i][r]+lml-1)) && (j<rndnumr[i][r]-1)) || (j>(rndnumr[i][r]+rml-1))){
  842. subnuc=str2[i][j];
  843.  
  844. if(md==2)
  845. subnuc+=str2[i][j+1];
  846.  
  847. if(subnuc==da[k])
  848. BG[k]++;}
  849. }
  850. }
  851. }
  852.  
  853. }
  854.  
  855.  
  856. if(p==2){
  857.  
  858. for(i=0 ; i<nu ; i++)
  859. BG2[i]=0;
  860.  
  861. for(i=0 ; i<max ; i++){
  862. if(i!=selseq)
  863. for(j=0 ; j<lenstr2[i]-1 ; j++){
  864. for(k=0 ; k<nu ; k++){
  865. if(rml==0){
  866. if (j<rndnuml[i][r]-1 || (j>(rndnuml[i][r]+rml-1))){
  867. subnuc=str3[i][j];
  868.  
  869. if(md==2)
  870. subnuc+=str2[i][j+1];
  871.  
  872. if(subnuc==da[k])
  873. BG2[k]++;}
  874. }
  875. else
  876. {
  877. if (j<rndnuml[i][r]-1 || ((j>(rndnuml[i][r]+lml-1)) && (j<rndnumr[i][r]-1)) || (j>(rndnumr[i][r]+rml-1))){
  878.  
  879. subnuc=str3[i][j];
  880.  
  881. if(md==2)
  882. subnuc+=str2[i][j+1];
  883.  
  884. if(subnuc==da[k])
  885. BG2[k]++;}
  886. }
  887. }
  888. }
  889.  
  890. }
  891. //if(zm==84)
  892. //for(i=0 ; i<nu ; i++)
  893. //BG2[i]=1;
  894.  
  895. for(i=0 ; i<nu ; i++)
  896. sum2=sum2+BG2[i];
  897.  
  898. }
  899.  
  900. //if(zm==84)
  901. //for(i=0 ; i<nu ; i++)
  902. //BG[i]=1;
  903. // cout<<"\nprint nucleotide count of random select PFM:\n";
  904. // printf(" Backgrand \n");
  905. for(i=0 ; i<nu ; i++){
  906. // cout<<da[i]<<"\t"<<BG[i]<<"\t";
  907. sum=sum+BG[i];
  908.  
  909. //sum2=sum2+BG2[i];
  910. // for(j=0 ; j<(lm-1) ; j++)
  911. // printf("%d ",freq[i][j]);
  912. // cout<<"\n";
  913. }
  914.  
  915. // cout<<"\nprint LIKELIHOOD: ((MatrixPFM[i]+0.25 )/ (total number of colum+1))\n";
  916. for(i=0 ; i<nu ; i++){
  917. // cout<<da[i]<<" ";
  918. if(md==1){
  919. BG[i]=(BG[i]+0.25)/sum;
  920. if(p==2)
  921. BG2[i]=(BG2[i]+0.25)/sum2;
  922. }
  923. else if(md==2){
  924. BG[i]=(BG[i]+0.0625)/sum;
  925. if(p==2)
  926. BG2[i]=(BG2[i]+0.0625)/sum2;
  927. }
  928. // printf("%.3f ", BG[i]);
  929. for(j=0 ; j<(lm) ; j++){
  930. if(md==1)
  931. freq1[i][j]=(float((freq[i][j])+0.25)/max);
  932. else if(md==2)
  933. freq1[i][j]=(float((freq[i][j])+0.0625)/max);
  934.  
  935. // printf("%.2f ",freq1[i][j]);
  936. }}
  937.  
  938. if(p==2){
  939. count_nucleotide(seq5);
  940. for(i=0 ; i<nu ; i++)
  941. for(j=0 ; j<(lm) ; j++){
  942. if(md==1)
  943. freq2[i][j]=(float((freq[i][j])+0.25)/max);
  944. else if(md==2)
  945. freq2[i][j]=(float((freq[i][j])+0.0625)/max);
  946. // cout<<"\n";
  947. }
  948. }
  949. }
  950.  
  951. //--------------------------------------------------------------
  952. void choosemethod(){
  953.  
  954.  
  955.  
  956. }
  957.  
  958. //--------------------------------------------------------------
  959. void subseqlength3(){
  960.  
  961. // cout<<"\n"<<"--------------------subsequence--------------------------\n";
  962.  
  963. a=0;
  964.  
  965. for(i=ming ; i<=maxg; i++){
  966. for(l=0 ; l<(lenstr2[z]-lms-i-2) ; l++){
  967. k=0;
  968. for(j=0 ; j<lms ; j++)
  969. {
  970. if(j<lml)
  971. {
  972. seq2[a][j] =str2[z][l+j];
  973.  
  974. }
  975. else
  976. {
  977. seq2[a][j] =str2[z][l+j+i];
  978.  
  979. }
  980. }
  981. positionl[a][0]=l;
  982. positionr[a][0]=l+lml+i;
  983.  
  984. a++;
  985. }
  986. }
  987.  
  988. /*
  989. for(i=a ; i<9000 ; i++)
  990. for(j=0 ; j<lms ; j++)
  991. seq2[i][j]='\0';
  992.  
  993.  
  994. for (l=0 ; l<a ; l++){
  995. cout<<l<<":";
  996. for (j=0 ; j<lm ; j++)
  997. cout<<seq2[l][j];
  998.  
  999. cout<<"\t"<<" "<<positionl[l]<<","<<positionr[l];
  1000. // if (l%3==0)
  1001. cout<<"\n";
  1002.  
  1003. }*/
  1004. }
  1005.  
  1006. //--------------------------------------------------------------
  1007.  
  1008. void score(){
  1009.  
  1010. // cout<<"\n-----------------calculate score for seq"<<selseq<<"----------------\n";
  1011. // cout<<" \n-----------------------------------------\nselect sequance "<<selseq<< "\n";
  1012.  
  1013. sum=0;
  1014.  
  1015.  
  1016. for(i=0 ; i<a ; i++){
  1017. score1[i]=0;
  1018. div=0;
  1019. for(j=0 ; j<(lm) ; j++){
  1020. subnuc=seq2[i][j];
  1021.  
  1022. if(md==2)
  1023. subnuc+=seq2[i][j+1];
  1024.  
  1025. for(k=0 ; k<nu ; k++)
  1026.  
  1027. if (subnuc==da[k]){
  1028. // printf("Log%.2f ",freq1[k][j]);
  1029. div=div+log(BG[k]);
  1030. score1[i]=score1[i]+log(freq1[k][j]);
  1031. }
  1032. /*
  1033. if(j<(lm-1))
  1034. cout<<"+";
  1035. else
  1036. printf("/%.6f",div); */
  1037.  
  1038. }
  1039. score1[i]=score1[i]/div;
  1040. // cout<<"="<<score1[i]<<" number:"<<i<<"\n";
  1041. //sum=sum+score1[i];
  1042.  
  1043. }
  1044. if(zm != 84){
  1045. maxscor[0]=score1[0];
  1046. for(i=min ; i<a ; i++){
  1047. if(maxscor[0]>score1[i]){
  1048. maxscor[0]=score1[i];
  1049. k=i;
  1050. }
  1051.  
  1052. }
  1053. // cout<<"\nMaximume Value Number:"<<score1[k]<<"\n";
  1054.  
  1055. rndnuml[z][r]=positionl[k][0];
  1056. rndnumr[z][r]=positionr[k][0];
  1057. state[z][r]='P';
  1058. }
  1059. /*else{
  1060. cout<<"\n---------test score1--------\n";
  1061. for(i=0; i<a; i++)
  1062. cout<<score1[i]<<"\n";
  1063. cout<<"\n---------test Freq1--------\n";
  1064. for(j=0 ; j<(nu) ; j++){
  1065. for(k=0 ; k<lm ; k++)
  1066. cout<<freq1[j][k]<<"\t";
  1067. cout<<"\n";}
  1068. cout<<"\n---------test BG--------\n";
  1069. for(i=0; i<nu; i++)
  1070. cout<<BG[i]<<"\n";
  1071. }*/
  1072.  
  1073.  
  1074.  
  1075. // cout<<"\n-------------------Random Number between 0.000 until 1.000--------------------\n";
  1076. }
  1077.  
  1078. //--------------------------------------------------------------
  1079. void selectreverseseq()
  1080. {
  1081.  
  1082.  
  1083.  
  1084. a=0;
  1085.  
  1086. for(i=ming ; i<=maxg; i++){
  1087. for(l=0 ; l<(lenstr2[z]-lms-i-2) ; l++){
  1088. k=0;
  1089. for(j=0 ; j<lms ; j++)
  1090.  
  1091. if(j<lml)
  1092. {
  1093. seq4[a][j] =str3[z][l+j];
  1094. }
  1095. else
  1096. {
  1097. seq4[a][j] =str3[z][l+j+i];
  1098.  
  1099. }
  1100.  
  1101. positionl[a][1]=l;
  1102. positionr[a][1]=l+lml+i;
  1103.  
  1104. a++;
  1105.  
  1106. }
  1107. }
  1108. /*
  1109. //if(z==6) {
  1110. for(i=a ; i<9000 ; i++)
  1111. for(j=0 ; j<lms ; j++)
  1112. seq4[i][j]='\0';
  1113.  
  1114.  
  1115. for (l=0 ; l<a ; l++){
  1116. cout<<l<<":";
  1117. for (j=0 ; j<lm ; j++)
  1118. cout<<seq4[l][j];
  1119.  
  1120. cout<<"\t"<<" "<<positionl[l][1]<<","<<positionr[l][1]<<" : "<<positionr[l][0]-positionl[l][0]-lml;
  1121. // if (l%3==0)
  1122. cout<<"\n";
  1123. }
  1124. } */
  1125. }
  1126.  
  1127. //--------------------------------------------------------------
  1128.  
  1129. //---------------------------------------------------------
  1130. void score2twoway(){
  1131.  
  1132. // cout<<"\n-----------------calculate score for seq"<<selseq<<"----------------\n";
  1133. // cout<<" \n-----------------------------------------\nselect sequance "<<selseq<< "\n";
  1134.  
  1135. // sum2[0]=0;
  1136. // sum2[1]=0;
  1137.  
  1138.  
  1139. for(i=min ; i<a ; i++){
  1140. score2[i][0]=0;
  1141. score2[i][1]=0;
  1142. div1[0]=0;
  1143. div1[1]=0;
  1144. for(j=0 ; j<lm ; j++){
  1145. subnuc=seq2[i][j];
  1146. subnuc1=seq4[i][j];
  1147.  
  1148. if(md==2){
  1149. subnuc+=seq2[i][j+1];
  1150. subnuc1+=seq4[i][j+1];}
  1151.  
  1152. for(k=0 ; k<nu ; k++){
  1153.  
  1154. if (subnuc==da[k]){
  1155. // printf("Log%.2f ",freq1[k][j]);
  1156. div1[0]=div1[0]+log(BG[k]);
  1157. score2[i][0]=score2[i][0]+log(freq1[k][j]);
  1158. }
  1159. if (subnuc1==da[k]){
  1160. // printf("Log%.2f ",freq1[k][j]);
  1161. div1[1]=div1[1]+log(BG2[k]);
  1162. score2[i][1]=score2[i][1]+log(freq2[k][j]);
  1163. }
  1164.  
  1165.  
  1166. }
  1167. /* if(j<lm-1)
  1168. cout<<"+";
  1169. else
  1170. printf("/%.6f",div[1]);
  1171. */
  1172. }
  1173. score2[i][0]=score2[i][0]/div1[0];
  1174. score2[i][1]=score2[i][1]/div1[1];
  1175. //cout<<"="<<score2[i][0]<<" number1:"<<i<<"\n";
  1176. //cout<<"="<<score2[i][1]<<" number2:"<<i<<"\n";
  1177.  
  1178. // sum2[0]=sum2[0]+score1[i][0];
  1179. // sum2[1]=sum2[1]+score1[i][1];
  1180.  
  1181. }
  1182.  
  1183. maxscor[0]=score2[0][0];
  1184. for(i=0 ; i<a ; i++){
  1185. if(maxscor[0]>score2[i][0]){
  1186. maxscor[0]=score2[i][0];
  1187. k1=i;
  1188. }}
  1189.  
  1190.  
  1191. maxscor[1]=score2[0][1];
  1192. for(i=0 ; i<a ; i++){
  1193. if(maxscor[1]>score2[i][1]){
  1194. maxscor[1]=score2[i][1];
  1195. k2=i;
  1196. }}
  1197.  
  1198.  
  1199. // cout<<"\nMaximume Value Number:"<<score1[k1][0]<<"\t"<<score1[k2][1]<<"\n";
  1200. rndnuml[z][r]=positionl[k1][0];
  1201. rndnumr[z][r]=positionr[k1][0];
  1202. state[z][r]='P';
  1203.  
  1204. // cout<<"\nMaximume Value Number:"<<score1[k1][0]<<"\t"<<score1[k2][1]<<"\n";
  1205. if (maxscor[0]>maxscor[1])
  1206. {
  1207. rndnuml[z][r]=positionl[k2][1];
  1208. rndnumr[z][r]=positionr[k2][1];
  1209. state[z][r]='N';
  1210. }
  1211. //for(i=0; i<max; i++)
  1212. //cout<<i<<"*"<<rndnuml[i][r]<<"\t"<<rndnumr[i][r]<<"\n";
  1213.  
  1214. }
  1215.  
  1216. //---------------------------------------------------------
  1217.  
  1218. void randomgenerate(){
  1219.  
  1220. int RND_MAX=1000;
  1221. srand((unsigned)time(NULL)*(sum+r));
  1222. sum=(rand() % (RND_MAX));
  1223. rndsel=sum/RND_MAX;
  1224. }
  1225.  
  1226.  
  1227.  
  1228.  
  1229. //--------------------------------------------------------------
  1230.  
  1231. void PFM(){
  1232.  
  1233.  
  1234. // cout<<"\n-----------------------------------------------------";
  1235. // printf("\nAfter Calculate 64 step , We have 64 SubSequnce:\n");
  1236.  
  1237. for(j=0 ; j<max ; j++){
  1238.  
  1239. k=0;
  1240.  
  1241. for(i=rndnuml[j][r] ; i<rndnuml[j][r]+lml ; i++){
  1242. if(state[j][r]=='P')
  1243. seq1[j][k]=str2[j][i];
  1244. else if(state[j][r]=='N')
  1245. seq1[j][k]=str3[j][i];
  1246. k++;
  1247. }
  1248. for(i=rndnumr[j][r] ; i<rndnumr[j][r]+rml ; i++){
  1249. if(state[j][r]=='P')
  1250. seq1[j][k]=str2[j][i];
  1251. else if(state[j][r]=='N')
  1252. seq1[j][k]=str3[j][i];
  1253. k++;
  1254. }
  1255. }
  1256.  
  1257.  
  1258.  
  1259.  
  1260. /*
  1261. for(i=0 ; i<max ; i++){
  1262. cout<<">"<<i+1;
  1263. cout<<"\n";
  1264. for(j=0 ; j<12 ; j++)
  1265. printf("%c",seq1[i][j]);
  1266. cout<<"\n";
  1267. } */
  1268.  
  1269. count_nucleotide(seq1);
  1270.  
  1271.  
  1272.  
  1273. /* cout<<"\nprint nucleotide count of random select PFM:\n";
  1274. printf(" \n");
  1275. for(i=0 ; i<=15 ; i++){
  1276. cout<<da[i]<<" ";
  1277. for(j=0 ; j<11; j++)
  1278. printf("%d ",freq[i][j]);
  1279. cout<<"\n";
  1280. } */
  1281.  
  1282. // cout<<"\nprint LIKELIHOOD: ((MatrixPFM[i]+0.0625 )/ (total number of colum+1))\n";
  1283. for(i=0 ; i<nu ; i++){
  1284. // cout<<da[i]<<"\t";
  1285. for(j=0 ; j<(lm) ; j++){
  1286. if(md==1)
  1287. freq1[i][j]=(float((freq[i][j])+0.25)/(max+1));
  1288. else if (md==2)
  1289. freq1[i][j]=(float((freq[i][j])+0.0625)/(max+1));
  1290. // printf("%.2f ",freq1[i][j]);
  1291. }
  1292. // cout<<"\n";
  1293. }
  1294.  
  1295.  
  1296.  
  1297. // cout<<"\nEntropy for each colum: -[i(1..n) Px*log(Px)]:\n";
  1298. for(i=0 ; i<(lm) ; i++){
  1299. entropy[i]=0;
  1300. for(j=0 ; j<nu ; j++){
  1301.  
  1302. entropy[i]=entropy[i]+(freq1[j][i]*log2(freq1[j][i]));}
  1303.  
  1304. entropy[i]=entropy[i]*(-1);
  1305. // printf("%.4f ", entropy[i]);
  1306.  
  1307. }
  1308.  
  1309. entropytotal[r][0]=0;
  1310. for(i=0 ; i<(lm) ; i++){
  1311. entropytotal[r][0]=entropytotal[r][0]+entropy[i];
  1312. }
  1313. //cout<<"\nTotal Added all Colums is:"<<entropytotal[r][0];
  1314.  
  1315.  
  1316.  
  1317.  
  1318. }
  1319. //--------------------------------------------------------------
  1320. void step1(){
  1321. // cout<<"\n\n------Step1 By New Sequence Computional (Seq1 , Seq3)-------\n";
  1322.  
  1323. selseq=z;
  1324. //max=lenstr2[z];
  1325. selectseq();
  1326. Background_count();
  1327. subseqlength3();
  1328.  
  1329. if(p==1){
  1330. score();
  1331. }
  1332. else if (p==2) {
  1333. selectreverseseq();
  1334. score2twoway();
  1335. }
  1336. //randomgenerate();
  1337. initial=z;
  1338. //randomly();
  1339.  
  1340.  
  1341. }
  1342.  
  1343. //--------------------------------------------------------------
  1344. void step2(){
  1345. z=0;
  1346. while(z<max){
  1347.  
  1348. step1();
  1349. z++;
  1350.  
  1351. }
  1352.  
  1353.  
  1354. // cout<<"\n";
  1355. PFM();
  1356.  
  1357.  
  1358.  
  1359. }
  1360. //--------------------------------------------------------------
  1361.  
  1362.  
  1363. void step11(){
  1364. //cout<<"\n********************************************************************************";
  1365. float f1,f2,f0;
  1366. f2=0;
  1367.  
  1368. int x2=0;
  1369. //float check[200];
  1370. temp_e[0]=0;
  1371. temp_e[1]=0;
  1372.  
  1373.  
  1374. while((bb==false)&&(temp_e[1]<30)){
  1375.  
  1376. step2();
  1377. f1=entropytotal[r][0];
  1378. f0=abs(f1-f2);
  1379. if ((f0 < 0.00000001)){
  1380. bb=true;
  1381. //entropytotal[r][0]=f2;
  1382. testi[r]=f1;
  1383. //cout<<"\nentropytotal[0]:"<<f2<<"\tentropytotal[1]:"<<f1;
  1384. //cout<<"\nNumber Total Step Repeat is:"<<temp_e[1];
  1385.  
  1386. x2++;
  1387. //check[x2]=f1;
  1388.  
  1389.  
  1390. }
  1391. else
  1392. {
  1393.  
  1394. x2=temp_e[1];
  1395.  
  1396. //check[x2]=f1;
  1397. temp_e[1]++;
  1398.  
  1399. f2=f1;
  1400. }
  1401. }
  1402. if(temp_e[1]>=10){
  1403. testi[r]=f1;
  1404. bb=true;
  1405. //cout<<"\nentropytotal[0]:"<<f2<<"\tentropytotal[1]:"<<f1;
  1406. // cout<<"\nNumber Total Step Repeat is:"<<temp_e[1];
  1407. }
  1408.  
  1409. //cout<<"\n";
  1410.  
  1411.  
  1412. //for(i=0 ; i<x2 ; i++)
  1413. //cout<<check[i]<<"\n";
  1414.  
  1415. x=0;
  1416. }
  1417. //--------------------------------------------------------------
  1418. void step21(){
  1419. k=lml+rml+maxg;
  1420.  
  1421. for(i=0; i<max; i++){
  1422. if(k>(lenstr2[i]-2)){
  1423.  
  1424. cout<<"ERROR: Length of the site "<<i+1<<" is less than your input parameters(Left Motif + Right Motif + Gap).\n";
  1425.  
  1426. exit (EXIT_FAILURE);
  1427. }}
  1428.  
  1429. for(i1=0 ; i1<tf ; i1++){
  1430. //cout<<"\n----------------------------------------------------------------------\n";
  1431.  
  1432. bb=false;
  1433.  
  1434. step11();
  1435.  
  1436. // cout<<"\n##########NEW Genrate for Calculate to find motif of Step: "<<r+2<<" ############\n" ;
  1437.  
  1438.  
  1439. r++;
  1440. b=1;
  1441. selectrandom();
  1442.  
  1443.  
  1444. }
  1445.  
  1446.  
  1447. cout<<"\n-----------------------------------------\n";
  1448.  
  1449.  
  1450.  
  1451. sum=testi[0];
  1452. l=0;
  1453. cout<<"\nEntropy is :\n";
  1454. for(i=0 ; i<tf ; i++){
  1455. printf("%4f \t",testi[i]);
  1456. if(testi[i]<sum){
  1457. sum=testi[i];
  1458. l=i;
  1459. }
  1460. cout<<"\n";
  1461. }
  1462. cout<<"minentropy["<<l<<"]: "<<sum<<"\n";
  1463.  
  1464. if(s==1)
  1465. cout<<"\n-------------------One Occurrence Per Sequence (oops) of Motifs-----------------\n";
  1466. else if(s==2)
  1467. cout<<"\n-------------------Any Number of Repetitions (anr) of Motifs-----------------\n";
  1468. else if(s==3)
  1469. cout<<"\n-------------------Zero or One Occurrence per Sequence (zoops) of Motifs-----------------\n";
  1470.  
  1471.  
  1472.  
  1473. cout<<"\n"<<"Start position of left motif:"<<"\t"<<"Start position of right motif:"<<"\n";
  1474.  
  1475.  
  1476. for(i=0 ; i<max ; i++){
  1477. initialmotif[i][0]=rndnuml[i][l];
  1478. initialmotif[i][1]=rndnumr[i][l];
  1479. if(s != 3)
  1480. cout<<initialmotif[i][0]<<" \t\t\t\t "<<initialmotif[i][1];
  1481. state[i][0]=state[i][l];
  1482. if(s != 3)
  1483. cout<<"\n";
  1484. }
  1485. cout<<"\n";
  1486.  
  1487.  
  1488. for(i=0 ; i<max ; i++){
  1489. k=0;
  1490. for(j=initialmotif[i][0] ; j<initialmotif[i][0]+lml ; j++){
  1491. if(state[i][0]=='P')
  1492. finalmotif[i][k]=str2[i][j];
  1493. else if(state[i][0]=='N')
  1494. finalmotif[i][k]=str3[i][j];
  1495. k++;
  1496. }
  1497. for(j=initialmotif[i][1] ; j<initialmotif[i][1]+rml ; j++){
  1498. if(state[i][0]=='P')
  1499. finalmotif[i][k]=str2[i][j];
  1500. else if(state[i][0]=='N')
  1501. finalmotif[i][k]=str3[i][j];
  1502. k++;
  1503. }
  1504. }
  1505.  
  1506. if(s==3){
  1507. zeromotif();
  1508.  
  1509. }
  1510.  
  1511. else if (s==1){
  1512. for(i=0 ; i<max ; i++){
  1513. if(g1=="1"){
  1514. for(j=0 ; str5[i][j] !='\n' ; j++)
  1515. cout<<str5[i][j];
  1516. }
  1517. else {
  1518. cout<<">"<<i+1;
  1519. }
  1520.  
  1521. cout<<"\t"<<"Direction: "<<state[i][0];
  1522. cout<<"\n";
  1523.  
  1524. for(j=0 ; j<lms ; j++){
  1525. printf("%c",finalmotif[i][j]);}
  1526. cout<<"\n";
  1527. } }
  1528.  
  1529.  
  1530. // for(i=0 ; i<max ; i++)
  1531. //cout<<rndnuml[i][l]<<"\t"<<rndnumr[i][l]<<"\n";
  1532.  
  1533.  
  1534. cout<<"\n***********************************************\n";
  1535.  
  1536.  
  1537. if(s==1 || s==2)
  1538. count_nucleotide(finalmotif);
  1539. else
  1540. count_nucleotide(finalmotifz);
  1541.  
  1542.  
  1543. cout<<"\n Nucleotide count of select Position Frequency Matrix(PFM):\n\n ";
  1544. for(i=0 ; i<lm ; i++){
  1545. if (i<=9)
  1546. cout<<" ("<<i+1<<")";
  1547. else
  1548. cout<<" ("<<i+1<<")";}
  1549.  
  1550. cout<<"\n";
  1551.  
  1552. for(i=0 ; i<nu ; i++){
  1553. cout<<da[i]<<"\t";
  1554. for(j=0 ; j<(lm) ; j++)
  1555. cout<<freq[i][j]<<"\t";
  1556. cout<<"\n";
  1557. }
  1558.  
  1559. if(s==2){
  1560. secondmotif();
  1561. }
  1562.  
  1563.  
  1564.  
  1565.  
  1566. }
  1567. //-------------------------------ZOOPS---------------------------------------
  1568. void zeromotif(){
  1569. zm=84;
  1570. r=0;
  1571. z=0;
  1572. sum=0;
  1573. sum2=0;
  1574. sum3=0;
  1575.  
  1576. //double sumscore[1000],BG4[16];
  1577.  
  1578. int BG3[16];
  1579.  
  1580. float landa=0.99;
  1581. long double sumbg3=1, scorez[2000], div, sumQ[2000], score1[2000], sumscore[2000],BG4[16],y1=0;
  1582.  
  1583. if(md==2)
  1584. landa=0.999;
  1585.  
  1586. while(z<max){
  1587.  
  1588. for(i=0 ; i<max ; i++){
  1589. rndnuml[i][r]=initialmotif[i][0];
  1590. rndnumr[i][r]=initialmotif[i][1];
  1591. }
  1592. selseq=z;
  1593. //max=lenstr2[z];
  1594. selectseq();
  1595.  
  1596. Background_count();
  1597. for(k1=0 ; k1<nu ; k1++){
  1598. BG4[k1]=BG[k1];
  1599. //cout<<BG4[k1]<<"\t";
  1600. }
  1601. subseqlength3();
  1602.  
  1603. sum=0;
  1604.  
  1605.  
  1606. for(i=0 ; i<a ; i++){
  1607. if(i<2000){
  1608. score1[i]=1;
  1609. div=1;
  1610. for(j=0 ; j<(lm) ; j++){
  1611. subnuc=seq2[i][j];
  1612.  
  1613. if(md==2)
  1614. subnuc+=seq2[i][j+1];
  1615.  
  1616. for(k=0 ; k<nu ; k++)
  1617.  
  1618. if ((subnuc==da[k])){
  1619. score1[i]=score1[i]*(freq1[k][j]);
  1620. }
  1621. }
  1622.  
  1623.  
  1624. for(k1=0; k1<lenstr2[z]-1; k1++){
  1625. if(k1<2000){
  1626. if(rml==0){
  1627. if((k1<positionl[i][0] || k1>positionl[i][0]+lml))
  1628. for(k=0 ; k<nu ; k++){
  1629. subnuc=str2[z][k1];
  1630. if(md==2)
  1631. subnuc+=str2[z][k1+1];
  1632. if(subnuc == da[k])
  1633. div=((div)*(BG[k]));
  1634. }}
  1635. else if(rml>0){
  1636. if((((k1<positionl[i][0]) || (k1>positionl[i][0]+lml && k1<positionr[i][0]) || (k1>positionr[i][0]))))
  1637. for(k=0 ; k<nu ; k++){
  1638. subnuc=str2[z][k1];
  1639. if(md==2)
  1640. subnuc+=str2[z][k1+1];
  1641. if(subnuc == da[k])
  1642. div=((div)*(BG[k]));
  1643. }
  1644. }
  1645. }
  1646. }
  1647.  
  1648. //div=1;
  1649. score1[i]=score1[i]*(div);
  1650.  
  1651. y1=(landa)/(float)(lenstr2[z]-lm+1);
  1652.  
  1653. y1=1-y1;
  1654.  
  1655. }
  1656. }
  1657.  
  1658. sumscore[z]=0;
  1659.  
  1660. for(i=0 ; i<a ; i++){
  1661. if(i<2000)
  1662. sumscore[z]+=score1[i];
  1663. }
  1664.  
  1665. //cout<<"sumscore:"<<sumscore[z]<<"\n";
  1666.  
  1667. for(i1=0; i1<nu ; i1++){
  1668. BG3[i1]=0;}
  1669.  
  1670.  
  1671.  
  1672. for(k1=0; k1<lenstr2[z]-1; k1++){
  1673. if(k1<2000){
  1674. subnuc1=str2[z][k1];
  1675. if(md==2)
  1676. subnuc1+=str2[z][k1+1];
  1677.  
  1678. for(k=0 ; k<nu ; k++){
  1679. if(subnuc1 == da[k])
  1680. BG3[k]++;
  1681. }
  1682. }
  1683. }
  1684.  
  1685.  
  1686. sumbg3=1;
  1687. for(i1=0; i1<nu ; i1++){
  1688. //sumbg3=sumbg3 * (BG4[i1]*BG3[i1]);
  1689. sumbg3=sumbg3 * pow(BG4[i1], BG3[i1]);
  1690. //cout<<BG4[i1]<<"\t"<<BG3[i1]<<"\n";
  1691. }
  1692. //cout<<"\nsumBG:"<<sumbg3<<"\n";
  1693.  
  1694.  
  1695.  
  1696. //cout<<"\n--------Sum SCORE-----\n"<<z<<" "<<sumscore[z];
  1697.  
  1698. sum=0;
  1699. sumQ[z]=0;
  1700. for(i1=0 ; i1<a ; i1++){
  1701. if(i1<2000){
  1702. scorez[i1]=score1[i1]/(sumbg3*y1+sumscore[z]);
  1703. sumQ[z]+=scorez[i1];}
  1704. }
  1705. //cout<<"sumQ:"<<sumQ[z]<<"\n";
  1706.  
  1707. initial=z;
  1708. z++;
  1709.  
  1710. }
  1711.  
  1712. //cout<<"\n------------------------------------------\nLanda"<<landa<<"\n";
  1713. /* cout<<"MIN"<<j<<":"<<min1;
  1714.  
  1715. for(i=0; i<max; i++){
  1716. sum3=abs(sumscore[i]-sum);
  1717. sum3=pow(sum3,2);
  1718. sum2+=sum3;
  1719. }
  1720. cout<<"\n"<<sum2<<"\n";
  1721. sum2=sum2/(max-1);
  1722. sum2=sqrt(sum2);
  1723. cout<<sum2<<"\n----------\n";
  1724. sum2*=1;
  1725. cout<<"\n------Sum2----\n";
  1726. //cout<<sum2;
  1727.  
  1728. if(md==1)
  1729. sum2=sum/10;
  1730. else if(md==2)
  1731. sum2=sum/10000000;
  1732. */
  1733. //cout<<sum2<<"\n-------------------------------\n";
  1734. int TP,TN,FN,FP;
  1735. TP=TN=FN=FP=0;
  1736. //(sumscore[i]<=(sum+sum2)) &&
  1737. for(i=0; i<max; i++){
  1738. if(i<323 && sumQ[i]>=(landa))
  1739. TP++;
  1740. if(i<323 && sumQ[i]<(landa))
  1741. FN++;
  1742. if(i>322 && sumQ[i]>=(landa))
  1743. FP++;
  1744. if(i>322 && sumQ[i]<(landa))
  1745. TN++;
  1746. }
  1747.  
  1748. //cout<<"\n---------TP--------\n";
  1749. //cout<<"TP:"<<TP<<"\n"<<"FN:"<<FN<<"\n"<<"FP:"<<FP<<"\n"<<"TN:"<<TN<<"\n";
  1750. for(i=0 ; i<max ; i++){
  1751. if(sumQ[i]<(landa))
  1752. cout<<"N/A \t\t\t\t N/A\n";
  1753. else{
  1754. cout<<initialmotif[i][0]<<" \t\t\t\t "<<initialmotif[i][1];
  1755. cout<<"\n";}
  1756.  
  1757. cout<<"\n";}
  1758.  
  1759.  
  1760. for (i=0; i<max; i++)
  1761. {
  1762. if(g1=="1"){
  1763. for(j=0 ; str5[i][j] !='\n' ; j++){
  1764. cout<<str5[i][j];}
  1765. cout<<"\tQ_Score:"<<sumQ[i];
  1766. }
  1767. else if(g1=="2"){
  1768. cout<<">"<<i+1<<"\tQ_Score:"<<sumQ[i];
  1769. }
  1770. cout<<"\t"<<"Direction: "<<state[i][0];
  1771. cout<<"\n";
  1772. if(sumQ[i]<(landa))
  1773. cout<<" N/A";
  1774. else
  1775. for (j = 0; j < lms; j++)
  1776. {
  1777. cout<<finalmotif[i][j];
  1778. }
  1779.  
  1780. cout<<"\n";}
  1781. x=0;
  1782. for(i=0 ; i<max; i++){
  1783. if(sumQ[i]>=(landa))
  1784. for(j=0 ; j<(lm) ; j++)
  1785. finalmotifz[i][j]=finalmotif[i][j];
  1786. else
  1787. for(j=0 ; j<(lm) ; j++)
  1788. finalmotifz[i][j]=' ';
  1789.  
  1790. }
  1791.  
  1792.  
  1793. cout<<"\n-------------------------------------------------------------\n";
  1794. cout<<"\n-------------------------------------------------------------\n";
  1795. cout<<"\n-------------------------------------------------------------\n";
  1796. }
  1797. //----------------------------------------------------------------------------------------
  1798. void zeromotif1(){
  1799. float score11[2000];
  1800. float z8,z9;
  1801. zm=84;
  1802. count_nucleotide(finalmotif);
  1803.  
  1804. sum=0;
  1805. sum2=0;
  1806. sum3=0;
  1807. a=max;
  1808.  
  1809. for(i=0 ; i<max ; i++){
  1810. if(i!=selseq)
  1811. for(j=0 ; j<lenstr2[i]-1 ; j++){
  1812. for(k=0 ; k<nu ; k++){
  1813. if(rml==0){
  1814. if (j<initialmotif[i][0]-1 || (j>(initialmotif[i][0]+rml-1))){
  1815. subnuc=str2[i][j];
  1816.  
  1817. if(md==2)
  1818. subnuc+=str2[i][j+1];
  1819.  
  1820. if(subnuc==da[k])
  1821. BG[k]++;}
  1822. }
  1823. else
  1824. {
  1825. if (j<initialmotif[i][0]-1 || ((j>(initialmotif[i][0]+lml-1)) && (j<initialmotif[i][1]-1)) || (j>(initialmotif[i][1]+rml-1))){
  1826. subnuc=str2[i][j];
  1827.  
  1828. if(md==2)
  1829. subnuc+=str2[i][j+1];
  1830.  
  1831. if(subnuc==da[k])
  1832. BG[k]++;}
  1833. }
  1834. }
  1835. }
  1836.  
  1837. for(i=0 ; i<nu ; i++){
  1838. sum=sum+BG[i];
  1839.  
  1840. }
  1841. }
  1842.  
  1843. for(i=0 ; i<nu ; i++){
  1844.  
  1845. if(md==1){
  1846. BG[i]=(BG[i]+0.25)/sum;
  1847. if(p==2)
  1848. BG2[i]=(BG2[i]+0.25)/sum2;
  1849. }
  1850. else if(md==2){
  1851. BG[i]=(BG[i]+0.0625)/sum;
  1852. if(p==2)
  1853. BG2[i]=(BG2[i]+0.0625)/sum2;
  1854. }
  1855.  
  1856. for(j=0 ; j<(lm) ; j++){
  1857. if(md==1)
  1858. freq1[i][j]=(float((freq[i][j])+0.25)/max);
  1859. else if(md==2)
  1860. freq1[i][j]=(float((freq[i][j])+0.0625)/max);
  1861.  
  1862.  
  1863. }}
  1864.  
  1865.  
  1866. score1[i]=0;
  1867. div=0;
  1868. for(j=0 ; j<(lm) ; j++){
  1869. subnuc=finalmotif[i][j];
  1870.  
  1871. if(md==2)
  1872. subnuc+=finalmotif[i][j+1];
  1873.  
  1874. for(k=0 ; k<nu ; k++)
  1875.  
  1876. if (subnuc==da[k]){
  1877. //printf("Log%.2f ",freq1[k][j]);
  1878. div=div+log(BG[k]);
  1879. score1[i]=score1[i]+(freq1[k][j]);
  1880. }
  1881.  
  1882.  
  1883.  
  1884.  
  1885.  
  1886. score1[i]=-1 * score1[i]/div;
  1887. //cout<<"="<<score1[i]<<" number:"<<i<<"\n";
  1888. //sum=sum+score1[i];
  1889.  
  1890. }
  1891.  
  1892. cout<<"\n-------------------Score------------------\n";
  1893. for(i=0; i<max; i++){
  1894. //cout<<i+1<<"-"<<score1[i]<<"\n";
  1895. sum+=score1[i];
  1896. score11[i]=score1[i];
  1897. position[i]=i+1;
  1898. }
  1899.  
  1900. for(i=(max-1); i>=0; i--)
  1901. for(j=0; j<i ; j++)
  1902. if(score11[j]>score11[j+1]){
  1903. z9=score11[j];
  1904. z8=position[j];
  1905. score11[j]=score11[j+1];
  1906. position[j]=position[j+1];
  1907. score11[j+1]=z9;
  1908. position[j+1]=z8;}
  1909.  
  1910. i=(max)/2;
  1911. sum=sum/max;
  1912. //cout<<"\n------Middle----\n"<<sum<<"\n----------\n";
  1913.  
  1914. for(i=0; i<max; i++){
  1915. sum3=abs(score1[i]-sum);
  1916. //sum3=pow(sum3,2);
  1917. sum2+=sum3;
  1918. }
  1919. //cout<<sum2<<"\n";
  1920. sum2=sum2/(max-1);
  1921. sum2=sqrt(sum2);
  1922. //cout<<sum2<<"\n----------\n";
  1923. sum2*=1;
  1924. cout<<sum2<<"\n----------\n";
  1925. cout<<"\n-------------------Score------------------\n";
  1926. int TP,TN,FN,FP;
  1927. TP=TN=FN=FP=0;
  1928. for(i=0; i<max; i++){
  1929. if(i<323 && score1[i]>sum2)
  1930. TP++;
  1931. if(i<323 && score1[i]<sum2)
  1932. FN++;
  1933. if(i>322 && score1[i]>sum2)
  1934. FP++;
  1935. if(i>322 && score1[i]<sum2)
  1936. TN++;
  1937. }
  1938.  
  1939. cout<<"\n---------TP--------\n";
  1940. cout<<"TP:"<<TP<<"\n"<<"FN:"<<FN<<"\n"<<"FP:"<<FP<<"\n"<<"TN:"<<TN<<"\n";
  1941. //cout<<i+1<<" "<<position[i]<<"-"<<score11[i]<<"\n";
  1942.  
  1943.  
  1944.  
  1945.  
  1946. for(i=0 ; i<max ; i++){
  1947. if(sum2 > score1[i])
  1948. cout<<"N/A \t\t\t\t N/A\n";
  1949. else{
  1950. cout<<initialmotif[i][0]<<" \t\t\t\t "<<initialmotif[i][1];
  1951. cout<<"\n";}
  1952.  
  1953. cout<<"\n";}
  1954.  
  1955.  
  1956. for (i=0; i<max; i++)
  1957. {
  1958. if(g1=="1"){
  1959. for(j=0 ; str5[i][j] !='\n' ; j++)
  1960. cout<<str5[i][j];
  1961. }
  1962. else {
  1963. cout<<">"<<i+1;
  1964. }
  1965. cout<<"\t"<<"Direction: "<<state[i][0];
  1966. cout<<"\n";
  1967. if(sum2 > score1[i])
  1968. cout<<" N/A";
  1969. else
  1970. for (j = 0; j < lms; j++)
  1971. {
  1972. cout<<finalmotif[i][j];
  1973. }
  1974.  
  1975. cout<<"\n";}}
  1976. //----------------------------------------------------------------------------------------------
  1977. void secondmotif(){
  1978.  
  1979. //int k5,j3,i3,a1,l3=0;
  1980. //int scoremotif1[max];
  1981. char match[100];
  1982. //int scoremotif2[1000][200];
  1983. //string secmotif[1000][2000];
  1984.  
  1985.  
  1986. cout<<"\n---------------------------New motifs discovery position------------------------------\n";
  1987. md=1;
  1988. nu=4;
  1989. lm=lms;
  1990. da[0]="A"; da[1]="C"; da[2]="G"; da[3]="T";
  1991. count_nucleotide(finalmotif);
  1992.  
  1993.  
  1994.  
  1995. for (i = 0; i < lms; i++)
  1996. {
  1997.  
  1998. k = 0;
  1999. l = 0;
  2000. for (j = 0; j < nu; j++)
  2001. {
  2002. if (freq[j][i] > k)
  2003. {
  2004. k = freq[j][i];
  2005. l = j;
  2006.  
  2007. }
  2008.  
  2009. }
  2010. fsum=fsum+k;
  2011.  
  2012. if (l == 0)
  2013. {
  2014. match[i] = 'A';
  2015. }
  2016. else if (l == 1)
  2017. {
  2018. match[i] = 'C';
  2019. }
  2020. else if (l == 2)
  2021. {
  2022. match[i] = 'G';
  2023. }
  2024. else if (l == 3)
  2025. {
  2026. match[i] = 'T';
  2027. }
  2028.  
  2029. }
  2030.  
  2031. cout<<"The best match of sequences for motif detection:\n\t";
  2032. for (i = 0; i <lms; i++)
  2033. cout<<match[i]<<"\t";
  2034. cout<<"\n";
  2035.  
  2036.  
  2037. for(i=0; i<max; i++)
  2038. {
  2039. temp_e[i]=0;
  2040. for(j=0; j<lm; j++)
  2041. {
  2042. subnuc="";
  2043. for(k=0; k<nu; k++)
  2044. {
  2045. subnuc=finalmotif[i][j];
  2046. if(md==2)
  2047. subnuc=subnuc+finalmotif[i][j];
  2048.  
  2049. if (subnuc== da[k])
  2050. {
  2051. temp_e[i] += freq[k][j];
  2052.  
  2053. }
  2054. }
  2055. }
  2056. if (i == 0)
  2057. {
  2058. l2 = 0;
  2059. min = temp_e[0];
  2060. }
  2061.  
  2062. if(min > temp_e[i])
  2063. {
  2064. min = temp_e[i];
  2065. l2 = i;
  2066. }
  2067.  
  2068. //cout<< i << ":" << temp_e[i] << "\n";
  2069. }
  2070. //cout<< "\n Minimum:" << min<<"\n";
  2071. //cout<< "Min:" << l2 << "-" << temp_e[l2];
  2072. //cout<< "\n";
  2073. subnuc="";
  2074.  
  2075. for (i=0; i<max; i++)
  2076. {
  2077.  
  2078. a = 0;
  2079.  
  2080. for (i1 = ming; i1 <= maxg; i1++)
  2081. {
  2082.  
  2083. for (l = 0; l < (lenstr2[i] - lms - i1); l++)
  2084. {
  2085. score2[i][l] = 0;
  2086. k = 0;
  2087. if ( l > (initialmotif[i][1] + rml) || l< (initialmotif[i][0]-lms-i1))
  2088. {
  2089. subnuc="";
  2090. for (j = 0; j < lms; j++)
  2091. {
  2092.  
  2093. if (j < lml)
  2094. {
  2095. seq2[a][j] = str2[i][l + j];
  2096. for (k2 = 0; k2 < nu; k2++)
  2097. {
  2098. subnuc=seq2[a][j];
  2099. if(md==2)
  2100. subnuc=subnuc+seq2[a][j];
  2101.  
  2102. if (subnuc == da[k2])
  2103. {
  2104. score2[i][l] += freq[k2][j];
  2105. }
  2106. }
  2107. }
  2108. else
  2109. {
  2110. seq2[a][j] = str2[i][l + j + i1];
  2111. for (k2 = 0; k2 < nu; k2++)
  2112. {
  2113. subnuc=seq2[a][j];
  2114. if(md==2)
  2115. subnuc=subnuc+seq2[a][j];
  2116.  
  2117. if (subnuc == da[k2])
  2118. {
  2119. score2[i][l] += freq[k2][j];
  2120. }
  2121. }
  2122. }
  2123.  
  2124. //cout<< seq2[a][j];
  2125. //secmotif[i][a1] += seq2[a][j];
  2126. //subnuc1 = subnuc1+seq2[a][j];
  2127. }
  2128. subnuc1 =seq2[a];
  2129.  
  2130. //cout<< " - " << scoremotif2[i][l] << "\n";
  2131. //cout<< " - " << subnuc << "\n";
  2132.  
  2133. if (min <= score2[i][l])
  2134. {
  2135. x=l+1+i1+lml;
  2136. if(g1=="1")
  2137. for(A=0 ; str5[i][A] !='\n' ; A++)
  2138. cout<<str5[i][A];
  2139. else
  2140. cout<<"Site:" << (i) ;
  2141.  
  2142. cout<< "\tLeft Position:" << (l)<<", Right Position:"<<(x) << " - " << subnuc1 << "\n";
  2143.  
  2144. subnuc1="";
  2145. }
  2146.  
  2147. a++;
  2148. }
  2149. }
  2150.  
  2151. }
  2152. }
  2153.  
  2154.  
  2155. //exit:
  2156.  
  2157. }
  2158.  
  2159.  
  2160. //std::vector<int> lenstr2;
  2161.  
  2162. char ch,str2[1000][1000],str3[1000][1000],str4[2000][1000],str5[1000][1000],state[1000][100];
  2163. int i , i1,j,k1,k2,k,l,l2,md,nu,lms,tf,p,rnd,rnd1,rnd2,rnd3,nmax,min,max,max2,ming,maxg,x,y,A,B,C,D,a,b,r,ns,g,z,lml,rml,lm,s,zm;
  2164. int selseq,initial,lenstr2[1000],freq[16][50],rndnuml[1000][100],rndnumr[1000][100],rndnuml2[1000][100],rndnumr2[1000][100],initialmotif[1000][2],sigb[1000][2];
  2165. int temp_e[1000],positionl[9000][2],positionr[9000][2], position[2000];
  2166. float freq1[16][50],freq2[4][50],score1[9000],score2[9000][2];
  2167. float BG[16],BG2[16],entropytotal[100][2],entropy[50],minentropy[100],testi[100];
  2168. float fsum,rndsel,sum,sum2,sum3,t,div,div1[2],maxscor[2];
  2169. string subnuc,subnuc1;
  2170. bool flag,bb;
  2171. char line,finalmotif[1000][50],finalmotifz[1000][50],strmotif[1000][50],seq1[1000][50],seq5[1000][50],seq2[9000][50],seq4[9000][50],ran1[1000][50];
  2172. string da[16];
  2173. string g1,seq,temp,temp2,temp3,address,input;
  2174. ifstream myfile1,myfile2,myfile3,myfile4;
  2175. FILE *student;
  2176. ostringstream buff;
  2177. };
  2178.  
  2179. int main(int argc, char *argv[])
  2180. {
  2181.  
  2182.  
  2183. //Some stuff to interact with the class will go here
  2184. Computer computer_one;
  2185.  
  2186. string input2;
  2187. input2=argv[1];
  2188. input2+=" ";
  2189. int i=2;
  2190. while(i < argc)
  2191. {
  2192. input2+=argv[i];
  2193. input2+=" ";
  2194. i++;
  2195. }
  2196.  
  2197.  
  2198. computer_one.read(input2);
  2199. computer_one.output_information();
  2200.  
  2201. computer_one.textmining();
  2202.  
  2203. computer_one.reverse();
  2204. //computer_one.subseqlength3();
  2205. computer_one.selectrandom();
  2206. //computer_one.randomgenerate();
  2207. computer_one.startlocation();
  2208. computer_one.PPM();
  2209. //computer_one.count_nucleotide();
  2210. //computer_one.PFM();
  2211. //computer_one.selectseq();
  2212. //computer_one.test();
  2213. //computer_one.score();
  2214.  
  2215.  
  2216. //computer_one.step1();
  2217. //computer_one.step2();
  2218.  
  2219.  
  2220. //computer_one.step11();
  2221.  
  2222. computer_one.step21();
  2223.  
  2224.  
  2225. //computer_one.secondmotif();
  2226.  
  2227.  
  2228.  
  2229.  
  2230.  
  2231. fclose(stdout);
  2232.  
  2233.  
  2234. return 0;
  2235. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement