/* local pair probabilities for RNA secondary structures Stephan Bernhart, Ivo L Hofacker Vienna RNA package */ /* todo: compute energy z-score for each window */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include /* #defines FLT_MAX ... */ #include "ViennaRNA/utils.h" #include "ViennaRNA/energy_par.h" #include "ViennaRNA/fold_vars.h" #include "ViennaRNA/pair_mat.h" #include "ViennaRNA/PS_dot.h" #include "ViennaRNA/part_func.h" #include "ViennaRNA/params.h" #include "ViennaRNA/loop_energies.h" #include "ViennaRNA/LPfold.h" #include "ViennaRNA/Lfold.h" #ifdef _OPENMP #include #endif #define ISOLATED 256.0 /* ################################# # GLOBAL VARIABLES # ################################# */ /* ################################# # PRIVATE VARIABLES # ################################# */ PRIVATE float cutoff; PRIVATE int num_p=0; /* for counting basepairs in pairlist pl, can actually be moved into pfl_fold */ PRIVATE FLT_OR_DBL *expMLbase=NULL; PRIVATE FLT_OR_DBL **q=NULL, **qb=NULL, **qm=NULL, *qqm=NULL, *qqm1=NULL, *qq=NULL, *qq1=NULL, **pR=NULL, **qm2=NULL, **QI5=NULL, **q2l=NULL, **qmb=NULL;/*,**QI3,*/ PRIVATE FLT_OR_DBL *prml=NULL, *prm_l=NULL, *prm_l1=NULL, *q1k=NULL, *qln=NULL; PRIVATE FLT_OR_DBL *scale=NULL; PRIVATE char **ptype=NULL; /* precomputed array of pair types */ PRIVATE int *jindx=NULL; PRIVATE int *my_iindx=NULL; PRIVATE vrna_exp_param_t *pf_params=NULL; PRIVATE short *S=NULL, *S1=NULL; PRIVATE int ulength; PRIVATE int pUoutput; PRIVATE double alpha = 1.0; #ifdef _OPENMP /* NOTE: all variables are assumed to be uninitialized if they are declared as threadprivate */ #pragma omp threadprivate(cutoff, num_p, scale, ptype, jindx, my_iindx, pf_params,\ expMLbase, q, qb, qm, qqm, qqm1, qq, qq1, pR, qm2, QI5, q2l, qmb,\ prml, prm_l, prm_l1, q1k, qln,\ S, S1, ulength, pUoutput, alpha) #endif /* ################################# # PRIVATE FUNCTION DECLARATIONS # ################################# */ PRIVATE void init_partfunc_L(int length, vrna_exp_param_t *parameters); PRIVATE void get_arrays_L(unsigned int length); PRIVATE void free_pf_arrays_L(void); PRIVATE void scale_pf_params(unsigned int length, vrna_exp_param_t *parameters); PRIVATE void GetPtype(int j, int pairsize, const short *S, int n); PRIVATE void FreeOldArrays(int i); PRIVATE void GetNewArrays(int j, int winSize); PRIVATE void printpbar(FLT_OR_DBL **prb,int winSize, int i, int n); PRIVATE plist *get_deppp(plist *pl, int start, int pairsize, int length); PRIVATE plist *get_plistW(plist *pl, int length, int start, FLT_OR_DBL **Tpr, int winSize); PRIVATE void print_plist(int length, int start, FLT_OR_DBL **Tpr, int winSize, FILE *fp); PRIVATE void compute_pU(int k, int ulength, double **pU, int winSize, int n, char *sequence); PRIVATE void putoutpU(double **pU,int k, int ulength, FILE *fp); /*PRIVATE void make_ptypes(const short *S, const char *structure);*/ PRIVATE void putoutpU_splitup(double **pUx, int k, int ulength, FILE *fp, char ident); PRIVATE void compute_pU_splitup(int k, int ulength, double **pU, double **pUO, double **pUH, double **pUI, double **pUM, int winSize,int n, char *sequence); /* ################################# # BEGIN OF FUNCTION DEFINITIONS # ################################# */ PRIVATE void init_partfunc_L(int length, vrna_exp_param_t *parameters){ if (length<1) vrna_message_error("init_partfunc_L: length must be greater 0"); #ifdef _OPENMP /* Explicitly turn off dynamic threads */ omp_set_dynamic(0); free_pf_arrays_L(); /* free previous allocation */ #else free_pf_arrays_L(); /* free previous allocation */ #endif #ifdef SUN4 nonstandard_arithmetic(); #else #ifdef HP9 fpsetfastmode(1); #endif #endif make_pair_matrix(); get_arrays_L((unsigned) length); scale_pf_params((unsigned) length, parameters); } PRIVATE void get_arrays_L(unsigned int length){ /*arrays in 2 dimensions*/ q = (FLT_OR_DBL **) vrna_alloc(sizeof(FLT_OR_DBL *)*(length+1)); qb = (FLT_OR_DBL **) vrna_alloc(sizeof(FLT_OR_DBL *)*(length+1)); qm = (FLT_OR_DBL **) vrna_alloc(sizeof(FLT_OR_DBL *)*(length+1)); pR = (FLT_OR_DBL **) vrna_alloc(sizeof(FLT_OR_DBL *)*(length+1)); q1k = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+1)); qln = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+2)); qq = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+2)); qq1 = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+2)); qqm = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+2)); qqm1 = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+2)); prm_l = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+2)); prm_l1 = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+2)); prml = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+2)); expMLbase = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+1)); scale = (FLT_OR_DBL *) vrna_alloc(sizeof(FLT_OR_DBL) *(length+1)); ptype = (char **) vrna_alloc(sizeof(char *) *(length+2)); if (ulength>0) { /* QI3 = (FLT_OR_DBL **) vrna_alloc((length+1)*sizeof(FLT_OR_DBL *));*/ QI5 = (FLT_OR_DBL **) vrna_alloc((length+1)*sizeof(FLT_OR_DBL *)); qmb = (FLT_OR_DBL **) vrna_alloc((length+1)*sizeof(FLT_OR_DBL *)); qm2 = (FLT_OR_DBL **) vrna_alloc((length+1)*sizeof(FLT_OR_DBL *)); q2l = (FLT_OR_DBL **) vrna_alloc((length+1)*sizeof(FLT_OR_DBL *)); } my_iindx = vrna_idx_row_wise(length); iindx = vrna_idx_row_wise(length); /* for backward compatibility and Perl wrapper */ jindx = vrna_idx_col_wise(length); } PRIVATE void free_pf_arrays_L(void){ if(q) free(q); if(qb) free(qb); if(qm) free(qm); if(pR) free(pR); if(qm2) free(qm2); if(qq) free(qq); if(qq1) free(qq1); if(qqm) free(qqm); if(qqm1) free(qqm1); if(q1k) free(q1k); if(qln) free(qln); if(prm_l) free(prm_l); if(prm_l1) free(prm_l1); if(prml) free(prml); if(expMLbase) free(expMLbase); if(scale) free(scale); if(my_iindx) free(my_iindx); if(iindx) free(iindx); /* for backward compatibility and Perl wrapper */ if(jindx) free(jindx); if(ptype) free(ptype); if(QI5) free(QI5); if(qmb) free(qmb); if(q2l) free(q2l); if(pf_params) free(pf_params); q = qb = qm = pR = QI5 = qmb = qm2 = q2l = NULL; qq = qq1 = qqm = qqm1 = q1k = qln = prml = prm_l = prm_l1 = expMLbase = NULL; my_iindx = jindx = iindx = NULL; pf_params = NULL; ptype = NULL; scale = NULL; #ifdef SUN4 standard_arithmetic(); #else #ifdef HP9 fpsetfastmode(0); #endif #endif } PUBLIC void update_pf_paramsLP(int length){ update_pf_paramsLP_par(length, NULL); } PUBLIC void update_pf_paramsLP_par(int length, vrna_exp_param_t *parameters){ init_partfunc_L(length, parameters); } PUBLIC plist *pfl_fold( char *sequence, int winSize, int pairSize, float cutoffb, double **pU, plist **dpp2, FILE *pUfp, FILE *spup){ return pfl_fold_par(sequence, winSize, pairSize, cutoffb, pU, dpp2, pUfp, spup, NULL); } PUBLIC plist *pfl_fold_par( char *sequence, int winSize, int pairSize, float cutoffb, double **pU, plist **dpp2, FILE *pUfp, FILE *spup, vrna_exp_param_t *parameters){ int n, m, i, j, k, l, u, u1, type, type_2, tt, ov, do_dpp, simply_putout, noGUclosure; double max_real; FLT_OR_DBL temp, Qmax, prm_MLb, prmt, prmt1, qbt1, *tmp, expMLclosing; plist *dpp, *pl; int split=0; ov = 0; Qmax = 0; do_dpp = 0; simply_putout = 0; dpp = NULL; pl = NULL; pUoutput = 0; ulength = 0; cutoff = cutoffb; if(pU != NULL) ulength = (int)pU[0][0]+0.49; if(spup !=NULL) simply_putout = 1; /*can't have one without the other*/ if(pUfp!=NULL) pUoutput = 1; else if((pUoutput)&&(ulength!=0)){ vrna_message_warning("There was a problem with non existing File Pointer for unpaireds, terminating process\n"); return pl; } dpp = *dpp2; if(dpp !=NULL) do_dpp=1; /*here, I allocate memory for pU, if has to be saved, I allocate all in one go, if pU is put out and freed, I only allocate what I really need*/ n = (int) strlen(sequence); /* allocate memory and initialize unpaired probabilities */ if (ulength > 0) { if (pUoutput) { for (i = 1; i <= ulength; i++) pU[i] = (double *)vrna_alloc((MAX2(MAXLOOP,ulength)+2)*sizeof(double)); } else { for (i = 1; i <= n; i++) pU[i]=(double *)vrna_alloc((MAX2(MAXLOOP,ulength)+2)*sizeof(double)); } } if (n < TURN + 2) { if (ulength > 0) { if (pUoutput) { for (i = 1; i <= ulength; i++) { for (j = 0; j < MAX2(MAXLOOP,ulength) + 1; j++) pU[i][j] = 1.; } } else { for (i = 1; i <= n; i++) { for (j = 0; j < MAX2(MAXLOOP,ulength) + 1; j++) pU[i][j] = 1.; } } } return pl; } /* always init everything since all global static variables are uninitialized when entering a thread */ init_partfunc_L(n, parameters); expMLclosing = pf_params->expMLclosing; noGUclosure = pf_params->model_details.noGUclosure; max_real = (sizeof(FLT_OR_DBL) == sizeof(float)) ? FLT_MAX : DBL_MAX; S = encode_sequence(sequence, 0); S1 = encode_sequence(sequence, 1); /* make_ptypes(S, structure); das machmadochlieber lokal, ey!*/ /*array initialization ; qb,qm,q qb,qm,q (i,j) are stored as ((n+1-i)*(n-i) div 2 + n+1-j */ num_p = 0; pl = (plist *)vrna_alloc(1000*sizeof(plist)); /*ALWAYS q[i][j] => i>j!!*/ for (j=1; j=MAX2(1,(j-winSize+1)); i--) { /* construction of partition function of segment i,j*/ /*firstly that given i bound to j : qb(i,j) */ u = j-i-1; type = ptype[i][j]; if (type!=0) { /*hairpin contribution*/ if (((type==3)||(type==4))&&noGUclosure) qbt1 = 0; else qbt1 = exp_E_Hairpin(u, type, S1[i+1], S1[j-1], sequence+i-1, pf_params) * scale[u+2]; /* interior loops with interior pair k,l */ for (k=i+1; k<=MIN2(i+MAXLOOP+1,j-TURN-2); k++) { u1 = k-i-1; for (l=MAX2(k+TURN+1,j-1-MAXLOOP+u1); l1) ? S1[i-1] : -1, (j0) qm2[i][j]=temp;/*new qm2 computation done here*/ for (k=i+1; k<=j; k++) temp += expMLbase[k-i] * qqm[k]; qm[i][j] = (temp + qqm[i]); /*auxiliary matrix qq for cubic order q calculation below */ qbt1 = qb[i][j]; if (type) { qbt1 *= exp_E_ExtLoop(type, (i>1) ? S1[i-1] : -1, (j < n) ? S1[j+1] : -1, pf_params); } qq[i] = qq1[i]*scale[1] + qbt1; /*construction of partition function for segment i,j */ temp = 1.0*scale[1+j-i] + qq[i]; for (k=i; k<=j-1; k++) temp += q[i][k]*qq[k+1]; q[i][j] = temp; if (temp>Qmax) { Qmax = temp; if (Qmax>max_real/10.) vrna_message_warning("Q close to overflow: %d %d %g\n", i,j,temp); } if (temp>=max_real) { vrna_message_error("overflow in pf_fold while calculating q[%d,%d]\n" "use larger pf_scale", i,j); } } /*end for i*/ tmp = qq1; qq1 =qq; qq =tmp; tmp = qqm1; qqm1=qqm; qqm=tmp; } /* just as a general service, I save here the free energy of the windows no output is generated, however,... */ if ((j>=winSize) && (j<=n) && (ulength) && !(pUoutput)) { double Fwindow=0.; Fwindow=(-log(q[j-winSize+1][j])-winSize*log(pf_params->pf_scale))*pf_params->kT/1000.0; pU[j][0]=Fwindow; /* if (ulength>=winSize) pU[j][winSize]=scale[winSize]/q[j-winSize+1][j]; */ } if (j>winSize) { Qmax=0; /* i=j-winSize; */ /* initialize multiloopfs */ for (k=j-winSize; k<=MIN2(n,j); k++) { prml[k]=0; prm_l[k]=0; /* prm_l1[k]=0; others stay*/ } prm_l1[j-winSize]=0; k=j-winSize; for (l=k+TURN+1; l<=MIN2(n,k+winSize-1); l++) { int a; pR[k][l] = 0; /* set zero at start */ type=ptype[k][l]; if (qb[k][l]==0) continue; for (a=MAX2(1,l-winSize+2); a=1) /*l outermost*/ pR[k][l]+=q[l-winSize+1][k-1]/q[l-winSize+1][l]; } pR[k][l] *= exp_E_ExtLoop(type, (k>1) ? S1[k-1] : -1, (l0)) pR[k][l] += pR[i][m]*exp_E_IntLoop(k-i-1, m-l-1, type, type_2, S1[i+1], S1[m-1], S1[k-1], S1[l+1], pf_params) * scale[k-i+m-l]; } } if (ulength) { /* NOT IF WITHIN INNER LOOP */ for (i=MAX2(MAX2(l-winSize+1,k-MAXLOOP-1),1); i<=k-1; i++) { for (m=l+1; m<=MIN2(MIN2(l+ MAXLOOP -k+i+2,i+winSize-1),n); m++) { type = ptype[i][m]; if ((pR[i][m]>0)){ temp=pR[i][m]*qb[k][l]*exp_E_IntLoop(k-i-1, m-l-1, type, type_2, S1[i+1], S1[m-1], S1[k-1], S1[l+1], pf_params) * scale[k-i+m-l]; QI5[l][m-l-1]+=temp; QI5[i][k-i-1]+=temp; } } } } } /* 3. bonding k,l as substem of multi-loop enclosed by i,m */ prm_MLb = 0.; if(k>1) /*sonst nix!*/ for (l=MIN2(n-1,k+winSize-2); l>=k+TURN+1; l--) { /* opposite direction */ m=l+1; prmt = prmt1 = 0.0; tt = ptype[k-1][m]; tt=rtype[tt]; prmt1 = pR[k-1][m] * expMLclosing * exp_E_MLstem(tt, S1[l], S1[k], pf_params); for (i=MAX2(1,l-winSize+2); ik; i--) prm_MLb += prml[i]*expMLbase[k-i-1]; */ prml[m] = prml[ m] + prm_l[m]; if (qb[k][l] == 0.) continue; temp = prm_MLb; if (ulength) { double dang; /* coefficient for computations of unpairedarrays */ dang = qb[k][l] * exp_E_MLstem(tt, S1[k-1], S1[l+1], pf_params) * scale[2]; for (m=MIN2(k+winSize-2,n);m>=l+2; m--){ qmb[l][m-l-1] += prml[m]*dang; q2l[l][m-l-1] += (prml[m]-prm_l[m])*dang; } } for (m=MIN2(k+winSize-2,n);m>=l+2; m--) temp += prml[m]*qm[l+1][m-1]; temp *= exp_E_MLstem(tt, (k>1) ? S1[k-1] : -1, (lQmax) { Qmax = pR[k][l]; if (Qmax>max_real/10.) vrna_message_warning("P close to overflow: %d %d %g %g\n", i, m, pR[k][l], qb[k][l]); } if (pR[k][l]>=max_real) { ov++; pR[k][l]=FLT_MAX; } } /* end for (l=..) */ tmp = prm_l1; prm_l1=prm_l; prm_l=tmp; /* end for (l=..) */ if ((ulength)&&(k-MAXLOOP-1>0)){ /* if (pUoutput) pU[k-MAXLOOP-1]=(double *)vrna_alloc((ulength+2)*sizeof(double)); */ if(split){ /*generate the new arrays, if you want them somewhere else, you have to generate them and overgive them ;)*/ double **pUO; double **pUI; double **pUM; double **pUH; pUO= (double **) vrna_alloc((n+1)*sizeof(double *)); pUI= (double **) vrna_alloc((n+1)*sizeof(double *)); pUM= (double **) vrna_alloc((n+1)*sizeof(double *)); pUH= (double **) vrna_alloc((n+1)*sizeof(double *)); if (pUoutput) { for (i=1; i<=ulength; i++) { pUH[i]=(double *)vrna_alloc((MAX2(MAXLOOP,ulength)+2)*sizeof(double)); pUI[i]=(double *)vrna_alloc((MAX2(MAXLOOP,ulength)+2)*sizeof(double)); pUO[i]=(double *)vrna_alloc((MAX2(MAXLOOP,ulength)+2)*sizeof(double)); pUM[i]=(double *)vrna_alloc((MAX2(MAXLOOP,ulength)+2)*sizeof(double)); } } //dont want to have that yet? /* else { for (i=1; i<=n; i++) pU[i]=(double *)vrna_alloc((MAX2(MAXLOOP,ulength)+2)*sizeof(double)); }*/ compute_pU_splitup(k-MAXLOOP-1,ulength,pU,pUO,pUH, pUI, pUM, winSize, n, sequence); if (pUoutput) { putoutpU_splitup(pUO,k-MAXLOOP-1, ulength, pUfp,'E'); putoutpU_splitup(pUH,k-MAXLOOP-1, ulength, pUfp,'H'); putoutpU_splitup(pUI,k-MAXLOOP-1, ulength, pUfp,'I'); putoutpU_splitup(pUM,k-MAXLOOP-1, ulength, pUfp,'M'); } } else { compute_pU(k-MAXLOOP-1,ulength,pU, winSize, n, sequence); /* here, we put out and free pUs not in use any more (hopefully) */ if (pUoutput) putoutpU(pU,k-MAXLOOP-1, ulength, pUfp); } } if (j-(2*winSize+MAXLOOP+1)>0) { printpbar(pR,winSize,j-(2*winSize+MAXLOOP+1),n); if (simply_putout) { print_plist(n, j-(2*winSize+MAXLOOP+1), pR, winSize, spup); } else{ pl=get_plistW(pl, n, j-(2*winSize+MAXLOOP+1), pR, winSize); } if (do_dpp)dpp=get_deppp(dpp,j-(2*winSize-MAXLOOP),pairSize, n); FreeOldArrays(j-(2*winSize+MAXLOOP+1)); } } /* end if (do_backtrack)*/ }/* end for j */ /* finish output and free */ for (j=MAX2(1,n-MAXLOOP); j<=n;j++) { /* if (pUoutput) pU[j]=(double *)vrna_alloc((ulength+2)*sizeof(double)); */ if (ulength) compute_pU(j,ulength,pU, winSize, n, sequence); /*here, we put out and free pUs not in use any more (hopefully)*/ if (pUoutput) putoutpU(pU,j, ulength, pUfp); } for (j=MAX2(n-winSize-MAXLOOP,1); j<=n; j++) { printpbar(pR,winSize,j,n); if (simply_putout) { print_plist(n, j, pR, winSize, spup); } else { pl=get_plistW(pl, n, j, pR, winSize); } if ((do_dpp)&&j 0) vrna_message_warning("%d overflows occurred while backtracking;\n" "you might try a smaller pf_scale than %g\n", ov, pf_params->pf_scale); *dpp2=dpp; return pl; } PRIVATE void scale_pf_params(unsigned int length, vrna_exp_param_t *parameters){ unsigned int i; double kT, scaling_factor; if(pf_params) free(pf_params); if(parameters){ pf_params = vrna_exp_params_copy(parameters); } else { vrna_md_t md; set_model_details(&md); pf_params = vrna_exp_params(&md); } scaling_factor = pf_params->pf_scale; kT = pf_params->kT; /* kT in cal/mol */ /* scaling factors (to avoid overflows) */ if (scaling_factor == -1) { /* mean energy for random sequences: 184.3*length cal */ scaling_factor = exp(-(-185+(pf_params->temperature-37.)*7.27)/kT); if (scaling_factor<1) scaling_factor=1; pf_params->pf_scale = scaling_factor; } scale[0] = 1.; scale[1] = 1./scaling_factor; expMLbase[0] = 1; expMLbase[1] = pf_params->expMLbase/scaling_factor; for (i=2; i<=length; i++) { scale[i] = scale[i/2]*scale[i-(i/2)]; expMLbase[i] = pow(pf_params->expMLbase, (double)i) * scale[i]; } } PRIVATE void printpbar(FLT_OR_DBL **prb,int winSize, int i, int n) { int j; int howoften=0; /* how many samples do we have for this pair */ int pairdist; for (j=i+TURN; j10e-200) { int type=ptype[start-1][j+1]; int type_2=rtype[(unsigned char)ptype[start][j]]; tmp=qb[start][j]/qb[start-1][(j+1)]*exp_E_IntLoop(0, 0, type, type_2, S1[start], S1[j], S1[start-1], S1[j+1], pf_params) * scale[2]; temp[count].i=start; temp[count].j=j; temp[count++].p=tmp; } } /* write it to list of deppps */ for (i=0; pl[i].i!=0; i++); pl=(plist *)vrna_realloc(pl,(i+count+1)*sizeof(plist)); for (j=0; jexpMLclosing; QBE=(double *) vrna_alloc((MAX2(ulength,MAXLOOP)+2)*sizeof(double)); /* first, we will */ /* for k<=ulength, pU[k][k]=0, because no bp can enclose it */ if (pUoutput&&k+ulength<=n) pU[k+ulength]=(double *)vrna_alloc((ulength+2)*sizeof(double)); /*compute pu[k+ulength][ulength] */ for (i5=MAX2(k+ulength-winSize+1,1);i5<=k;i5++) { for (j3=k+ulength+1; j3<=MIN2(n,i5+winSize-1); j3++) { /* if (k>400) { printf("i%d j%d ",i5,j3); fflush(stdout); } */ if (ptype[i5][j3]!=0) {/**/ /* (.. >-----|..........) i5 j j+ulength j3 */ /*Multiloops*/ temp = (i5k+ulength) temp += qm2[k+ulength+1][j3-1] * expMLbase[k+ulength-i5]; /* (..|-----|{}{}) */ if((i5k+ulength)) temp += qm[i5+1][k] * qm[k+ulength+1][j3-1] * expMLbase[ulength]; /* ({}|-----|{}) */ /* add dangles, multloopclosing etc. */ temp *= exp_E_MLstem(rtype[(unsigned char)ptype[i5][j3]], S1[j3-1], S1[i5+1], pf_params) * scale[2] * expMLclosing; /*add hairpins*/ temp += exp_E_Hairpin(j3-i5-1, ptype[i5][j3], S1[i5+1], S1[j3-1], sequence+i5-1, pf_params) * scale[j3-i5+1]; /*add outer probability*/ temp *= pR[i5][j3]; pU[k+ulength][ulength] += temp; } } } /* code doubling to avoid if within loop */ #if 0 /*initialization for interior loops, it is not recomended to have verysmall ulengths!!*/ if (ulength((l5-k5-1)+(j3-l3-1) k-winSize+ulength0) /* oder so halt */ for (l3=MAX2(i5+TURN+1,j3-MAXLOOP-1); l3<=k; l3++){ for (k5=i5+1; k5<=MIN2(l3-TURN-1,MAXLOOP+i5+l3+2-j3); k5++){ if (ptype[k5][l3]) { temp+= qb[k5][l3]*expLoopEnergy(k5-i5-1, j3-l3-1, outype, rtype[ptype[k5][l3]], S1[i5+1], S1[j3-1], S1[k5-1], S1[l3+1]); } } } temp*=pR[i5][j3]; pU[k+ulength][ulength]+= temp; } } /* kl bp is 3' */ /* k+ulength-MAXLOOP<=i5<=k k+ulength+1+TURN0) /* oder so halt */ for (k5=k+ulength+1; k5=ulength; len--) temp+=QI3[k][len]; for (;len>0; len--) { temp += QI3[k][len]; QBE[len] += temp; } #endif temp=0.; for (len=winSize; len>=MAX2(ulength,MAXLOOP); len--) temp+=QI5[k][len]; for (;len>0; len--) { temp += QI5[k][len]; QBE[len] += temp; /* replace QBE with QI */ } /* Add Hairpinenergy to QBE */ temp=0.; for(obp = MIN2(n, k + winSize - 1); obp > k + ulength; obp--) if(ptype[k][obp]) temp += pR[k][obp] * exp_E_Hairpin(obp-k-1, ptype[k][obp], S1[k+1], S1[obp-1], sequence+k-1, pf_params) * scale[obp-k+1]; for(obp = MIN2(n, MIN2(k + winSize - 1, k + ulength)); obp > k + 1; obp--){ if (ptype[k][obp]) temp += pR[k][obp] * exp_E_Hairpin(obp-k-1, ptype[k][obp], S1[k+1], S1[obp-1], sequence+k-1, pf_params) * scale[obp-k+1]; QBE[obp-k-1] += temp; /* add hairpins to QBE (all in one array) */ } /* doubling the code to get the if out of the loop */ /* Add up Multiloopterms qmb[l][m]+=prml[m]*dang; q2l[l][m]+=(prml[m]-prm_l[m])*dang; */ temp=0.; for(len = winSize; len >= ulength; len--) temp += q2l[k][len] * expMLbase[len]; for( ; len > 0; len--){ temp += q2l[k][len] * expMLbase[len]; QBE[len] += temp; /* add (()()____) type cont. to I3 */ } for(len = 1; len < ulength; len++){ for(obp = k + len + TURN; obp <= MIN2(n, k + winSize - 1); obp++){ /* add (()___()) */ QBE[len] += qmb[k][obp-k-1] * qm[k+len+1/*2*/][obp-1] * expMLbase[len]; } } for (len=1; len=winSize)&&(k>=ulength)) { pU[k][winSize]=scale[winSize]/q[k-winSize+1][k]; } /* now the not enclosed by any base pair terms for whatever it is we do not need anymore... ... which should be e.g; k, again */ for(startu = MIN2(ulength, k); startu > 0; startu--){ temp=0.; for(i5 = MAX2(1, k - winSize + 2); i5 <= MIN2(k - startu, n - winSize + 1); i5++){ temp += q[i5][k - startu] * q[k + 1][i5 + winSize - 1] * scale[startu]/q[i5][i5 + winSize - 1]; } /* the 2 Cases where the borders are on the edge of the interval */ if((k >= winSize) && (startu + 1 <= winSize)) temp += q[k - winSize + 1][k - startu]*scale[startu]/q[k - winSize + 1][k]; if((k <= n - winSize+ startu) && (k - startu >= 0) && (k < n) && (startu + 1 <= winSize)) temp += q[k + 1][k - startu + winSize] * scale[startu] / q[k - startu + 1][k - startu + winSize]; /* Divide by number of possible windows */ pU[k][startu] += temp; { int leftmost, rightmost; leftmost = MAX2(1, k - winSize + 1); rightmost = MIN2(n - winSize + 1, k - startu + 1); pU[k][startu] /= (rightmost - leftmost + 1); } } free(QBE); return; } PRIVATE void putoutpU(double **pUx, int k, int ulength, FILE *fp) { /*put out unpaireds for k, and free pU[k], make sure we don't need pU[k] any more!!*/ /*could use that for hairpins, also!*/ int i; fprintf(fp,"%d\t",k); for (i=1; i<=MIN2(ulength,k); i++) { fprintf(fp,"%.5g\t",pUx[k][i]); } fprintf(fp,"\n"); free(pUx[k]); } PRIVATE void putoutpU_splitup(double **pUx, int k, int ulength, FILE *fp, char ident) { /*put out unpaireds for k, and free pU[k], make sure we don't need pU[k] any more!!*/ /*could use that for hairpins, also!*/ int i; fprintf(fp,"%d\t",k); for (i=1; i<=MIN2(ulength,k); i++) { fprintf(fp,"%.5g\t",pUx[k][i]); } fprintf(fp,"\t%c\n",ident); free(pUx[k]); } PUBLIC void putoutpU_prob(double **pU,int length, int ulength, FILE *fp, int energies) { putoutpU_prob_par(pU, length, ulength, fp, energies, pf_params); } PUBLIC void putoutpU_prob_par(double **pU,int length, int ulength, FILE *fp, int energies, vrna_exp_param_t *parameters){ /*put out unpaireds */ int i,k; double kT = parameters->kT/1000.0; double temp; if (energies) fprintf(fp,"#opening energies\n #i$\tl="); else fprintf(fp,"#unpaired probabilities\n #i$\tl="); for (i=1; i<=ulength; i++) { fprintf(fp,"%d\t", i); } fprintf(fp,"\n"); for (k=1; k<=length; k++){ fprintf(fp,"%d\t",k); for (i=1; i<=ulength; i++) { if (i>k) { fprintf(fp,"NA\t"); continue; } if (energies) temp=-log(pU[k][i])*kT; else temp=pU[k][i]; fprintf(fp,"%.7g\t",temp); } fprintf(fp,"\n"); free(pU[k]); } fflush(fp); } PUBLIC void putoutpU_prob_bin(double **pU,int length, int ulength, FILE *fp, int energies) { putoutpU_prob_bin_par(pU, length, ulength, fp, energies, pf_params); } PUBLIC void putoutpU_prob_bin_par(double **pU,int length, int ulength, FILE *fp, int energies, vrna_exp_param_t *parameters) { /*put out unpaireds */ int i,k; double kT= parameters->kT/1000.0; int *p; p = (int*) vrna_alloc(sizeof(int)*1); /* write first line */ p[0]=ulength; /* u length */ fwrite(p,sizeof(int),1,fp); p[0]=length; /* seq length */ fwrite(p,sizeof(int),1,fp); for (k=3; k<=(length+20); k++){ /* all the other lines are set to 1000000 because we are at ulength=0 */ p[0]=1000000; fwrite(p,sizeof(int),1,fp); } /* data */ for (i=1; i<=ulength; i++) { for (k=1; k<=11; k++){/* write first ten entries to 1000000 */ p[0]=1000000; fwrite(p,sizeof(int),1,fp); } for (k=1; k<=length; k++){/* write data now */ if (i>k) { p[0]=1000000; /* check if u > pos */ fwrite(p,sizeof(int),1,fp); continue; } else{ p[0]= (int) rint(100 *(-log(pU[k][i])*kT)); fwrite(p,sizeof(int),1,fp); } } for (k=1; k<=9; k++){/* finish by writing the last 10 entries */ p[0]=1000000; fwrite(p,sizeof(int),1,fp); } } /* free pU array; */ for (k=1; k<=length; k++){ free(pU[k]); } free(p); fflush(fp); } /* Here: Space for questions... */ PRIVATE void compute_pU_splitup(int k, int ulength, double **pU, double **pUO, double **pUH, double **pUI, double **pUM, int winSize,int n, char *sequence) { /* here, we try to add a function computing all unpaired probabilities starting at some i, going down to $unpaired, to be unpaired, i.e. a list with entries from 1 to unpaired for every i, with the probability of a stretch of length x, starting at i-x+1, to be unpaired */ int startu; int i5; int j3, len, obp; double temp; double *QBE; double *QBI; double *QBM; double *QBH; FLT_OR_DBL expMLclosing = pf_params->expMLclosing; QBE=(double *) vrna_alloc((MAX2(ulength,MAXLOOP)+2)*sizeof(double)); QBM=(double *) vrna_alloc((MAX2(ulength,MAXLOOP)+2)*sizeof(double)); QBI=(double *) vrna_alloc((MAX2(ulength,MAXLOOP)+2)*sizeof(double)); QBH=(double *) vrna_alloc((MAX2(ulength,MAXLOOP)+2)*sizeof(double)); /* first, we will */ /* for k<=ulength, pU[k][k]=0, because no bp can enclose it */ if (pUoutput&&k+ulength<=n) pU[k+ulength]=(double *)vrna_alloc((ulength+2)*sizeof(double)); /*compute pu[k+ulength][ulength] */ for (i5=MAX2(k+ulength-winSize+1,1);i5<=k;i5++) { for (j3=k+ulength+1; j3<=MIN2(n,i5+winSize-1); j3++) { /* if (k>400) { printf("i%d j%d ",i5,j3); fflush(stdout); } */ if (ptype[i5][j3]!=0) {/**/ /* (.. >-----|..........) i5 j j+ulength j3 */ /*Multiloops*/ temp = (i5k+ulength) temp += qm2[k+ulength+1][j3-1] * expMLbase[k+ulength-i5]; /* (..|-----|{}{}) */ if((i5k+ulength)) temp += qm[i5+1][k] * qm[k+ulength+1][j3-1] * expMLbase[ulength]; /* ({}|-----|{}) */ /* add dangles, multloopclosing etc. */ temp *= exp_E_MLstem(rtype[(unsigned char)ptype[i5][j3]], S1[j3-1], S1[i5+1], pf_params) * scale[2] * expMLclosing; /*add hairpins*/ temp += exp_E_Hairpin(j3-i5-1, ptype[i5][j3], S1[i5+1], S1[j3-1], sequence+i5-1, pf_params) * scale[j3-i5+1]; /*add outer probability*/ temp *= pR[i5][j3]; pU[k+ulength][ulength] += temp; } } } /* code doubling to avoid if within loop */ temp=0.; for (len=winSize; len>=MAX2(ulength,MAXLOOP); len--) temp+=QI5[k][len]; for (;len>0; len--) { temp += QI5[k][len]; QBI[len] += temp; QBE[len] += temp; /* replace QBE with QI */ } /* Add Hairpinenergy to QBE */ temp=0.; for(obp = MIN2(n, k + winSize - 1); obp > k + ulength; obp--) if(ptype[k][obp]) temp += pR[k][obp] * exp_E_Hairpin(obp-k-1, ptype[k][obp], S1[k+1], S1[obp-1], sequence+k-1, pf_params) * scale[obp-k+1]; for(obp = MIN2(n, MIN2(k + winSize - 1, k + ulength)); obp > k + 1; obp--){ if (ptype[k][obp]) temp += pR[k][obp] * exp_E_Hairpin(obp-k-1, ptype[k][obp], S1[k+1], S1[obp-1], sequence+k-1, pf_params) * scale[obp-k+1]; QBH[obp-k-1] += temp; QBE[obp-k-1] += temp; /* add hairpins to QBE (all in one array) */ } /* doubling the code to get the if out of the loop */ /* Add up Multiloopterms qmb[l][m]+=prml[m]*dang; q2l[l][m]+=(prml[m]-prm_l[m])*dang; */ temp=0.; for(len = winSize; len >= ulength; len--) temp += q2l[k][len] * expMLbase[len]; for( ; len > 0; len--){ temp += q2l[k][len] * expMLbase[len]; QBM[len] += temp; QBE[len] += temp; /* add (()()____) type cont. to I3 */ } for(len = 1; len < ulength; len++){ for(obp = k + len + TURN; obp <= MIN2(n, k + winSize - 1); obp++){ /* add (()___()) */ QBM[len] += qmb[k][obp-k-1] * qm[k+len+1/*2*/][obp-1] * expMLbase[len]; QBE[len] += qmb[k][obp-k-1] * qm[k+len+1/*2*/][obp-1] * expMLbase[len]; } } for (len=1; len=winSize)&&(k>=ulength)) { pUO[k][winSize]=scale[winSize]/q[k-winSize+1][k]; } /*open chain*/ if ((ulength>=winSize)&&(k>=ulength)) { pU[k][winSize]=scale[winSize]/q[k-winSize+1][k]; } /* now the not enclosed by any base pair terms for whatever it is we do not need anymore... ... which should be e.g; k, again */ for(startu = MIN2(ulength, k); startu > 0; startu--){ temp=0.; for(i5 = MAX2(1, k - winSize + 2); i5 <= MIN2(k - startu, n - winSize + 1); i5++){ temp += q[i5][k - startu] * q[k + 1][i5 + winSize - 1] * scale[startu]/q[i5][i5 + winSize - 1]; } /* the 2 Cases where the borders are on the edge of the interval */ if((k >= winSize) && (startu + 1 <= winSize)) temp += q[k - winSize + 1][k - startu]*scale[startu]/q[k - winSize + 1][k]; if((k <= n - winSize+ startu) && (k - startu >= 0) && (k < n) && (startu + 1 <= winSize)) temp += q[k + 1][k - startu + winSize] * scale[startu] / q[k - startu + 1][k - startu + winSize]; /* Divide by number of possible windows */ pU[k][startu] += temp; pUO[k][startu] += temp; { int leftmost, rightmost; leftmost = MAX2(1, k - winSize + 1); rightmost = MIN2(n - winSize + 1, k - startu + 1); pU[k][startu] /= (rightmost - leftmost + 1); /*Do we want to make a distinction between those?*/ pUH[k][startu] /= (rightmost - leftmost + 1); pUO[k][startu] /= (rightmost - leftmost + 1); pUI[k][startu] /= (rightmost - leftmost + 1); pUM[k][startu] /= (rightmost - leftmost + 1); } } free(QBE); free(QBI); free(QBH); free(QBM); return; } PUBLIC void putoutpU_prob_splitup(double **pU, double **pUO, double **pUH, double **pUI, double **pUM, int length, int ulength, FILE *fp, int energies) { /*put out unpaireds */ int i,k; double kT= (temperature+K0)*GASCONST/1000.0; double temp; if (energies) fprintf(fp,"#opening energies\n #i$\tl="); else fprintf(fp,"#unpaired probabilities\n #i$\tl="); fprintf(fp,"Total\n"); for (i=1; i<=ulength; i++) { fprintf(fp,"%d\t", i); } fprintf(fp,"\n"); for (k=1; k<=length; k++){ fprintf(fp,"%d\t",k); for (i=1; i<=ulength; i++) { if (i>k) { fprintf(fp,"NA\t"); continue; } if (energies) temp=-log(pU[k][i])*kT; else temp=pU[k][i]; fprintf(fp,"%.7g\t",temp); } fprintf(fp,"\tT\n"); free(pU[k]); } fprintf(fp,"\n###################################################################\nHairpin\n"); for (i=1; i<=ulength; i++) { fprintf(fp,"%d\t", i); } fprintf(fp,"\n"); for (k=1; k<=length; k++){ fprintf(fp,"%d\t",k); for (i=1; i<=ulength; i++) { if (i>k) { fprintf(fp,"NA\t"); continue; } if (energies) temp=-log(pUH[k][i])*kT; else temp=pUH[k][i]; fprintf(fp,"%.7g\t",temp); } fprintf(fp,"\tH\n"); free(pUH[k]); } fprintf(fp,"\n###################################################################\nInterior\n"); for (i=1; i<=ulength; i++) { fprintf(fp,"%d\t", i); } fprintf(fp,"\n"); for (k=1; k<=length; k++){ fprintf(fp,"%d\t",k); for (i=1; i<=ulength; i++) { if (i>k) { fprintf(fp,"NA\t"); continue; } if (energies) temp=-log(pUI[k][i])*kT; else temp=pUI[k][i]; fprintf(fp,"%.7g\t",temp); } fprintf(fp,"\tI\n"); free(pUI[k]); } fprintf(fp,"\n###################################################################\nMultiloop\n"); for (i=1; i<=ulength; i++) { fprintf(fp,"%d\t", i); } fprintf(fp,"\n"); for (k=1; k<=length; k++){ fprintf(fp,"%d\t",k); for (i=1; i<=ulength; i++) { if (i>k) { fprintf(fp,"NA\t"); continue; } if (energies) temp=-log(pUM[k][i])*kT; else temp=pUM[k][i]; fprintf(fp,"%.7g\t",temp); } fprintf(fp,"\tM\n"); free(pUM[k]); } fprintf(fp,"\n###################################################################\nExterior\n"); for (i=1; i<=ulength; i++) { fprintf(fp,"%d\t", i); } fprintf(fp,"\t E\n"); for (k=1; k<=length; k++){ fprintf(fp,"%d\t",k); for (i=1; i<=ulength; i++) { if (i>k) { fprintf(fp,"NA\t"); continue; } if (energies) temp=-log(pUO[k][i])*kT; else temp=pUO[k][i]; fprintf(fp,"%.7g\t",temp); } fprintf(fp,"\n"); free(pU[k]); } fflush(fp); } /*###########################################*/ /*# deprecated functions below #*/ /*###########################################*/ PUBLIC void init_pf_foldLP(int length){ /* DO NOTHING */}