/* search for sequences that fold into a given target structure c Ivo Hofacker Vienna RNA package */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #define TDIST 0 /* use tree distance */ #define PF 1 /* include support for partiton function */ #include #include #include #include #include #include #if PF #include "ViennaRNA/part_func.h" #endif #include "ViennaRNA/fold.h" #if TDIST #include "ViennaRNA/dist_vars.h" #include "ViennaRNA/treedist.h" #include "ViennaRNA/RNAstruct.h" #endif #include "ViennaRNA/utils.h" #include "ViennaRNA/fold_vars.h" #include "ViennaRNA/pair_mat.h" PRIVATE double adaptive_walk(char *start, const char *target); PRIVATE void shuffle(int *list, int len); PRIVATE void make_start(char* start, const char *structure); PRIVATE void make_ptable(const char *structure, int *table); PRIVATE void make_pairset(void); PRIVATE double mfe_cost(const char *, char*, const char *); PRIVATE double pf_cost(const char *, char *, const char *); PRIVATE char *aux_struct(const char* structure ); /* for backward compatibility, make sure symbolset can hold 20 characters */ PRIVATE char default_alpha[21] = "AUGC"; PUBLIC char *symbolset = default_alpha; PUBLIC int give_up = 0; PUBLIC float final_cost = 0; /* when to stop inverse_pf_fold */ PUBLIC int inv_verbose=0; /* print out substructure on which inverse_fold() fails */ PRIVATE char pairset[2*MAXALPHA+1]; PRIVATE int base, npairs; PRIVATE int nc2; /*-------------------------------------------------------------------------*/ PRIVATE int fold_type; #if TDIST PRIVATE Tree *T0; #endif PRIVATE double cost2; PRIVATE double adaptive_walk(char *start, const char *target) { #ifdef DUMMY printf("%s\n%s %c\n", start, target, backtrack_type ); return 0.; #endif int i,j,p,tt,w1,w2, n_pos, len, flag; long walk_len; char *string, *string2, *cstring, *structure, *struct2; int *mut_pos_list, mut_sym_list[MAXALPHA+1], mut_pair_list[2*MAXALPHA+1]; int *w1_list, *w2_list, mut_position, symbol, bp; int *target_table, *test_table; char cont; double cost, current_cost, ccost2; double (*cost_function)(const char *, char *, const char *); len = strlen(start); if (strlen(target)!=len) { vrna_message_error("%s\n%s\nadaptive_walk: start and target have unequal length", start, target); } string = (char *) vrna_alloc(sizeof(char)*(len+1)); cstring = (char *) vrna_alloc(sizeof(char)*(len+1)); string2 = (char *) vrna_alloc(sizeof(char)*(len+1)); structure = (char *) vrna_alloc(sizeof(char)*(len+1)); struct2 = (char *) vrna_alloc(sizeof(char)*(len+1)); mut_pos_list = (int *) vrna_alloc(sizeof(int)*len); w1_list = (int *) vrna_alloc(sizeof(int)*len); w2_list = (int *) vrna_alloc(sizeof(int)*len); target_table = (int *) vrna_alloc(sizeof(int)*len); test_table = (int *) vrna_alloc(sizeof(int)*len); make_ptable(target, target_table); for (i=0; i0) do { cont=0; if (fold_type==0) { /* min free energy fold */ make_ptable(structure, test_table); for (j=w1=w2=flag=0; j0)) if ((target_table[j-1]1) if (w2_list[w2-2]==w2_list[w2-1]) w2--; flag = 1; } else { if (flag==1) if ((tt0) cont=1; break; } } if ((current_cost>0)&&(cont==0)&&(string2[0])) { /* no mutation that decreased cost was found, but the the sequence in string2 decreases cost2 while keeping cost constant */ strcpy(cstring, string2); strcpy(structure, struct2); nc2++; cont=1; } } while (cont); for (i=0; i0)&&(give_up)) goto adios PUBLIC float inverse_fold(char *start, char *structure) { int i, j, jj, len, o; int *pt; char *string, *wstring, *wstruct, *aux; double dist=0; nc2 = j = o = fold_type = 0; len = strlen(structure); if (strlen(start)!=len) { vrna_message_error("%s\n%s\ninverse_fold: start and structure have unequal length", start, structure); } string = (char *) vrna_alloc(len+1); wstring = (char *) vrna_alloc(len+1); wstruct = (char *) vrna_alloc(len+1); pt = (int *) vrna_alloc(sizeof(int)*(len+2)); pt[len] = len+1; aux = aux_struct(structure); strcpy(string, start); make_pairset(); make_start(string, structure); make_ptable(structure, pt); while (j0) && structure[--i]!='('); if (structure[i]=='.') { /* no pair found -> open chain */ WALK(0,len-1); } if (aux[i]!='[') { i--; j++;} while (pt[j]==i) { backtrack_type='C'; if (aux[i]!='[') { while (aux[--i]!='['); while (aux[++j]!=']'); /* WALK(i,j); */ } WALK(i,j); o--; jj = j; i--; while (aux[++j]=='.'); while ((i>=0)&&(aux[i]=='.')) i--; if (pt[j]!=i) { backtrack_type = (o==0)? 'F' : 'M'; if (j-jj>8) { WALK((i+1),(jj)); } WALK((i+1), (j-1)); while ((i>=0) &&(aux[i]==']')) { i=pt[i]-1; while ((i>=0)&&(aux[i]=='.')) i--; WALK((i+1), (j-1)); } } } } adios: backtrack_type='F'; if ((dist>0)&&(inv_verbose)) printf("%s\n%s\n", wstring, wstruct); /*if ((dist==0)||(give_up==0))*/ strcpy(start, string); free(wstring); free(wstruct); free(string); free(aux); free(pt); /* if (dist>0) printf("%3d \n", nc2); */ return dist; } /*-------------------------------------------------------------------------*/ PUBLIC float inverse_pf_fold(char *start, char *target) { double dist; int dang; dang=dangles; if (dangles!=0) dangles=2; update_fold_params(); /* make sure there is a valid pair matrix */ make_pairset(); make_start(start, target); fold_type=1; do_backtrack = 0; dist = adaptive_walk(start, target); dangles=dang; return (dist+final_cost); } /*-------------------------------------------------------------------------*/ PRIVATE void make_start(char* start, const char *structure) { int i,j,k,l,r,length; int *table, *S, sym[MAXALPHA], ss; length=strlen(start); table = (int *) vrna_alloc(sizeof(int)*length); S = (int *) vrna_alloc(sizeof(int)*length); make_ptable(structure, table); for (i=0; i