/* ################################### # convert energy parameter files # # from ViennaRNAPackage 1.8.4 to # # 2.0 format # # # # Ronny Lorenz # ################################### */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include #include "ViennaRNA/utils.h" #include "ViennaRNA/fold_vars.h" #include "ViennaRNA/read_epars.h" #include "ViennaRNA/pair_mat.h" #include "1.8.4_epars.h" #include "1.8.4_intloops.h" #include "ViennaRNA/convert_epars.h" enum parset_184 {UNKNOWN_184= -1, QUIT_184, S_184, SH_184, HP_184, B_184, IL_184, MMI_184, MMH_184, MMM_184, MM_H_184, DE5_184, DE3_184, DE5_H_184, DE3_H_184, ML_184, TL_184, TRI_184, TE_184, NIN_184, MISC_184, INT11_184, INT11_H_184, INT21_184, INT21_H_184, INT22_184, INT22_H_184}; PRIVATE unsigned int read_old_parameter_file(FILE *ifile, int skip_header); PRIVATE void write_new_parameter_file(FILE *ofile, unsigned int options); PRIVATE void rd_stacks(int stack[NBPAIRS+1][NBPAIRS+1], FILE *fp); PRIVATE void rd_loop(int looparray[31], FILE *fp); PRIVATE void rd_mismatch(int mismatch[NBPAIRS+1][5][5], FILE *fp); PRIVATE void rd_int11(int int11[NBPAIRS+1][NBPAIRS+1][5][5], FILE *fp); PRIVATE void rd_int21(int int21[NBPAIRS+1][NBPAIRS+1][5][5][5], FILE *fp); PRIVATE void rd_int22(int int22[NBPAIRS+1][NBPAIRS+1][5][5][5][5], FILE *fp); PRIVATE void rd_dangle(int dangles[NBPAIRS+1][5], FILE *fp); PRIVATE void rd_MLparams(FILE *fp); PRIVATE void rd_misc(FILE *fp); PRIVATE void rd_ninio(FILE *fp); PRIVATE void rd_Tetra_loop(FILE *fp); PRIVATE void rd_Tri_loop(FILE *fp); PRIVATE void check_symmetry(void); PRIVATE enum parset_184 gettype_184(char ident[]); PRIVATE char *get_array1(int *arr, int size, FILE *fp); PRIVATE void ignore_comment(char *line); PRIVATE void display_array(int *p, int size, int line, FILE *fp); PUBLIC void convert_parameter_file(const char *iname, const char *oname, unsigned int options){ FILE *ifile, *ofile; unsigned int old_options = 0; int skip_input_header = 0; if(options & VRNA_CONVERT_OUTPUT_DUMP){ if(oname == NULL) oname = iname; skip_input_header = 1; } else{ if(iname == NULL){ ifile = stdin; skip_input_header = 1; } else if(!(ifile=fopen(iname,"r"))){ vrna_message_warning("convert_epars: can't open file %s", iname); return; } /* read old (1.8.4 format) parameter file */ old_options = read_old_parameter_file(ifile, skip_input_header); if(ifile != stdin) fclose(ifile); check_symmetry(); } if(options & VRNA_CONVERT_OUTPUT_VANILLA) options = old_options; if(oname == NULL) ofile = stdout; else if(!(ofile=fopen(oname,"a+"))){ vrna_message_warning("convert_epars: can't open file %s for writing", oname); return; } write_new_parameter_file(ofile, options); if(ofile != stdout) fclose(ofile); } /*------------------------------------------------------------*/ PRIVATE unsigned int read_old_parameter_file(FILE *ifile, int skip_header){ char *line, ident[32]; enum parset_184 type; int r, last; unsigned int read_successfully = 0; if (!(line = vrna_read_line(ifile))) { vrna_message_warning("convert_epars: can't read input parameter file"); return 0; } if(!skip_header){ if (strncmp(line,"## RNAfold parameter file",25)!=0){ vrna_message_warning("convert_epars: Missing header line in input parameter file.\n" "May be this file has incorrect format?"); free(line); return 0; } free(line); line = vrna_read_line(ifile); } last = 0; do{ r = sscanf(line, "# %31s", ident); if (r==1){ type = gettype_184(ident); switch (type){ case QUIT_184: if(ifile == stdin){ vrna_message_info(stderr, "press ENTER to continue..."); fflush(stderr); } last = 1; break; case SH_184: rd_stacks(enthalpies_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_STACK; break; case S_184: rd_stacks(stack37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_STACK; break; case HP_184: rd_loop(hairpin37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_HP; break; case B_184: rd_loop(bulge37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_BULGE; break; case IL_184: rd_loop(internal_loop37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_INT; break; case MMH_184: rd_mismatch(mismatchH37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_MM_HP; break; case MMI_184: rd_mismatch(mismatchI37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_MM_INT |VRNA_CONVERT_OUTPUT_MM_INT_1N /* since 1:n-interior loop mismatches are treated seperately in 2.0 */ |VRNA_CONVERT_OUTPUT_MM_INT_23; /* since 2:3-interior loop mismatches are treated seperately in 2.0 */ break; case MMM_184: rd_mismatch(mismatchM37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_MM_MULTI; break; case MM_H_184: rd_mismatch(mism_H_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_MM_HP /* since hairpin mismatches are treated seperately in 2.0 */ |VRNA_CONVERT_OUTPUT_MM_INT /* since interior loop mismatches are treated seperately in 2.0 */ |VRNA_CONVERT_OUTPUT_MM_INT_1N /* since 1:n-interior loop mismatches are treated seperately in 2.0 */ |VRNA_CONVERT_OUTPUT_MM_INT_23 /* since 2:3-interior loop mismatches are treated seperately in 2.0 */ |VRNA_CONVERT_OUTPUT_MM_MULTI; /* since multi loop mismatches are treated seperately in 2.0 */ break; case INT11_184: rd_int11(int11_37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_INT_11; break; case INT11_H_184: rd_int11(int11_H_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_INT_11; break; case INT21_184: rd_int21(int21_37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_INT_21; break; case INT21_H_184: rd_int21(int21_H_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_INT_21; break; case INT22_184: rd_int22(int22_37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_INT_22; break; case INT22_H_184: rd_int22(int22_H_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_INT_22; break; case DE5_184: rd_dangle(dangle5_37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_DANGLE5 |VRNA_CONVERT_OUTPUT_MM_MULTI /* since multi loop mismatches were treated as dangle contribution */ |VRNA_CONVERT_OUTPUT_MM_EXT; /* since exterior loop mismatches were treated as dangle contribution */ break; case DE5_H_184: rd_dangle(dangle5_H_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_DANGLE5 |VRNA_CONVERT_OUTPUT_MM_MULTI /* since multi loop mismatches were treated as dangle contribution */ |VRNA_CONVERT_OUTPUT_MM_EXT; /* since exterior loop mismatches were treated as dangle contribution */ break; case DE3_184: rd_dangle(dangle3_37_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_DANGLE3 |VRNA_CONVERT_OUTPUT_MM_MULTI /* since multi loop mismatches were treated as dangle contribution */ |VRNA_CONVERT_OUTPUT_MM_EXT; /* since exterior loop mismatches were treated as dangle contribution */ break; case DE3_H_184: rd_dangle(dangle3_H_184, ifile); read_successfully |= VRNA_CONVERT_OUTPUT_DANGLE3 |VRNA_CONVERT_OUTPUT_MM_MULTI /* since multi loop mismatches were treated as dangle contribution */ |VRNA_CONVERT_OUTPUT_MM_EXT; /* since exterior loop mismatches were treated as dangle contribution */ break; case ML_184: rd_MLparams(ifile); read_successfully |= VRNA_CONVERT_OUTPUT_ML |VRNA_CONVERT_OUTPUT_MISC; /* since TerminalAU went to "misc" section */ break; case NIN_184: rd_ninio(ifile); read_successfully |= VRNA_CONVERT_OUTPUT_NINIO; break; case TL_184: rd_Tetra_loop(ifile); read_successfully |= VRNA_CONVERT_OUTPUT_SPECIAL_HP; break; case TRI_184: rd_Tri_loop(ifile); read_successfully |= VRNA_CONVERT_OUTPUT_SPECIAL_HP; break; case MISC_184: rd_misc(ifile); read_successfully |= VRNA_CONVERT_OUTPUT_MISC; break; default: /* do nothing but complain */ vrna_message_warning("convert_parameter_file: Unknown field identifier in `%s'", line); } } /* else ignore line */ free(line); } while((line=vrna_read_line(ifile)) && !last); return read_successfully; } PRIVATE void display_array(int *p, int size, int nl, FILE *fp){ int i; for (i=1; i<=size; i++, p++) { switch(*p){ case INF: fprintf(fp," INF"); break; case -INF: fprintf(fp," -INf"); break; case DEF: fprintf(fp," DEF"); break; default: fprintf(fp,"%6d", *p); break; } if ((i%nl)==0) fprintf(fp,"\n"); } if (size%nl) fprintf(fp,"\n"); return; } PRIVATE char *get_array1(int *arr, int size, FILE *fp){ int i, p, pos, pp, r, last; char *line, buf[16]; i = last = 0; while( i 2) en += TerminalAU_184; fprintf(ofile,"\t%.5s %6d %6d\n", Triloops_184+c*6, Triloop_E37_184[c] + en, base_dH); } } /* since the old hairpin loop function treated the tabulated tetraloop energy as bonus * and the new one takes this tabulated energy as a total energy, we have to compute some * things now... */ fprintf(ofile,"\n# %s\n", settype(TL)); { int base_en = hairpin37_184[4]; int base_dH = TETRA_ENTH37_184; for (c=0; c< (int)strlen(Tetraloops_184)/7; c++){ char bla[6]; int en = base_en; int dH = base_dH; strncpy(bla, Tetraloops_184+c*7, 6); short si = (short)encode_char(toupper(bla[1])); short sj = (short)encode_char(toupper(bla[4])); int type = pair[(short)encode_char(toupper(bla[0]))][(short)encode_char(toupper(bla[5]))]; en += mismatchH37_184[type][si][sj]; dH += mism_H_184[type][si][sj]; fprintf(ofile,"\t%.6s %6d %6d\n", Tetraloops_184+c*7, en + TETRA_ENERGY37_184[c], dH); } } fprintf(ofile,"\n# %s\n", settype(HEX)); { fprintf(ofile, "\n"); } } fprintf(ofile, "\n# %s\n", settype(QUIT)); } PRIVATE void check_symmetry(void) { int i,j,k,l; for (i=0; i<=NBPAIRS; i++) for (j=0; j<=NBPAIRS; j++) if (stack37_184[i][j] != stack37_184[j][i]) vrna_message_warning("stacking energies not symmetric"); for (i=0; i<=NBPAIRS; i++) for (j=0; j<=NBPAIRS; j++) if (enthalpies_184[i][j] != enthalpies_184[j][i]) vrna_message_warning("stacking enthalpies not symmetric"); /* interior 1x1 loops */ for (i=0; i<=NBPAIRS; i++) for (j=0; j<=NBPAIRS; j++) for (k=0; k<5; k++) for (l=0; l<5; l++) if (int11_37_184[i][j][k][l] != int11_37_184[j][i][l][k]) vrna_message_warning("int11 energies not symmetric"); for (i=0; i<=NBPAIRS; i++) for (j=0; j<=NBPAIRS; j++) for (k=0; k<5; k++) for (l=0; l<5; l++) if (int11_H_184[i][j][k][l] != int11_H_184[j][i][l][k]) vrna_message_warning("int11 enthalpies not symmetric"); /* interior 2x2 loops */ for (i=0; i<=NBPAIRS; i++) for (j=0; j<=NBPAIRS; j++) for (k=0; k<5; k++) for (l=0; l<5; l++) { int m,n; for (m=0; m<5; m++) for (n=0; n<5; n++) if (int22_37_184[i][j][k][l][m][n] != int22_37_184[j][i][m][n][k][l]) vrna_message_warning("int22 energies not symmetric"); } for (i=0; i<=NBPAIRS; i++) for (j=0; j<=NBPAIRS; j++) for (k=0; k<5; k++) for (l=0; l<5; l++) { int m,n; for (m=0; m<5; m++) for (n=0; n<5; n++) if (int22_H_184[i][j][k][l][m][n] != int22_H_184[j][i][m][n][k][l]) vrna_message_warning("int22 enthalpies not symmetric: %d %d %d %d %d %d", i,j,k,l,m,n); } }