/* * bibtexin.c * * Copyright (c) Chris Putnam 2003-2017 * * Program and source code released under the GPL version 2 * */ #include #include #include #include #include "is_ws.h" #include "str.h" #include "utf8.h" #include "str_conv.h" #include "fields.h" #include "slist.h" #include "name.h" #include "title.h" #include "url.h" #include "reftypes.h" #include "bibformats.h" #include "generic.h" static slist find = { 0, 0, 0, NULL }; static slist replace = { 0, 0, 0, NULL }; extern variants bibtex_all[]; extern int bibtex_nall; /***************************************************** PUBLIC: void bibtexin_initparams() *****************************************************/ static int bibtexin_convertf( fields *bibin, fields *info, int reftype, param *p ); static int bibtexin_processf( fields *bibin, char *data, char *filename, long nref, param *p ); static int bibtexin_cleanf( bibl *bin, param *p ); static int bibtexin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset ); static int bibtexin_typef( fields *bibin, char *filename, int nrefs, param *p ); void bibtexin_initparams( param *p, const char *progname ) { p->readformat = BIBL_BIBTEXIN; p->charsetin = BIBL_CHARSET_DEFAULT; p->charsetin_src = BIBL_SRC_DEFAULT; p->latexin = 1; p->xmlin = 0; p->utf8in = 0; p->nosplittitle = 0; p->verbose = 0; p->addcount = 0; p->output_raw = 0; p->readf = bibtexin_readf; p->processf = bibtexin_processf; p->cleanf = bibtexin_cleanf; p->typef = bibtexin_typef; p->convertf = bibtexin_convertf; p->all = bibtex_all; p->nall = bibtex_nall; slist_init( &(p->asis) ); slist_init( &(p->corps) ); if ( !progname ) p->progname = NULL; else p->progname = strdup( progname ); } /***************************************************** PUBLIC: int bibtexin_readf() *****************************************************/ /* * readf can "read too far", so we store this information in line, thus * the next new text is in line, either from having read too far or * from the next chunk obtained via str_fget() * * return 1 on success, 0 on error/end-of-file * */ static int readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line ) { if ( line->len ) return 1; else return str_fget( fp, buf, bufsize, bufpos, line ); } /* * readf() * * returns zero if cannot get reference and hit end of-file * returns 1 if last reference in file, 2 if reference within file */ static int bibtexin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset ) { int haveref = 0; char *p; *fcharset = CHARSET_UNKNOWN; while ( haveref!=2 && readmore( fp, buf, bufsize, bufpos, line ) ) { if ( line->len == 0 ) continue; /* blank line */ p = &(line->data[0]); /* Recognize UTF8 BOM */ if ( line->len > 2 && (unsigned char)(p[0])==0xEF && (unsigned char)(p[1])==0xBB && (unsigned char)(p[2])==0xBF ) { *fcharset = CHARSET_UNICODE; p += 3; } p = skip_ws( p ); if ( *p == '%' ) { /* commented out line */ str_empty( line ); continue; } if ( *p == '@' ) haveref++; if ( haveref && haveref<2 ) { str_strcatc( reference, p ); str_addchar( reference, '\n' ); str_empty( line ); } else if ( !haveref ) str_empty( line ); } return haveref; } /***************************************************** PUBLIC: int bibtexin_processf() *****************************************************/ static char* process_bibtextype( char *p, str *type ) { str tmp; str_init( &tmp ); if ( *p=='@' ) p++; p = str_cpytodelim( &tmp, p, "{( \t\r\n", 0 ); p = skip_ws( p ); if ( *p=='{' || *p=='(' ) p++; p = skip_ws( p ); if ( str_has_value( &tmp ) ) str_strcpy( type, &tmp ); else str_empty( type ); str_free( &tmp ); return p; } static char* process_bibtexid( char *p, str *id ) { char *start_p = p; str tmp; str_init( &tmp ); p = str_cpytodelim( &tmp, p, ",", 1 ); if ( str_has_value( &tmp ) ) { if ( strchr( tmp.data, '=' ) ) { /* Endnote writes bibtex files w/o fields, try to * distinguish via presence of an equal sign.... if * it's there, assume that it's a tag/data pair instead * and roll back. */ p = start_p; str_empty( id ); } else { str_strcpy( id, &tmp ); } } else { str_empty( id ); } str_free( &tmp ); return skip_ws( p ); } static char * bibtex_tag( char *p, str *tag ) { p = str_cpytodelim( tag, skip_ws( p ), "= \t\r\n", 0 ); if ( str_memerr( tag ) ) return NULL; return skip_ws( p ); } static char * bibtex_data( char *p, fields *bibin, slist *tokens, long nref, param *pm ) { unsigned int nbracket = 0, nquotes = 0; char *startp = p; str tok, *t; str_init( &tok ); while ( p && *p ) { if ( !nquotes && !nbracket ) { if ( *p==',' || *p=='=' || *p=='}' || *p==')' ) goto out; } if ( *p=='\"' && nbracket==0 && ( p==startp || *(p-1)!='\\' ) ) { nquotes = !nquotes; str_addchar( &tok, *p ); if ( !nquotes ) { if ( str_memerr( &tok ) ) { p=NULL; goto out; } t = slist_add( tokens, &tok ); if ( !t ) { p=NULL; goto out0; } str_empty( &tok ); } } else if ( *p=='#' && !nquotes && !nbracket ) { if ( str_has_value( &tok ) ) { if ( str_memerr( &tok ) ) { p=NULL; goto out; } t = slist_add( tokens, &tok ); if ( !t ) { p=NULL; goto out0; } } str_strcpyc( &tok, "#" ); t = slist_add( tokens, &tok ); if ( !t ) { p=NULL; goto out0; } str_empty( &tok ); } else if ( *p=='{' && !nquotes && ( p==startp || *(p-1)!='\\' ) ) { nbracket++; str_addchar( &tok, *p ); } else if ( *p=='}' && !nquotes && ( p==startp || *(p-1)!='\\' ) ) { nbracket--; str_addchar( &tok, *p ); if ( nbracket==0 ) { if ( str_memerr( &tok ) ) { p=NULL; goto out; } t = slist_add( tokens, &tok ); if ( !t ) { p=NULL; goto out; } str_empty( &tok ); } } else if ( !is_ws( *p ) || nquotes || nbracket ) { if ( !is_ws( *p ) ) str_addchar( &tok, *p ); else { if ( tok.len!=0 && *p!='\n' && *p!='\r' ) str_addchar( &tok, *p ); else if ( tok.len!=0 && (*p=='\n' || *p=='\r')) { str_addchar( &tok, ' ' ); while ( is_ws( *(p+1) ) ) p++; } } } else if ( is_ws( *p ) ) { if ( tok.len ) { if ( str_memerr( &tok ) ) { p=NULL; goto out; } t = slist_add( tokens, &tok ); if ( !t ) { p=NULL; goto out; } str_empty( &tok ); } } p++; } out: if ( nbracket!=0 ) { fprintf( stderr, "%s: Mismatch in number of brackets in reference %ld.\n", pm->progname, nref ); } if ( nquotes!=0 ) { fprintf( stderr, "%s: Mismatch in number of quotes in reference %ld.\n", pm->progname, nref ); } if ( str_has_value( &tok ) ) { if ( str_memerr( &tok ) ) { p = NULL; goto out; } t = slist_add( tokens, &tok ); if ( !t ) p = NULL; } out0: str_free( &tok ); return p; } /* replace_strings() * * do string replacement -- only if unprotected by quotation marks or curly brackets */ static void replace_strings( slist *tokens, fields *bibin, param *pm ) { int i, n, ok; char *q; str *s; i = 0; while ( i < tokens->n ) { s = slist_str( tokens, i ); if ( !strcmp( s->data, "#" ) ) { } else if ( s->data[0]!='\"' && s->data[0]!='{' ) { n = slist_find( &find, s ); if ( n!=-1 ) { str_strcpy( s, slist_str( &replace, n ) ); } else { q = s->data; ok = 1; while ( *q && ok ) { if ( !isdigit( *q ) ) ok = 0; q++; } if ( !ok ) { fprintf( stderr, "%s: Warning: Non-numeric " "BibTeX elements should be in quotations or " "curly brackets in reference.\n", pm->progname ); } } } i++; } } static int string_concatenate( slist *tokens, fields *bibin, long nref, param *pm ) { int i, status; str *s, *t; i = 0; while ( i < tokens->n ) { s = slist_str( tokens, i ); if ( !strcmp( s->data, "#" ) ) { if ( i==0 || i==tokens->n-1 ) { fprintf( stderr, "%s: Warning: Stray string concatenation " "('#' character) in reference %ld\n", pm->progname, nref ); status = slist_remove( tokens, i ); if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR; continue; } s = slist_str( tokens, i-1 ); if ( s->data[0]!='\"' && s->data[s->len-1]!='\"' ) fprintf( stderr, "%s: Warning: String concentation should " "be used in context of quotations marks in reference %ld\n", pm->progname, nref ); t = slist_str( tokens, i+1 ); if ( t->data[0]!='\"' && t->data[s->len-1]!='\"' ) fprintf( stderr, "%s: Warning: String concentation should " "be used in context of quotations marks in reference %ld\n", pm->progname, nref ); if ( ( s->data[s->len-1]=='\"' && t->data[0]=='\"') || (s->data[s->len-1]=='}' && t->data[0]=='{') ) { str_trimend( s, 1 ); str_trimbegin( t, 1 ); str_strcat( s, t ); } else { str_strcat( s, t ); } status = slist_remove( tokens, i ); if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR; status = slist_remove( tokens, i ); if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR; } else i++; } return BIBL_OK; } /* return NULL on memory error */ static char * process_bibtexline( char *p, str *tag, str *data, uchar stripquotes, fields *bibin, long nref, param *pm ) { int i, status; slist tokens; str *s; str_empty( data ); p = bibtex_tag( p, tag ); if ( p==NULL || str_is_empty( tag ) ) return p; slist_init( &tokens ); if ( *p=='=' ) { p = bibtex_data( p+1, bibin, &tokens, nref, pm ); if ( p==NULL ) goto out; } replace_strings( &tokens, bibin, pm ); status = string_concatenate( &tokens, bibin, nref, pm ); if ( status!=BIBL_OK ) { p = NULL; goto out; } for ( i=0; idata[0]=='\"' && s->data[s->len-1]=='\"' ) || ( s->data[0]=='{' && s->data[s->len-1]=='}' ) ) { str_trimbegin( s, 1 ); str_trimend( s, 1 ); } str_strcat( data, slist_str( &tokens, i ) ); } out: slist_free( &tokens ); return p; } /* process_cite() * */ static int process_cite( fields *bibin, char *p, char *filename, long nref, param *pm ) { int fstatus, status = BIBL_OK; str tag, data; strs_init( &tag, &data, NULL ); p = process_bibtextype( p, &data ); if ( str_has_value( &data ) ) { fstatus = fields_add( bibin, "INTERNAL_TYPE", str_cstr( &data ), 0 ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } p = process_bibtexid( p, &data ); if ( str_has_value( &data ) ) { fstatus = fields_add( bibin, "REFNUM", str_cstr( &data ), 0 ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } while ( *p ) { p = process_bibtexline( p, &tag, &data, 1, bibin, nref, pm ); if ( p==NULL ) { status = BIBL_ERR_MEMERR; goto out; } /* no anonymous or empty fields allowed */ if ( str_has_value( &tag ) && str_has_value( &data ) ) { fstatus = fields_add( bibin, str_cstr( &tag ), str_cstr( &data ), 0 ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } strs_empty( &tag, &data, NULL ); } out: strs_free( &tag, &data, NULL ); return status; } /* process_string() * * Handle lines like: * * '@STRING{TL = {Tetrahedron Lett.}}' * * p should point to just after '@STRING' * * In BibTeX, if a string is defined several times, the last one is kept. * */ static int process_string( char *p, long nref, param *pm ) { int n, status = BIBL_OK; str s1, s2, *t; strs_init( &s1, &s2, NULL ); while ( *p && *p!='{' && *p!='(' ) p++; if ( *p=='{' || *p=='(' ) p++; p = process_bibtexline( skip_ws( p ), &s1, &s2, 0, NULL, nref, pm ); if ( p==NULL ) { status = BIBL_ERR_MEMERR; goto out; } if ( str_has_value( &s2 ) ) { str_findreplace( &s2, "\\ ", " " ); } if ( str_has_value( &s1 ) ) { n = slist_find( &find, &s1 ); if ( n==-1 ) { t = slist_add( &find, &s1 ); if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; } if ( str_has_value( &s2 ) ) t = slist_add( &replace, &s2 ); else t = slist_addc( &replace, "" ); if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; } } else { if ( str_has_value( &s2 ) ) t = slist_set( &replace, n, &s2 ); else t = slist_setc( &replace, n, "" ); if ( t==NULL ) { status = BIBL_ERR_MEMERR; goto out; } } } out: strs_free( &s1, &s2, NULL ); return status; } /* bibtexin_processf() * * Handle '@STRING', '@reftype', and ignore '@COMMENT' */ static int bibtexin_processf( fields *bibin, char *data, char *filename, long nref, param *p ) { if ( !strncasecmp( data, "@STRING", 7 ) ) { process_string( data+7, nref, p ); return 0; } else if ( !strncasecmp( data, "@COMMENT", 8 ) ) { /* Not sure if these are real Bibtex, but not references */ return 0; } else { process_cite( bibin, data, filename, nref, p ); return 1; } } /***************************************************** PUBLIC: void bibtexin_cleanf() *****************************************************/ static int bibtex_protected( str *data ) { if ( data->data[0]=='{' && data->data[data->len-1]=='}' ) return 1; if ( data->data[0]=='\"' && data->data[data->len-1]=='\"' ) return 1; return 0; } static int bibtex_split( slist *tokens, str *s ) { int i, n = s->len, nbrackets = 0, status = BIBL_OK; str tok, *t; str_init( &tok ); for ( i=0; idata[i]=='{' && ( i==0 || s->data[i-1]!='\\' ) ) { nbrackets++; str_addchar( &tok, '{' ); } else if ( s->data[i]=='}' && ( i==0 || s->data[i-1]!='\\' ) ) { nbrackets--; str_addchar( &tok, '}' ); } else if ( !is_ws( s->data[i] ) || nbrackets ) { str_addchar( &tok, s->data[i] ); } else if ( is_ws( s->data[i] ) ) { if ( str_has_value( &tok ) ) { t = slist_add( tokens, &tok ); if ( !t ) { status = BIBL_ERR_MEMERR; goto out; } } str_empty( &tok ); } } if ( str_has_value( &tok ) ) { t = slist_add( tokens, &tok ); if ( !t ) { status = BIBL_ERR_MEMERR; goto out; } } for ( i=0; in; ++i ) { str_trimstartingws( slist_str( tokens, i ) ); str_trimendingws( slist_str( tokens, i ) ); } out: str_free( &tok ); return status; } static int bibtex_addtitleurl( fields *info, str *in ) { int fstatus, status = BIBL_OK; str s; char *p; str_init( &s ); /* ...skip past "\href{" and copy to "}" */ p = str_cpytodelim( &s, in->data + 6, "}", 1 ); if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } /* ...add to URL */ fstatus = fields_add( info, "URL", s.data, 0 ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } /* ...return deleted fragment to str in */ p = str_cpytodelim( &s, p, "", 0 ); if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } str_swapstrings( &s, in ); out: str_free( &s ); return status; } static int is_url_tag( str *tag ) { if ( str_has_value( tag ) ) { if ( !strcasecmp( str_cstr( tag ), "url" ) ) return 1; } return 0; } static int is_name_tag( str *tag ) { if ( str_has_value( tag ) ) { if ( !strcasecmp( str_cstr( tag ), "author" ) ) return 1; if ( !strcasecmp( str_cstr( tag ), "editor" ) ) return 1; } return 0; } static void bibtex_process_tilde( str *s ) { char *p, *q; int n = 0; p = q = s->data; if ( !p ) return; while ( *p ) { if ( *p=='~' ) { *q = ' '; } else if ( *p=='\\' && *(p+1)=='~' ) { n++; p++; *q = '~'; } else { *q = *p; } p++; q++; } *q = '\0'; s->len -= n; } static void bibtex_process_bracket( str *s ) { char *p, *q; int n = 0; p = q = s->data; if ( !p ) return; while ( *p ) { if ( *p=='\\' && ( *(p+1)=='{' || *(p+1)=='}' ) ) { n++; p++; *q = *p; q++; } else if ( *p=='{' || *p=='}' ) { n++; } else { *q = *p; q++; } p++; } *q = '\0'; s->len -= n; } static void bibtex_cleantoken( str *s ) { /* 'textcomp' annotations */ str_findreplace( s, "\\textit", "" ); str_findreplace( s, "\\textbf", "" ); str_findreplace( s, "\\textsl", "" ); str_findreplace( s, "\\textsc", "" ); str_findreplace( s, "\\textsf", "" ); str_findreplace( s, "\\texttt", "" ); str_findreplace( s, "\\textsubscript", "" ); str_findreplace( s, "\\textsuperscript", "" ); str_findreplace( s, "\\emph", "" ); str_findreplace( s, "\\url", "" ); str_findreplace( s, "\\mbox", "" ); /* Other text annotations */ str_findreplace( s, "\\it ", "" ); str_findreplace( s, "\\em ", "" ); str_findreplace( s, "\\%", "%" ); str_findreplace( s, "\\$", "$" ); while ( str_findreplace( s, " ", " " ) ) {} /* 'textcomp' annotations that we don't want to substitute on output*/ str_findreplace( s, "\\textdollar", "$" ); str_findreplace( s, "\\textunderscore", "_" ); bibtex_process_bracket( s ); bibtex_process_tilde( s ); } static int bibtex_cleandata( str *tag, str *s, fields *info, param *p ) { int i, status; slist tokens; str *tok; if ( str_is_empty( s ) ) return BIBL_OK; /* protect url from undergoing any parsing */ if ( is_url_tag( tag ) ) return BIBL_OK; slist_init( &tokens ); status = bibtex_split( &tokens, s ); if ( status!=BIBL_OK ) goto out; for ( i=0; idata,"\\href{", 6)) { bibtex_addtitleurl( info, tok ); } } if ( p->latexin && !is_name_tag( tag ) && !is_url_tag( tag ) ) bibtex_cleantoken( tok ); } str_empty( s ); for ( i=0; i0 ) str_addchar( s, ' ' ); str_strcat( s, tok ); } out: slist_free( &tokens ); return status; } static int bibtexin_cleanref( fields *bibin, param *p ) { int i, n, status; str *t, *d; n = fields_num( bibin ); for ( i=0; inrefs; ++i ) { n = fields_find( bin->ref[i], "refnum", LEVEL_ANY ); if ( n==-1 ) continue; if ( !strcmp( bin->ref[i]->data[n].data, citekey ) ) return i; } return -1; } static void bibtexin_nocrossref( bibl *bin, long i, int n, param *p ) { int n1 = fields_find( bin->ref[i], "REFNUM", LEVEL_ANY ); if ( p->progname ) fprintf( stderr, "%s: ", p->progname ); fprintf( stderr, "Cannot find cross-reference '%s'", bin->ref[i]->data[n].data ); if ( n1!=-1 ) fprintf( stderr, " for reference '%s'\n", bin->ref[i]->data[n1].data ); fprintf( stderr, "\n" ); } static int bibtexin_crossref_oneref( fields *bibref, fields *bibcross ) { int j, n, nl, ntype, fstatus, status = BIBL_OK; char *type, *nt, *nv; ntype = fields_find( bibref, "INTERNAL_TYPE", LEVEL_ANY ); type = ( char * ) fields_value( bibref, ntype, FIELDS_CHRP_NOUSE ); n = fields_num( bibcross ); for ( j=0; jnrefs; ++i ) { bibref = bin->ref[i]; n = fields_find( bibref, "CROSSREF", LEVEL_ANY ); if ( n==-1 ) continue; fields_setused( bibref, n ); ncross = bibtexin_findref( bin, (char*) fields_value( bibref, n, FIELDS_CHRP ) ); if ( ncross==-1 ) { bibtexin_nocrossref( bin, i, n, p ); continue; } bibcross = bin->ref[ncross]; status = bibtexin_crossref_oneref( bibref, bibcross ); if ( status!=BIBL_OK ) goto out; } out: return status; } static int bibtexin_cleanf( bibl *bin, param *p ) { int status = BIBL_OK; long i; for ( i=0; inrefs; ++i ) status = bibtexin_cleanref( bin->ref[i], p ); bibtexin_crossref( bin, p ); return status; } /***************************************************** PUBLIC: int bibtexin_typef() *****************************************************/ static int bibtexin_typef( fields *bibin, char *filename, int nrefs, param *p ) { int ntypename, nrefname, is_default; char *refname = "", *typename = ""; ntypename = fields_find( bibin, "INTERNAL_TYPE", LEVEL_MAIN ); nrefname = fields_find( bibin, "REFNUM", LEVEL_MAIN ); if ( nrefname!=-1 ) refname = fields_value( bibin, nrefname, FIELDS_CHRP_NOUSE ); if ( ntypename!=-1 ) typename = fields_value( bibin, ntypename, FIELDS_CHRP_NOUSE ); return get_reftype( typename, nrefs, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY ); } /***************************************************** PUBLIC: int bibtexin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR *****************************************************/ static int bibtex_matches_list( fields *bibout, char *tag, char *suffix, str *data, int level, slist *names, int *match ) { int i, fstatus, status = BIBL_OK; str newtag; *match = 0; if ( names->n==0 ) return status; str_init( &newtag ); for ( i=0; in; ++i ) { if ( strcmp( str_cstr( data ), slist_cstr( names, i ) ) ) continue; str_initstrc( &newtag, tag ); str_strcatc( &newtag, suffix ); fstatus = fields_add( bibout, str_cstr( &newtag ), str_cstr( data ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } *match = 1; goto out; } out: str_free( &newtag ); return status; } /**** bibtexin_btorg ****/ /* * BibTeX uses 'organization' in lieu of publisher if that field is missing. * Otherwise output as * * The organization * * organizer of meeting * * */ static int bibtexin_btorg( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout ) { int n, fstatus; n = fields_find( bibin, "publisher", LEVEL_ANY ); if ( n==-1 ) fstatus = fields_add( bibout, "PUBLISHER", str_cstr( invalue ), level ); else fstatus = fields_add( bibout, "ORGANIZER:CORP", str_cstr( invalue ), level ); if ( fstatus==FIELDS_OK ) return BIBL_OK; else return BIBL_ERR_MEMERR; } /**** bibtexin_btsente() ****/ /* * sentelink = {file://localhost/full/path/to/file.pdf,Sente,PDF} * * Sente is an academic reference manager for MacOSX and Apple iPad. */ static int bibtexin_btsente( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout ) { int fstatus, status = BIBL_OK; str link; str_init( &link ); str_cpytodelim( &link, skip_ws( invalue->data ), ",", 0 ); str_trimendingws( &link ); if ( str_memerr( &link ) ) status = BIBL_ERR_MEMERR; if ( status==BIBL_OK && link.len ) { fstatus = fields_add( bibout, "FILEATTACH", str_cstr( &link ), level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } str_free( &link ); return status; } /**** bibtexin_linkedfile() ****/ static int count_colons( char *p ) { int n = 0; while ( *p ) { if ( *p==':' ) n++; p++; } return n; } static int first_colon( char *p ) { int n = 0; while ( p[n] && p[n]!=':' ) n++; return n; } static int last_colon( char *p ) { int n = strlen( p ) - 1; while ( n>0 && p[n]!=':' ) n--; return n; } /* * file={Description:/full/path/to/file.pdf:PDF} */ static int bibtexin_linkedfile( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout ) { int fstatus, status = BIBL_OK; char *p = invalue->data; int i, n, n1, n2; str link; n = count_colons( p ); if ( n > 1 ) { /* A DOS file can contain a colon ":C:/....pdf:PDF" */ /* Extract after 1st and up to last colons */ n1 = first_colon( p ) + 1; n2 = last_colon( p ); str_init( &link ); for ( i=n1; idata, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } else if ( !strcmp( prefix, "jstor" ) ) { fstatus = fields_add( bibout, "JSTOR", value->data, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } else if ( !strcmp( prefix, "medline" ) ) { fstatus = fields_add( bibout, "MEDLINE", value->data, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } else if ( !strcmp( prefix, "pubmed" ) ) { fstatus = fields_add( bibout, "PMID", value->data, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; } /* ...if this is unknown prefix, merge prefix & eprint */ else { str_init( &merge ); str_mergestrs( &merge, prefix, ":", value->data, NULL ); fstatus = fields_add( bibout, "URL", merge.data, level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; str_free( &merge ); } return status; } static int process_eprint_without_prefix( fields *bibout, str *value, int level ) { int fstatus; /* ...no archivePrefix, need to handle just 'eprint' tag */ fstatus = fields_add( bibout, "URL", value->data, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; else return BIBL_OK; } static int bibtexin_eprint( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout ) { char *prefix; int n; /* ...do we have an archivePrefix too? */ n = fields_find( bibin, "ARCHIVEPREFIX", level ); if ( n==-1 ) n = fields_find( bibin, "EPRINTTYPE", level ); if ( n!=-1 ) { prefix = fields_value( bibin, n, FIELDS_CHRP ); return process_eprint_with_prefix( bibout, prefix, invalue, level ); } /* ...no we don't */ return process_eprint_without_prefix( bibout, invalue, level ); } /**** bibtexin_keyword() ****/ /* Split keywords="" with semicolons. * Commas are also frequently used, but will break * entries like: * keywords="Microscopy, Confocal" * Returns BIBL_OK or BIBL_ERR_MEMERR */ static int bibtexin_keyword( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout ) { int fstatus, status = BIBL_OK; str keyword; char *p; p = invalue->data; str_init( &keyword ); while ( *p ) { p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 ); str_trimendingws( &keyword ); if ( str_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; } if ( keyword.len ) { fstatus = fields_add( bibout, "KEYWORD", keyword.data, level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } } out: str_free( &keyword ); return status; } /* * bibtex_names( bibout, newtag, field, level); * * split names in author list separated by and's (use '|' character) * and add names * * returns BIBL_OK on success, BIBL_ERR_MEMERR on memory error */ static int bibtexin_person( fields *bibin, int m, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout ) { int begin, end, ok, n, etal, i, status, match; slist tokens; /* If we match the asis or corps list add and bail. */ status = bibtex_matches_list( bibout, outtag, ":ASIS", invalue, level, &(pm->asis), &match ); if ( match==1 || status!=BIBL_OK ) return status; status = bibtex_matches_list( bibout, outtag, ":CORP", invalue, level, &(pm->corps), &match ); if ( match==1 || status!=BIBL_OK ) return status; slist_init( &tokens ); bibtex_split( &tokens, invalue ); for ( i=0; idata ) ) level=LEVEL_MAIN; ok = title_process( bibout, "TITLE", invalue->data, level, pm->nosplittitle ); if ( ok ) return BIBL_OK; else return BIBL_ERR_MEMERR; } static void bibtexin_notag( param *p, char *tag ) { if ( p->verbose && strcmp( tag, "INTERNAL_TYPE" ) ) { if ( p->progname ) fprintf( stderr, "%s: ", p->progname ); fprintf( stderr, "Cannot find tag '%s'\n", tag ); } } static int bibtexin_convertf( fields *bibin, fields *bibout, int reftype, param *p ) { static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = { [ 0 ... NUM_REFTYPES-1 ] = generic_null, [ SIMPLE ] = generic_simple, [ TITLE ] = bibtexin_title, [ PERSON ] = bibtexin_person, [ PAGES ] = generic_pages, [ KEYWORD ] = bibtexin_keyword, [ EPRINT ] = bibtexin_eprint, [ HOWPUBLISHED ] = bibtexin_howpublished, [ LINKEDFILE ] = bibtexin_linkedfile, [ NOTES ] = generic_notes, [ BT_SENTE ] = bibtexin_btsente, [ BT_ORG ] = bibtexin_btorg, [ URL ] = generic_url }; int process, level, i, nfields, status = BIBL_OK; str *intag, *invalue; char *outtag; nfields = fields_num( bibin ); for ( i=0; iall, p->nall, &process, &level, &outtag ) ) { bibtexin_notag( p, str_cstr( intag ) ); continue; } status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout ); if ( status!=BIBL_OK ) return status; } if ( status==BIBL_OK && p->verbose ) fields_report( bibout, stderr ); return status; }