/* * endin.c * * Copyright (c) Chris Putnam 2003-2018 * * Program and source code released under the GPL version 2 * */ #include #include #include #include #include "is_ws.h" #include "str.h" #include "str_conv.h" #include "fields.h" #include "url.h" #include "reftypes.h" #include "bibformats.h" #include "generic.h" extern variants end_all[]; extern int end_nall; /***************************************************** PUBLIC: void endin_initparams() *****************************************************/ static int endin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset ); static int endin_processf( fields *endin, char *p, char *filename, long nref, param *pm ); int endin_typef( fields *endin, char *filename, int nrefs, param *p ); int endin_convertf( fields *endin, fields *info, int reftype, param *p ); int endin_cleanf( bibl *bin, param *p ); void endin_initparams( param *p, const char *progname ) { p->readformat = BIBL_ENDNOTEIN; p->charsetin = BIBL_CHARSET_DEFAULT; p->charsetin_src = BIBL_SRC_DEFAULT; p->latexin = 0; p->xmlin = 0; p->utf8in = 0; p->nosplittitle = 0; p->verbose = 0; p->addcount = 0; p->output_raw = 0; p->readf = endin_readf; p->processf = endin_processf; p->cleanf = endin_cleanf; p->typef = endin_typef; p->convertf = endin_convertf; p->all = end_all; p->nall = end_nall; slist_init( &(p->asis) ); slist_init( &(p->corps) ); if ( !progname ) p->progname = NULL; else p->progname = strdup( progname ); } /***************************************************** PUBLIC: int endin_readf() *****************************************************/ /* Endnote tag definition: character 1 = '%' character 2 = alphabetic character or digit (or other characters) character 3 = space (ansi 32) */ static int endin_istag( char *buf ) { const char others[]="!@#$^&*()+=?[~>"; if ( buf[0]!='%' ) return 0; if ( buf[2]!=' ' ) return 0; if ( isalpha( (unsigned char)buf[1] ) ) return 1; if ( isdigit( (unsigned char)buf[1] ) ) return 1; if ( strchr( others, buf[1] ) ) return 1; return 0; } static int readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line ) { if ( line->len ) return 1; else return str_fget( fp, buf, bufsize, bufpos, line ); } static int endin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset ) { int haveref = 0, inref = 0; unsigned char *up; char *p; *fcharset = CHARSET_UNKNOWN; while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) { if ( !line->data ) continue; p = &(line->data[0]); /* Skip Unicode header information */ /* = ef bb bf */ up = (unsigned char* ) p; if ( line->len > 2 && up[0]==0xEF && up[1]==0xBB && up[2]==0xBF ) { *fcharset = CHARSET_UNICODE; p += 3; } if ( !*p ) { if ( inref ) haveref = 1; /* blank line separates */ else continue; /* blank line to ignore */ } /* Each reference starts with a tag && ends with a blank line */ if ( endin_istag( p ) ) { if ( reference->len ) str_addchar( reference, '\n' ); str_strcatc( reference, p ); inref = 1; } else if ( inref && p ) { str_addchar( reference, '\n' ); str_strcatc( reference, p ); } str_empty( line ); } if ( reference->len ) haveref = 1; return haveref; } /***************************************************** PUBLIC: int endin_processf() *****************************************************/ static char* process_endline( str *tag, str *data, char *p ) { int i; i = 0; while ( i<2 && *p ) { str_addchar( tag, *p++); i++; } while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) str_addchar( data, *p++ ); str_trimendingws( data ); while ( *p=='\r' || *p=='\n' ) p++; return p; } static char * process_endline2( str *tag, str *data, char *p ) { while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) str_addchar( data, *p++ ); str_trimendingws( data ); while ( *p=='\r' || *p=='\n' ) p++; return p; } static int endin_processf( fields *endin, char *p, char *filename, long nref, param *pm ) { str tag, data; int status, n; strs_init( &tag, &data, NULL ); while ( *p ) { strs_empty( &tag, &data, NULL ); if ( endin_istag( p ) ) { p = process_endline( &tag, &data, p ); if ( str_is_empty( &data ) ) continue; status = fields_add( endin, str_cstr( &tag ), str_cstr( &data ), 0 ); if ( status!=FIELDS_OK ) return 0; } else { p = process_endline2( &tag, &data, p ); /* endnote puts %K only on 1st line of keywords */ n = fields_num( endin ); if ( n>0 && str_has_value( &data ) ) { if ( !strncmp( endin->tag[n-1].data, "%K", 2 ) ) { status = fields_add( endin, "%K", str_cstr( &data ), 0 ); if ( status!=FIELDS_OK ) return 0; } else { str_addchar( &(endin->data[n-1]), ' ' ); str_strcat( &(endin->data[n-1]), &data ); } } } } strs_free( &tag, &data, NULL ); return 1; } /***************************************************** PUBLIC: int endin_typef() *****************************************************/ /* Endnote defaults if no %0 tag * * if %J & %V - journal article * if %B - book section * if %R & !%T - report * if %I & !%B & !%J & !%R - book * if !%B & !%J & !%R & !%I - journal article */ int endin_typef( fields *endin, char *filename, int nrefs, param *p ) { int ntypename, nrefname, is_default, nj, nv, nb, nr, nt, ni; char *refname = "", *typename=""; ntypename = fields_find( endin, "%0", LEVEL_MAIN ); nrefname = fields_find( endin, "%F", LEVEL_MAIN ); if ( nrefname!=-1 ) refname = fields_value( endin, nrefname, FIELDS_CHRP_NOUSE ); if ( ntypename!=-1 ) typename = fields_value( endin, ntypename, FIELDS_CHRP_NOUSE ); else { nj = fields_find( endin, "%J", 0 ); nv = fields_find( endin, "%V", 0 ); nb = fields_find( endin, "%B", 0 ); nr = fields_find( endin, "%R", 0 ); nt = fields_find( endin, "%T", 0 ); ni = fields_find( endin, "%I", 0 ); if ( nj!=-1 && nv!=-1 ) { typename = "Journal Article"; } else if ( nb!=-1 ) { typename = "Book Section"; } else if ( nr!=-1 && nt==-1 ) { typename = "Report"; } else if ( ni!=-1 && nb==-1 && nj==-1 && nr==-1 ) { typename = "Book"; } else if ( nb==-1 && nj==-1 && nr==-1 && ni==-1 ) { typename = "Journal Article"; } } return get_reftype( typename, nrefs, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY ); } /***************************************************** PUBLIC: void endin_cleanf() *****************************************************/ /* Wiley puts multiple authors separated by commas on the %A lines. * We can detect this by finding the terminal comma in the value * from the tag/value pair. * * "%A" "Author A. X. Last, Author N. B. Next," */ static int is_wiley_author( fields *endin, int n ) { str *t, *v; t = fields_tag( endin, n, FIELDS_STRP_NOUSE ); if ( str_is_empty( t ) || strcmp( str_cstr( t ), "%A" ) ) return 0; v = fields_value( endin, n, FIELDS_STRP_NOUSE ); if ( str_is_empty( v ) ) return 0; if ( v->data[v->len-1]!=',' ) return 0; return 1; } static int add_wiley_author( fields *endin, char *intag, str *instring, int inlevel, str *name, int authornum ) { int fstatus; /* if first author, just replace the data string in the field */ if ( authornum==0 ) { str_strcpy( instring, name ); if ( str_memerr( instring ) ) return BIBL_ERR_MEMERR; } /* otherwise, append the author */ else { fstatus = fields_add( endin, intag, str_cstr( name ), inlevel ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } return BIBL_OK; } static int cleanup_wiley_author( fields *endin, int n ) { int status=BIBL_OK, inlevel, authornum = 0; str *instring, copy, name; char *p, *intag; strs_init( ©, &name, NULL ); intag = fields_tag ( endin, n, FIELDS_CHRP_NOUSE ); instring = fields_value( endin, n, FIELDS_STRP_NOUSE ); inlevel = fields_level( endin, n ); str_strcpy( ©, instring ); p = str_cstr( © ); while ( *p ) { if ( *p==',' ) { if ( str_memerr( &name ) ) { status = BIBL_ERR_MEMERR; goto out; } status = add_wiley_author( endin, intag, instring, inlevel, &name, authornum ); if ( status!=BIBL_OK ) goto out; str_empty( &name ); authornum++; p++; while ( is_ws( *p ) ) p++; } else { str_addchar( &name, *p ); p++; } } if ( str_has_value( &name ) ) status = add_wiley_author( endin, intag, instring, inlevel, &name, authornum ); out: strs_free( ©, &name, NULL ); return status; } static int endin_cleanref( fields *endin ) { int i, n, status; n = fields_num( endin ); for ( i=0; inrefs; ++i ) endin_cleanref( bin->ref[i] ); return BIBL_OK; } /***************************************************** PUBLIC: int endin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR *****************************************************/ /* month_convert() * convert month name to number in format MM, e.g. "January" -> "01" * if converted, return 1 * otherwise return 0 */ static int month_convert( char *in, char *out ) { char *month1[12]={ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" }; char *month2[12]={ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; int i, found = -1; for ( i=0; i<12 && found==-1; ++i ) { if ( !strcasecmp( in, month1[i] ) ) found = i; if ( !strcasecmp( in, month2[i] ) ) found = i; } if ( found==-1 ) return 0; if ( found > 8 ) sprintf( out, "%d", found+1 ); else sprintf( out, "0%d", found+1 ); return 1; } static int endin_date( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout ) { char *tags[3][2] = { { "DATE:YEAR", "PARTDATE:YEAR" }, { "DATE:MONTH", "PARTDATE:MONTH" }, { "DATE:DAY", "PARTDATE:DAY" } }; char *p = invalue->data; char month[10], *m; int part, status; str date; str_init( &date ); if ( !strncasecmp( outtag, "PART", 4 ) ) part = 1; else part = 0; /* %D YEAR */ if ( !strcasecmp( intag->data, "%D" ) ) { str_cpytodelim( &date, skip_ws( p ), "", 0 ); if ( str_memerr( &date ) ) return BIBL_ERR_MEMERR; if ( str_has_value( &date ) ) { status = fields_add( bibout, tags[0][part], date.data, level ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; } } /* %8 MONTH DAY, YEAR */ /* %8 MONTH, YEAR */ /* %8 MONTH YEAR */ else if ( !strcasecmp( intag->data, "%8" ) ) { /* ...get month */ p = str_cpytodelim( &date, skip_ws( p ), " ,\n", 0 ); if ( str_memerr( &date ) ) return BIBL_ERR_MEMERR; if ( str_has_value( &date ) ) { if ( month_convert( date.data, month ) ) m = month; else m = str_cstr( &date ); status = fields_add( bibout, tags[1][part], m, level ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = skip_ws( p ); if ( *p==',' ) p++; /* ...get days */ p = str_cpytodelim( &date, skip_ws( p ), ",\n", 0 ); if ( str_memerr( &date ) ) return BIBL_ERR_MEMERR; if ( date.len>0 && date.len<3 ) { status = fields_add( bibout, tags[2][part], date.data, level ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; } else if ( date.len==4 ) { status = fields_add( bibout, tags[0][part], date.data, level ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; } p = skip_ws( p ); if ( *p==',' ) p++; /* ...get year */ (void) str_cpytodelim( &date, skip_ws( p ), " \t\n\r", 0 ); if ( str_memerr( &date ) ) return BIBL_ERR_MEMERR; if ( str_has_value( &date ) ) { status = fields_add( bibout, tags[0][part], date.data, level ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; } } str_free( &date ); return BIBL_OK; } static int endin_type( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout ) { lookups types[] = { { "GENERIC", "ARTICLE" }, { "BOOK", "BOOK" }, { "MANUSCRIPT", "MANUSCRIPT" }, { "CONFERENCE PROCEEDINGS", "INPROCEEDINGS"}, { "REPORT", "REPORT" }, { "COMPUTER PROGRAM", "BOOK" }, { "AUDIOVISUAL MATERIAL", "AUDIOVISUAL" }, { "ARTWORK", "BOOK" }, { "PATENT", "BOOK" }, { "BILL", "BILL" }, { "CASE", "CASE" }, { "JOURNAL ARTICLE", "ARTICLE" }, { "MAGAZINE ARTICLE", "ARTICLE" }, { "BOOK SECTION", "INBOOK" }, { "EDITED BOOK", "BOOK" }, { "NEWSPAPER ARTICLE", "NEWSARTICLE" }, { "THESIS", "PHDTHESIS" }, { "PERSONAL COMMUNICATION", "COMMUNICATION" }, { "ELECTRONIC SOURCE", "TEXT" }, { "FILM OR BROADCAST", "AUDIOVISUAL" }, { "MAP", "MAP" }, { "HEARING", "HEARING" }, { "STATUTE", "STATUTE" }, { "CHART OR TABLE", "CHART" }, { "WEB PAGE", "WEBPAGE" }, }; int ntypes = sizeof( types ) / sizeof( lookups ); int i, status, found=0; for ( i=0; idata ) ) { found = 1; status = fields_add( bibout, "INTERNAL_TYPE", types[i].newstr, level ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; } } if ( !found ) { fprintf( stderr, "Did not identify reference type '%s'\n", invalue->data ); fprintf( stderr, "Defaulting to journal article type\n"); status = fields_add( bibout, "INTERNAL_TYPE", types[0].newstr, level ); if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR; } return BIBL_OK; } static void endin_notag( param *p, char *tag, char *data ) { if ( p->verbose ) { if ( p->progname ) fprintf( stderr, "%s: ", p->progname ); fprintf( stderr, "Cannot find tag '%s'='%s'\n", tag, data ); } } int endin_convertf( fields *bibin, fields *bibout, int reftype, param *p ) { static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = { [ 0 ... NUM_REFTYPES-1 ] = generic_null, [ SIMPLE ] = generic_simple, [ TITLE ] = generic_title, [ PERSON ] = generic_person, [ SERIALNO ] = generic_serialno, [ PAGES ] = generic_pages, [ NOTES ] = generic_notes, [ URL ] = generic_url, [ GENRE ] = generic_genre, [ TYPE ] = endin_type, [ DATE ] = endin_date, }; int i, level, process, nfields, fstatus, status = BIBL_OK; char *outtag; str *intag, *invalue; nfields = fields_num( bibin ); for ( i=0; idata[0]!='%' ) { fstatus = fields_add( bibout, str_cstr( intag ), str_cstr( invalue ), bibin->level[i] ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; continue; } if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) { endin_notag( p, str_cstr( intag ), str_cstr( invalue ) ); continue; } fields_setused( bibin, i ); status = convertfns[ process ]( bibin, i, intag, invalue, level, p, outtag, bibout ); if ( status!=BIBL_OK ) return status; } return status; }