/* * risin.c * * Copyright (c) Chris Putnam 2003-2013 * * Source code released under the GPL version 2 * */ #include #include #include #include #include "newstr.h" #include "newstr_conv.h" #include "fields.h" #include "name.h" #include "title.h" #include "serialno.h" #include "reftypes.h" #include "doi.h" #include "risin.h" void risin_initparams( param *p, const char *progname ) { p->readformat = BIBL_RISIN; p->charsetin = BIBL_CHARSET_DEFAULT; p->charsetin_src = BIBL_SRC_DEFAULT; p->latexin = 0; p->xmlin = 0; p->utf8in = 0; p->nosplittitle = 0; p->verbose = 0; p->addcount = 0; p->output_raw = 0; p->readf = risin_readf; p->processf = risin_processf; p->cleanf = NULL; p->typef = risin_typef; p->convertf = risin_convertf; p->all = ris_all; p->nall = ris_nall; list_init( &(p->asis) ); list_init( &(p->corps) ); if ( !progname ) p->progname = NULL; else p->progname = strdup( progname ); } /* RIS definition of a tag is strict: character 1 = uppercase alphabetic character character 2 = uppercase alphabetic character or digit character 3 = space (ansi 32) character 4 = space (ansi 32) character 5 = dash (ansi 45) character 6 = space (ansi 32) */ static int risin_istag( char *buf ) { if (! (buf[0]>='A' && buf[0]<='Z') ) return 0; if (! (((buf[1]>='A' && buf[1]<='Z'))||(buf[1]>='0'&&buf[1]<='9')) ) return 0; if (buf[2]!=' ') return 0; if (buf[3]!=' ') return 0; if (buf[4]!='-') return 0; if (buf[5]!=' ') return 0; return 1; } static int readmore( FILE *fp, char *buf, int bufsize, int *bufpos, newstr *line ) { if ( line->len ) return 1; else return newstr_fget( fp, buf, bufsize, bufpos, line ); } int risin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, newstr *line, newstr *reference, int *fcharset ) { int haveref = 0, inref = 0, readtoofar = 0; unsigned char *up; char *p; *fcharset = CHARSET_UNKNOWN; while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) { if ( !line->data || line->len==0 ) continue; p = &( line->data[0] ); /* Recognize UTF8 BOM */ up = (unsigned char * ) p; if ( line->len > 2 && up[0]==0xEF && up[1]==0xBB && up[2]==0xBF ) { *fcharset = CHARSET_UNICODE; p += 3; } /* Each reference starts with 'TY - ' && * ends with 'ER - ' */ if ( strncmp(p,"TY - ",6)==0 ) { if ( !inref ) { inref = 1; } else { /* we've read too far.... */ readtoofar = 1; inref = 0; } } if ( risin_istag( p ) ) { if ( !inref ) { fprintf(stderr,"Warning. Tagged line not " "in properly started reference.\n"); fprintf(stderr,"Ignored: '%s'\n", p ); } else if ( !strncmp(p,"ER -",5) ) { inref = 0; } else { newstr_addchar( reference, '\n' ); newstr_strcat( reference, p ); } } /* not a tag, but we'll append to last values ...*/ else if ( inref && strncmp(p,"ER -",5)) { newstr_addchar( reference, '\n' ); newstr_strcat( reference, p ); } if ( !inref && reference->len ) haveref = 1; if ( !readtoofar ) newstr_empty( line ); } if ( inref ) haveref = 1; return haveref; } static char* process_line2( newstr *tag, newstr *data, char *p ) { while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) newstr_addchar( data, *p++ ); while ( *p=='\r' || *p=='\n' ) p++; return p; } static char* process_line( newstr *tag, newstr *data, char *p ) { int i = 0; while ( i<6 && *p ) { if ( i<2 ) newstr_addchar( tag, *p ); p++; i++; } while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) newstr_addchar( data, *p++ ); newstr_trimendingws( data ); while ( *p=='\n' || *p=='\r' ) p++; return p; } int risin_processf( fields *risin, char *p, char *filename, long nref ) { newstr tag, data; int n; newstrs_init( &tag, &data, NULL ); while ( *p ) { if ( risin_istag( p ) ) { p = process_line( &tag, &data, p ); /* no anonymous fields allowed */ if ( tag.len ) fields_add( risin, tag.data, data.data, 0 ); } else { p = process_line2( &tag, &data, p ); n = fields_num( risin ); if ( data.len && n>0 ) { newstr *od; od = fields_value( risin, n-1, FIELDS_STRP ); newstr_addchar( od, ' ' ); newstr_strcat( od, data.data ); } } newstrs_empty( &tag, &data, NULL ); } newstrs_free( &tag, &data, NULL ); return 1; } /* oxfordjournals hide the DOI in the NOTES N1 field */ static int risin_addnotes( fields *f, char *tag, newstr *s, int level ) { int doi = is_doi( s->data ); if ( doi!=-1 ) return fields_add( f, "DOI", &(s->data[doi]), level ); else return fields_add( f, tag, s->data, level ); } static int is_uri_file_scheme( char *p ) { if ( !strncmp( p, "file:", 5 ) ) return 5; return 0; } static int is_uri_remote_scheme( char *p ) { char *scheme[] = { "http:", "ftp:", "git:", "gopher:" }; int i, len, nschemes = sizeof( scheme ) / sizeof( scheme[0] ); for ( i=0; idata ); if ( n ) { /* skip past "file:" and store only actual path */ p = s->data + n; return fields_add( f, tag, p, level ); } /* if URL is http:, ftp:, etc. store as a URL */ n = is_uri_remote_scheme( s->data ); if ( n ) { return fields_add( f, "URL", s->data, level ); } /* badly formed, RIS wants URI, but store value anyway */ return fields_add( f, tag, s->data, level ); } /* scopus puts DOI in the DO or DI tag, but it needs cleaning */ static int risin_adddoi( fields *f, char *tag, newstr *s, int level ) { int doi = is_doi( s->data ); if ( doi!=-1 ) return fields_add( f, "DOI", &(s->data[doi]), level ); else return 1; } static int risin_adddate( fields *f, char *tag, newstr *d, int level ) { char *p = d->data; newstr date; int part = ( !strncasecmp( tag, "PART", 4 ) ), ok; newstr_init( &date ); while ( *p && *p!='/' ) newstr_addchar( &date, *p++ ); if ( *p=='/' ) p++; if ( date.len>0 ) { if ( part ) ok = fields_add( f, "PARTYEAR", date.data, level ); else ok = fields_add( f, "YEAR", date.data, level ); if ( !ok ) return 0; } newstr_empty( &date ); while ( *p && *p!='/' ) newstr_addchar( &date, *p++ ); if ( *p=='/' ) p++; if ( date.len>0 ) { if ( part ) ok = fields_add( f, "PARTMONTH", date.data, level ); else ok = fields_add( f, "MONTH", date.data, level ); if ( !ok ) return 0; } newstr_empty( &date ); while ( *p && *p!='/' ) newstr_addchar( &date, *p++ ); if ( *p=='/' ) p++; if ( date.len>0 ) { if ( part ) ok = fields_add( f, "PARTDAY", date.data, level ); else ok = fields_add( f, "DAY", date.data, level ); if ( !ok ) return 0; } newstr_empty( &date ); while ( *p ) newstr_addchar( &date, *p++ ); if ( date.len>0 ) { if ( part ) ok = fields_add( f, "PARTDATEOTHER", date.data,level); else ok = fields_add( f, "DATEOTHER", date.data, level ); if ( !ok ) return 0; } newstr_free( &date ); return 1; } int risin_typef( fields *risin, char *filename, int nref, param *p, variants *all, int nall ) { char *refnum = ""; int n, reftype, nreftype; n = fields_find( risin, "TY", 0 ); nreftype = fields_find( risin, "ID", 0 ); if ( nreftype!=-1 ) refnum = risin[n].data->data; if ( n!=-1 ) reftype = get_reftype( (risin[n].data)->data, nref, p->progname, all, nall, refnum ); else reftype = get_reftype( "", nref, p->progname, all, nall, refnum ); /*default */ return reftype; } static void risin_report_notag( param *p, char *tag ) { if ( p->verbose && strcmp( tag, "TY" ) ) { if ( p->progname ) fprintf( stderr, "%s: ", p->progname ); fprintf( stderr, "Did not identify RIS tag '%s'\n", tag ); } } int risin_convertf( fields *risin, fields *f, int reftype, param *p, variants *all, int nall ) { int process, level, i, n, nfields, ok; char *outtag, *tag, *value; newstr *t, *d; nfields = fields_num( risin ); for ( i=0; idata, reftype, all, nall, &process, &level, &outtag ); if ( n==-1 ) { risin_report_notag( p, t->data ); continue; } if ( process==ALWAYS ) continue; /* add in core code */ d = fields_value( risin, i, FIELDS_STRP ); switch ( process ) { case SIMPLE: ok = fields_add( f, outtag, d->data, level ); break; case PERSON: ok = name_add( f, outtag, d->data, level, &(p->asis), &(p->corps) ); break; case TITLE: ok = title_process( f, outtag, d->data, level, p->nosplittitle ); break; case SERIALNO: ok = addsn( f, d->data, level ); break; case DATE: ok = risin_adddate( f, outtag, d, level ); break; case NOTES: ok = risin_addnotes( f, outtag, d, level ); break; case DOI: ok = risin_adddoi( f, outtag, d, level ); break; case LINKEDFILE: ok = risin_addfile( f, outtag, d, level ); break; default: ok = 1; break; } if ( !ok ) return BIBL_ERR_MEMERR; } /* look for thesis-type hint */ if ( !strcasecmp( all[reftype].type, "THES" ) ) { for ( i=0; i