/* * modsin.c * * Copyright (c) Chris Putnam 2004-2018 * * Source code released under the GPL version 2 * */ #include #include #include #include #include "is_ws.h" #include "str.h" #include "str_conv.h" #include "xml.h" #include "xml_encoding.h" #include "fields.h" #include "name.h" #include "reftypes.h" #include "modstypes.h" #include "bu_auth.h" #include "marc_auth.h" #include "url.h" #include "iso639_1.h" #include "iso639_2.h" #include "iso639_3.h" #include "bibutils.h" #include "bibformats.h" static int modsin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset ); static int modsin_processf( fields *medin, char *data, char *filename, long nref, param *p ); /***************************************************** PUBLIC: void modsin_initparams() *****************************************************/ void modsin_initparams( param *p, const char *progname ) { p->readformat = BIBL_MODSIN; p->format_opts = 0; p->charsetin = BIBL_CHARSET_UNICODE; p->charsetin_src = BIBL_SRC_DEFAULT; p->latexin = 0; p->utf8in = 1; p->xmlin = 1; p->nosplittitle = 0; p->verbose = 0; p->addcount = 0; p->singlerefperfile = 0; p->output_raw = BIBL_RAW_WITHMAKEREFID | BIBL_RAW_WITHCHARCONVERT; p->readf = modsin_readf; p->processf = modsin_processf; p->cleanf = NULL; p->typef = NULL; p->convertf = NULL; p->all = NULL; p->nall = 0; slist_init( &(p->asis) ); slist_init( &(p->corps) ); if ( !progname ) p->progname = NULL; else p->progname = strdup( progname ); } /***************************************************** PUBLIC: int modsin_processf() *****************************************************/ static char modsns[]="mods"; static int modsin_detailr( xml *node, str *value ) { int status = BIBL_OK; if ( xml_has_value( node ) ) { if ( value->len ) str_addchar( value, ' ' ); str_strcat( value, xml_value( node ) ); if ( str_memerr( value ) ) return BIBL_ERR_MEMERR; } if ( node->down ) { status = modsin_detailr( node->down, value ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_detailr( node->next, value ); return status; } static int modsin_detail( xml *node, fields *info, int level ) { str type, value, *tp; int fstatus, status = BIBL_OK; if ( node->down ) { strs_init( &type, &value, NULL ); tp = xml_attribute( node, "type" ); if ( tp ) { str_strcpy( &type, tp ); str_toupper( &type ); if ( str_memerr( &type ) ) goto out; } status = modsin_detailr( node->down, &value ); if ( status!=BIBL_OK ) goto out; if ( type.data && !strcasecmp( type.data, "PAGE" ) ) { fstatus = fields_add( info, "PAGES:START", value.data, level ); } else { fstatus = fields_add( info, type.data, value.data, level ); } if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; out: strs_free( &type, &value, NULL ); } return status; } static int modsin_date( xml *node, fields *info, int level, int part ) { int fstatus, status = BIBL_OK; char *tag, *p; str s; str_init( &s ); p = xml_value_cstr( node ); if ( p ) { p = str_cpytodelim( &s, skip_ws( p ), "-", 1 ); if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } if ( str_has_value( &s ) ) { tag = ( part ) ? "PARTDATE:YEAR" : "DATE:YEAR"; fstatus = fields_add( info, tag, str_cstr( &s ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } p = str_cpytodelim( &s, skip_ws( p ), "-", 1 ); if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } if ( str_has_value( &s ) ) { tag = ( part ) ? "PARTDATE:MONTH" : "DATE:MONTH"; fstatus = fields_add( info, tag, str_cstr( &s ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } (void) str_cpytodelim( &s, skip_ws( p ), "", 0 ); if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } if ( str_has_value( &s ) ) { tag = ( part ) ? "PARTDATE:DAY" : "DATE:DAY"; fstatus = fields_add( info, tag, str_cstr( &s ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } } out: str_free( &s ); return status; } static int modsin_pager( xml *node, str *sp, str *ep, str *tp, str *lp ) { int status = BIBL_OK; if ( xml_tag_matches_has_value( node, "start" ) ) { str_strcpy( sp, xml_value( node ) ); if ( str_memerr( sp ) ) return BIBL_ERR_MEMERR; } else if ( xml_tag_matches_has_value( node, "end" ) ) { str_strcpy( ep, xml_value( node ) ); if ( str_memerr( ep ) ) return BIBL_ERR_MEMERR; } else if ( xml_tag_matches_has_value( node, "total" ) ) { str_strcpy( tp, xml_value( node ) ); if ( str_memerr( tp ) ) return BIBL_ERR_MEMERR; } else if ( xml_tag_matches_has_value( node, "list" ) ) { str_strcpy( lp, xml_value( node ) ); if ( str_memerr( lp ) ) return BIBL_ERR_MEMERR; } if ( node->down ) { status = modsin_pager( node->down, sp, ep, tp, lp ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_pager( node->next, sp, ep, tp, lp ); return status; } static int modsin_page( xml *node, fields *info, int level ) { int fstatus, status = BIBL_OK; str sp, ep, tp, lp; xml *dnode = node->down; if ( !dnode ) return BIBL_OK; strs_init( &sp, &ep, &tp, &lp, NULL ); status = modsin_pager( dnode, &sp, &ep, &tp, &lp ); if ( status!=BIBL_OK ) goto out; if ( str_has_value( &sp ) || str_has_value( &ep ) ) { if ( str_has_value( &sp ) ) { fstatus = fields_add( info, "PAGES:START", str_cstr( &sp ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } if ( str_has_value( &ep ) ) { fstatus = fields_add( info, "PAGES:STOP", str_cstr( &ep ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } } else if ( str_has_value( &lp ) ) { fstatus = fields_add( info, "PAGES:START", str_cstr( &lp ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } if ( str_has_value( &tp ) ) { fstatus = fields_add( info, "PAGES:TOTAL", str_cstr( &tp ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } out: strs_free( &sp, &ep, &tp, &lp, NULL ); return status; } static int modsin_titler( xml *node, str *title, str *subtitle ) { int status = BIBL_OK; if ( xml_tag_matches_has_value( node, "title" ) ) { if ( str_has_value( title ) ) str_strcatc( title, " : " ); str_strcat( title, xml_value( node ) ); if ( str_memerr( title ) ) return BIBL_ERR_MEMERR; } else if ( xml_tag_matches_has_value( node, "subTitle" ) ) { str_strcat( subtitle, xml_value( node ) ); if ( str_memerr( subtitle ) ) return BIBL_ERR_MEMERR; } if ( node->down ) { status = modsin_titler( node->down, title, subtitle ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_titler( node->next, title, subtitle ); return status; } static int modsin_title( xml *node, fields *info, int level ) { char *titletag[2][2] = { { "TITLE", "SHORTTITLE" }, { "SUBTITLE", "SHORTSUBTITLE" }, }; int fstatus, status = BIBL_OK; str title, subtitle; xml *dnode; int abbr; dnode = node->down; if ( !dnode ) return status; strs_init( &title, &subtitle, NULL ); abbr = xml_tag_has_attribute( node, "titleInfo", "type", "abbreviated" ); status = modsin_titler( dnode, &title, &subtitle ); if ( status!=BIBL_OK ) goto out; if ( str_has_value( &title ) ) { fstatus = fields_add( info, titletag[0][abbr], str_cstr( &title ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } if ( str_has_value( &subtitle ) ) { fstatus = fields_add( info, titletag[1][abbr], str_cstr( &subtitle ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } out: strs_free( &title, &subtitle, NULL ); return status; } /* modsin_marcrole_convert() * * Map MARC-authority roles for people or organizations associated * with a reference to internal roles. * * Take input strings with roles separated by '|' characters, e.g. * "author" or "author|creator" or "edt" or "editor|edt". */ static int modsin_marcrole_convert( str *s, char *suffix, str *out ) { int i, sstatus, status = BIBL_OK; slist tokens; char *p; slist_init( &tokens ); /* ...default to author on an empty string */ if ( str_is_empty( s ) ) { str_strcpyc( out, "AUTHOR" ); } else { sstatus = slist_tokenize( &tokens, s, "|", 1 ); if ( sstatus!=SLIST_OK ) { status = BIBL_ERR_MEMERR; goto done; } /* ...take first match */ for ( i=0; ilen ) str_addchar( role, '|' ); str_strcat( role, xml_value( node ) ); if ( str_memerr( role ) ) return BIBL_ERR_MEMERR; } if ( node->down ) { status = modsin_asis_corp_r( node->down, name, role ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_asis_corp_r( node->next, name, role ); return status; } static int modsin_asis_corp( xml *node, fields *info, int level, char *suffix ) { int fstatus, status = BIBL_OK; str name, roles, role_out; xml *dnode = node->down; if ( dnode ) { strs_init( &name, &roles, &role_out, NULL ); status = modsin_asis_corp_r( dnode, &name, &roles ); if ( status!=BIBL_OK ) goto out; status = modsin_marcrole_convert( &roles, suffix, &role_out ); if ( status!=BIBL_OK ) goto out; fstatus = fields_add( info, str_cstr( &role_out ), str_cstr( &name ), level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; out: strs_free( &name, &roles, &role_out, NULL ); } return status; } static int modsin_roler( xml *node, str *roles ) { if ( xml_has_value( node ) ) { if ( roles->len ) str_addchar( roles, '|' ); str_strcat( roles, xml_value( node ) ); } if ( str_memerr( roles ) ) return BIBL_ERR_MEMERR; else return BIBL_OK; } static int modsin_personr( xml *node, str *familyname, str *givenname, str *suffix ) { int status = BIBL_OK; if ( !xml_has_value( node ) ) return status; if ( xml_tag_has_attribute( node, "namePart", "type", "family" ) ) { if ( str_has_value( familyname ) ) str_addchar( familyname, ' ' ); str_strcat( familyname, xml_value( node ) ); if ( str_memerr( familyname ) ) status = BIBL_ERR_MEMERR; } else if ( xml_tag_has_attribute( node, "namePart", "type", "suffix" ) || xml_tag_has_attribute( node, "namePart", "type", "termsOfAddress" ) ) { if ( str_has_value( suffix ) ) str_addchar( suffix, ' ' ); str_strcat( suffix, xml_value( node ) ); if ( str_memerr( suffix ) ) status = BIBL_ERR_MEMERR; } else if ( xml_tag_has_attribute( node, "namePart", "type", "date" ) ) { /* no nothing */ } else { if ( str_has_value( givenname ) ) str_addchar( givenname, '|' ); str_strcat( givenname, xml_value( node ) ); if ( str_memerr( givenname ) ) status = BIBL_ERR_MEMERR; } return status; } static int modsin_person( xml *node, fields *info, int level ) { str familyname, givenname, name, suffix, roles, role_out; int fstatus, status = BIBL_OK; xml *dnode, *rnode; dnode = node->down; if ( !dnode ) return status; strs_init( &name, &familyname, &givenname, &suffix, &roles, &role_out, NULL ); while ( dnode ) { if ( xml_tag_matches( dnode, "namePart" ) ) { status = modsin_personr( dnode, &familyname, &givenname, &suffix ); if ( status!=BIBL_OK ) goto out; } else if ( xml_tag_matches( dnode, "role" ) ) { rnode = dnode->down; while ( rnode ) { if ( xml_tag_matches( rnode, "roleTerm" ) ) { status = modsin_roler( rnode, &roles ); if ( status!=BIBL_OK ) goto out; } rnode = rnode->next; } } dnode = dnode->next; } /* * Handle: * Noah A. * Smith * without mangling the order of "Noah A." */ if ( str_has_value( &familyname ) ) { str_strcpy( &name, &familyname ); if ( givenname.len ) { str_addchar( &name, '|' ); str_strcat( &name, &givenname ); } } /* * Handle: * Noah A. Smith * with name order mangling. */ else { if ( str_has_value( &givenname ) ) name_parse( &name, &givenname, NULL, NULL ); } if ( str_has_value( &suffix ) ) { str_strcatc( &name, "||" ); str_strcat( &name, &suffix ); } if ( str_memerr( &name ) ) { status=BIBL_ERR_MEMERR; goto out; } status = modsin_marcrole_convert( &roles, NULL, &role_out ); if ( status!=BIBL_OK ) goto out; fstatus = fields_add_can_dup( info, str_cstr( &role_out ), str_cstr( &name ), level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; out: strs_free( &name, &familyname, &givenname, &suffix, &roles, &role_out, NULL ); return status; } static int modsin_placeterm_text( xml *node, fields *info, int level, int school ) { char address_tag[] = "ADDRESS"; char school_tag[] = "SCHOOL"; char *tag; int fstatus; tag = ( school ) ? school_tag : address_tag; fstatus = fields_add( info, tag, xml_value_cstr( node ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; return BIBL_OK; } static int modsin_placeterm_code( xml *node, fields *info, int level ) { int fstatus, status = BIBL_OK; str s, *auth; str_init( &s ); auth = xml_attribute( node, "authority" ); if ( auth && auth->len ) { str_strcpy( &s, auth ); str_addchar( &s, '|' ); } str_strcat( &s, xml_value( node ) ); if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } fstatus = fields_add( info, "CODEDADDRESS", str_cstr( &s ), level ); if ( fstatus!=FIELDS_OK ) status = BIBL_ERR_MEMERR; out: str_free( &s ); return status; } static int modsin_placeterm( xml *node, fields *info, int level, int school ) { int status = BIBL_OK; str *type; type = xml_attribute( node, "type" ); if ( str_has_value( type ) ) { if ( !strcmp( str_cstr( type ), "text" ) ) status = modsin_placeterm_text( node, info, level, school ); else if ( !strcmp( str_cstr( type ), "code" ) ) status = modsin_placeterm_code( node, info, level ); } return status; } static int modsin_placer( xml *node, fields *info, int level, int school ) { int status = BIBL_OK; if ( xml_tag_has_attribute( node, "place", "type", "school" ) ) { school = 1; } else if ( xml_tag_matches( node, "placeTerm" ) ) { status = modsin_placeterm( node, info, level, school ); } if ( node->down ) { status = modsin_placer( node->down, info, level, school ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_placer( node->next, info, level, school ); return status; } static int modsin_origininfor( xml *node, fields *info, int level, str *pub, str *add, str *addc, str *ed, str *iss ) { int status = BIBL_OK; if ( xml_tag_matches( node, "dateIssued" ) ) { status = modsin_date( node, info, level, 0 ); } else if ( xml_tag_matches( node, "place" ) ) { status = modsin_placer( node, info, level, 0 ); } else if ( xml_tag_matches_has_value( node, "publisher" ) ) { str_strcat( pub, xml_value( node ) ); if ( str_memerr( pub ) ) return BIBL_ERR_MEMERR; } else if ( xml_tag_matches_has_value( node, "edition" ) ) { str_strcat( ed, xml_value( node ) ); if( str_memerr( ed ) ) return BIBL_ERR_MEMERR; } else if ( xml_tag_matches_has_value( node, "issuance" ) ) { str_strcat( iss, xml_value( node ) ); if ( str_memerr( iss ) ) return BIBL_ERR_MEMERR; } if ( status!=BIBL_OK ) return status; if ( node->down ) { status = modsin_origininfor( node->down, info, level, pub, add, addc, ed, iss ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_origininfor( node->next, info, level, pub, add, addc, ed, iss ); return status; } static int modsin_origininfo( xml *node, fields *info, int level ) { str publisher, address, addcode, edition, issuance; int fstatus, status = BIBL_OK; if ( node->down ) { strs_init( &publisher, &address, &addcode, &edition, &issuance, NULL ); status = modsin_origininfor( node->down, info, level, &publisher, &address, &addcode, &edition, &issuance ); if ( status!=BIBL_OK ) goto out; if ( str_has_value( &publisher ) ) { fstatus = fields_add( info, "PUBLISHER", str_cstr( &publisher ), level ); if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; } } if ( str_has_value( &address ) ) { fstatus = fields_add( info, "ADDRESS", str_cstr( &address ), level ); if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; } } if ( str_has_value( &addcode ) ) { fstatus = fields_add( info, "CODEDADDRESS", str_cstr( &addcode ), level ); if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; } } if ( str_has_value( &edition ) ) { fstatus = fields_add( info, "EDITION", str_cstr( &edition ), level ); if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; } } if ( str_has_value( &issuance ) ) { fstatus = fields_add( info, "ISSUANCE", str_cstr( &issuance ), level ); if ( fstatus!=FIELDS_OK ) { status=BIBL_ERR_MEMERR; goto out; } } out: strs_free( &publisher, &address, &addcode, &edition, &issuance, NULL ); } return status; } static int modsin_subjectr( xml *node, fields *info, int level ) { int fstatus, status = BIBL_OK; if ( xml_tag_has_attribute( node, "topic", "class", "primary" ) && xml_has_value( node ) ) { fstatus = fields_add( info, "EPRINTCLASS", xml_value_cstr( node ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } else if ( xml_tag_matches_has_value( node, "topic" ) ) { fstatus = fields_add( info, "KEYWORD", xml_value_cstr( node ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } else if ( xml_tag_matches_has_value( node, "geographic" ) ) { fstatus = fields_add( info, "KEYWORD", xml_value_cstr( node ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } if ( node->down ) { status = modsin_subjectr( node->down, info, level ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_subjectr( node->next, info, level ); return status; } static int modsin_subject( xml *node, fields *info, int level ) { int status = BIBL_OK; if ( node->down ) status = modsin_subjectr( node->down, info, level ); return status; } static int modsin_id1( xml *node, fields *info, int level ) { int fstatus; str *ns; ns = xml_attribute( node, "ID" ); if ( str_has_value( ns ) ) { fstatus = fields_add( info, "REFNUM", str_cstr( ns ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } return BIBL_OK; } /* modsin_genre() * * MARC authority terms tagged with "GENRE:MARC" * bibutils authority terms tagged with "GENRE:BIBUTILS" * unknown terms tagged with "GENRE:UNKNOWN" */ static int modsin_genre( xml *node, fields *info, int level ) { int fstatus; char *d; if ( !xml_has_value( node ) ) return BIBL_OK; d = xml_value_cstr( node ); /* ...handle special genres in KTH DivA */ if ( !strcmp( d, "conferenceProceedings" ) || !strcmp( d, "conferencePaper" ) ) d = "conference publication"; else if ( !strcmp( d, "artisticOutput" ) || !strcmp( d, "other" ) ) d = "miscellaneous"; else if ( !strcmp( d, "studentThesis" ) ) d = "thesis"; else if ( !strcmp( d, "monographDoctoralThesis" ) ) d = "Ph.D. thesis"; else if ( !strcmp( d, "comprehensiveDoctoralThesis" ) ) d = "Ph.D. thesis"; else if ( !strcmp( d, "monographLicentiateThesis" ) ) d = "Licentiate thesis"; else if ( !strcmp( d, "comprehensiveLicentiateThesis" ) ) d = "Licentiate thesis"; if ( is_marc_genre( d ) ) fstatus = fields_add( info, "GENRE:MARC", d, level ); else if ( is_bu_genre( d ) ) fstatus = fields_add( info, "GENRE:BIBUTILS", d, level ); else fstatus = fields_add( info, "GENRE:UNKNOWN", d, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; else return BIBL_OK; } /* in MODS version 3.5 * .... * ... * xxx = rfc3066 * xxx = iso639-2b * xxx = iso639-3 * xxx = rfc4646 * xxx = rfc5646 */ static int modsin_languager( xml *node, fields *info, int level ) { int fstatus, status = BIBL_OK; char *d = NULL; if ( xml_tag_matches( node, "languageTerm" ) ) { if ( xml_has_value( node ) ) { if ( xml_has_attribute( node, "type", "code" ) ) { if ( xml_has_attribute( node, "authority", "iso639-1" ) ) d = iso639_1_from_code( xml_value_cstr( node ) ); else if ( xml_has_attribute( node, "authority", "iso639-2b" ) ) d = iso639_2_from_code( xml_value_cstr( node ) ); else if ( xml_has_attribute( node, "authority", "iso639-3" )) d = iso639_3_from_code( xml_value_cstr( node ) ); } if ( !d ) d = xml_value_cstr( node ); fstatus = fields_add( info, "LANGUAGE", d, level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } } if ( node->next ) status = modsin_languager( node->next, info, level ); return status; } static int modsin_language( xml *node, fields *info, int level ) { int fstatus, status = BIBL_OK; /* Old versions of MODS had English */ if ( xml_has_value( node ) ) { fstatus = fields_add( info, "LANGUAGE", xml_value_cstr( node ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } /* New versions of MODS have English */ if ( node->down ) status = modsin_languager( node->down, info, level ); return status; } static int modsin_simple( xml *node, fields *info, char *tag, int level ) { int fstatus; if ( xml_has_value( node ) ) { fstatus = fields_add( info, tag, xml_value_cstr( node ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } return BIBL_OK; } static int modsin_locationr( xml *node, fields *info, int level ) { int fstatus, status = BIBL_OK; char *url = "URL"; char *fileattach = "FILEATTACH"; char *tag=NULL; if ( xml_tag_matches( node, "url" ) ) { if ( xml_has_attribute( node, "access", "raw object" ) ) tag = fileattach; else tag = url; } else if ( xml_tag_matches( node, "physicalLocation" ) ) { if ( xml_has_attribute( node, "type", "school" ) ) tag = "SCHOOL"; else tag = "LOCATION"; } if ( tag == url ) { status = urls_split_and_add( xml_value_cstr( node ), info, level ); if ( status!=BIBL_OK ) return status; } else if ( tag ) { fstatus = fields_add( info, tag, xml_value_cstr( node ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } if ( node->down ) { status = modsin_locationr( node->down, info, level ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_locationr( node->next, info, level ); return status; } static int modsin_location( xml *node, fields *info, int level ) { int status = BIBL_OK; if ( node->down ) status = modsin_locationr( node->down, info, level ); return status; } static int modsin_descriptionr( xml *node, str *s ) { int status = BIBL_OK; if ( xml_tag_matches( node, "extent" ) || xml_tag_matches( node, "note" ) ) { str_strcpy( s, &(node->value) ); if ( str_memerr( s ) ) return BIBL_ERR_MEMERR; } if ( node->down ) { status = modsin_descriptionr( node->down, s ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_descriptionr( node->next, s ); return status; } static int modsin_description( xml *node, fields *info, int level ) { int fstatus, status = BIBL_OK; str s; str_init( &s ); if ( node->down ) { status = modsin_descriptionr( node->down, &s ); if ( status!=BIBL_OK ) goto out; } else { if ( node->value.len > 0 ) str_strcpy( &s, &(node->value) ); if ( str_memerr( &s ) ) { status = BIBL_ERR_MEMERR; goto out; } } if ( str_has_value( &s ) ) { fstatus = fields_add( info, "DESCRIPTION", str_cstr( &s ), level ); if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; } } out: str_free( &s ); return status; } static int modsin_partr( xml *node, fields *info, int level ) { int status = BIBL_OK; if ( xml_tag_matches( node, "detail" ) ) status = modsin_detail( node, info, level ); else if ( xml_tag_has_attribute( node, "extent", "unit", "page" ) ) status = modsin_page( node, info, level ); else if ( xml_tag_has_attribute( node, "extent", "unit", "pages" ) ) status = modsin_page( node, info, level ); else if ( xml_tag_matches( node, "date" ) ) status = modsin_date( node, info, level, 1 ); if ( status!=BIBL_OK ) return status; if ( node->next ) status = modsin_partr( node->next, info, level ); return status; } static int modsin_part( xml *node, fields *info, int level ) { if ( node->down ) return modsin_partr( node->down, info, level ); return BIBL_OK; } /* Q3 .A65 */ static int modsin_classification( xml *node, fields *info, int level ) { int fstatus, status = BIBL_OK; char *tag; if ( xml_has_value( node ) ) { if ( xml_tag_has_attribute( node, "classification", "authority", "lcc" ) ) tag = "LCC"; else tag = "CLASSIFICATION"; fstatus = fields_add( info, tag, xml_value_cstr( node ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } if ( node->down ) status = modsin_classification( node->down, info, level ); return status; } static int modsin_recordinfo( xml *node, fields *info, int level ) { int fstatus; xml *curr; /* extract recordIdentifier */ curr = node; while ( curr ) { if ( xml_tag_matches_has_value( curr, "recordIdentifier" ) ) { fstatus = fields_add( info, "REFNUM", xml_value_cstr( curr ), level ); if ( fstatus!=FIELDS_OK ) return BIBL_ERR_MEMERR; } curr = curr->next; } return BIBL_OK; } static int modsin_identifier( xml *node, fields *info, int level ) { convert ids[] = { { "citekey", "REFNUM", 0, 0 }, { "issn", "ISSN", 0, 0 }, { "coden", "CODEN", 0, 0 }, { "isbn", "ISBN", 0, 0 }, { "doi", "DOI", 0, 0 }, { "url", "URL", 0, 0 }, { "uri", "URL", 0, 0 }, { "pmid", "PMID", 0, 0 }, { "pubmed", "PMID", 0, 0 }, { "medline", "MEDLINE", 0, 0 }, { "pmc", "PMC", 0, 0 }, { "arXiv", "ARXIV", 0, 0 }, { "MRnumber", "MRNUMBER", 0, 0 }, { "pii", "PII", 0, 0 }, { "isi", "ISIREFNUM", 0, 0 }, { "serial number", "SERIALNUMBER",0, 0 }, { "accessnum", "ACCESSNUM", 0, 0 }, { "jstor", "JSTOR", 0, 0 }, }; int i, fstatus, n = sizeof( ids ) / sizeof( ids[0] ); if ( node->value.len==0 ) return BIBL_OK; for ( i=0; idown ) status = modsin_recordinfo( node->down, info, level ); else if ( xml_tag_matches( node, "part" ) ) modsin_part( node, info, level ); else if ( xml_tag_matches( node, "identifier" ) ) status = modsin_identifier( node, info, level ); else if ( xml_tag_matches( node, "originInfo" ) ) status = modsin_origininfo( node, info, level ); else if ( xml_tag_matches( node, "language" ) ) status = modsin_language( node, info, level ); else if ( xml_tag_matches( node, "genre" ) ) status = modsin_genre( node, info, level ); else if ( xml_tag_matches( node, "date" ) ) status = modsin_date( node, info, level, 0 ); else if ( xml_tag_matches( node, "subject" ) ) status = modsin_subject( node, info, level ); else if ( xml_tag_matches( node, "classification" ) ) status = modsin_classification( node, info, level ); else if ( xml_tag_matches( node, "location" ) ) status = modsin_location( node, info, level ); else if ( xml_tag_matches( node, "physicalDescription" ) ) status = modsin_description( node, info, level ); else if ( xml_tag_has_attribute( node, "relatedItem", "type", "host" ) || xml_tag_has_attribute( node, "relatedItem", "type", "series" ) ) { if ( node->down ) status = modsin_mods( node->down, info, level+1 ); } else if ( xml_tag_has_attribute( node, "relatedItem", "type", "original" ) ) { if ( node->down ) status = modsin_mods( node->down, info, LEVEL_ORIG ); } if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_mods( node->next, info, level ); return status; } static int modsin_assembleref( xml *node, fields *info ) { int status = BIBL_OK; if ( xml_tag_matches( node, "mods" ) ) { status = modsin_id1( node, info, 0 ); if ( status!=BIBL_OK ) return status; if ( node->down ) { status = modsin_mods( node->down, info, 0 ); if ( status!=BIBL_OK ) return status; } } else if ( node->down ) { status = modsin_assembleref( node->down, info ); if ( status!=BIBL_OK ) return status; } if ( node->next ) status = modsin_assembleref( node->next, info ); return status; } static int modsin_processf( fields *modsin, char *data, char *filename, long nref, param *p ) { int status; xml top; xml_init( &top ); xml_parse( data, &top ); status = modsin_assembleref( &top, modsin ); xml_free( &top ); if ( status==BIBL_OK ) return 1; else return 0; } /***************************************************** PUBLIC: int modsin_readf() *****************************************************/ static char * modsin_startptr( char *p ) { char *startptr; startptr = xml_find_start( p, "mods:mods" ); if ( startptr ) { /* set namespace if found */ xml_pns = modsns; } else { startptr = xml_find_start( p, "mods" ); if ( startptr ) xml_pns = NULL; } return startptr; } static char * modsin_endptr( char *p ) { return xml_find_end( p, "mods" ); } static int modsin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset ) { str tmp; int m, file_charset = CHARSET_UNKNOWN; char *startptr = NULL, *endptr = NULL; str_init( &tmp ); do { if ( line->data ) str_strcat( &tmp, line ); if ( str_has_value( &tmp ) ) { m = xml_getencoding( &tmp ); if ( m!=CHARSET_UNKNOWN ) file_charset = m; startptr = modsin_startptr( tmp.data ); endptr = modsin_endptr( tmp.data ); } else startptr = endptr = NULL; str_empty( line ); if ( startptr && endptr ) { str_segcpy( reference, startptr, endptr ); str_strcpyc( line, endptr ); } } while ( !endptr && str_fget( fp, buf, bufsize, bufpos, line ) ); str_free( &tmp ); *fcharset = file_charset; return ( reference->len > 0 ); }