/* * xml.c * * Copyright (c) Chris Putnam 2004-2009 * * Source code released under the GPL * */ #include #include #include #include "is_ws.h" #include "strsearch.h" #include "newstr.h" #include "xml.h" char *xml_pns = NULL; static xml_attrib * xmlattrib_new( void ) { xml_attrib *a = (xml_attrib *) malloc( sizeof( xml_attrib ) ); if ( a ) { list_init( &(a->attrib) ); list_init( &(a->value) ); } return a; } static void xmlattrib_add( xml_attrib *a, char *attrib, char *value ) { if ( attrib ) list_add( &(a->attrib), attrib ); else list_add( &(a->attrib), "" ); if ( value ) list_add( &(a->value), value ); else list_add( &(a->value), "" ); } static void xmlattrib_free( xml_attrib *a ) { list_free( &(a->attrib) ); list_free( &(a->value ) ); } static xml * xml_new( void ) { xml *x = ( xml * ) malloc( sizeof( xml ) ); if ( x ) xml_init( x ); return x; } void xml_free( xml *x ) { if ( x->tag ) { newstr_free( x->tag ); free( x->tag ); } if ( x->value ) { newstr_free( x->value ); free( x->value ); } if ( x->a ) { xmlattrib_free( x->a ); free( x->a ); } if ( x->down ) xml_free( x->down ); if ( x->next ) xml_free( x->next ); } void xml_init( xml *x ) { x->tag = newstr_new(); x->value = newstr_new(); x->a = NULL; x->down = NULL; x->next = NULL; if ( !(x->tag) || !(x->value) ) { fprintf(stderr,"xml_init: memory error.\n"); exit( EXIT_FAILURE ); } } enum { XML_DESCRIPTOR, XML_COMMENT, XML_OPEN, XML_CLOSE, XML_OPENCLOSE }; static int xml_terminator( char *p, int *type ) { if ( *p=='>' ) { return 1; } else if ( *p=='/' && *(p+1)=='>' ) { if ( *type==XML_OPENCLOSE ) return 1; else if ( *type==XML_OPEN ) { *type = XML_OPENCLOSE; return 1; } } else if ( *p=='?' && *(p+1)=='>' && *type==XML_DESCRIPTOR ) { return 1; } else if ( *p=='!' && *(p+1)=='>' && *type==XML_COMMENT ) { return 1; } return 0; } static char * xml_processattrib( char *p, xml_attrib **ap, int *type ) { xml_attrib *a = NULL; char quote_character = '\"'; int inquotes = 0; newstr aname, aval; newstr_init( &aname ); newstr_init( &aval ); while ( *p && !xml_terminator(p,type) ) { /* get attribute name */ while ( *p==' ' || *p=='\t' ) p++; while ( *p && !strchr( "= \t", *p ) && !xml_terminator(p,type)){ newstr_addchar( &aname, *p ); p++; } while ( *p==' ' || *p=='\t' ) p++; if ( *p=='=' ) p++; /* get attribute value */ while ( *p==' ' || *p=='\t' ) p++; if ( *p=='\"' || *p=='\'' ) { if ( *p=='\'' ) quote_character = *p; inquotes=1; p++; } while ( *p && ((!xml_terminator(p,type) && !strchr("= \t", *p ))||inquotes)){ if ( *p==quote_character ) inquotes=0; else newstr_addchar( &aval, *p ); p++; } if ( aname.len ) { if ( !a ) a = xmlattrib_new(); xmlattrib_add( a, aname.data, aval.data ); } newstr_empty( &aname ); newstr_empty( &aval ); } newstr_free( &aname ); newstr_free( &aval ); *ap = a; return p; } /* * xml_processtag * * XML_COMMENT * XML_DESCRIPTOR * XML_OPEN * XML_CLOSE * XML_OPENCLOSE */ static char * xml_processtag( char *p, newstr *tag, xml_attrib **attrib, int *type ) { *attrib = NULL; if ( *p=='<' ) p++; if ( *p=='!' ) { while ( *p && *p!='>' ) newstr_addchar( tag, *p++ ); *type = XML_COMMENT; } else if ( *p=='?' ) { *type = XML_DESCRIPTOR; p++; /* skip '?' */ while ( *p && !strchr( " \t", *p ) && !xml_terminator(p,type) ) newstr_addchar( tag, *p++ ); if ( *p==' ' || *p=='\t' ) p = xml_processattrib( p, attrib, type ); } else if ( *p=='/' ) { while ( *p && !strchr( " \t", *p ) && !xml_terminator(p,type) ) newstr_addchar( tag, *p++ ); *type = XML_CLOSE; if ( *p==' ' || *p=='\t' ) p = xml_processattrib( p, attrib, type ); } else { *type = XML_OPEN; while ( *p && !strchr( " \t", *p ) && !xml_terminator(p,type) ) newstr_addchar( tag, *p++ ); if ( *p==' ' || *p=='\t' ) p = xml_processattrib( p, attrib, type ); } while ( *p && *p!='>' ) p++; if ( *p=='>' ) p++; return p; } static void xml_appendnode( xml *onode, xml *nnode ) { if ( !onode->down ) onode->down = nnode; else { xml *p = onode->down; while ( p->next ) p = p->next; p->next = nnode; } } char * xml_tree( char *p, xml *onode ) { newstr tag; xml_attrib *attrib; int type, is_style = 0; newstr_init( &tag ); while ( *p ) { /* retain white space for