#ifndef CMARK_CMARK_EXTENSION_API_H #define CMARK_CMARK_EXTENSION_API_H #ifdef __cplusplus extern "C" { #endif #include #include #include /** * ## Extension Support * * While the "core" of libcmark is strictly compliant with the * specification, an API is provided for extension writers to * hook into the parsing process. * * It should be noted that the cmark_node API already offers * room for customization, with methods offered to traverse and * modify the AST, and even define custom blocks. * When the desired customization is achievable in an error-proof * way using that API, it should be the preferred method. * * The following API requires a more in-depth understanding * of libcmark's parsing strategy, which is exposed * [here](http://spec.commonmark.org/0.24/#appendix-a-parsing-strategy). * * It should be used when "a posteriori" modification of the AST * proves to be too difficult / impossible to implement correctly. * * It can also serve as an intermediary step before extending * the specification, as an extension implemented using this API * will be trivially integrated in the core if it proves to be * desirable. */ typedef struct cmark_plugin cmark_plugin; /** A syntax extension that can be attached to a cmark_parser * with cmark_parser_attach_syntax_extension(). * * Extension writers should assign functions matching * the signature of the following 'virtual methods' to * implement new functionality. * * Their calling order and expected behaviour match the procedure outlined * at : * * During step 1, cmark will call the function provided through * 'cmark_syntax_extension_set_match_block_func' when it * iterates over an open block created by this extension, * to determine whether it could contain the new line. * If no function was provided, cmark will close the block. * * During step 2, if and only if the new line doesn't match any * of the standard syntax rules, cmark will call the function * provided through 'cmark_syntax_extension_set_open_block_func' * to let the extension determine whether that new line matches * one of its syntax rules. * It is the responsibility of the parser to create and add the * new block with cmark_parser_make_block and cmark_parser_add_child. * If no function was provided is NULL, the extension will have * no effect at all on the final block structure of the AST. * * #### Inline parsing phase hooks * * For each character provided by the extension through * 'cmark_syntax_extension_set_special_inline_chars', * the function provided by the extension through * 'cmark_syntax_extension_set_match_inline_func' * will get called, it is the responsibility of the extension * to scan the characters located at the current inline parsing offset * with the cmark_inline_parser API. * * Depending on the type of the extension, it can either: * * * Scan forward, determine that the syntax matches and return * a newly-created inline node with the appropriate type. * This is the technique that would be used if inline code * (with backticks) was implemented as an extension. * * Scan only the character(s) that its syntax rules require * for opening and closing nodes, push a delimiter on the * delimiter stack, and return a simple text node with its * contents set to the character(s) consumed. * This is the technique that would be used if emphasis * inlines were implemented as an extension. * * When an extension has pushed delimiters on the stack, * the function provided through * 'cmark_syntax_extension_set_inline_from_delim_func' * will get called in a latter phase, * when the inline parser has matched opener and closer delimiters * created by the extension together. * * It is then the responsibility of the extension to modify * and populate the opener inline text node, and to remove * the necessary delimiters from the delimiter stack. * * Finally, the extension should return NULL if its scan didn't * match its syntax rules. * * The extension can store whatever private data it might need * with 'cmark_syntax_extension_set_private', * and optionally define a free function for this data. */ typedef struct cmark_syntax_extension cmark_syntax_extension; typedef struct subject cmark_inline_parser; /** Exposed raw for now */ typedef struct delimiter { struct delimiter *previous; struct delimiter *next; cmark_node *inl_text; bufsize_t length; unsigned char delim_char; int can_open; int can_close; } delimiter; /** * ### Plugin API. * * Extensions should be distributed as dynamic libraries, * with a single exported function named after the distributed * filename. * * When discovering extensions (see cmark_init), cmark will * try to load a symbol named "init_{{filename}}" in all the * dynamic libraries it encounters. * * For example, given a dynamic library named myextension.so * (or myextension.dll), cmark will try to load the symbol * named "init_myextension". This means that the filename * must lend itself to forming a valid C identifier, with * the notable exception of dashes, which will be translated * to underscores, which means cmark will look for a function * named "init_my_extension" if it encounters a dynamic library * named "my-extension.so". * * See the 'cmark_plugin_init_func' typedef for the exact prototype * this function should follow. * * For now the extensibility of cmark is not complete, as * it only offers API to hook into the block parsing phase * (). * * See 'cmark_plugin_register_syntax_extension' for more information. */ /** The prototype plugins' init function should follow. */ typedef int (*cmark_plugin_init_func)(cmark_plugin *plugin); /** Register a syntax 'extension' with the 'plugin', it will be made * available as an extension and, if attached to a cmark_parser * with 'cmark_parser_attach_syntax_extension', it will contribute * to the block parsing process. * * See the documentation for 'cmark_syntax_extension' for information * on how to implement one. * * This function will typically be called from the init function * of external modules. * * This takes ownership of 'extension', one should not call * 'cmark_syntax_extension_free' on a registered extension. */ CMARK_EXPORT int cmark_plugin_register_syntax_extension(cmark_plugin *plugin, cmark_syntax_extension *extension); /** This will search for the syntax extension named 'name' among the * registered syntax extensions. * * It can then be attached to a cmark_parser * with the cmark_parser_attach_syntax_extension method. */ CMARK_EXPORT cmark_syntax_extension *cmark_find_syntax_extension(const char *name); /** Should create and add a new open block to 'parent_container' if * 'input' matches a syntax rule for that block type. It is allowed * to modify the type of 'parent_container'. * * Should return the newly created block if there is one, or * 'parent_container' if its type was modified, or NULL. */ typedef cmark_node * (*cmark_open_block_func) (cmark_syntax_extension *extension, int indented, cmark_parser *parser, cmark_node *parent_container, unsigned char *input, int len); typedef cmark_node *(*cmark_match_inline_func)(cmark_syntax_extension *extension, cmark_parser *parser, cmark_node *parent, unsigned char character, cmark_inline_parser *inline_parser); typedef delimiter *(*cmark_inline_from_delim_func)(cmark_syntax_extension *extension, cmark_parser *parser, cmark_inline_parser *inline_parser, delimiter *opener, delimiter *closer); /** Should return 'true' if 'input' can be contained in 'container', * 'false' otherwise. */ typedef int (*cmark_match_block_func) (cmark_syntax_extension *extension, cmark_parser *parser, unsigned char *input, int len, cmark_node *container); typedef const char *(*cmark_get_type_string_func) (cmark_syntax_extension *extension, cmark_node *node); typedef int (*cmark_can_contain_func) (cmark_syntax_extension *extension, cmark_node *node, cmark_node_type child); typedef int (*cmark_contains_inlines_func) (cmark_syntax_extension *extension, cmark_node *node); typedef void (*cmark_common_render_func) (cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options); typedef int (*cmark_commonmark_escape_func) (cmark_syntax_extension *extension, cmark_node *node, int c); typedef void (*cmark_html_render_func) (cmark_syntax_extension *extension, cmark_html_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options); typedef int (*cmark_html_filter_func) (cmark_syntax_extension *extension, const unsigned char *tag, size_t tag_len); typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension, cmark_parser *parser, cmark_node *root); typedef int (*cmark_ispunct_func) (char c); typedef void (*cmark_opaque_free_func) (cmark_syntax_extension *extension, cmark_mem *mem, cmark_node *node); /** Free a cmark_syntax_extension. */ CMARK_EXPORT void cmark_syntax_extension_free (cmark_mem *mem, cmark_syntax_extension *extension); /** Return a newly-constructed cmark_syntax_extension, named 'name'. */ CMARK_EXPORT cmark_syntax_extension *cmark_syntax_extension_new (const char *name); CMARK_EXPORT cmark_node_type cmark_syntax_extension_add_node(int is_inline); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, cmark_open_block_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension, cmark_match_block_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension, cmark_match_inline_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension, cmark_inline_from_delim_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, cmark_llist *special_chars); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension, cmark_get_type_string_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension, cmark_can_contain_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension, cmark_contains_inlines_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension, cmark_common_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension, cmark_html_render_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension, cmark_html_filter_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension, cmark_commonmark_escape_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, void *priv, cmark_free_func free_func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension, cmark_postprocess_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension, cmark_opaque_free_func func); /** See the documentation for 'cmark_syntax_extension' */ CMARK_EXPORT void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser, cmark_ispunct_func func); /** Return the index of the line currently being parsed, starting with 1. */ CMARK_EXPORT int cmark_parser_get_line_number(cmark_parser *parser); /** Return the offset in bytes in the line being processed. * * Example: * * ### foo * * Here, offset will first be 0, then 5 (the index of the 'f' character). */ CMARK_EXPORT int cmark_parser_get_offset(cmark_parser *parser); /** * Return the offset in 'columns' in the line being processed. * * This value may differ from the value returned by * cmark_parser_get_offset() in that it accounts for tabs, * and as such should not be used as an index in the current line's * buffer. * * Example: * * cmark_parser_advance_offset() can be called to advance the * offset by a number of columns, instead of a number of bytes. * * In that case, if offset falls "in the middle" of a tab * character, 'column' and offset will differ. * * ``` * foo \t bar * ^ ^^ * offset (0) 20 * ``` * * If cmark_parser_advance_offset is called here with 'columns' * set to 'true' and 'offset' set to 22, cmark_parser_get_offset() * will return 20, whereas cmark_parser_get_column() will return * 22. * * Additionally, as tabs expand to the next multiple of 4 column, * cmark_parser_has_partially_consumed_tab() will now return * 'true'. */ CMARK_EXPORT int cmark_parser_get_column(cmark_parser *parser); /** Return the absolute index in bytes of the first nonspace * character coming after the offset as returned by * cmark_parser_get_offset() in the line currently being processed. * * Example: * * ``` * foo bar baz \n * ^ ^ ^ * 0 offset (16) first_nonspace (28) * ``` */ CMARK_EXPORT int cmark_parser_get_first_nonspace(cmark_parser *parser); /** Return the absolute index of the first nonspace column coming after 'offset' * in the line currently being processed, counting tabs as multiple * columns as appropriate. * * See the documentation for cmark_parser_get_first_nonspace() and * cmark_parser_get_column() for more information. */ CMARK_EXPORT int cmark_parser_get_first_nonspace_column(cmark_parser *parser); /** Return the difference between the values returned by * cmark_parser_get_first_nonspace_column() and * cmark_parser_get_column(). * * This is not a byte offset, as it can count one tab as multiple * characters. */ CMARK_EXPORT int cmark_parser_get_indent(cmark_parser *parser); /** Return 'true' if the line currently being processed has been entirely * consumed, 'false' otherwise. * * Example: * * ``` * foo bar baz \n * ^ * offset * ``` * * This function will return 'false' here. * * ``` * foo bar baz \n * ^ * offset * ``` * This function will still return 'false'. * * ``` * foo bar baz \n * ^ * offset * ``` * * At this point, this function will now return 'true'. */ CMARK_EXPORT int cmark_parser_is_blank(cmark_parser *parser); /** Return 'true' if the value returned by cmark_parser_get_offset() * is 'inside' an expanded tab. * * See the documentation for cmark_parser_get_column() for more * information. */ CMARK_EXPORT int cmark_parser_has_partially_consumed_tab(cmark_parser *parser); /** Return the length in bytes of the previously processed line, excluding potential * newline (\n) and carriage return (\r) trailing characters. */ CMARK_EXPORT int cmark_parser_get_last_line_length(cmark_parser *parser); /** Add a child to 'parent' during the parsing process. * * If 'parent' isn't the kind of node that can accept this child, * this function will back up till it hits a node that can, closing * blocks as appropriate. */ CMARK_EXPORT cmark_node*cmark_parser_add_child(cmark_parser *parser, cmark_node *parent, cmark_node_type block_type, int start_column); /** Advance the 'offset' of the parser in the current line. * * See the documentation of cmark_parser_get_offset() and * cmark_parser_get_column() for more information. */ CMARK_EXPORT void cmark_parser_advance_offset(cmark_parser *parser, const char *input, int count, int columns); CMARK_EXPORT void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len); /** Attach the syntax 'extension' to the 'parser', to provide extra syntax * rules. * See the documentation for cmark_syntax_extension for more information. * * Returns 'true' if the 'extension' was successfully attached, * 'false' otherwise. */ CMARK_EXPORT int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension); /** Change the type of 'node'. * * Return 0 if the type could be changed, 1 otherwise. */ CMARK_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type); /** Return the string content for all types of 'node'. * The pointer stays valid as long as 'node' isn't freed. */ CMARK_EXPORT const char *cmark_node_get_string_content(cmark_node *node); /** Set the string 'content' for all types of 'node'. * Copies 'content'. */ CMARK_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content); /** Get the syntax extension responsible for the creation of 'node'. * Return NULL if 'node' was created because it matched standard syntax rules. */ CMARK_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node); /** Set the syntax extension responsible for creating 'node'. */ CMARK_EXPORT int cmark_node_set_syntax_extension(cmark_node *node, cmark_syntax_extension *extension); /** * ## Inline syntax extension helpers * * The inline parsing process is described in detail at * */ /** Should return 'true' if the predicate matches 'c', 'false' otherwise */ typedef int (*cmark_inline_predicate)(int c); /** Advance the current inline parsing offset */ CMARK_EXPORT void cmark_inline_parser_advance_offset(cmark_inline_parser *parser); /** Get the current inline parsing offset */ CMARK_EXPORT int cmark_inline_parser_get_offset(cmark_inline_parser *parser); /** Set the offset in bytes in the chunk being processed by the given inline parser. */ CMARK_EXPORT void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset); /** Gets the cmark_chunk being operated on by the given inline parser. * Use cmark_inline_parser_get_offset to get our current position in the chunk. */ CMARK_EXPORT cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser); /** Returns 1 if the inline parser is currently in a bracket; pass 1 for 'image' * if you want to know about an image-type bracket, 0 for link-type. */ CMARK_EXPORT int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image); /** Remove the last n characters from the last child of the given node. * This only works where all n characters are in the single last child, and the last * child is CMARK_NODE_TEXT. */ CMARK_EXPORT void cmark_node_unput(cmark_node *node, int n); /** Get the character located at the current inline parsing offset */ CMARK_EXPORT unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser); /** Get the character located 'pos' bytes in the current line. */ CMARK_EXPORT unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, int pos); /** Whether the inline parser has reached the end of the current line */ CMARK_EXPORT int cmark_inline_parser_is_eof(cmark_inline_parser *parser); /** Get the characters located after the current inline parsing offset * while 'pred' matches. Free after usage. */ CMARK_EXPORT char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred); /** Push a delimiter on the delimiter stack. * See < for * more information on the parameters */ CMARK_EXPORT void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, unsigned char c, int can_open, int can_close, cmark_node *inl_text); /** Remove 'delim' from the delimiter stack */ CMARK_EXPORT void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim); CMARK_EXPORT delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser); CMARK_EXPORT int cmark_inline_parser_get_line(cmark_inline_parser *parser); CMARK_EXPORT int cmark_inline_parser_get_column(cmark_inline_parser *parser); /** Convenience function to scan a given delimiter. * * 'left_flanking' and 'right_flanking' will be set to true if they * respectively precede and follow a non-space, non-punctuation * character. * * Additionally, 'punct_before' and 'punct_after' will respectively be set * if the preceding or following character is a punctuation character. * * Note that 'left_flanking' and 'right_flanking' can both be 'true'. * * Returns the number of delimiters encountered, in the limit * of 'max_delims', and advances the inline parsing offset. */ CMARK_EXPORT int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, int max_delims, unsigned char c, int *left_flanking, int *right_flanking, int *punct_before, int *punct_after); CMARK_EXPORT void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add); CMARK_EXPORT cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser); CMARK_EXPORT void cmark_arena_push(void); CMARK_EXPORT int cmark_arena_pop(void); #ifdef __cplusplus } #endif #endif