lucy-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nwelln...@apache.org
Subject [05/12] lucy-clownfish git commit: Update CommonMark source code
Date Sun, 18 Jan 2015 18:44:41 GMT
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/inlines.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.c b/compiler/modules/CommonMark/src/inlines.c
index 08a934b..2487f63 100644
--- a/compiler/modules/CommonMark/src/inlines.c
+++ b/compiler/modules/CommonMark/src/inlines.c
@@ -1,8 +1,8 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
-#include <ctype.h>
 
+#include "cmark_ctype.h"
 #include "config.h"
 #include "node.h"
 #include "parser.h"
@@ -16,7 +16,7 @@
 
 // Macros for creating various kinds of simple.
 #define make_str(s) make_literal(CMARK_NODE_TEXT, s)
-#define make_code(s) make_literal(CMARK_NODE_INLINE_CODE, s)
+#define make_code(s) make_literal(CMARK_NODE_CODE, s)
 #define make_raw_html(s) make_literal(CMARK_NODE_INLINE_HTML, s)
 #define make_linebreak() make_simple(CMARK_NODE_LINEBREAK)
 #define make_softbreak() make_simple(CMARK_NODE_SOFTBREAK)
@@ -31,10 +31,11 @@ typedef struct delimiter {
 	int position;
 	bool can_open;
 	bool can_close;
+	bool active;
 } delimiter;
 
 typedef struct {
-	chunk input;
+	cmark_chunk input;
 	int pos;
 	cmark_reference_map *refmap;
 	delimiter *last_delim;
@@ -45,24 +46,24 @@ S_insert_emph(subject *subj, delimiter *opener, delimiter *closer);
 
 static int parse_inline(subject* subj, cmark_node * parent);
 
-static void subject_from_buf(subject *e, strbuf *buffer,
-			     cmark_reference_map *refmap);
+static void subject_from_buf(subject *e, cmark_strbuf *buffer,
+                             cmark_reference_map *refmap);
 static int subject_find_special_char(subject *subj);
 
-static unsigned char *cmark_clean_autolink(chunk *url, int is_email)
+static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email)
 {
-	strbuf buf = GH_BUF_INIT;
+	cmark_strbuf buf = GH_BUF_INIT;
 
-	chunk_trim(url);
+	cmark_chunk_trim(url);
 
 	if (url->len == 0)
 		return NULL;
 
 	if (is_email)
-		strbuf_puts(&buf, "mailto:");
+		cmark_strbuf_puts(&buf, "mailto:");
 
 	houdini_unescape_html_f(&buf, url->data, url->len);
-	return strbuf_detach(&buf);
+	return cmark_strbuf_detach(&buf);
 }
 
 static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title)
@@ -71,11 +72,11 @@ static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsig
 	if(e != NULL) {
 		e->type = CMARK_NODE_LINK;
 		e->first_child   = label;
-                e->last_child    = label;
+		e->last_child    = label;
 		e->as.link.url   = url;
 		e->as.link.title = title;
 		e->next = NULL;
-                label->parent = e;
+		label->parent = e;
 	}
 	return e;
 }
@@ -93,14 +94,14 @@ static inline cmark_node* make_literal(cmark_node_type t, cmark_chunk s)
 		e->type = t;
 		e->as.literal = s;
 		e->next = NULL;
-                e->prev = NULL;
-                e->parent = NULL;
-                e->first_child = NULL;
-                e->last_child = NULL;
-                // These fields aren't used for inlines:
-                e->start_line = 0;
-                e->start_column = 0;
-                e->end_line = 0;
+		e->prev = NULL;
+		e->parent = NULL;
+		e->first_child = NULL;
+		e->last_child = NULL;
+		// These fields aren't used for inlines:
+		e->start_line = 0;
+		e->start_column = 0;
+		e->end_line = 0;
 	}
 	return e;
 }
@@ -112,14 +113,14 @@ static inline cmark_node* make_simple(cmark_node_type t)
 	if(e != NULL) {
 		e->type = t;
 		e->next = NULL;
-                e->prev = NULL;
-                e->parent = NULL;
-                e->first_child = NULL;
-                e->last_child = NULL;
-                // These fields aren't used for inlines:
-                e->start_line = 0;
-                e->start_column = 0;
-                e->end_line = 0;
+		e->prev = NULL;
+		e->parent = NULL;
+		e->first_child = NULL;
+		e->last_child = NULL;
+		// These fields aren't used for inlines:
+		e->start_line = 0;
+		e->start_column = 0;
+		e->end_line = 0;
 	}
 	return e;
 }
@@ -139,8 +140,8 @@ static unsigned char *bufdup(const unsigned char *buf)
 	return new_buf;
 }
 
-static void subject_from_buf(subject *e, strbuf *buffer,
-			     cmark_reference_map *refmap)
+static void subject_from_buf(subject *e, cmark_strbuf *buffer,
+                             cmark_reference_map *refmap)
 {
 	e->input.data = buffer->ptr;
 	e->input.len = buffer->size;
@@ -149,7 +150,7 @@ static void subject_from_buf(subject *e, strbuf *buffer,
 	e->refmap = refmap;
 	e->last_delim = NULL;
 
-	chunk_rtrim(&e->input);
+	cmark_chunk_rtrim(&e->input);
 }
 
 static inline int isbacktick(int c)
@@ -177,7 +178,7 @@ static inline int is_eof(subject* subj)
 #define advance(subj) (subj)->pos += 1
 
 // Take characters while a predicate holds, and return a string.
-static inline chunk take_while(subject* subj, int (*f)(int))
+static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 {
 	unsigned char c;
 	int startpos = subj->pos;
@@ -188,7 +189,7 @@ static inline chunk take_while(subject* subj, int (*f)(int))
 		len++;
 	}
 
-	return chunk_dup(&subj->input, startpos, len);
+	return cmark_chunk_dup(&subj->input, startpos, len);
 }
 
 // Try to process a backtick code span that began with a
@@ -211,7 +212,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 		advance(subj);
 		numticks++;
 	}
-	if (numticks != openticklength){
+	if (numticks != openticklength) {
 		return(scan_to_closing_backticks(subj, openticklength));
 	}
 	return (subj->pos);
@@ -221,7 +222,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 // Assumes that the subject has a backtick at the current position.
 static cmark_node* handle_backticks(subject *subj)
 {
-	chunk openticks = take_while(subj, isbacktick);
+	cmark_chunk openticks = take_while(subj, isbacktick);
 	int startpos = subj->pos;
 	int endpos = scan_to_closing_backticks(subj, openticks.len);
 
@@ -229,34 +230,66 @@ static cmark_node* handle_backticks(subject *subj)
 		subj->pos = startpos; // rewind
 		return make_str(openticks);
 	} else {
-		strbuf buf = GH_BUF_INIT;
+		cmark_strbuf buf = GH_BUF_INIT;
 
-		strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
-		strbuf_trim(&buf);
-		strbuf_normalize_whitespace(&buf);
+		cmark_strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
+		cmark_strbuf_trim(&buf);
+		cmark_strbuf_normalize_whitespace(&buf);
 
-		return make_code(chunk_buf_detach(&buf));
+		return make_code(cmark_chunk_buf_detach(&buf));
 	}
 }
 
 // Scan ***, **, or * and return number scanned, or 0.
 // Advances position.
-static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
+static int
+scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 {
 	int numdelims = 0;
-	unsigned char char_before, char_after;
+	int before_char_pos;
+	int32_t after_char = 0;
+	int32_t before_char = 0;
+	int len;
+
+	if (subj->pos == 0) {
+		before_char = 10;
+	} else {
+		before_char_pos = subj->pos - 1;
+		// walk back to the beginning of the UTF_8 sequence:
+		while (peek_at(subj, before_char_pos) >> 6 == 2 &&
+		       before_char_pos > 0) {
+			before_char_pos -= 1;
+		}
+		len = utf8proc_iterate(subj->input.data + before_char_pos,
+		                       subj->pos - before_char_pos, &before_char);
+		if (len == -1) {
+			before_char = 10;
+		}
+	}
 
-	char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
 	while (peek_char(subj) == c) {
 		numdelims++;
 		advance(subj);
 	}
-	char_after = peek_char(subj);
-	*can_open = numdelims > 0 && !isspace(char_after);
-	*can_close = numdelims > 0 && !isspace(char_before);
+
+	len = utf8proc_iterate(subj->input.data + subj->pos,
+	                       subj->input.len - subj->pos, &after_char);
+	if (len == -1) {
+		after_char = 10;
+	}
+	*can_open = numdelims > 0 && !utf8proc_is_space(after_char) &&
+	            !(utf8proc_is_punctuation(after_char) &&
+	              !utf8proc_is_space(before_char) &&
+	              !utf8proc_is_punctuation(before_char));
+	*can_close = numdelims > 0 && !utf8proc_is_space(before_char) &&
+	             !(utf8proc_is_punctuation(before_char) &&
+	               !utf8proc_is_space(after_char) &&
+	               !utf8proc_is_punctuation(after_char));
 	if (c == '_') {
-		*can_open = *can_open && !isalnum(char_before);
-		*can_close = *can_close && !isalnum(char_after);
+		*can_open = *can_open && !(before_char < 128 &&
+		                           cmark_isalnum((char)before_char));
+		*can_close = *can_close && !(before_char < 128 &&
+		                             cmark_isalnum((char)after_char));
 	}
 	return numdelims;
 }
@@ -293,10 +326,10 @@ static void remove_delimiter(subject *subj, delimiter *delim)
 }
 
 static void push_delimiter(subject *subj, unsigned char c, bool can_open,
-			   bool can_close, cmark_node *inl_text)
+                           bool can_close, cmark_node *inl_text)
 {
 	delimiter *delim =
-		(delimiter*)malloc(sizeof(delimiter));
+	    (delimiter*)malloc(sizeof(delimiter));
 	if (delim == NULL) {
 		return;
 	}
@@ -310,6 +343,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
 		delim->previous->next = delim;
 	}
 	delim->position = subj->pos;
+	delim->active = true;
 	subj->last_delim = delim;
 }
 
@@ -323,7 +357,7 @@ static cmark_node* handle_strong_emph(subject* subj, unsigned char c)
 
 	numdelims = scan_delims(subj, c, &can_open, &can_close);
 
-	inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
+	inl_text = make_str(cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
 
 	if (can_open || can_close) {
 		push_delimiter(subj, c, can_open, can_close, inl_text);
@@ -384,7 +418,7 @@ S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
 	// calculate the actual number of characters used from this closer
 	if (closer_num_chars < 3 || opener_num_chars < 3) {
 		use_delims = closer_num_chars <= opener_num_chars ?
-			closer_num_chars : opener_num_chars;
+		             closer_num_chars : opener_num_chars;
 	} else { // closer and opener both have >= 3 characters
 		use_delims = closer_num_chars % 2 == 0 ? 2 : 1;
 	}
@@ -409,13 +443,12 @@ S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
 	// if opener has 0 characters, remove it and its associated inline
 	if (opener_num_chars == 0) {
 		// replace empty opener inline with emph
-		chunk_free(&(opener_inl->as.literal));
+		cmark_chunk_free(&(opener_inl->as.literal));
 		emph = opener_inl;
 		emph->type = use_delims == 1 ? NODE_EMPH : NODE_STRONG;
 		// remove opener from list
 		remove_delimiter(subj, opener);
-	}
-	else {
+	} else {
 		// create new emph or strong, and splice it in to our inlines
 		// between the opener and closer
 		emph = use_delims == 1 ? make_emph() : make_strong();
@@ -455,14 +488,14 @@ static cmark_node* handle_backslash(subject *subj)
 {
 	advance(subj);
 	unsigned char nextchar = peek_char(subj);
-	if (ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
+	if (cmark_ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
 		advance(subj);
-		return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
+		return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
 	} else if (nextchar == '\n') {
 		advance(subj);
 		return make_linebreak();
 	} else {
-		return make_str(chunk_literal("\\"));
+		return make_str(cmark_chunk_literal("\\"));
 	}
 }
 
@@ -470,31 +503,31 @@ static cmark_node* handle_backslash(subject *subj)
 // Assumes the subject has an '&' character at the current position.
 static cmark_node* handle_entity(subject* subj)
 {
-	strbuf ent = GH_BUF_INIT;
+	cmark_strbuf ent = GH_BUF_INIT;
 	size_t len;
 
 	advance(subj);
 
 	len = houdini_unescape_ent(&ent,
-				   subj->input.data + subj->pos,
-				   subj->input.len - subj->pos
-				   );
+	                           subj->input.data + subj->pos,
+	                           subj->input.len - subj->pos
+	                          );
 
 	if (len == 0)
-		return make_str(chunk_literal("&"));
+		return make_str(cmark_chunk_literal("&"));
 
 	subj->pos += len;
-	return make_str(chunk_buf_detach(&ent));
+	return make_str(cmark_chunk_buf_detach(&ent));
 }
 
 // Like make_str, but parses entities.
 // Returns an inline sequence consisting of str and entity elements.
-static cmark_node *make_str_with_entities(chunk *content)
+static cmark_node *make_str_with_entities(cmark_chunk *content)
 {
-	strbuf unescaped = GH_BUF_INIT;
+	cmark_strbuf unescaped = GH_BUF_INIT;
 
 	if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
-		return make_str(chunk_buf_detach(&unescaped));
+		return make_str(cmark_chunk_buf_detach(&unescaped));
 	} else {
 		return make_str(*content);
 	}
@@ -502,11 +535,11 @@ static cmark_node *make_str_with_entities(chunk *content)
 
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
-unsigned char *cmark_clean_url(chunk *url)
+unsigned char *cmark_clean_url(cmark_chunk *url)
 {
-	strbuf buf = GH_BUF_INIT;
+	cmark_strbuf buf = GH_BUF_INIT;
 
-	chunk_trim(url);
+	cmark_chunk_trim(url);
 
 	if (url->len == 0)
 		return NULL;
@@ -517,32 +550,32 @@ unsigned char *cmark_clean_url(chunk *url)
 		houdini_unescape_html_f(&buf, url->data, url->len);
 	}
 
-	strbuf_unescape(&buf);
-	return strbuf_detach(&buf);
+	cmark_strbuf_unescape(&buf);
+	return cmark_strbuf_detach(&buf);
 }
 
-unsigned char *cmark_clean_title(chunk *title)
+unsigned char *cmark_clean_title(cmark_chunk *title)
 {
-       strbuf buf = GH_BUF_INIT;
-       unsigned char first, last;
+	cmark_strbuf buf = GH_BUF_INIT;
+	unsigned char first, last;
 
-       if (title->len == 0)
-               return NULL;
+	if (title->len == 0)
+		return NULL;
 
-       first = title->data[0];
-       last = title->data[title->len - 1];
+	first = title->data[0];
+	last = title->data[title->len - 1];
 
-       // remove surrounding quotes if any:
-       if ((first == '\'' && last == '\'') ||
-           (first == '(' && last == ')') ||
-           (first == '"' && last == '"')) {
-               houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
-       } else {
-               houdini_unescape_html_f(&buf, title->data, title->len);
-       }
+	// remove surrounding quotes if any:
+	if ((first == '\'' && last == '\'') ||
+	    (first == '(' && last == ')') ||
+	    (first == '"' && last == '"')) {
+		houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
+	} else {
+		houdini_unescape_html_f(&buf, title->data, title->len);
+	}
 
-       strbuf_unescape(&buf);
-       return strbuf_detach(&buf);
+	cmark_strbuf_unescape(&buf);
+	return cmark_strbuf_detach(&buf);
 }
 
 // Parse an autolink or HTML tag.
@@ -550,51 +583,51 @@ unsigned char *cmark_clean_title(chunk *title)
 static cmark_node* handle_pointy_brace(subject* subj)
 {
 	int matchlen = 0;
-	chunk contents;
+	cmark_chunk contents;
 
 	advance(subj);  // advance past first <
 
 	// first try to match a URL autolink
 	matchlen = scan_autolink_uri(&subj->input, subj->pos);
 	if (matchlen > 0) {
-		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
+		contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
 		subj->pos += matchlen;
 
 		return make_autolink(
-				     make_str_with_entities(&contents),
-				     contents, 0
-				     );
+		           make_str_with_entities(&contents),
+		           contents, 0
+		       );
 	}
 
 	// next try to match an email autolink
 	matchlen = scan_autolink_email(&subj->input, subj->pos);
 	if (matchlen > 0) {
-		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
+		contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
 		subj->pos += matchlen;
 
 		return make_autolink(
-				     make_str_with_entities(&contents),
-				     contents, 1
-				     );
+		           make_str_with_entities(&contents),
+		           contents, 1
+		       );
 	}
 
 	// finally, try to match an html tag
 	matchlen = scan_html_tag(&subj->input, subj->pos);
 	if (matchlen > 0) {
-		contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
+		contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
 		subj->pos += matchlen;
 		return make_raw_html(contents);
 	}
 
 	// if nothing matches, just return the opening <:
-	return make_str(chunk_literal("<"));
+	return make_str(cmark_chunk_literal("<"));
 }
 
 // Parse a link label.  Returns 1 if successful.
 // Note:  unescaped brackets are not allowed in labels.
 // The label begins with `[` and ends with the first `]` character
 // encountered.  Backticks in labels do not start code spans.
-static int link_label(subject* subj, chunk *raw_label)
+static int link_label(subject* subj, cmark_chunk *raw_label)
 {
 	int startpos = subj->pos;
 	int length = 0;
@@ -611,7 +644,7 @@ static int link_label(subject* subj, chunk *raw_label)
 		if (c == '\\') {
 			advance(subj);
 			length++;
-			if (ispunct(peek_char(subj))) {
+			if (cmark_ispunct(peek_char(subj))) {
 				advance(subj);
 				length++;
 			}
@@ -625,12 +658,12 @@ static int link_label(subject* subj, chunk *raw_label)
 	}
 
 	if (c == ']') { // match found
-		*raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
+		*raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
 		advance(subj);  // advance past ]
 		return 1;
 	}
 
- noMatch:
+noMatch:
 	subj->pos = startpos; // rewind
 	return 0;
 
@@ -645,13 +678,12 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 	int sps;
 	cmark_reference *ref;
 	bool is_image = false;
-	chunk urlchunk, titlechunk;
+	cmark_chunk url_chunk, title_chunk;
 	unsigned char *url, *title;
 	delimiter *opener;
-	delimiter *tmp_delim;
 	cmark_node *link_text;
 	cmark_node *inl;
-	chunk raw_label;
+	cmark_chunk raw_label;
 	int found_label;
 
 	advance(subj);  // advance past ]
@@ -667,7 +699,13 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 	}
 
 	if (opener == NULL) {
-		return make_str(chunk_literal("]"));
+		return make_str(cmark_chunk_literal("]"));
+	}
+
+	if (!opener->active) {
+		// take delimiter off stack
+		remove_delimiter(subj, opener);
+		return make_str(cmark_chunk_literal("]"));
 	}
 
 	// If we got here, we matched a potential link/image text.
@@ -688,19 +726,19 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 
 		// ensure there are spaces btw url and title
 		endtitle = (starttitle == endurl) ? starttitle :
-			starttitle + scan_link_title(&subj->input, starttitle);
+		           starttitle + scan_link_title(&subj->input, starttitle);
 
 		endall = endtitle + scan_spacechars(&subj->input, endtitle);
 
 		if (peek_at(subj, endall) == ')') {
 			subj->pos = endall + 1;
 
-			urlchunk = chunk_dup(&subj->input, starturl, endurl - starturl);
-			titlechunk = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
-			url = cmark_clean_url(&urlchunk);
-			title = cmark_clean_title(&titlechunk);
-			chunk_free(&urlchunk);
-			chunk_free(&titlechunk);
+			url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
+			title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
+			url = cmark_clean_url(&url_chunk);
+			title = cmark_clean_title(&title_chunk);
+			cmark_chunk_free(&url_chunk);
+			cmark_chunk_free(&title_chunk);
 			goto match;
 
 		} else {
@@ -711,12 +749,12 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 	// Next, look for a following [link label] that matches in refmap.
 	// skip spaces
 	subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
-	raw_label = chunk_literal("");
+	raw_label = cmark_chunk_literal("");
 	found_label = link_label(subj, &raw_label);
 	if (!found_label || raw_label.len == 0) {
-		chunk_free(&raw_label);
-		raw_label = chunk_dup(&subj->input, opener->position,
-				      initial_pos - opener->position - 1);
+		cmark_chunk_free(&raw_label);
+		raw_label = cmark_chunk_dup(&subj->input, opener->position,
+		                            initial_pos - opener->position - 1);
 	}
 
 	if (!found_label) {
@@ -726,7 +764,7 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 	}
 
 	ref = cmark_reference_lookup(subj->refmap, &raw_label);
-	chunk_free(&raw_label);
+	cmark_chunk_free(&raw_label);
 
 	if (ref != NULL) { // found
 		url = bufdup(ref->url);
@@ -740,12 +778,12 @@ noMatch:
 	// If we fall through to here, it means we didn't match a link:
 	remove_delimiter(subj, opener);  // remove this opener from delimiter list
 	subj->pos = initial_pos;
-	return make_str(chunk_literal("]"));
+	return make_str(cmark_chunk_literal("]"));
 
 match:
 	inl = opener->inl_text;
 	inl->type = is_image ? NODE_IMAGE : NODE_LINK;
-	chunk_free(&inl->as.literal);
+	cmark_chunk_free(&inl->as.literal);
 	inl->first_child = link_text;
 	process_emphasis(subj, opener->previous);
 	inl->as.link.url   = url;
@@ -763,17 +801,20 @@ match:
 	parent->last_child = inl;
 
 	// process_emphasis will remove this delimiter and all later ones.
-	// Now, if we have a link, we also want to remove earlier link
-        // delimiters. (This code can be removed if we decide to allow links
+	// Now, if we have a link, we also want to deactivate earlier link
+	// delimiters. (This code can be removed if we decide to allow links
 	// inside links.)
 	if (!is_image) {
 		opener = subj->last_delim;
 		while (opener != NULL) {
-			tmp_delim = opener->previous;
 			if (opener->delim_char == '[') {
-				remove_delimiter(subj, opener);
+				if (!opener->active) {
+					break;
+				} else {
+					opener->active = false;
+				}
 			}
-			opener = tmp_delim;
+			opener = opener->previous;
 		}
 	}
 
@@ -819,7 +860,8 @@ static int subject_find_special_char(subject *subj)
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	};
 
 	int n = subj->pos + 1;
 
@@ -837,14 +879,14 @@ static int subject_find_special_char(subject *subj)
 static int parse_inline(subject* subj, cmark_node * parent)
 {
 	cmark_node* new_inl = NULL;
-	chunk contents;
+	cmark_chunk contents;
 	unsigned char c;
 	int endpos;
 	c = peek_char(subj);
 	if (c == 0) {
 		return 0;
 	}
-	switch(c){
+	switch(c) {
 	case '\n':
 		new_inl = handle_newline(subj);
 		break;
@@ -866,7 +908,7 @@ static int parse_inline(subject* subj, cmark_node * parent)
 		break;
 	case '[':
 		advance(subj);
-		new_inl = make_str(chunk_literal("["));
+		new_inl = make_str(cmark_chunk_literal("["));
 		push_delimiter(subj, '[', true, false, new_inl);
 		break;
 	case ']':
@@ -876,20 +918,20 @@ static int parse_inline(subject* subj, cmark_node * parent)
 		advance(subj);
 		if (peek_char(subj) == '[') {
 			advance(subj);
-			new_inl = make_str(chunk_literal("!["));
+			new_inl = make_str(cmark_chunk_literal("!["));
 			push_delimiter(subj, '!', false, true, new_inl);
 		} else {
-			new_inl = make_str(chunk_literal("!"));
+			new_inl = make_str(cmark_chunk_literal("!"));
 		}
 		break;
 	default:
 		endpos = subject_find_special_char(subj);
-		contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
+		contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
 		subj->pos = endpos;
 
 		// if we're at a newline, strip trailing spaces.
 		if (peek_char(subj) == '\n') {
-			chunk_rtrim(&contents);
+			cmark_chunk_rtrim(&contents);
 		}
 
 		new_inl = make_str(contents);
@@ -918,7 +960,7 @@ static void spnl(subject* subj)
 	bool seen_newline = false;
 	while (peek_char(subj) == ' ' ||
 	       (!seen_newline &&
-		(seen_newline = peek_char(subj) == '\n'))) {
+	        (seen_newline = peek_char(subj) == '\n'))) {
 		advance(subj);
 	}
 }
@@ -927,13 +969,13 @@ static void spnl(subject* subj)
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-int cmark_parse_reference_inline(strbuf *input, cmark_reference_map *refmap)
+int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
 {
 	subject subj;
 
-	chunk lab;
-	chunk url;
-	chunk title;
+	cmark_chunk lab;
+	cmark_chunk url;
+	cmark_chunk title;
 
 	int matchlen = 0;
 	int beforetitle;
@@ -955,7 +997,7 @@ int cmark_parse_reference_inline(strbuf *input, cmark_reference_map *refmap)
 	spnl(&subj);
 	matchlen = scan_link_url(&subj.input, subj.pos);
 	if (matchlen) {
-		url = chunk_dup(&subj.input, subj.pos, matchlen);
+		url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
 		subj.pos += matchlen;
 	} else {
 		return 0;
@@ -966,11 +1008,11 @@ int cmark_parse_reference_inline(strbuf *input, cmark_reference_map *refmap)
 	spnl(&subj);
 	matchlen = scan_link_title(&subj.input, subj.pos);
 	if (matchlen) {
-		title = chunk_dup(&subj.input, subj.pos, matchlen);
+		title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
 		subj.pos += matchlen;
 	} else {
 		subj.pos = beforetitle;
-		title = chunk_literal("");
+		title = cmark_chunk_literal("");
 	}
 	// parse final spaces and newline:
 	while (peek_char(&subj) == ' ') {

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/iterator.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/iterator.c b/compiler/modules/CommonMark/src/iterator.c
new file mode 100644
index 0000000..4daec2d
--- /dev/null
+++ b/compiler/modules/CommonMark/src/iterator.c
@@ -0,0 +1,140 @@
+#include <assert.h>
+#include <stdlib.h>
+
+#include "config.h"
+#include "node.h"
+#include "cmark.h"
+#include "iterator.h"
+
+static const int S_leaf_mask =
+    (1 << CMARK_NODE_HTML)        |
+    (1 << CMARK_NODE_HRULE)       |
+    (1 << CMARK_NODE_CODE_BLOCK)  |
+    (1 << CMARK_NODE_TEXT)        |
+    (1 << CMARK_NODE_SOFTBREAK)   |
+    (1 << CMARK_NODE_LINEBREAK)   |
+    (1 << CMARK_NODE_CODE)        |
+    (1 << CMARK_NODE_INLINE_HTML);
+
+cmark_iter*
+cmark_iter_new(cmark_node *root)
+{
+	if (root == NULL) {
+		return NULL;
+	}
+	cmark_iter *iter = (cmark_iter*)malloc(sizeof(cmark_iter));
+	if (iter == NULL) {
+		return NULL;
+	}
+	iter->root         = root;
+	iter->cur.ev_type  = CMARK_EVENT_NONE;
+	iter->cur.node     = NULL;
+	iter->next.ev_type = CMARK_EVENT_ENTER;
+	iter->next.node    = root;
+	return iter;
+}
+
+void
+cmark_iter_free(cmark_iter *iter)
+{
+	free(iter);
+}
+
+static bool
+S_is_leaf(cmark_node *node)
+{
+	return (1 << node->type) & S_leaf_mask;
+}
+
+cmark_event_type
+cmark_iter_next(cmark_iter *iter)
+{
+	cmark_event_type  ev_type = iter->next.ev_type;
+	cmark_node       *node    = iter->next.node;
+
+	iter->cur.ev_type = ev_type;
+	iter->cur.node    = node;
+
+	if (ev_type == CMARK_EVENT_DONE) {
+		return ev_type;
+	}
+
+	/* roll forward to next item, setting both fields */
+	if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) {
+		if (node->first_child == NULL) {
+			/* stay on this node but exit */
+			iter->next.ev_type = CMARK_EVENT_EXIT;
+		} else {
+			iter->next.ev_type = CMARK_EVENT_ENTER;
+			iter->next.node    = node->first_child;
+		}
+	} else if (node == iter->root) {
+		/* don't move past root */
+		iter->next.ev_type = CMARK_EVENT_DONE;
+		iter->next.node    = NULL;
+	} else if (node->next) {
+		iter->next.ev_type = CMARK_EVENT_ENTER;
+		iter->next.node    = node->next;
+	} else if (node->parent) {
+		iter->next.ev_type = CMARK_EVENT_EXIT;
+		iter->next.node    = node->parent;
+	} else {
+		assert(false);
+		iter->next.ev_type = CMARK_EVENT_DONE;
+		iter->next.node    = NULL;
+	}
+
+	return ev_type;
+}
+
+void
+cmark_iter_reset(cmark_iter *iter, cmark_node *current,
+                 cmark_event_type event_type)
+{
+	iter->next.ev_type = event_type;
+	iter->next.node    = current;
+	cmark_iter_next(iter);
+}
+
+cmark_node*
+cmark_iter_get_node(cmark_iter *iter)
+{
+	return iter->cur.node;
+}
+
+cmark_event_type
+cmark_iter_get_event_type(cmark_iter *iter)
+{
+	return iter->cur.ev_type;
+}
+
+
+void cmark_consolidate_text_nodes(cmark_node *root)
+{
+	cmark_iter *iter = cmark_iter_new(root);
+	cmark_strbuf buf = GH_BUF_INIT;
+	cmark_event_type ev_type;
+	cmark_node *cur, *tmp, *next;
+
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cur = cmark_iter_get_node(iter);
+		if (ev_type == CMARK_EVENT_ENTER &&
+		    cur->type == CMARK_NODE_TEXT &&
+		    cur->next &&
+		    cur->next->type == CMARK_NODE_TEXT) {
+			cmark_strbuf_clear(&buf);
+			cmark_strbuf_puts(&buf, cmark_node_get_literal(cur));
+			tmp = cur->next;
+			while (tmp && tmp->type == CMARK_NODE_TEXT) {
+				cmark_iter_get_node(iter); // advance pointer
+				cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp));
+				next = tmp->next;
+				cmark_node_free(tmp);
+				tmp = next;
+			}
+			cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf));
+		}
+	}
+
+	cmark_iter_free(iter);
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/iterator.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/iterator.h b/compiler/modules/CommonMark/src/iterator.h
new file mode 100644
index 0000000..027b10b
--- /dev/null
+++ b/compiler/modules/CommonMark/src/iterator.h
@@ -0,0 +1,25 @@
+#ifndef CMARK_ITERATOR_H
+#define CMARK_ITERATOR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "cmark.h"
+
+typedef struct {
+	cmark_event_type  ev_type;
+	cmark_node       *node;
+} cmark_iter_state;
+
+struct cmark_iter {
+	cmark_node       *root;
+	cmark_iter_state  cur;
+	cmark_iter_state  next;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/man.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/man.c b/compiler/modules/CommonMark/src/man.c
new file mode 100644
index 0000000..2c8a3a5
--- /dev/null
+++ b/compiler/modules/CommonMark/src/man.c
@@ -0,0 +1,249 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+
+// Functions to convert cmark_nodes to groff man strings.
+
+static void escape_man(cmark_strbuf *dest, const unsigned char *source, int length)
+{
+	int i;
+	unsigned char c;
+
+	for (i = 0; i < length; i++) {
+		c = source[i];
+		if (c == '.' && i == 0) {
+			cmark_strbuf_puts(dest, "\\&.");
+		} else if (c == '\'' && i == 0) {
+			cmark_strbuf_puts(dest, "\\&'");
+		} else if (c == '-') {
+			cmark_strbuf_puts(dest, "\\-");
+		} else if (c == '\\') {
+			cmark_strbuf_puts(dest, "\\e");
+		} else {
+			cmark_strbuf_putc(dest, source[i]);
+		}
+	}
+}
+
+static inline void cr(cmark_strbuf *man)
+{
+	if (man->size && man->ptr[man->size - 1] != '\n')
+		cmark_strbuf_putc(man, '\n');
+}
+
+struct render_state {
+	cmark_strbuf* man;
+	cmark_node *plain;
+};
+
+static int
+S_render_node(cmark_node *node, cmark_event_type ev_type,
+              struct render_state *state)
+{
+	cmark_node *tmp;
+	cmark_strbuf *man = state->man;
+	int list_number;
+	bool entering = (ev_type == CMARK_EVENT_ENTER);
+
+	if (state->plain == node) { // back at original node
+		state->plain = NULL;
+	}
+
+	if (state->plain != NULL) {
+		switch(node->type) {
+		case CMARK_NODE_TEXT:
+		case CMARK_NODE_CODE:
+			escape_man(man, node->as.literal.data,
+			           node->as.literal.len);
+			break;
+
+		case CMARK_NODE_LINEBREAK:
+		case CMARK_NODE_SOFTBREAK:
+			cmark_strbuf_putc(man, ' ');
+			break;
+
+		default:
+			break;
+		}
+		return 1;
+	}
+
+	switch (node->type) {
+	case CMARK_NODE_DOCUMENT:
+		break;
+
+	case CMARK_NODE_BLOCK_QUOTE:
+		if (entering) {
+			cr(man);
+			cmark_strbuf_puts(man, ".RS");
+			cr(man);
+		} else {
+			cr(man);
+			cmark_strbuf_puts(man, ".RE");
+			cr(man);
+		}
+		break;
+
+	case CMARK_NODE_LIST:
+		break;
+
+	case CMARK_NODE_ITEM:
+		if (entering) {
+			cr(man);
+			cmark_strbuf_puts(man, ".IP ");
+			if (cmark_node_get_list_type(node->parent) ==
+			    CMARK_BULLET_LIST) {
+				cmark_strbuf_puts(man, "\\[bu] 2");
+			} else {
+				list_number = cmark_node_get_list_start(node->parent);
+				tmp = node;
+				while (tmp->prev) {
+					tmp = tmp->prev;
+					list_number += 1;
+				}
+				cmark_strbuf_printf(man, "\"%d.\" 4", list_number);
+			}
+			cr(man);
+		} else {
+			cr(man);
+		}
+		break;
+
+	case CMARK_NODE_HEADER:
+		if (entering) {
+			cr(man);
+			cmark_strbuf_puts(man,
+			                  cmark_node_get_header_level(node) == 1 ?
+			                  ".SH" : ".SS");
+			cr(man);
+		} else {
+			cr(man);
+		}
+		break;
+
+	case CMARK_NODE_CODE_BLOCK:
+		cr(man);
+		cmark_strbuf_puts(man, ".IP\n.nf\n\\f[C]\n");
+		escape_man(man, node->as.code.literal.data,
+		           node->as.code.literal.len);
+		cr(man);
+		cmark_strbuf_puts(man, "\\f[]\n.fi");
+		cr(man);
+		break;
+
+	case CMARK_NODE_HTML:
+		break;
+
+	case CMARK_NODE_HRULE:
+		cr(man);
+		cmark_strbuf_puts(man, ".PP\n  *  *  *  *  *");
+		cr(man);
+		break;
+
+	case CMARK_NODE_PARAGRAPH:
+		if (entering) {
+			// no blank line if first paragraph in list:
+			if (node->parent &&
+			    node->parent->type == CMARK_NODE_ITEM &&
+			    node->prev == NULL) {
+				// no blank line or .PP
+			} else {
+				cr(man);
+				cmark_strbuf_puts(man, ".PP\n");
+			}
+		} else {
+			cr(man);
+		}
+		break;
+
+	case CMARK_NODE_TEXT:
+		escape_man(man, node->as.literal.data,
+		           node->as.literal.len);
+		break;
+
+	case CMARK_NODE_LINEBREAK:
+		cmark_strbuf_puts(man, ".PD 0\n.P\n.PD");
+		cr(man);
+		break;
+
+	case CMARK_NODE_SOFTBREAK:
+		cmark_strbuf_putc(man, '\n');
+		break;
+
+	case CMARK_NODE_CODE:
+		cmark_strbuf_puts(man, "\\f[C]");
+		escape_man(man, node->as.literal.data, node->as.literal.len);
+		cmark_strbuf_puts(man, "\\f[]");
+		break;
+
+	case CMARK_NODE_INLINE_HTML:
+		break;
+
+	case CMARK_NODE_STRONG:
+		if (entering) {
+			cmark_strbuf_puts(man, "\\f[B]");
+		} else {
+			cmark_strbuf_puts(man, "\\f[]");
+		}
+		break;
+
+	case CMARK_NODE_EMPH:
+		if (entering) {
+			cmark_strbuf_puts(man, "\\f[I]");
+		} else {
+			cmark_strbuf_puts(man, "\\f[]");
+		}
+		break;
+
+	case CMARK_NODE_LINK:
+		if (!entering) {
+			cmark_strbuf_printf(man, " (%s)",
+			                    cmark_node_get_url(node));
+		}
+		break;
+
+	case CMARK_NODE_IMAGE:
+		if (entering) {
+			cmark_strbuf_puts(man, "[IMAGE: ");
+			state->plain = node;
+		} else {
+			cmark_strbuf_puts(man, "]");
+		}
+		break;
+
+	default:
+		assert(false);
+		break;
+	}
+
+	// cmark_strbuf_putc(man, 'x');
+	return 1;
+}
+
+char *cmark_render_man(cmark_node *root, long options)
+{
+	char *result;
+	cmark_strbuf man = GH_BUF_INIT;
+	struct render_state state = { &man, NULL };
+	cmark_node *cur;
+	cmark_event_type ev_type;
+	cmark_iter *iter = cmark_iter_new(root);
+
+	if (options == 0) options = 0; // avoid warning about unused parameters
+
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cur = cmark_iter_get_node(iter);
+		S_render_node(cur, ev_type, &state);
+	}
+	result = (char *)cmark_strbuf_detach(&man);
+
+	cmark_iter_free(iter);
+	cmark_strbuf_free(&man);
+	return result;
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/node.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.c b/compiler/modules/CommonMark/src/node.c
index 243c3e6..3785a27 100644
--- a/compiler/modules/CommonMark/src/node.c
+++ b/compiler/modules/CommonMark/src/node.c
@@ -8,7 +8,8 @@ static void
 S_node_unlink(cmark_node *node);
 
 cmark_node*
-cmark_node_new(cmark_node_type type) {
+cmark_node_new(cmark_node_type type)
+{
 	cmark_node *node = (cmark_node *)calloc(1, sizeof(*node));
 	node->type = type;
 
@@ -38,14 +39,16 @@ void S_free_nodes(cmark_node *e)
 {
 	cmark_node *next;
 	while (e != NULL) {
-		strbuf_free(&e->string_content);
-		switch (e->type){
+		cmark_strbuf_free(&e->string_content);
+		switch (e->type) {
 		case NODE_CODE_BLOCK:
-			strbuf_free(&e->as.code.info);
+			cmark_chunk_free(&e->as.code.info);
+			cmark_chunk_free(&e->as.code.literal);
 			break;
 		case NODE_TEXT:
 		case NODE_INLINE_HTML:
-		case NODE_INLINE_CODE:
+		case NODE_CODE:
+		case NODE_HTML:
 			cmark_chunk_free(&e->as.literal);
 			break;
 		case NODE_LINK:
@@ -68,7 +71,8 @@ void S_free_nodes(cmark_node *e)
 }
 
 void
-cmark_node_free(cmark_node *node) {
+cmark_node_free(cmark_node *node)
+{
 	S_node_unlink(node);
 	node->next = NULL;
 	S_free_nodes(node);
@@ -84,34 +88,52 @@ cmark_node_get_type(cmark_node *node)
 	}
 }
 
-static const char*
-S_type_string(cmark_node *node)
+const char*
+cmark_node_get_type_string(cmark_node *node)
 {
 	if (node == NULL) {
 		return "NONE";
 	}
 
 	switch (node->type) {
-	case CMARK_NODE_NONE:          return "NONE";
-	case CMARK_NODE_DOCUMENT:      return "DOCUMENT";
-	case CMARK_NODE_BLOCK_QUOTE:   return "BLOCK_QUOTE";
-	case CMARK_NODE_LIST:          return "LIST";
-	case CMARK_NODE_LIST_ITEM:     return "LIST_ITEM";
-	case CMARK_NODE_CODE_BLOCK:    return "CODE_BLOCK";
-	case CMARK_NODE_HTML:          return "HTML";
-	case CMARK_NODE_PARAGRAPH:     return "PARAGRAPH";
-	case CMARK_NODE_HEADER:	       return "HEADER";
-	case CMARK_NODE_HRULE:         return "HRULE";
-	case CMARK_NODE_REFERENCE_DEF: return "REFERENCE_DEF";
-	case CMARK_NODE_TEXT:          return "TEXT";
-	case CMARK_NODE_SOFTBREAK:     return "SOFTBREAK";
-	case CMARK_NODE_LINEBREAK:     return "LINEBREAK";
-	case CMARK_NODE_INLINE_CODE:   return "INLINE_CODE";
-	case CMARK_NODE_INLINE_HTML:   return "INLINE_HTML";
-	case CMARK_NODE_EMPH:          return "EMPH";
-	case CMARK_NODE_STRONG:        return "STRONG";
-	case CMARK_NODE_LINK:          return "LINK";
-	case CMARK_NODE_IMAGE:         return "IMAGE";
+	case CMARK_NODE_NONE:
+		return "none";
+	case CMARK_NODE_DOCUMENT:
+		return "document";
+	case CMARK_NODE_BLOCK_QUOTE:
+		return "block_quote";
+	case CMARK_NODE_LIST:
+		return "list";
+	case CMARK_NODE_ITEM:
+		return "item";
+	case CMARK_NODE_CODE_BLOCK:
+		return "code_block";
+	case CMARK_NODE_HTML:
+		return "html";
+	case CMARK_NODE_PARAGRAPH:
+		return "paragraph";
+	case CMARK_NODE_HEADER:
+		return "header";
+	case CMARK_NODE_HRULE:
+		return "hrule";
+	case CMARK_NODE_TEXT:
+		return "text";
+	case CMARK_NODE_SOFTBREAK:
+		return "softbreak";
+	case CMARK_NODE_LINEBREAK:
+		return "linebreak";
+	case CMARK_NODE_CODE:
+		return "code";
+	case CMARK_NODE_INLINE_HTML:
+		return "inline_html";
+	case CMARK_NODE_EMPH:
+		return "emph";
+	case CMARK_NODE_STRONG:
+		return "strong";
+	case CMARK_NODE_LINK:
+		return "link";
+	case CMARK_NODE_IMAGE:
+		return "image";
 	}
 
 	return "<unknown>";
@@ -168,7 +190,8 @@ cmark_node_last_child(cmark_node *node)
 }
 
 static char*
-S_strdup(const char *str) {
+S_strdup(const char *str)
+{
 	size_t size = strlen(str) + 1;
 	char *dup = (char *)malloc(size);
 	memcpy(dup, str, size);
@@ -176,21 +199,22 @@ S_strdup(const char *str) {
 }
 
 const char*
-cmark_node_get_string_content(cmark_node *node) {
+cmark_node_get_literal(cmark_node *node)
+{
 	if (node == NULL) {
 		return NULL;
 	}
 
 	switch (node->type) {
-	case NODE_CODE_BLOCK:
 	case NODE_HTML:
-		return cmark_strbuf_cstr(&node->string_content);
-
 	case NODE_TEXT:
 	case NODE_INLINE_HTML:
-	case NODE_INLINE_CODE:
+	case NODE_CODE:
 		return cmark_chunk_to_cstr(&node->as.literal);
 
+	case NODE_CODE_BLOCK:
+		return cmark_chunk_to_cstr(&node->as.code.literal);
+
 	default:
 		break;
 	}
@@ -199,23 +223,24 @@ cmark_node_get_string_content(cmark_node *node) {
 }
 
 int
-cmark_node_set_string_content(cmark_node *node, const char *content) {
+cmark_node_set_literal(cmark_node *node, const char *content)
+{
 	if (node == NULL) {
 		return 0;
 	}
 
 	switch (node->type) {
-	case NODE_CODE_BLOCK:
 	case NODE_HTML:
-		cmark_strbuf_sets(&node->string_content, content);
-		return 1;
-
 	case NODE_TEXT:
 	case NODE_INLINE_HTML:
-	case NODE_INLINE_CODE:
+	case NODE_CODE:
 		cmark_chunk_set_cstr(&node->as.literal, content);
 		return 1;
 
+	case NODE_CODE_BLOCK:
+		cmark_chunk_set_cstr(&node->as.code.literal, content);
+		return 1;
+
 	default:
 		break;
 	}
@@ -224,7 +249,8 @@ cmark_node_set_string_content(cmark_node *node, const char *content) {
 }
 
 int
-cmark_node_get_header_level(cmark_node *node) {
+cmark_node_get_header_level(cmark_node *node)
+{
 	if (node == NULL) {
 		return 0;
 	}
@@ -241,7 +267,8 @@ cmark_node_get_header_level(cmark_node *node) {
 }
 
 int
-cmark_node_set_header_level(cmark_node *node, int level) {
+cmark_node_set_header_level(cmark_node *node, int level)
+{
 	if (node == NULL || level < 1 || level > 6) {
 		return 0;
 	}
@@ -259,21 +286,22 @@ cmark_node_set_header_level(cmark_node *node, int level) {
 }
 
 cmark_list_type
-cmark_node_get_list_type(cmark_node *node) {
+cmark_node_get_list_type(cmark_node *node)
+{
 	if (node == NULL) {
 		return CMARK_NO_LIST;
 	}
 
 	if (node->type == CMARK_NODE_LIST) {
 		return node->as.list.list_type;
-	}
-	else {
+	} else {
 		return CMARK_NO_LIST;
 	}
 }
 
 int
-cmark_node_set_list_type(cmark_node *node, cmark_list_type type) {
+cmark_node_set_list_type(cmark_node *node, cmark_list_type type)
+{
 	if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) {
 		return 0;
 	}
@@ -285,28 +313,61 @@ cmark_node_set_list_type(cmark_node *node, cmark_list_type type) {
 	if (node->type == CMARK_NODE_LIST) {
 		node->as.list.list_type = type;
 		return 1;
+	} else {
+		return 0;
+	}
+}
+
+cmark_delim_type
+cmark_node_get_list_delim(cmark_node *node)
+{
+	if (node == NULL) {
+		return CMARK_NO_DELIM;
+	}
+
+	if (node->type == CMARK_NODE_LIST) {
+		return node->as.list.delimiter;
+	} else {
+		return CMARK_NO_DELIM;
 	}
-	else {
+}
+
+int
+cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim)
+{
+	if (!(delim == CMARK_PERIOD_DELIM || delim == CMARK_PAREN_DELIM)) {
+		return 0;
+	}
+
+	if (node == NULL) {
+		return 0;
+	}
+
+	if (node->type == CMARK_NODE_LIST) {
+		node->as.list.delimiter = delim;
+		return 1;
+	} else {
 		return 0;
 	}
 }
 
 int
-cmark_node_get_list_start(cmark_node *node) {
+cmark_node_get_list_start(cmark_node *node)
+{
 	if (node == NULL) {
 		return 0;
 	}
 
 	if (node->type == CMARK_NODE_LIST) {
 		return node->as.list.start;
-	}
-	else {
+	} else {
 		return 0;
 	}
 }
 
 int
-cmark_node_set_list_start(cmark_node *node, int start) {
+cmark_node_set_list_start(cmark_node *node, int start)
+{
 	if (node == NULL || start < 0) {
 		return 0;
 	}
@@ -314,28 +375,28 @@ cmark_node_set_list_start(cmark_node *node, int start) {
 	if (node->type == CMARK_NODE_LIST) {
 		node->as.list.start = start;
 		return 1;
-	}
-	else {
+	} else {
 		return 0;
 	}
 }
 
 int
-cmark_node_get_list_tight(cmark_node *node) {
+cmark_node_get_list_tight(cmark_node *node)
+{
 	if (node == NULL) {
 		return 0;
 	}
 
 	if (node->type == CMARK_NODE_LIST) {
 		return node->as.list.tight;
-	}
-	else {
+	} else {
 		return 0;
 	}
 }
 
 int
-cmark_node_set_list_tight(cmark_node *node, int tight) {
+cmark_node_set_list_tight(cmark_node *node, int tight)
+{
 	if (node == NULL) {
 		return 0;
 	}
@@ -343,43 +404,43 @@ cmark_node_set_list_tight(cmark_node *node, int tight) {
 	if (node->type == CMARK_NODE_LIST) {
 		node->as.list.tight = tight;
 		return 1;
-	}
-	else {
+	} else {
 		return 0;
 	}
 }
 
 const char*
-cmark_node_get_fence_info(cmark_node *node) {
+cmark_node_get_fence_info(cmark_node *node)
+{
 	if (node == NULL) {
 		return NULL;
 	}
 
 	if (node->type == NODE_CODE_BLOCK) {
-		return cmark_strbuf_cstr(&node->as.code.info);
-	}
-	else {
+		return cmark_chunk_to_cstr(&node->as.code.info);
+	} else {
 		return NULL;
 	}
 }
 
 int
-cmark_node_set_fence_info(cmark_node *node, const char *info) {
+cmark_node_set_fence_info(cmark_node *node, const char *info)
+{
 	if (node == NULL) {
 		return 0;
 	}
 
 	if (node->type == NODE_CODE_BLOCK) {
-		cmark_strbuf_sets(&node->as.code.info, info);
+		cmark_chunk_set_cstr(&node->as.code.info, info);
 		return 1;
-	}
-	else {
+	} else {
 		return 0;
 	}
 }
 
 const char*
-cmark_node_get_url(cmark_node *node) {
+cmark_node_get_url(cmark_node *node)
+{
 	if (node == NULL) {
 		return NULL;
 	}
@@ -396,7 +457,8 @@ cmark_node_get_url(cmark_node *node) {
 }
 
 int
-cmark_node_set_url(cmark_node *node, const char *url) {
+cmark_node_set_url(cmark_node *node, const char *url)
+{
 	if (node == NULL) {
 		return 0;
 	}
@@ -415,7 +477,8 @@ cmark_node_set_url(cmark_node *node, const char *url) {
 }
 
 const char*
-cmark_node_get_title(cmark_node *node) {
+cmark_node_get_title(cmark_node *node)
+{
 	if (node == NULL) {
 		return NULL;
 	}
@@ -432,7 +495,8 @@ cmark_node_get_title(cmark_node *node) {
 }
 
 int
-cmark_node_set_title(cmark_node *node, const char *title) {
+cmark_node_set_title(cmark_node *node, const char *title)
+{
 	if (node == NULL) {
 		return 0;
 	}
@@ -451,7 +515,8 @@ cmark_node_set_title(cmark_node *node, const char *title) {
 }
 
 int
-cmark_node_get_start_line(cmark_node *node) {
+cmark_node_get_start_line(cmark_node *node)
+{
 	if (node == NULL) {
 		return 0;
 	}
@@ -459,7 +524,8 @@ cmark_node_get_start_line(cmark_node *node) {
 }
 
 int
-cmark_node_get_start_column(cmark_node *node) {
+cmark_node_get_start_column(cmark_node *node)
+{
 	if (node == NULL) {
 		return 0;
 	}
@@ -467,15 +533,26 @@ cmark_node_get_start_column(cmark_node *node) {
 }
 
 int
-cmark_node_get_end_line(cmark_node *node) {
+cmark_node_get_end_line(cmark_node *node)
+{
 	if (node == NULL) {
 		return 0;
 	}
 	return node->end_line;
 }
 
+int
+cmark_node_get_end_column(cmark_node *node)
+{
+	if (node == NULL) {
+		return 0;
+	}
+	return node->end_column;
+}
+
 static inline bool
-S_is_block(cmark_node *node) {
+S_is_block(cmark_node *node)
+{
 	if (node == NULL) {
 		return false;
 	}
@@ -484,7 +561,8 @@ S_is_block(cmark_node *node) {
 }
 
 static inline bool
-S_is_inline(cmark_node *node) {
+S_is_inline(cmark_node *node)
+{
 	if (node == NULL) {
 		return false;
 	}
@@ -517,12 +595,12 @@ S_can_contain(cmark_node *node, cmark_node *child)
 	switch (node->type) {
 	case CMARK_NODE_DOCUMENT:
 	case CMARK_NODE_BLOCK_QUOTE:
-	case CMARK_NODE_LIST_ITEM:
+	case CMARK_NODE_ITEM:
 		return S_is_block(child)
-		       && child->type != CMARK_NODE_LIST_ITEM;
+		       && child->type != CMARK_NODE_ITEM;
 
 	case CMARK_NODE_LIST:
-		return child->type == CMARK_NODE_LIST_ITEM;
+		return child->type == CMARK_NODE_ITEM;
 
 	case CMARK_NODE_PARAGRAPH:
 	case CMARK_NODE_HEADER:
@@ -567,7 +645,8 @@ S_node_unlink(cmark_node *node)
 }
 
 void
-cmark_node_unlink(cmark_node *node) {
+cmark_node_unlink(cmark_node *node)
+{
 	S_node_unlink(node);
 
 	node->next   = NULL;
@@ -664,8 +743,7 @@ cmark_node_prepend_child(cmark_node *node, cmark_node *child)
 
 	if (old_first_child) {
 		old_first_child->prev = child;
-	}
-	else {
+	} else {
 		// Also set last_child if node previously had no children.
 		node->last_child = child;
 	}
@@ -691,8 +769,7 @@ cmark_node_append_child(cmark_node *node, cmark_node *child)
 
 	if (old_last_child) {
 		old_last_child->next = child;
-	}
-	else {
+	} else {
 		// Also set first_child if node previously had no children.
 		node->first_child = child;
 	}
@@ -707,7 +784,8 @@ S_print_error(FILE *out, cmark_node *node, const char *elem)
 		return;
 	}
 	fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem,
-		S_type_string(node), node->start_line, node->start_column);
+	        cmark_node_get_type_string(node), node->start_line,
+	        node->start_column);
 }
 
 int
@@ -737,7 +815,7 @@ cmark_node_check(cmark_node *node, FILE *out)
 			continue;
 		}
 
-	next_sibling:
+next_sibling:
 		if (cur == node) {
 			break;
 		}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/node.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.h b/compiler/modules/CommonMark/src/node.h
index b842ed8..c0c43d3 100644
--- a/compiler/modules/CommonMark/src/node.h
+++ b/compiler/modules/CommonMark/src/node.h
@@ -26,7 +26,8 @@ typedef struct {
 	int               fence_length;
 	int               fence_offset;
 	unsigned char     fence_char;
-	cmark_strbuf      info;
+	cmark_chunk       info;
+	cmark_chunk       literal;
 } cmark_code;
 
 typedef struct {
@@ -51,6 +52,7 @@ struct cmark_node {
 	int start_line;
 	int start_column;
 	int end_line;
+	int end_column;
 	bool open;
 	bool last_line_blank;
 
@@ -73,4 +75,3 @@ cmark_node_check(cmark_node *node, FILE *out);
 #endif
 
 #endif
-

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/parser.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/parser.h b/compiler/modules/CommonMark/src/parser.h
index 9d65b67..3c8def9 100644
--- a/compiler/modules/CommonMark/src/parser.h
+++ b/compiler/modules/CommonMark/src/parser.h
@@ -17,6 +17,7 @@ struct cmark_parser {
 	struct cmark_node* current;
 	int line_number;
 	cmark_strbuf *curline;
+	int last_line_length;
 	cmark_strbuf *linebuf;
 };
 

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/print.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/print.c b/compiler/modules/CommonMark/src/print.c
deleted file mode 100644
index d2dfe8c..0000000
--- a/compiler/modules/CommonMark/src/print.c
+++ /dev/null
@@ -1,169 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "cmark.h"
-#include "buffer.h"
-#include "node.h"
-
-#define INDENT 2
-
-static void print_str(strbuf* buffer, const unsigned char *s, int len)
-{
-	int i;
-
-	if (len < 0)
-		len = strlen((char *)s);
-
-	strbuf_putc(buffer, '"');
-	for (i = 0; i < len; ++i) {
-		unsigned char c = s[i];
-
-		switch (c) {
-		case '\n':
-			strbuf_printf(buffer, "\\n");
-			break;
-		case '"':
-			strbuf_printf(buffer, "\\\"");
-			break;
-		case '\\':
-			strbuf_printf(buffer, "\\\\");
-			break;
-		default:
-			strbuf_putc(buffer, (int)c);
-		}
-	}
-	strbuf_putc(buffer, '"');
-}
-
-// Prettyprint an inline list, for debugging.
-static void render_nodes(strbuf* buffer, cmark_node* node, int indent)
-{
-	int i;
-	cmark_list *data;
-
-	while(node != NULL) {
-		for (i=0; i < indent; i++) {
-			strbuf_putc(buffer, ' ');
-		}
-		switch(node->type) {
-		case NODE_DOCUMENT:
-			break;
-		case NODE_BLOCK_QUOTE:
-			strbuf_printf(buffer, "block_quote\n");
-			break;
-		case NODE_LIST_ITEM:
-			strbuf_printf(buffer, "list_item\n");
-			break;
-		case NODE_LIST:
-			data = &(node->as.list);
-			if (data->list_type == CMARK_ORDERED_LIST) {
-				strbuf_printf(buffer, "list (type=ordered tight=%s start=%d delim=%s)\n",
-				       (data->tight ? "true" : "false"),
-				       data->start,
-				       (data->delimiter == CMARK_PAREN_DELIM ? "parens" : "period"));
-			} else {
-				strbuf_printf(buffer, "list (type=bullet tight=%s bullet_char=%c)\n",
-				       (data->tight ? "true" : "false"),
-				       data->bullet_char);
-			}
-			break;
-		case NODE_HEADER:
-			strbuf_printf(buffer, "header (level=%d)\n", node->as.header.level);
-			break;
-		case NODE_PARAGRAPH:
-			strbuf_printf(buffer, "paragraph\n");
-			break;
-		case NODE_HRULE:
-			strbuf_printf(buffer, "hrule\n");
-			break;
-		case NODE_CODE_BLOCK:
-			strbuf_printf(buffer, "code_block info=");
-			print_str(buffer, node->as.code.info.ptr, -1);
-			strbuf_putc(buffer, ' ');
-			print_str(buffer, node->string_content.ptr, -1);
-			strbuf_putc(buffer, '\n');
-			break;
-		case NODE_HTML:
-			strbuf_printf(buffer, "html ");
-			print_str(buffer, node->string_content.ptr, -1);
-			strbuf_putc(buffer, '\n');
-			break;
-		case NODE_REFERENCE_DEF:
-			// skip
-			// strbuf_printf(buffer, "reference_def\n");
-			break;
-		case NODE_TEXT:
-			strbuf_printf(buffer, "text ");
-			print_str(buffer, node->as.literal.data, node->as.literal.len);
-			strbuf_putc(buffer, '\n');
-			break;
-		case NODE_LINEBREAK:
-			strbuf_printf(buffer, "linebreak\n");
-			break;
-		case NODE_SOFTBREAK:
-			strbuf_printf(buffer, "softbreak\n");
-			break;
-		case NODE_INLINE_CODE:
-			strbuf_printf(buffer, "code ");
-			print_str(buffer, node->as.literal.data, node->as.literal.len);
-			strbuf_putc(buffer, '\n');
-			break;
-		case NODE_INLINE_HTML:
-			strbuf_printf(buffer, "inline_html ");
-			print_str(buffer, node->as.literal.data, node->as.literal.len);
-			strbuf_putc(buffer, '\n');
-			break;
-		case NODE_LINK:
-		case NODE_IMAGE:
-			strbuf_printf(buffer, "%s url=", node->type == NODE_LINK ? "link" : "image");
-
-			if (node->as.link.url)
-				print_str(buffer, node->as.link.url, -1);
-
-			if (node->as.link.title) {
-				strbuf_printf(buffer, " title=");
-				print_str(buffer, node->as.link.title, -1);
-			}
-			strbuf_putc(buffer, '\n');
-			break;
-		case NODE_STRONG:
-			strbuf_printf(buffer, "strong\n");
-			break;
-		case NODE_EMPH:
-			strbuf_printf(buffer, "emph\n");
-			break;
-		default:
-			break;
-		}
-		if (node->first_child) { // render children if any
-			indent += INDENT;
-			node = node->first_child;
-		} else if (node->next) { // otherwise render next sibling
-			node = node->next;
-		} else {
-			node = node->parent;  // back up to parent
-			while (node) {
-				indent -= INDENT;
-				if (node->next) {
-					node = node->next;
-					break;
-				} else {
-					node = node->parent;
-				}
-				if (!node) {
-					break;
-				}
-			}
-		}
-	}
-}
-
-char *cmark_render_ast(cmark_node *root)
-{
-	char* result;
-	strbuf buffer = GH_BUF_INIT;
-	render_nodes(&buffer, root, -2);
-	result = (char *)strbuf_detach(&buffer);
-	strbuf_free(&buffer);
-	return result;
-}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/references.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.c b/compiler/modules/CommonMark/src/references.c
index 2b1d0a7..37bf4cb 100644
--- a/compiler/modules/CommonMark/src/references.c
+++ b/compiler/modules/CommonMark/src/references.c
@@ -30,9 +30,9 @@ static void reference_free(cmark_reference *ref)
 // remove leading/trailing whitespace, case fold
 // Return NULL if the reference name is actually empty (i.e. composed
 // solely from whitespace)
-static unsigned char *normalize_reference(chunk *ref)
+static unsigned char *normalize_reference(cmark_chunk *ref)
 {
-	strbuf normalized = GH_BUF_INIT;
+	cmark_strbuf normalized = GH_BUF_INIT;
 	unsigned char *result;
 
 	if(ref == NULL)
@@ -42,10 +42,10 @@ static unsigned char *normalize_reference(chunk *ref)
 		return NULL;
 
 	utf8proc_case_fold(&normalized, ref->data, ref->len);
-	strbuf_trim(&normalized);
-	strbuf_normalize_whitespace(&normalized);
+	cmark_strbuf_trim(&normalized);
+	cmark_strbuf_normalize_whitespace(&normalized);
 
-	result = strbuf_detach(&normalized);
+	result = cmark_strbuf_detach(&normalized);
 	assert(result);
 
 	if (result[0] == '\0') {
@@ -73,8 +73,8 @@ static void add_reference(cmark_reference_map *map, cmark_reference* ref)
 	map->table[ref->hash % REFMAP_SIZE] = ref;
 }
 
-void cmark_reference_create(cmark_reference_map *map, chunk *label, chunk *url,
-			    chunk *title)
+void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, cmark_chunk *url,
+                            cmark_chunk *title)
 {
 	cmark_reference *ref;
 	unsigned char *reflabel = normalize_reference(label);
@@ -97,7 +97,7 @@ void cmark_reference_create(cmark_reference_map *map, chunk *label, chunk *url,
 
 // Returns reference if refmap contains a reference with matching
 // label, otherwise NULL.
-cmark_reference* cmark_reference_lookup(cmark_reference_map *map, chunk *label)
+cmark_reference* cmark_reference_lookup(cmark_reference_map *map, cmark_chunk *label)
 {
 	cmark_reference *ref = NULL;
 	unsigned char *norm;


Mime
View raw message