lucy-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nwelln...@apache.org
Subject [03/12] lucy-clownfish git commit: Update CommonMark source code
Date Sun, 18 Jan 2015 18:44:39 GMT
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/scanners.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/scanners.h b/compiler/modules/CommonMark/src/scanners.h
index aa5c00d..f360505 100644
--- a/compiler/modules/CommonMark/src/scanners.h
+++ b/compiler/modules/CommonMark/src/scanners.h
@@ -5,7 +5,7 @@
 extern "C" {
 #endif
 
-int _scan_at(int (*scanner)(const unsigned char *), chunk *c, int offset);
+int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset);
 int _scan_autolink_uri(const unsigned char *p);
 int _scan_autolink_email(const unsigned char *p);
 int _scan_html_tag(const unsigned char *p);

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/scanners.re
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/scanners.re b/compiler/modules/CommonMark/src/scanners.re
index 5e74873..d83efde 100644
--- a/compiler/modules/CommonMark/src/scanners.re
+++ b/compiler/modules/CommonMark/src/scanners.re
@@ -2,7 +2,7 @@
 #include "chunk.h"
 #include "scanners.h"
 
-int _scan_at(int (*scanner)(const unsigned char *), chunk *c, int offset)
+int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset)
 {
 	int res;
 	unsigned char *ptr = (unsigned char *)c->data;
@@ -49,7 +49,7 @@ int _scan_at(int (*scanner)(const unsigned char *), chunk *c, int offset)
   opentag = tagname attribute* spacechar* [/]? [>];
   closetag = [/] tagname spacechar* [>];
 
-  htmlcomment = "!--" ([^-\x00]+ | [-][^-\x00]+)* "-->";
+  htmlcomment = "!---->" | ("!--" ([-]? [^\x00>-]) ([-]? [^\x00-])* "-->");
 
   processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00] | [>])* "?>";
 
@@ -216,7 +216,8 @@ int _scan_close_code_fence(const unsigned char *p)
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  ([`]{3,} | [~]{3,}) / spacechar* [\n] { return (p - start); }
+  [`]{3,} / [ \t]*[\n] { return (p - start); }
+  [~]{3,} / [ \t]*[\n] { return (p - start); }
   .? { return 0; }
 */
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/utf8.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/utf8.c b/compiler/modules/CommonMark/src/utf8.c
index e4ea8e2..d77c5d1 100644
--- a/compiler/modules/CommonMark/src/utf8.c
+++ b/compiler/modules/CommonMark/src/utf8.c
@@ -2,6 +2,7 @@
 #include <stdint.h>
 #include <assert.h>
 
+#include "cmark_ctype.h"
 #include "utf8.h"
 
 static const int8_t utf8proc_utf8class[256] = {
@@ -20,12 +21,13 @@ static const int8_t utf8proc_utf8class[256] = {
 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-	4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 };
+	4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
+};
 
-static void encode_unknown(strbuf *buf)
+static void encode_unknown(cmark_strbuf *buf)
 {
 	static const uint8_t repl[] = {239, 191, 189};
-	strbuf_put(buf, repl, 3);
+	cmark_strbuf_put(buf, repl, 3);
 }
 
 static int utf8proc_charlen(const uint8_t *str, int str_len)
@@ -81,8 +83,7 @@ static int utf8proc_valid(const uint8_t *str, int str_len)
 				// Overlong
 				return -length;
 			}
-		}
-		else if (str[0] == 0xED) {
+		} else if (str[0] == 0xED) {
 			if (str[1] >= 0xA0) {
 				// Surrogate
 				return -length;
@@ -96,8 +97,7 @@ static int utf8proc_valid(const uint8_t *str, int str_len)
 				// Overlong
 				return -length;
 			}
-		}
-		else if (str[0] >= 0xF4) {
+		} else if (str[0] >= 0xF4) {
 			if (str[0] > 0xF4 || str[1] >= 0x90) {
 				// Above 0x10FFFF
 				return -length;
@@ -109,7 +109,7 @@ static int utf8proc_valid(const uint8_t *str, int str_len)
 	return length;
 }
 
-void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
+void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size)
 {
 	static const uint8_t whitespace[] = "    ";
 
@@ -120,25 +120,26 @@ void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
 
 		while (i < size && line[i] != '\t' && line[i] != '\0'
 		       && line[i] < 0x80) {
-			i++; tab++;
+			i++;
+			tab++;
 		}
 
 		if (i > org)
-			strbuf_put(ob, line + org, i - org);
+			cmark_strbuf_put(ob, line + org, i - org);
 
 		if (i >= size)
 			break;
 
 		if (line[i] == '\t') {
 			int numspaces = 4 - (tab % 4);
-			strbuf_put(ob, whitespace, numspaces);
+			cmark_strbuf_put(ob, whitespace, numspaces);
 			i += 1;
 			tab += numspaces;
 		} else {
 			int charlen = utf8proc_valid(line + i, size - i);
 
 			if (charlen >= 0) {
-				strbuf_put(ob, line + i, charlen);
+				cmark_strbuf_put(ob, line + i, charlen);
 			} else {
 				encode_unknown(ob);
 				charlen = -charlen;
@@ -170,13 +171,13 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
 		break;
 	case 3:
 		uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) <<  6)
-			+ (str[2] & 0x3F);
+		     + (str[2] & 0x3F);
 		if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
 		    (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
 		break;
 	case 4:
 		uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
-			+ ((str[2] & 0x3F) <<  6) + (str[3] & 0x3F);
+		     + ((str[2] & 0x3F) <<  6) + (str[3] & 0x3F);
 		if (uc < 0x10000 || uc >= 0x110000) uc = -1;
 		break;
 	}
@@ -188,7 +189,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
 	return length;
 }
 
-void utf8proc_encode_char(int32_t uc, strbuf *buf)
+void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf)
 {
 	uint8_t dst[4];
 	int len = 0;
@@ -224,10 +225,10 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
 		return;
 	}
 
-	strbuf_put(buf, dst, len);
+	cmark_strbuf_put(buf, dst, len);
 }
 
-void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
+void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len)
 {
 	int32_t c;
 
@@ -253,210 +254,200 @@ void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
 int utf8proc_is_space(int32_t uc)
 {
 	return (uc == 9 ||
-		uc == 10 ||
-		uc == 12 ||
-		uc == 13 ||
-		uc == 32 ||
-		uc == 160 ||
-		uc == 5760 ||
-		(uc >= 8192 && uc <= 8202) ||
-		uc == 8239 ||
-		uc == 8287 ||
-		uc == 12288);
+	        uc == 10 ||
+	        uc == 12 ||
+	        uc == 13 ||
+	        uc == 32 ||
+	        uc == 160 ||
+	        uc == 5760 ||
+	        (uc >= 8192 && uc <= 8202) ||
+	        uc == 8239 ||
+	        uc == 8287 ||
+	        uc == 12288);
 }
 
 // matches anything in the P[cdefios] classes.
 int utf8proc_is_punctuation(int32_t uc)
 {
-	return ((uc >= 33 && uc <= 35) ||
-		(uc >= 37 && uc <= 42) ||
-		(uc >= 44 && uc <= 47) ||
-		uc == 58 ||
-		uc == 59 ||
-		uc == 63 ||
-		uc == 64 ||
-		(uc >= 91 && uc <= 93) ||
-		uc == 95 ||
-		uc == 123 ||
-		uc == 125 ||
-		uc == 161 ||
-		uc == 167 ||
-		uc == 171 ||
-		uc == 182 ||
-		uc == 183 ||
-		uc == 187 ||
-		uc == 191 ||
-		uc == 894 ||
-		uc == 903 ||
-		(uc >= 1370 && uc <= 1375) ||
-		uc == 1417 ||
-		uc == 1418 ||
-		uc == 1470 ||
-		uc == 1472 ||
-		uc == 1475 ||
-		uc == 1478 ||
-		uc == 1523 ||
-		uc == 1524 ||
-		uc == 1545 ||
-		uc == 1546 ||
-		uc == 1548 ||
-		uc == 1549 ||
-		uc == 1563 ||
-		uc == 1566 ||
-		uc == 1567 ||
-		(uc >= 1642 && uc <= 1645) ||
-		uc == 1748 ||
-		(uc >= 1792 && uc <= 1805) ||
-		(uc >= 2039 && uc <= 2041) ||
-		(uc >= 2096 && uc <= 2110) ||
-		uc == 2142 ||
-		uc == 2404 ||
-		uc == 2405 ||
-		uc == 2416 ||
-		uc == 2800 ||
-		uc == 3572 ||
-		uc == 3663 ||
-		uc == 3674 ||
-		uc == 3675 ||
-		(uc >= 3844 && uc <= 3858) ||
-		uc == 3860 ||
-		(uc >= 3898 && uc <= 3901) ||
-		uc == 3973 ||
-		(uc >= 4048 && uc <= 4052) ||
-		uc == 4057 ||
-		uc == 4058 ||
-		(uc >= 4170 && uc <= 4175) ||
-		uc == 4347 ||
-		(uc >= 4960 && uc <= 4968) ||
-		uc == 5120 ||
-		uc == 5741 ||
-		uc == 5742 ||
-		uc == 5787 ||
-		uc == 5788 ||
-		(uc >= 5867 && uc <= 5869) ||
-		uc == 5941 ||
-		uc == 5942 ||
-		(uc >= 6100 && uc <= 6102) ||
-		(uc >= 6104 && uc <= 6106) ||
-		(uc >= 6144 && uc <= 6154) ||
-		uc == 6468 ||
-		uc == 6469 ||
-		uc == 6686 ||
-		uc == 6687 ||
-		(uc >= 6816 && uc <= 6822) ||
-		(uc >= 6824 && uc <= 6829) ||
-		(uc >= 7002 && uc <= 7008) ||
-		(uc >= 7164 && uc <= 7167) ||
-		(uc >= 7227 && uc <= 7231) ||
-		uc == 7294 ||
-		uc == 7295 ||
-		(uc >= 7360 && uc <= 7367) ||
-		uc == 7379 ||
-		(uc >= 8208 && uc <= 8231) ||
-		(uc >= 8240 && uc <= 8259) ||
-		(uc >= 8261 && uc <= 8273) ||
-		(uc >= 8275 && uc <= 8286) ||
-		uc == 8317 ||
-		uc == 8318 ||
-		uc == 8333 ||
-		uc == 8334 ||
-		(uc >= 8968 && uc <= 8971) ||
-		uc == 9001 ||
-		uc == 9002 ||
-		(uc >= 10088 && uc <= 10101) ||
-		uc == 10181 ||
-		uc == 10182 ||
-		(uc >= 10214 && uc <= 10223) ||
-		(uc >= 10627 && uc <= 10648) ||
-		(uc >= 10712 && uc <= 10715) ||
-		uc == 10748 ||
-		uc == 10749 ||
-		(uc >= 11513 && uc <= 11516) ||
-		uc == 11518 ||
-		uc == 11519 ||
-		uc == 11632 ||
-		(uc >= 11776 && uc <= 11822) ||
-		(uc >= 11824 && uc <= 11842) ||
-		(uc >= 12289 && uc <= 12291) ||
-		(uc >= 12296 && uc <= 12305) ||
-		(uc >= 12308 && uc <= 12319) ||
-		uc == 12336 ||
-		uc == 12349 ||
-		uc == 12448 ||
-		uc == 12539 ||
-		uc == 42238 ||
-		uc == 42239 ||
-		(uc >= 42509 && uc <= 42511) ||
-		uc == 42611 ||
-		uc == 42622 ||
-		(uc >= 42738 && uc <= 42743) ||
-		(uc >= 43124 && uc <= 43127) ||
-		uc == 43214 ||
-		uc == 43215 ||
-		(uc >= 43256 && uc <= 43258) ||
-		uc == 43310 ||
-		uc == 43311 ||
-		uc == 43359 ||
-		(uc >= 43457 && uc <= 43469) ||
-		uc == 43486 ||
-		uc == 43487 ||
-		(uc >= 43612 && uc <= 43615) ||
-		uc == 43742 ||
-		uc == 43743 ||
-		uc == 43760 ||
-		uc == 43761 ||
-		uc == 44011 ||
-		uc == 64830 ||
-		uc == 64831 ||
-		(uc >= 65040 && uc <= 65049) ||
-		(uc >= 65072 && uc <= 65106) ||
-		(uc >= 65108 && uc <= 65121) ||
-		uc == 65123 ||
-		uc == 65128 ||
-		uc == 65130 ||
-		uc == 65131 ||
-		(uc >= 65281 && uc <= 65283) ||
-		(uc >= 65285 && uc <= 65290) ||
-		(uc >= 65292 && uc <= 65295) ||
-		uc == 65306 ||
-		uc == 65307 ||
-		uc == 65311 ||
-		uc == 65312 ||
-		(uc >= 65339 && uc <= 65341) ||
-		uc == 65343 ||
-		uc == 65371 ||
-		uc == 65373 ||
-		(uc >= 65375 && uc <= 65381) ||
-		(uc >= 65792 && uc <= 65794) ||
-		uc == 66463 ||
-		uc == 66512 ||
-		uc == 66927 ||
-		uc == 67671 ||
-		uc == 67871 ||
-		uc == 67903 ||
-		(uc >= 68176 && uc <= 68184) ||
-		uc == 68223 ||
-		(uc >= 68336 && uc <= 68342) ||
-		(uc >= 68409 && uc <= 68415) ||
-		(uc >= 68505 && uc <= 68508) ||
-		(uc >= 69703 && uc <= 69709) ||
-		uc == 69819 ||
-		uc == 69820 ||
-		(uc >= 69822 && uc <= 69825) ||
-		(uc >= 69952 && uc <= 69955) ||
-		uc == 70004 ||
-		uc == 70005 ||
-		(uc >= 70085 && uc <= 70088) ||
-		uc == 70093 ||
-		(uc >= 70200 && uc <= 70205) ||
-		uc == 70854 ||
-		(uc >= 71105 && uc <= 71113) ||
-		(uc >= 71233 && uc <= 71235) ||
-		(uc >= 74864 && uc <= 74868) ||
-		uc == 92782 ||
-		uc == 92783 ||
-		uc == 92917 ||
-		(uc >= 92983 && uc <= 92987) ||
-		uc == 92996 ||
-		uc == 113823);
+	return ((uc < 128 && cmark_ispunct((char)uc)) ||
+	        uc == 161 ||
+	        uc == 167 ||
+	        uc == 171 ||
+	        uc == 182 ||
+	        uc == 183 ||
+	        uc == 187 ||
+	        uc == 191 ||
+	        uc == 894 ||
+	        uc == 903 ||
+	        (uc >= 1370 && uc <= 1375) ||
+	        uc == 1417 ||
+	        uc == 1418 ||
+	        uc == 1470 ||
+	        uc == 1472 ||
+	        uc == 1475 ||
+	        uc == 1478 ||
+	        uc == 1523 ||
+	        uc == 1524 ||
+	        uc == 1545 ||
+	        uc == 1546 ||
+	        uc == 1548 ||
+	        uc == 1549 ||
+	        uc == 1563 ||
+	        uc == 1566 ||
+	        uc == 1567 ||
+	        (uc >= 1642 && uc <= 1645) ||
+	        uc == 1748 ||
+	        (uc >= 1792 && uc <= 1805) ||
+	        (uc >= 2039 && uc <= 2041) ||
+	        (uc >= 2096 && uc <= 2110) ||
+	        uc == 2142 ||
+	        uc == 2404 ||
+	        uc == 2405 ||
+	        uc == 2416 ||
+	        uc == 2800 ||
+	        uc == 3572 ||
+	        uc == 3663 ||
+	        uc == 3674 ||
+	        uc == 3675 ||
+	        (uc >= 3844 && uc <= 3858) ||
+	        uc == 3860 ||
+	        (uc >= 3898 && uc <= 3901) ||
+	        uc == 3973 ||
+	        (uc >= 4048 && uc <= 4052) ||
+	        uc == 4057 ||
+	        uc == 4058 ||
+	        (uc >= 4170 && uc <= 4175) ||
+	        uc == 4347 ||
+	        (uc >= 4960 && uc <= 4968) ||
+	        uc == 5120 ||
+	        uc == 5741 ||
+	        uc == 5742 ||
+	        uc == 5787 ||
+	        uc == 5788 ||
+	        (uc >= 5867 && uc <= 5869) ||
+	        uc == 5941 ||
+	        uc == 5942 ||
+	        (uc >= 6100 && uc <= 6102) ||
+	        (uc >= 6104 && uc <= 6106) ||
+	        (uc >= 6144 && uc <= 6154) ||
+	        uc == 6468 ||
+	        uc == 6469 ||
+	        uc == 6686 ||
+	        uc == 6687 ||
+	        (uc >= 6816 && uc <= 6822) ||
+	        (uc >= 6824 && uc <= 6829) ||
+	        (uc >= 7002 && uc <= 7008) ||
+	        (uc >= 7164 && uc <= 7167) ||
+	        (uc >= 7227 && uc <= 7231) ||
+	        uc == 7294 ||
+	        uc == 7295 ||
+	        (uc >= 7360 && uc <= 7367) ||
+	        uc == 7379 ||
+	        (uc >= 8208 && uc <= 8231) ||
+	        (uc >= 8240 && uc <= 8259) ||
+	        (uc >= 8261 && uc <= 8273) ||
+	        (uc >= 8275 && uc <= 8286) ||
+	        uc == 8317 ||
+	        uc == 8318 ||
+	        uc == 8333 ||
+	        uc == 8334 ||
+	        (uc >= 8968 && uc <= 8971) ||
+	        uc == 9001 ||
+	        uc == 9002 ||
+	        (uc >= 10088 && uc <= 10101) ||
+	        uc == 10181 ||
+	        uc == 10182 ||
+	        (uc >= 10214 && uc <= 10223) ||
+	        (uc >= 10627 && uc <= 10648) ||
+	        (uc >= 10712 && uc <= 10715) ||
+	        uc == 10748 ||
+	        uc == 10749 ||
+	        (uc >= 11513 && uc <= 11516) ||
+	        uc == 11518 ||
+	        uc == 11519 ||
+	        uc == 11632 ||
+	        (uc >= 11776 && uc <= 11822) ||
+	        (uc >= 11824 && uc <= 11842) ||
+	        (uc >= 12289 && uc <= 12291) ||
+	        (uc >= 12296 && uc <= 12305) ||
+	        (uc >= 12308 && uc <= 12319) ||
+	        uc == 12336 ||
+	        uc == 12349 ||
+	        uc == 12448 ||
+	        uc == 12539 ||
+	        uc == 42238 ||
+	        uc == 42239 ||
+	        (uc >= 42509 && uc <= 42511) ||
+	        uc == 42611 ||
+	        uc == 42622 ||
+	        (uc >= 42738 && uc <= 42743) ||
+	        (uc >= 43124 && uc <= 43127) ||
+	        uc == 43214 ||
+	        uc == 43215 ||
+	        (uc >= 43256 && uc <= 43258) ||
+	        uc == 43310 ||
+	        uc == 43311 ||
+	        uc == 43359 ||
+	        (uc >= 43457 && uc <= 43469) ||
+	        uc == 43486 ||
+	        uc == 43487 ||
+	        (uc >= 43612 && uc <= 43615) ||
+	        uc == 43742 ||
+	        uc == 43743 ||
+	        uc == 43760 ||
+	        uc == 43761 ||
+	        uc == 44011 ||
+	        uc == 64830 ||
+	        uc == 64831 ||
+	        (uc >= 65040 && uc <= 65049) ||
+	        (uc >= 65072 && uc <= 65106) ||
+	        (uc >= 65108 && uc <= 65121) ||
+	        uc == 65123 ||
+	        uc == 65128 ||
+	        uc == 65130 ||
+	        uc == 65131 ||
+	        (uc >= 65281 && uc <= 65283) ||
+	        (uc >= 65285 && uc <= 65290) ||
+	        (uc >= 65292 && uc <= 65295) ||
+	        uc == 65306 ||
+	        uc == 65307 ||
+	        uc == 65311 ||
+	        uc == 65312 ||
+	        (uc >= 65339 && uc <= 65341) ||
+	        uc == 65343 ||
+	        uc == 65371 ||
+	        uc == 65373 ||
+	        (uc >= 65375 && uc <= 65381) ||
+	        (uc >= 65792 && uc <= 65794) ||
+	        uc == 66463 ||
+	        uc == 66512 ||
+	        uc == 66927 ||
+	        uc == 67671 ||
+	        uc == 67871 ||
+	        uc == 67903 ||
+	        (uc >= 68176 && uc <= 68184) ||
+	        uc == 68223 ||
+	        (uc >= 68336 && uc <= 68342) ||
+	        (uc >= 68409 && uc <= 68415) ||
+	        (uc >= 68505 && uc <= 68508) ||
+	        (uc >= 69703 && uc <= 69709) ||
+	        uc == 69819 ||
+	        uc == 69820 ||
+	        (uc >= 69822 && uc <= 69825) ||
+	        (uc >= 69952 && uc <= 69955) ||
+	        uc == 70004 ||
+	        uc == 70005 ||
+	        (uc >= 70085 && uc <= 70088) ||
+	        uc == 70093 ||
+	        (uc >= 70200 && uc <= 70205) ||
+	        uc == 70854 ||
+	        (uc >= 71105 && uc <= 71113) ||
+	        (uc >= 71233 && uc <= 71235) ||
+	        (uc >= 74864 && uc <= 74868) ||
+	        uc == 92782 ||
+	        uc == 92783 ||
+	        uc == 92917 ||
+	        (uc >= 92983 && uc <= 92987) ||
+	        uc == 92996 ||
+	        uc == 113823);
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/xml.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/xml.c b/compiler/modules/CommonMark/src/xml.c
new file mode 100644
index 0000000..f630aba
--- /dev/null
+++ b/compiler/modules/CommonMark/src/xml.c
@@ -0,0 +1,175 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+#include "houdini.h"
+
+// Functions to convert cmark_nodes to XML strings.
+
+static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length)
+{
+	if (source != NULL) {
+		if (length < 0)
+			length = strlen((char *)source);
+
+		houdini_escape_html0(dest, source, (size_t)length, 0);
+	}
+}
+
+struct render_state {
+	cmark_strbuf* xml;
+	int indent;
+};
+
+static inline void indent(struct render_state *state)
+{
+	int i;
+	for (i = 0; i < state->indent; i++) {
+		cmark_strbuf_putc(state->xml, ' ');
+	}
+}
+
+static int
+S_render_node(cmark_node *node, cmark_event_type ev_type,
+              struct render_state *state, long options)
+{
+	cmark_strbuf *xml = state->xml;
+	bool literal = false;
+	cmark_delim_type delim;
+	bool entering = (ev_type == CMARK_EVENT_ENTER);
+
+	if (entering) {
+		indent(state);
+		cmark_strbuf_printf(xml, "<%s",
+		                    cmark_node_get_type_string(node));
+
+		if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
+			cmark_strbuf_printf(xml, " sourcepos=\"%d:%d-%d:%d\"",
+			                    node->start_line,
+			                    node->start_column,
+			                    node->end_line,
+			                    node->end_column);
+		}
+
+		literal = false;
+
+		switch (node->type) {
+		case CMARK_NODE_TEXT:
+		case CMARK_NODE_CODE:
+		case CMARK_NODE_HTML:
+		case CMARK_NODE_INLINE_HTML:
+			cmark_strbuf_puts(xml, ">");
+			escape_xml(xml, node->as.literal.data,
+			           node->as.literal.len);
+			cmark_strbuf_puts(xml, "</");
+			cmark_strbuf_puts(xml,
+			                  cmark_node_get_type_string(node));
+			literal = true;
+			break;
+		case CMARK_NODE_LIST:
+			switch (cmark_node_get_list_type(node)) {
+			case CMARK_ORDERED_LIST:
+				cmark_strbuf_puts(xml, " type=\"ordered\"");
+				cmark_strbuf_printf(xml, " start=\"%d\"",
+				                    cmark_node_get_list_start(node));
+				delim = cmark_node_get_list_delim(node);
+				if (delim == CMARK_PAREN_DELIM) {
+					cmark_strbuf_puts(xml,
+					                  " delim=\"paren\"");
+				} else if (delim == CMARK_PERIOD_DELIM) {
+					cmark_strbuf_puts(xml,
+					                  " delim=\"period\"");
+				}
+				break;
+			case CMARK_BULLET_LIST:
+				cmark_strbuf_puts(xml, " type=\"bullet\"");
+				break;
+			default:
+				break;
+			}
+			cmark_strbuf_printf(xml, " tight=\"%s\"",
+			                    (cmark_node_get_list_tight(node) ?
+			                     "true" : "false"));
+			break;
+		case CMARK_NODE_HEADER:
+			cmark_strbuf_printf(xml, " level=\"%d\"",
+			                    node->as.header.level);
+			break;
+		case CMARK_NODE_CODE_BLOCK:
+			if (node->as.code.info.len > 0) {
+				cmark_strbuf_puts(xml, " info=\"");
+				escape_xml(xml, node->as.code.info.data,
+				           node->as.code.info.len);
+				cmark_strbuf_putc(xml, '"');
+			}
+			cmark_strbuf_puts(xml, ">");
+			escape_xml(xml, node->as.code.literal.data,
+			           node->as.code.literal.len);
+			cmark_strbuf_puts(xml, "</");
+			cmark_strbuf_puts(xml,
+			                  cmark_node_get_type_string(node));
+			literal = true;
+			break;
+		case CMARK_NODE_LINK:
+		case CMARK_NODE_IMAGE:
+			cmark_strbuf_puts(xml, " destination=\"");
+			escape_xml(xml, node->as.link.url, -1);
+			cmark_strbuf_putc(xml, '"');
+			cmark_strbuf_puts(xml, " title=\"");
+			escape_xml(xml, node->as.link.title, -1);
+			cmark_strbuf_putc(xml, '"');
+			break;
+		default:
+			break;
+		}
+		if (node->first_child) {
+			state->indent += 2;
+		} else if (!literal) {
+			cmark_strbuf_puts(xml, " /");
+		}
+		cmark_strbuf_puts(xml, ">\n");
+
+
+	} else if (node->first_child) {
+		state->indent -= 2;
+		indent(state);
+		cmark_strbuf_printf(xml, "</%s>\n",
+		                    cmark_node_get_type_string(node));
+	}
+
+	return 1;
+}
+
+char *cmark_render_xml(cmark_node *root, long options)
+{
+	char *result;
+	cmark_strbuf xml = GH_BUF_INIT;
+	cmark_event_type ev_type;
+	cmark_node *cur;
+	struct render_state state = { &xml, 0 };
+
+	if (options & CMARK_OPT_NORMALIZE) {
+		cmark_consolidate_text_nodes(root);
+	}
+
+	cmark_iter *iter = cmark_iter_new(root);
+
+	cmark_strbuf_puts(state.xml,
+	                  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+	cmark_strbuf_puts(state.xml,
+	                  "<!DOCTYPE CommonMark SYSTEM \"CommonMark.dtd\">\n");
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cur = cmark_iter_get_node(iter);
+		S_render_node(cur, ev_type, &state, options);
+	}
+	result = (char *)cmark_strbuf_detach(&xml);
+
+	cmark_iter_free(iter);
+	cmark_strbuf_free(&xml);
+	return result;
+}


Mime
View raw message