Merge pull request #97 from uranusjr/fix_89-markup-escape

Fix markup characters escaping
2014-08-13 00:26:32 -05:00 · 2014-08-13 00:26:32 -05:00 · 6590101008
commit 6590101008
parent e2d34bba00 efc8974c73
3 changed files with 137 additions and 22 deletions
--- a/src/document.c
+++ b/src/document.c
@ -465,27 +465,40 @@ parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t si
 	}
 }
 /* is_escaped • returns whether special char at data[loc] is escaped by '\\' */
 static int
 is_escaped(uint8_t *data, size_t loc)
 {
 	size_t i = loc;
 	while (i >= 1 && data[i - 1] == '\\')
 		i--;
 	/* odd numbers of backslashes escapes data[loc] */
 	return (loc - i) % 2;
 }
 /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
 static size_t
 find_emph_char(uint8_t *data, size_t size, uint8_t c)
 {
-	size_t i = 1;
+	size_t i = 0;
 	while (i < size) {
-		while (i < size && data[i] != c && data[i] != '[')
+		while (i < size && data[i] != c && data[i] != '[' && data[i] != '`')
 			i++;
 		if (i == size)
 			return 0;
 		if (data[i] == c)
 			return i;
 		/* not counting escaped chars */
-		if (i && data[i - 1] == '\\') {
+		if (is_escaped(data, i)) {
 			i++; continue;
 		}
 		if (data[i] == c)
 			return i;
 		/* skipping a codespan */
 		if (data[i] == '`') {
 			size_t span_nb = 0, bt;
 			size_t tmp_i = 0;
@ -506,6 +519,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c)
 				i++;
 			}
 			/* not a well-formed codespan; use found matching emph char */
 			if (i >= size) return tmp_i;
 		}
 		/* skipping a link */
@ -685,7 +699,7 @@ char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t o
 	if (size > 2 && data[1] != c) {
 		/* spacing cannot follow an opening emphasis;
-		 * strikethrough only takes two characters '~~' */
+		 * strikethrough and highlight only takes two characters '~~' */
 		if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0)
 			return 0;
@ -778,15 +792,16 @@ char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offs
 		nq++;
 	/* finding the next delimiter */
-	i = 0;
+	end = nq;
-	for (end = nq; end < size && i < nq; end++) {
+	while (1) {
-		if (data[end] == '"') i++;
+		i = end;
-		else i = 0;
+		end += find_emph_char(data + end, size - end, '"');
 		if (end == i) return 0;		/* no matching delimiter */
 		i = end;
 		while (end < size && data[end] == '"' && end - i < nq) end++;
 		if (end - i >= nq) break;
 	}
 	if (i < nq && end >= size)
 		return 0; /* no matching delimiter */
 	/* trimming outside spaces */
 	f_begin = nq;
 	while (f_begin < end && data[f_begin] == ' ')
@ -817,7 +832,7 @@ char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offs
 static size_t
 char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
 {
-	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
+	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"";
 	hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
 	if (size > 1) {
@ -987,7 +1002,7 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
 		if (data[i] == '\n')
 			text_has_nl = 1;
-		else if (data[i - 1] == '\\')
+		else if (is_escaped(data, i))
 			continue;
 		else if (data[i] == '[')
@ -1251,10 +1266,8 @@ char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_
 		return 0;
 	if (data[1] == '(') {
-		sup_start = sup_len = 2;
+		sup_start = 2;
-
+		sup_len = find_emph_char(data + 2, size - 2, ')') + 2;
 		while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
 			sup_len++;
 		if (sup_len == size)
 			return 0;
@ -2177,8 +2190,8 @@ parse_table_row(
 		cell_start = i;
-		while (i < size && data[i] != '|')
+		size_t len = find_emph_char(data + i, size - i, '|');
-			i++;
+		i += len ? len : size - i;
 		cell_end = i - 1;
--- a/test/MarkdownTest_1.0.3/Tests/Escape
+++ b/test/MarkdownTest_1.0.3/Tests/Escape
@ -0,0 +1,51 @@
 <p>==Highlight==</p>
 <p>~~Strikethrough~~</p>
 <p>_Underscore_</p>
 <p>_<em>Underscore_</em></p>
 <p><em>_Underscore</em>_</p>
 <p>_<em>Underscore</em>_</p>
 <p><em>_Underscore_</em></p>
 <p>*Asterisk*</p>
 <p>*<em>Asterisk*</em></p>
 <p>*<em>Asterisk</em>*</p>
 <p><em>*Asterisk*</em></p>
 <p><em>*Asterisk</em>*</p>
 <p>[Bracket]</p>
 <p>(Parenthesis)</p>
 <p>&lt;Chevron&gt;</p>
 <p>Super^script</p>
 <p>`Backtick`</p>
 <p>&quot;Quote&quot;</p>
 <p><strong>Foo\</strong></p>
 <p><em>Foo\*</em></p>
 <p><strong>Foo\\Bar\</strong></p>
 <p>*Foo\Bar\*</p>
 <p><a href="http://example.com">Foo]</a></p>
 <p><a href="http://example.com">Foo\</a></p>
 <p><a href="http://example.com">Foo\]</a></p>
 <p><a href="http://example.com">Foo\\</a></p>
--- a/test/MarkdownTest_1.0.3/Tests/Escape
+++ b/test/MarkdownTest_1.0.3/Tests/Escape
@ -0,0 +1,51 @@
 \==Highlight\==
 \~~Strikethrough\~~
 \_Underscore\_
 \__Underscore\__
 _\_Underscore_\_
 \__Underscore_\_
 _\_Underscore\__
 \*Asterisk\*
 \**Asterisk\**
 \**Asterisk*\*
 *\*Asterisk\**
 *\*Asterisk*\*
 \[Bracket\]
 \(Parenthesis\)
 \<Chevron\>
 Super\^script
 \`Backtick\`
 \"Quote\"
 **Foo\\**
 *Foo\\\**
 **Foo\\\Bar\\**
 *Foo\\Bar\\\*
 [Foo\]](http://example.com)
 [Foo\\](http://example.com)
 [Foo\\\]](http://example.com)
 [Foo\\\\](http://example.com)