Merge pull request #97 from uranusjr/fix_89-markup-escape

Fix markup characters escaping
2014-08-13 00:26:32 -05:00 · 2014-08-13 00:26:32 -05:00 · 6590101008
commit 6590101008
parent e2d34bba00 efc8974c73
3 changed files with 137 additions and 22 deletions
--- a/src/document.c
+++ b/src/document.c
@ -465,27 +465,40 @@ parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t si
 	}
 }

+/* is_escaped • returns whether special char at data[loc] is escaped by '\\' */
+static int
+is_escaped(uint8_t *data, size_t loc)
+{
+	size_t i = loc;
+	while (i >= 1 && data[i - 1] == '\\')
+		i--;
+
+	/* odd numbers of backslashes escapes data[loc] */
+	return (loc - i) % 2;
+}
+
 /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
 static size_t
 find_emph_char(uint8_t *data, size_t size, uint8_t c)
 {
-	size_t i = 1;
+	size_t i = 0;

 	while (i < size) {
-		while (i < size && data[i] != c && data[i] != '[')
+		while (i < size && data[i] != c && data[i] != '[' && data[i] != '`')
 			i++;

 		if (i == size)
 			return 0;

-		if (data[i] == c)
-			return i;
-
 		/* not counting escaped chars */
-		if (i && data[i - 1] == '\\') {
+		if (is_escaped(data, i)) {
 			i++; continue;
 		}

+		if (data[i] == c)
+			return i;
+
+		/* skipping a codespan */
 		if (data[i] == '`') {
 			size_t span_nb = 0, bt;
 			size_t tmp_i = 0;
@ -506,6 +519,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c)
 				i++;
 			}

+			/* not a well-formed codespan; use found matching emph char */
 			if (i >= size) return tmp_i;
 		}
 		/* skipping a link */
@ -685,7 +699,7 @@ char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t o

 	if (size > 2 && data[1] != c) {
 		/* spacing cannot follow an opening emphasis;
-		 * strikethrough only takes two characters '~~' */
+		 * strikethrough and highlight only takes two characters '~~' */
 		if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0)
 			return 0;

@ -778,15 +792,16 @@ char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offs
 		nq++;

 	/* finding the next delimiter */
-	i = 0;
-	for (end = nq; end < size && i < nq; end++) {
-		if (data[end] == '"') i++;
-		else i = 0;
+	end = nq;
+	while (1) {
+		i = end;
+		end += find_emph_char(data + end, size - end, '"');
+		if (end == i) return 0;		/* no matching delimiter */
+		i = end;
+		while (end < size && data[end] == '"' && end - i < nq) end++;
+		if (end - i >= nq) break;
 	}

-	if (i < nq && end >= size)
-		return 0; /* no matching delimiter */
-
 	/* trimming outside spaces */
 	f_begin = nq;
 	while (f_begin < end && data[f_begin] == ' ')
@ -817,7 +832,7 @@ char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offs
 static size_t
 char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
 {
-	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
+	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"";
 	hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };

 	if (size > 1) {
@ -987,7 +1002,7 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
 		if (data[i] == '\n')
 			text_has_nl = 1;

-		else if (data[i - 1] == '\\')
+		else if (is_escaped(data, i))
 			continue;

 		else if (data[i] == '[')
@ -1251,10 +1266,8 @@ char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_
 		return 0;

 	if (data[1] == '(') {
-		sup_start = sup_len = 2;
-
-		while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
-			sup_len++;
+		sup_start = 2;
+		sup_len = find_emph_char(data + 2, size - 2, ')') + 2;

 		if (sup_len == size)
 			return 0;
@ -2177,8 +2190,8 @@ parse_table_row(

 		cell_start = i;

-		while (i < size && data[i] != '|')
-			i++;
+		size_t len = find_emph_char(data + i, size - i, '|');
+		i += len ? len : size - i;

 		cell_end = i - 1;

--- a/test/MarkdownTest_1.0.3/Tests/Escape
+++ b/test/MarkdownTest_1.0.3/Tests/Escape
@ -0,0 +1,51 @@
+<p>==Highlight==</p>
+
+<p>~~Strikethrough~~</p>
+
+<p>_Underscore_</p>
+
+<p>_<em>Underscore_</em></p>
+
+<p><em>_Underscore</em>_</p>
+
+<p>_<em>Underscore</em>_</p>
+
+<p><em>_Underscore_</em></p>
+
+<p>*Asterisk*</p>
+
+<p>*<em>Asterisk*</em></p>
+
+<p>*<em>Asterisk</em>*</p>
+
+<p><em>*Asterisk*</em></p>
+
+<p><em>*Asterisk</em>*</p>
+
+<p>[Bracket]</p>
+
+<p>(Parenthesis)</p>
+
+<p>&lt;Chevron&gt;</p>
+
+<p>Super^script</p>
+
+<p>`Backtick`</p>
+
+<p>&quot;Quote&quot;</p>
+
+<p><strong>Foo\</strong></p>
+
+<p><em>Foo\*</em></p>
+
+<p><strong>Foo\\Bar\</strong></p>
+
+<p>*Foo\Bar\*</p>
+
+<p><a href="http://example.com">Foo]</a></p>
+
+<p><a href="http://example.com">Foo\</a></p>
+
+<p><a href="http://example.com">Foo\]</a></p>
+
+<p><a href="http://example.com">Foo\\</a></p>
--- a/test/MarkdownTest_1.0.3/Tests/Escape
+++ b/test/MarkdownTest_1.0.3/Tests/Escape
@ -0,0 +1,51 @@
+\==Highlight\==
+
+\~~Strikethrough\~~
+
+\_Underscore\_
+
+\__Underscore\__
+
+_\_Underscore_\_
+
+\__Underscore_\_
+
+_\_Underscore\__
+
+\*Asterisk\*
+
+\**Asterisk\**
+
+\**Asterisk*\*
+
+*\*Asterisk\**
+
+*\*Asterisk*\*
+
+\[Bracket\]
+
+\(Parenthesis\)
+
+\<Chevron\>
+
+Super\^script
+
+\`Backtick\`
+
+\"Quote\"
+
+**Foo\\**
+
+*Foo\\\**
+
+**Foo\\\Bar\\**
+
+*Foo\\Bar\\\*
+
+[Foo\]](http://example.com)
+
+[Foo\\](http://example.com)
+
+[Foo\\\]](http://example.com)
+
+[Foo\\\\](http://example.com)