Merge pull request #97 from uranusjr/fix_89-markup-escape

Fix markup characters escaping
This commit is contained in:
Devin Torres 2014-08-13 00:26:32 -05:00
commit 6590101008
3 changed files with 137 additions and 22 deletions

View file

@ -465,27 +465,40 @@ parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t si
} }
} }
/* is_escaped • returns whether special char at data[loc] is escaped by '\\' */
static int
is_escaped(uint8_t *data, size_t loc)
{
size_t i = loc;
while (i >= 1 && data[i - 1] == '\\')
i--;
/* odd numbers of backslashes escapes data[loc] */
return (loc - i) % 2;
}
/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
static size_t static size_t
find_emph_char(uint8_t *data, size_t size, uint8_t c) find_emph_char(uint8_t *data, size_t size, uint8_t c)
{ {
size_t i = 1; size_t i = 0;
while (i < size) { while (i < size) {
while (i < size && data[i] != c && data[i] != '[') while (i < size && data[i] != c && data[i] != '[' && data[i] != '`')
i++; i++;
if (i == size) if (i == size)
return 0; return 0;
if (data[i] == c)
return i;
/* not counting escaped chars */ /* not counting escaped chars */
if (i && data[i - 1] == '\\') { if (is_escaped(data, i)) {
i++; continue; i++; continue;
} }
if (data[i] == c)
return i;
/* skipping a codespan */
if (data[i] == '`') { if (data[i] == '`') {
size_t span_nb = 0, bt; size_t span_nb = 0, bt;
size_t tmp_i = 0; size_t tmp_i = 0;
@ -506,6 +519,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c)
i++; i++;
} }
/* not a well-formed codespan; use found matching emph char */
if (i >= size) return tmp_i; if (i >= size) return tmp_i;
} }
/* skipping a link */ /* skipping a link */
@ -685,7 +699,7 @@ char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t o
if (size > 2 && data[1] != c) { if (size > 2 && data[1] != c) {
/* spacing cannot follow an opening emphasis; /* spacing cannot follow an opening emphasis;
* strikethrough only takes two characters '~~' */ * strikethrough and highlight only takes two characters '~~' */
if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0) if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0)
return 0; return 0;
@ -778,15 +792,16 @@ char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offs
nq++; nq++;
/* finding the next delimiter */ /* finding the next delimiter */
i = 0; end = nq;
for (end = nq; end < size && i < nq; end++) { while (1) {
if (data[end] == '"') i++; i = end;
else i = 0; end += find_emph_char(data + end, size - end, '"');
if (end == i) return 0; /* no matching delimiter */
i = end;
while (end < size && data[end] == '"' && end - i < nq) end++;
if (end - i >= nq) break;
} }
if (i < nq && end >= size)
return 0; /* no matching delimiter */
/* trimming outside spaces */ /* trimming outside spaces */
f_begin = nq; f_begin = nq;
while (f_begin < end && data[f_begin] == ' ') while (f_begin < end && data[f_begin] == ' ')
@ -817,7 +832,7 @@ char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offs
static size_t static size_t
char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
{ {
static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~"; static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"";
hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
if (size > 1) { if (size > 1) {
@ -987,7 +1002,7 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse
if (data[i] == '\n') if (data[i] == '\n')
text_has_nl = 1; text_has_nl = 1;
else if (data[i - 1] == '\\') else if (is_escaped(data, i))
continue; continue;
else if (data[i] == '[') else if (data[i] == '[')
@ -1251,10 +1266,8 @@ char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_
return 0; return 0;
if (data[1] == '(') { if (data[1] == '(') {
sup_start = sup_len = 2; sup_start = 2;
sup_len = find_emph_char(data + 2, size - 2, ')') + 2;
while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
sup_len++;
if (sup_len == size) if (sup_len == size)
return 0; return 0;
@ -2177,8 +2190,8 @@ parse_table_row(
cell_start = i; cell_start = i;
while (i < size && data[i] != '|') size_t len = find_emph_char(data + i, size - i, '|');
i++; i += len ? len : size - i;
cell_end = i - 1; cell_end = i - 1;

View file

@ -0,0 +1,51 @@
<p>==Highlight==</p>
<p>~~Strikethrough~~</p>
<p>_Underscore_</p>
<p>_<em>Underscore_</em></p>
<p><em>_Underscore</em>_</p>
<p>_<em>Underscore</em>_</p>
<p><em>_Underscore_</em></p>
<p>*Asterisk*</p>
<p>*<em>Asterisk*</em></p>
<p>*<em>Asterisk</em>*</p>
<p><em>*Asterisk*</em></p>
<p><em>*Asterisk</em>*</p>
<p>[Bracket]</p>
<p>(Parenthesis)</p>
<p>&lt;Chevron&gt;</p>
<p>Super^script</p>
<p>`Backtick`</p>
<p>&quot;Quote&quot;</p>
<p><strong>Foo\</strong></p>
<p><em>Foo\*</em></p>
<p><strong>Foo\\Bar\</strong></p>
<p>*Foo\Bar\*</p>
<p><a href="http://example.com">Foo]</a></p>
<p><a href="http://example.com">Foo\</a></p>
<p><a href="http://example.com">Foo\]</a></p>
<p><a href="http://example.com">Foo\\</a></p>

View file

@ -0,0 +1,51 @@
\==Highlight\==
\~~Strikethrough\~~
\_Underscore\_
\__Underscore\__
_\_Underscore_\_
\__Underscore_\_
_\_Underscore\__
\*Asterisk\*
\**Asterisk\**
\**Asterisk*\*
*\*Asterisk\**
*\*Asterisk*\*
\[Bracket\]
\(Parenthesis\)
\<Chevron\>
Super\^script
\`Backtick\`
\"Quote\"
**Foo\\**
*Foo\\\**
**Foo\\\Bar\\**
*Foo\\Bar\\\*
[Foo\]](http://example.com)
[Foo\\](http://example.com)
[Foo\\\]](http://example.com)
[Foo\\\\](http://example.com)