From c5f4c35541ac9163d9503363f3a29eab8b97bda9 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Sat, 9 Aug 2014 20:35:02 +0800 Subject: [PATCH 01/16] Add = and " to escapable character list --- src/document.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/document.c b/src/document.c index 298a44b..b910f83 100644 --- a/src/document.c +++ b/src/document.c @@ -817,7 +817,7 @@ char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offs static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) { - static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~"; + static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\""; hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; if (size > 1) { From c8544810924d00c36f0a3d7671b9402fa796fd92 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Sat, 9 Aug 2014 20:45:32 +0800 Subject: [PATCH 02/16] Fix incorrect escaping logic in find_emph_char --- src/document.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/document.c b/src/document.c index b910f83..eecbb84 100644 --- a/src/document.c +++ b/src/document.c @@ -478,14 +478,14 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c) if (i == size) return 0; - if (data[i] == c) - return i; - /* not counting escaped chars */ if (i && data[i - 1] == '\\') { i++; continue; } + if (data[i] == c) + return i; + if (data[i] == '`') { size_t span_nb = 0, bt; size_t tmp_i = 0; From 2301e4caa2ad89a9c442db9f49b58eb710fe837f Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Sun, 10 Aug 2014 02:03:54 +0800 Subject: [PATCH 03/16] Better logic to handle backslashes is_escaped actually counts backslashes instead of looking back one character to determine whether a character is escaped. This handles inputs like *Foo\\* correctly (as `

Foo\

`; would be `

Foo\*

` previously). --- src/document.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/document.c b/src/document.c index eecbb84..7e1858d 100644 --- a/src/document.c +++ b/src/document.c @@ -465,6 +465,20 @@ parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t si } } +/* is_escaped • returns whether special char at data[loc] is escaped by '\\' */ +static int +is_escaped(uint8_t *data, size_t loc) +{ + size_t i = loc; + while (i >= 1 && data[i - 1] == '\\') + i--; + + /* odd numbers of backslashes escapes data[loc] */ + if ((loc - i) % 2) + return 1; + return 0; +} + /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ static size_t find_emph_char(uint8_t *data, size_t size, uint8_t c) @@ -479,7 +493,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c) return 0; /* not counting escaped chars */ - if (i && data[i - 1] == '\\') { + if (is_escaped(data, i)) { i++; continue; } From 4a2c96236981717489034429f24e234f0412f793 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Sun, 10 Aug 2014 02:16:34 +0800 Subject: [PATCH 04/16] Add test cases for escape characters --- .../Tests/Escape cahracter.html | 43 +++++++++++++++++++ .../Tests/Escape character.text | 43 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 test/MarkdownTest_1.0.3/Tests/Escape cahracter.html create mode 100644 test/MarkdownTest_1.0.3/Tests/Escape character.text diff --git a/test/MarkdownTest_1.0.3/Tests/Escape cahracter.html b/test/MarkdownTest_1.0.3/Tests/Escape cahracter.html new file mode 100644 index 0000000..68864e6 --- /dev/null +++ b/test/MarkdownTest_1.0.3/Tests/Escape cahracter.html @@ -0,0 +1,43 @@ +

==Highlight==

+ +

~~Strikethrough~~

+ +

_Underscore_

+ +

_Underscore_

+ +

_Underscore_

+ +

_Underscore_

+ +

_Underscore_

+ +

*Asterisk*

+ +

*Asterisk*

+ +

*Asterisk*

+ +

*Asterisk*

+ +

*Asterisk*

+ +

[Bracket]

+ +

(Parenthesis)

+ +

<Chevron>

+ +

Super^script

+ +

`Backtick`

+ +

"Quote"

+ +

Foo\

+ +

Foo\*

+ +

Foo\\Bar\

+ +

*Foo\Bar\*

\ No newline at end of file diff --git a/test/MarkdownTest_1.0.3/Tests/Escape character.text b/test/MarkdownTest_1.0.3/Tests/Escape character.text new file mode 100644 index 0000000..566cb85 --- /dev/null +++ b/test/MarkdownTest_1.0.3/Tests/Escape character.text @@ -0,0 +1,43 @@ +\==Highlight\== + +\~~Strikethrough\~~ + +\_Underscore\_ + +\__Underscore\__ + +_\_Underscore_\_ + +\__Underscore_\_ + +_\_Underscore\__ + +\*Asterisk\* + +\**Asterisk\** + +\**Asterisk*\* + +*\*Asterisk\** + +*\*Asterisk*\* + +\[Bracket\] + +\(Parenthesis\) + +\ + +Super\^script + +\`Backtick\` + +\"Quote\" + +**Foo\\** + +*Foo\\\** + +**Foo\\\Bar\\** + +*Foo\\Bar\\\* \ No newline at end of file From 0c766248c9e913952009d171aa3b53924737706b Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Sun, 10 Aug 2014 02:43:59 +0800 Subject: [PATCH 05/16] Fix incorrect file name --- .../Tests/{Escape cahracter.html => Escape character.html} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/MarkdownTest_1.0.3/Tests/{Escape cahracter.html => Escape character.html} (100%) diff --git a/test/MarkdownTest_1.0.3/Tests/Escape cahracter.html b/test/MarkdownTest_1.0.3/Tests/Escape character.html similarity index 100% rename from test/MarkdownTest_1.0.3/Tests/Escape cahracter.html rename to test/MarkdownTest_1.0.3/Tests/Escape character.html From 64a4267429d38f818a4b00f1a72f17da65761707 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Sun, 10 Aug 2014 02:48:35 +0800 Subject: [PATCH 06/16] Fix escaping in char_link and add tests Same logic as 2301e4c --- src/document.c | 2 +- test/MarkdownTest_1.0.3/Tests/Escape character.html | 10 +++++++++- test/MarkdownTest_1.0.3/Tests/Escape character.text | 10 +++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/document.c b/src/document.c index 7e1858d..bce36e5 100644 --- a/src/document.c +++ b/src/document.c @@ -1001,7 +1001,7 @@ char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offse if (data[i] == '\n') text_has_nl = 1; - else if (data[i - 1] == '\\') + else if (is_escaped(data, i)) continue; else if (data[i] == '[') diff --git a/test/MarkdownTest_1.0.3/Tests/Escape character.html b/test/MarkdownTest_1.0.3/Tests/Escape character.html index 68864e6..a6863ba 100644 --- a/test/MarkdownTest_1.0.3/Tests/Escape character.html +++ b/test/MarkdownTest_1.0.3/Tests/Escape character.html @@ -40,4 +40,12 @@

Foo\\Bar\

-

*Foo\Bar\*

\ No newline at end of file +

*Foo\Bar\*

+ +

Foo]

+ +

Foo\

+ +

Foo\]

+ +

Foo\\

diff --git a/test/MarkdownTest_1.0.3/Tests/Escape character.text b/test/MarkdownTest_1.0.3/Tests/Escape character.text index 566cb85..3eab090 100644 --- a/test/MarkdownTest_1.0.3/Tests/Escape character.text +++ b/test/MarkdownTest_1.0.3/Tests/Escape character.text @@ -40,4 +40,12 @@ Super\^script **Foo\\\Bar\\** -*Foo\\Bar\\\* \ No newline at end of file +*Foo\\Bar\\\* + +[Foo\]](http://example.com) + +[Foo\\](http://example.com) + +[Foo\\\]](http://example.com) + +[Foo\\\\](http://example.com) From 753e4d892c0ca8ad41b84d676bdafe55843832aa Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Sun, 10 Aug 2014 03:10:52 +0800 Subject: [PATCH 07/16] Fix superscript group escaping --- src/document.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/document.c b/src/document.c index bce36e5..7927abf 100644 --- a/src/document.c +++ b/src/document.c @@ -1267,8 +1267,12 @@ char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_ if (data[1] == '(') { sup_start = sup_len = 2; - while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\') + do { + while (sup_len < size && data[sup_len] != ')') + sup_len++; sup_len++; + } while (is_escaped(data, sup_len - 1)); + sup_len--; if (sup_len == size) return 0; From e3cbd4be4ee5c2126074102d64b68dcd3be72206 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Sun, 10 Aug 2014 23:11:51 +0800 Subject: [PATCH 08/16] Make return statement of is_escaped more succinct --- src/document.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/document.c b/src/document.c index 7927abf..ebdd979 100644 --- a/src/document.c +++ b/src/document.c @@ -474,9 +474,7 @@ is_escaped(uint8_t *data, size_t loc) i--; /* odd numbers of backslashes escapes data[loc] */ - if ((loc - i) % 2) - return 1; - return 0; + return (loc - i) % 2; } /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ From 0cd4dbd41cbe9c06d60cedcd62134123ac545698 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Mon, 11 Aug 2014 00:30:33 +0800 Subject: [PATCH 09/16] Need to handle backticks in find_emph_char --- src/document.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/document.c b/src/document.c index ebdd979..0fb4843 100644 --- a/src/document.c +++ b/src/document.c @@ -484,7 +484,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c) size_t i = 1; while (i < size) { - while (i < size && data[i] != c && data[i] != '[') + while (i < size && data[i] != c && data[i] != '[' && data[i] != '`') i++; if (i == size) From 79010cca84777a51ef15c4b39f3696d65506d63d Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Mon, 11 Aug 2014 00:52:06 +0800 Subject: [PATCH 10/16] Add comments --- src/document.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/document.c b/src/document.c index 0fb4843..8973b30 100644 --- a/src/document.c +++ b/src/document.c @@ -498,6 +498,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c) if (data[i] == c) return i; + /* skipping a codespan */ if (data[i] == '`') { size_t span_nb = 0, bt; size_t tmp_i = 0; @@ -518,6 +519,7 @@ find_emph_char(uint8_t *data, size_t size, uint8_t c) i++; } + /* not a well-formed codespan; use found matching emph char */ if (i >= size) return tmp_i; } /* skipping a link */ From 81d5932b679af8cdfec0e759ca02cdd01ea91c4f Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Mon, 11 Aug 2014 00:53:42 +0800 Subject: [PATCH 11/16] Reusing find_emph_char logic in char_superscript --- src/document.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/document.c b/src/document.c index 8973b30..8caa2ac 100644 --- a/src/document.c +++ b/src/document.c @@ -1265,14 +1265,8 @@ char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_ return 0; if (data[1] == '(') { - sup_start = sup_len = 2; - - do { - while (sup_len < size && data[sup_len] != ')') - sup_len++; - sup_len++; - } while (is_escaped(data, sup_len - 1)); - sup_len--; + sup_start = 2; + sup_len = find_emph_char(data + 2, size - 2, ')') + 2; if (sup_len == size) return 0; From 60d63a1920b6954b479c05d153c79af6abf09669 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Mon, 11 Aug 2014 00:57:53 +0800 Subject: [PATCH 12/16] Make comments in char_emphasis up-to-date --- src/document.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/document.c b/src/document.c index 8caa2ac..68905aa 100644 --- a/src/document.c +++ b/src/document.c @@ -699,7 +699,7 @@ char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t o if (size > 2 && data[1] != c) { /* spacing cannot follow an opening emphasis; - * strikethrough only takes two characters '~~' */ + * strikethrough and highlight only takes two characters '~~' */ if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0) return 0; From 48a19b31bfa7a4485d23a93afcd3abc42594f7ed Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Mon, 11 Aug 2014 03:06:25 +0800 Subject: [PATCH 13/16] Input of find_emph_char should INCLUDE opening This fixes super^(`)`) --- src/document.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/document.c b/src/document.c index 68905aa..0da921c 100644 --- a/src/document.c +++ b/src/document.c @@ -1266,7 +1266,7 @@ char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_ if (data[1] == '(') { sup_start = 2; - sup_len = find_emph_char(data + 2, size - 2, ')') + 2; + sup_len = find_emph_char(data + 1, size - 1, ')') + 1; if (sup_len == size) return 0; From e7bcbe3fa62165d3d6c6f4fe619ecfb38122293a Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Mon, 11 Aug 2014 03:24:54 +0800 Subject: [PATCH 14/16] Use find_emph_char for table column parsing Fix #77 --- src/document.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/document.c b/src/document.c index 0da921c..36f0f79 100644 --- a/src/document.c +++ b/src/document.c @@ -2189,8 +2189,9 @@ parse_table_row( cell_start = i; - while (i < size && data[i] != '|') - i++; + i--; + size_t len = find_emph_char(data + i, size - i, '|'); + i += len ? len : size - i; cell_end = i - 1; From 492e3d14de6d2048e36c247decfba6ba79624318 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Mon, 11 Aug 2014 03:38:09 +0800 Subject: [PATCH 15/16] find_emph_char should starts from the beginning This fixes bugs for edge cases of pattern: opening backtick ending [any] backtick [any] ending Examples: *`**`* and __`__`__ Range handling in 48a19b3 and e7bcbe3 is also changed to match the correct behavior. --- src/document.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/document.c b/src/document.c index 36f0f79..3f75cb0 100644 --- a/src/document.c +++ b/src/document.c @@ -481,7 +481,7 @@ is_escaped(uint8_t *data, size_t loc) static size_t find_emph_char(uint8_t *data, size_t size, uint8_t c) { - size_t i = 1; + size_t i = 0; while (i < size) { while (i < size && data[i] != c && data[i] != '[' && data[i] != '`') @@ -1266,7 +1266,7 @@ char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_ if (data[1] == '(') { sup_start = 2; - sup_len = find_emph_char(data + 1, size - 1, ')') + 1; + sup_len = find_emph_char(data + 2, size - 2, ')') + 2; if (sup_len == size) return 0; @@ -2189,7 +2189,6 @@ parse_table_row( cell_start = i; - i--; size_t len = find_emph_char(data + i, size - i, '|'); i += len ? len : size - i; From efc8974c737dd2920aa7b359a2f60fecbd397465 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Tue, 12 Aug 2014 22:39:27 +0800 Subject: [PATCH 16/16] Use find_emph_char to find delims in char_quote --- src/document.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/document.c b/src/document.c index 3f75cb0..40c96cd 100644 --- a/src/document.c +++ b/src/document.c @@ -792,15 +792,16 @@ char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offs nq++; /* finding the next delimiter */ - i = 0; - for (end = nq; end < size && i < nq; end++) { - if (data[end] == '"') i++; - else i = 0; + end = nq; + while (1) { + i = end; + end += find_emph_char(data + end, size - end, '"'); + if (end == i) return 0; /* no matching delimiter */ + i = end; + while (end < size && data[end] == '"' && end - i < nq) end++; + if (end - i >= nq) break; } - if (i < nq && end >= size) - return 0; /* no matching delimiter */ - /* trimming outside spaces */ f_begin = nq; while (f_begin < end && data[f_begin] == ' ')