Fix SmartyPants handling of single quotes
This commit is contained in:
parent
92c9d20b0a
commit
8d30051774
2 changed files with 57 additions and 7 deletions
|
@ -83,6 +83,26 @@ word_boundary(uint8_t c)
|
|||
return c == 0 || isspace(c) || ispunct(c);
|
||||
}
|
||||
|
||||
// If 'text' begins with any kind of single quote (e.g. "'" or "'" etc.),
|
||||
// returns the length of the sequence of characters that makes up the single-
|
||||
// quote. Otherwise, returns zero.
|
||||
static size_t
|
||||
squote_len(const uint8_t *text, size_t size)
|
||||
{
|
||||
static char* single_quote_list[] = { "'", "'", "'", "'", NULL };
|
||||
char** p;
|
||||
|
||||
for (p = single_quote_list; *p; ++p) {
|
||||
size_t len = strlen(*p);
|
||||
if (size >= len && memcmp(text, *p, len) == 0) {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Converts " or ' at very beginning or end of a word to left or right quote
|
||||
static int
|
||||
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
|
||||
{
|
||||
|
@ -100,23 +120,33 @@ smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uin
|
|||
return 1;
|
||||
}
|
||||
|
||||
// Converts ' to left or right single quote; but the initial ' might be in
|
||||
// different forms, e.g. ' or ' or '.
|
||||
// 'squote_text' points to the original single quote, and 'squote_size' is its length.
|
||||
// 'text' points at the last character of the single-quote, e.g. ' or ;
|
||||
static size_t
|
||||
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
smartypants_squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
|
||||
const uint8_t *squote_text, size_t squote_size)
|
||||
{
|
||||
if (size >= 2) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
size_t next_squote_len = squote_len(text+1, size-1);
|
||||
|
||||
if (t1 == '\'') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
// convert '' to “ or ”
|
||||
if (next_squote_len > 0) {
|
||||
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
|
||||
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
|
||||
return next_squote_len;
|
||||
}
|
||||
|
||||
// Tom's, isn't, I'm, I'd
|
||||
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
|
||||
(size == 3 || word_boundary(text[2]))) {
|
||||
BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// you're, you'll, you've
|
||||
if (size >= 3) {
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
|
@ -133,10 +163,18 @@ smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
|
|||
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
|
||||
return 0;
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
bufput(ob, squote_text, squote_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Converts ' to left or right single quote.
|
||||
static size_t
|
||||
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
|
||||
}
|
||||
|
||||
// Converts (c), (r), (tm)
|
||||
static size_t
|
||||
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
|
@ -164,6 +202,7 @@ smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Converts "--" to em-dash, etc.
|
||||
static size_t
|
||||
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
|
@ -181,6 +220,7 @@ smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t prev
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Converts " etc.
|
||||
static size_t
|
||||
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
|
@ -189,6 +229,11 @@ smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previ
|
|||
return 5;
|
||||
}
|
||||
|
||||
int len = squote_len(text, size);
|
||||
if (len > 0) {
|
||||
return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
|
||||
}
|
||||
|
||||
if (size >= 4 && memcmp(text, "�", 4) == 0)
|
||||
return 3;
|
||||
|
||||
|
@ -196,6 +241,7 @@ smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previ
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Converts "..." to ellipsis
|
||||
static size_t
|
||||
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
|
@ -213,6 +259,7 @@ smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Converts `` to opening double quote
|
||||
static size_t
|
||||
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
|
@ -221,9 +268,11 @@ smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t
|
|||
return 1;
|
||||
}
|
||||
|
||||
bufputc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Converts 1/2, 1/4, 3/4
|
||||
static size_t
|
||||
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
|
@ -256,6 +305,7 @@ smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Converts " to left or right double quote
|
||||
static size_t
|
||||
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
|
|
|
@ -44,7 +44,7 @@ struct buf {
|
|||
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
||||
};
|
||||
|
||||
/* CONST_BUF: global buffer from a string litteral */
|
||||
/* CONST_BUF: global buffer from a string literal */
|
||||
#define BUF_STATIC(string) \
|
||||
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
||||
|
||||
|
@ -52,7 +52,7 @@ struct buf {
|
|||
#define BUF_VOLATILE(strname) \
|
||||
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
||||
|
||||
/* BUFPUTSL: optimized bufputs of a string litteral */
|
||||
/* BUFPUTSL: optimized bufputs of a string literal */
|
||||
#define BUFPUTSL(output, literal) \
|
||||
bufput(output, literal, sizeof literal - 1)
|
||||
|
||||
|
|
Loading…
Reference in a new issue