Fix SmartyPants handling of single quotes

This commit is contained in:
Mike Morearty 2013-09-18 18:17:42 -05:00 committed by Devin Torres
parent 92c9d20b0a
commit 8d30051774
2 changed files with 57 additions and 7 deletions

View file

@ -83,6 +83,26 @@ word_boundary(uint8_t c)
return c == 0 || isspace(c) || ispunct(c);
}
// If 'text' begins with any kind of single quote (e.g. "'" or "'" etc.),
// returns the length of the sequence of characters that makes up the single-
// quote. Otherwise, returns zero.
static size_t
squote_len(const uint8_t *text, size_t size)
{
static char* single_quote_list[] = { "'", "'", "'", "'", NULL };
char** p;
for (p = single_quote_list; *p; ++p) {
size_t len = strlen(*p);
if (size >= len && memcmp(text, *p, len) == 0) {
return len;
}
}
return 0;
}
// Converts " or ' at very beginning or end of a word to left or right quote
static int
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
{
@ -100,23 +120,33 @@ smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uin
return 1;
}
// Converts ' to left or right single quote; but the initial ' might be in
// different forms, e.g. ' or ' or '.
// 'squote_text' points to the original single quote, and 'squote_size' is its length.
// 'text' points at the last character of the single-quote, e.g. ' or ;
static size_t
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
smartypants_squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
const uint8_t *squote_text, size_t squote_size)
{
if (size >= 2) {
uint8_t t1 = tolower(text[1]);
size_t next_squote_len = squote_len(text+1, size-1);
if (t1 == '\'') {
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
return 1;
// convert '' to “ or ”
if (next_squote_len > 0) {
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
return next_squote_len;
}
// Tom's, isn't, I'm, I'd
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
(size == 3 || word_boundary(text[2]))) {
BUFPUTSL(ob, "’");
return 0;
}
// you're, you'll, you've
if (size >= 3) {
uint8_t t2 = tolower(text[2]);
@ -133,10 +163,18 @@ smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
return 0;
bufputc(ob, text[0]);
bufput(ob, squote_text, squote_size);
return 0;
}
// Converts ' to left or right single quote.
static size_t
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
}
// Converts (c), (r), (tm)
static size_t
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
@ -164,6 +202,7 @@ smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
return 0;
}
// Converts "--" to em-dash, etc.
static size_t
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
@ -181,6 +220,7 @@ smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t prev
return 0;
}
// Converts " etc.
static size_t
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
@ -189,6 +229,11 @@ smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previ
return 5;
}
int len = squote_len(text, size);
if (len > 0) {
return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
}
if (size >= 4 && memcmp(text, "�", 4) == 0)
return 3;
@ -196,6 +241,7 @@ smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previ
return 0;
}
// Converts "..." to ellipsis
static size_t
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
@ -213,6 +259,7 @@ smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
return 0;
}
// Converts `` to opening double quote
static size_t
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
@ -221,9 +268,11 @@ smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t
return 1;
}
bufputc(ob, text[0]);
return 0;
}
// Converts 1/2, 1/4, 3/4
static size_t
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
@ -256,6 +305,7 @@ smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t pr
return 0;
}
// Converts " to left or right double quote
static size_t
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{

View file

@ -44,7 +44,7 @@ struct buf {
size_t unit; /* reallocation unit size (0 = read-only buffer) */
};
/* CONST_BUF: global buffer from a string litteral */
/* CONST_BUF: global buffer from a string literal */
#define BUF_STATIC(string) \
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
@ -52,7 +52,7 @@ struct buf {
#define BUF_VOLATILE(strname) \
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
/* BUFPUTSL: optimized bufputs of a string litteral */
/* BUFPUTSL: optimized bufputs of a string literal */
#define BUFPUTSL(output, literal) \
bufput(output, literal, sizeof literal - 1)