439 lines
12 KiB
C
439 lines
12 KiB
C
/*
|
|
* Copyright (c) 2011, Vicent Marti
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#include "buffer.h"
|
|
#include "html.h"
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
|
|
#if defined(_WIN32)
|
|
#define snprintf _snprintf
|
|
#endif
|
|
|
|
struct smartypants_data {
|
|
int in_squote;
|
|
int in_dquote;
|
|
};
|
|
|
|
static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
|
|
|
static size_t (*smartypants_cb_ptrs[])
|
|
(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
|
|
{
|
|
NULL, /* 0 */
|
|
smartypants_cb__dash, /* 1 */
|
|
smartypants_cb__parens, /* 2 */
|
|
smartypants_cb__squote, /* 3 */
|
|
smartypants_cb__dquote, /* 4 */
|
|
smartypants_cb__amp, /* 5 */
|
|
smartypants_cb__period, /* 6 */
|
|
smartypants_cb__number, /* 7 */
|
|
smartypants_cb__ltag, /* 8 */
|
|
smartypants_cb__backtick, /* 9 */
|
|
smartypants_cb__escape, /* 10 */
|
|
};
|
|
|
|
static const uint8_t smartypants_cb_chars[] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
|
|
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
|
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
|
|
static inline int
|
|
word_boundary(uint8_t c)
|
|
{
|
|
return c == 0 || isspace(c) || ispunct(c);
|
|
}
|
|
|
|
// If 'text' begins with any kind of single quote (e.g. "'" or "'" etc.),
|
|
// returns the length of the sequence of characters that makes up the single-
|
|
// quote. Otherwise, returns zero.
|
|
static size_t
|
|
squote_len(const uint8_t *text, size_t size)
|
|
{
|
|
static char* single_quote_list[] = { "'", "'", "'", "'", NULL };
|
|
char** p;
|
|
|
|
for (p = single_quote_list; *p; ++p) {
|
|
size_t len = strlen(*p);
|
|
if (size >= len && memcmp(text, *p, len) == 0) {
|
|
return len;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Converts " or ' at very beginning or end of a word to left or right quote
|
|
static int
|
|
smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
|
|
{
|
|
char ent[8];
|
|
|
|
if (*is_open && !word_boundary(next_char))
|
|
return 0;
|
|
|
|
if (!(*is_open) && !word_boundary(previous_char))
|
|
return 0;
|
|
|
|
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
|
|
*is_open = !(*is_open);
|
|
bufputs(ob, ent);
|
|
return 1;
|
|
}
|
|
|
|
// Converts ' to left or right single quote; but the initial ' might be in
|
|
// different forms, e.g. ' or ' or '.
|
|
// 'squote_text' points to the original single quote, and 'squote_size' is its length.
|
|
// 'text' points at the last character of the single-quote, e.g. ' or ;
|
|
static size_t
|
|
smartypants_squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
|
|
const uint8_t *squote_text, size_t squote_size)
|
|
{
|
|
if (size >= 2) {
|
|
uint8_t t1 = tolower(text[1]);
|
|
size_t next_squote_len = squote_len(text+1, size-1);
|
|
|
|
// convert '' to “ or ”
|
|
if (next_squote_len > 0) {
|
|
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
|
|
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
|
|
return next_squote_len;
|
|
}
|
|
|
|
// Tom's, isn't, I'm, I'd
|
|
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
|
|
(size == 3 || word_boundary(text[2]))) {
|
|
BUFPUTSL(ob, "’");
|
|
return 0;
|
|
}
|
|
|
|
// you're, you'll, you've
|
|
if (size >= 3) {
|
|
uint8_t t2 = tolower(text[2]);
|
|
|
|
if (((t1 == 'r' && t2 == 'e') ||
|
|
(t1 == 'l' && t2 == 'l') ||
|
|
(t1 == 'v' && t2 == 'e')) &&
|
|
(size == 4 || word_boundary(text[3]))) {
|
|
BUFPUTSL(ob, "’");
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
|
|
return 0;
|
|
|
|
bufput(ob, squote_text, squote_size);
|
|
return 0;
|
|
}
|
|
|
|
// Converts ' to left or right single quote.
|
|
static size_t
|
|
smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
|
|
}
|
|
|
|
// Converts (c), (r), (tm)
|
|
static size_t
|
|
smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
if (size >= 3) {
|
|
uint8_t t1 = tolower(text[1]);
|
|
uint8_t t2 = tolower(text[2]);
|
|
|
|
if (t1 == 'c' && t2 == ')') {
|
|
BUFPUTSL(ob, "©");
|
|
return 2;
|
|
}
|
|
|
|
if (t1 == 'r' && t2 == ')') {
|
|
BUFPUTSL(ob, "®");
|
|
return 2;
|
|
}
|
|
|
|
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
|
|
BUFPUTSL(ob, "™");
|
|
return 3;
|
|
}
|
|
}
|
|
|
|
bufputc(ob, text[0]);
|
|
return 0;
|
|
}
|
|
|
|
// Converts "--" to em-dash, etc.
|
|
static size_t
|
|
smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
if (size >= 3 && text[1] == '-' && text[2] == '-') {
|
|
BUFPUTSL(ob, "—");
|
|
return 2;
|
|
}
|
|
|
|
if (size >= 2 && text[1] == '-') {
|
|
BUFPUTSL(ob, "–");
|
|
return 1;
|
|
}
|
|
|
|
bufputc(ob, text[0]);
|
|
return 0;
|
|
}
|
|
|
|
// Converts " etc.
|
|
static size_t
|
|
smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
if (size >= 6 && memcmp(text, """, 6) == 0) {
|
|
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
|
|
return 5;
|
|
}
|
|
|
|
int len = squote_len(text, size);
|
|
if (len > 0) {
|
|
return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
|
|
}
|
|
|
|
if (size >= 4 && memcmp(text, "�", 4) == 0)
|
|
return 3;
|
|
|
|
bufputc(ob, '&');
|
|
return 0;
|
|
}
|
|
|
|
// Converts "..." to ellipsis
|
|
static size_t
|
|
smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
if (size >= 3 && text[1] == '.' && text[2] == '.') {
|
|
BUFPUTSL(ob, "…");
|
|
return 2;
|
|
}
|
|
|
|
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
|
|
BUFPUTSL(ob, "…");
|
|
return 4;
|
|
}
|
|
|
|
bufputc(ob, text[0]);
|
|
return 0;
|
|
}
|
|
|
|
// Converts `` to opening double quote
|
|
static size_t
|
|
smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
if (size >= 2 && text[1] == '`') {
|
|
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
|
return 1;
|
|
}
|
|
|
|
bufputc(ob, text[0]);
|
|
return 0;
|
|
}
|
|
|
|
// Converts 1/2, 1/4, 3/4
|
|
static size_t
|
|
smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
if (word_boundary(previous_char) && size >= 3) {
|
|
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
|
|
if (size == 3 || word_boundary(text[3])) {
|
|
BUFPUTSL(ob, "½");
|
|
return 2;
|
|
}
|
|
}
|
|
|
|
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
|
|
if (size == 3 || word_boundary(text[3]) ||
|
|
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
|
|
BUFPUTSL(ob, "¼");
|
|
return 2;
|
|
}
|
|
}
|
|
|
|
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
|
|
if (size == 3 || word_boundary(text[3]) ||
|
|
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
|
|
BUFPUTSL(ob, "¾");
|
|
return 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
bufputc(ob, text[0]);
|
|
return 0;
|
|
}
|
|
|
|
// Converts " to left or right double quote
|
|
static size_t
|
|
smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
|
|
BUFPUTSL(ob, """);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static size_t
|
|
smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
static const char *skip_tags[] = {
|
|
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
|
|
};
|
|
static const size_t skip_tags_count = 8;
|
|
|
|
size_t tag, i = 0;
|
|
|
|
while (i < size && text[i] != '>')
|
|
i++;
|
|
|
|
for (tag = 0; tag < skip_tags_count; ++tag) {
|
|
if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
|
|
break;
|
|
}
|
|
|
|
if (tag < skip_tags_count) {
|
|
for (;;) {
|
|
while (i < size && text[i] != '<')
|
|
i++;
|
|
|
|
if (i == size)
|
|
break;
|
|
|
|
if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
|
|
break;
|
|
|
|
i++;
|
|
}
|
|
|
|
while (i < size && text[i] != '>')
|
|
i++;
|
|
}
|
|
|
|
bufput(ob, text, i + 1);
|
|
return i;
|
|
}
|
|
|
|
static size_t
|
|
smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
|
{
|
|
if (size < 2)
|
|
return 0;
|
|
|
|
switch (text[1]) {
|
|
case '\\':
|
|
case '"':
|
|
case '\'':
|
|
case '.':
|
|
case '-':
|
|
case '`':
|
|
bufputc(ob, text[1]);
|
|
return 1;
|
|
|
|
default:
|
|
bufputc(ob, '\\');
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
static struct {
|
|
uint8_t c0;
|
|
const uint8_t *pattern;
|
|
const uint8_t *entity;
|
|
int skip;
|
|
} smartypants_subs[] = {
|
|
{ '\'', "'s>", "’", 0 },
|
|
{ '\'', "'t>", "’", 0 },
|
|
{ '\'', "'re>", "’", 0 },
|
|
{ '\'', "'ll>", "’", 0 },
|
|
{ '\'', "'ve>", "’", 0 },
|
|
{ '\'', "'m>", "’", 0 },
|
|
{ '\'', "'d>", "’", 0 },
|
|
{ '-', "--", "—", 1 },
|
|
{ '-', "<->", "–", 0 },
|
|
{ '.', "...", "…", 2 },
|
|
{ '.', ". . .", "…", 4 },
|
|
{ '(', "(c)", "©", 2 },
|
|
{ '(', "(r)", "®", 2 },
|
|
{ '(', "(tm)", "™", 3 },
|
|
{ '3', "<3/4>", "¾", 2 },
|
|
{ '3', "<3/4ths>", "¾", 2 },
|
|
{ '1', "<1/2>", "½", 2 },
|
|
{ '1', "<1/4>", "¼", 2 },
|
|
{ '1', "<1/4th>", "¼", 2 },
|
|
{ '&', "�", 0, 3 },
|
|
};
|
|
#endif
|
|
|
|
void
|
|
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
|
|
{
|
|
size_t i;
|
|
struct smartypants_data smrt = {0, 0};
|
|
|
|
if (!text)
|
|
return;
|
|
|
|
bufgrow(ob, size);
|
|
|
|
for (i = 0; i < size; ++i) {
|
|
size_t org;
|
|
uint8_t action = 0;
|
|
|
|
org = i;
|
|
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
|
|
i++;
|
|
|
|
if (i > org)
|
|
bufput(ob, text + org, i - org);
|
|
|
|
if (i < size) {
|
|
i += smartypants_cb_ptrs[(int)action]
|
|
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
|
|
}
|
|
}
|
|
}
|
|
|
|
|