Squashed commits for rewrite

This commit is contained in:
Xavier Mendez 2014-11-03 23:02:45 +01:00
parent a88343c593
commit f1ad70fef4
87 changed files with 24993 additions and 9862 deletions

18
.gitignore vendored
View file

@ -1,9 +1,13 @@
*.o
*.obj
*.exe
*.dll
*.exp
*.lib
hoedown
hoedown.dll
hoedown.exp
hoedown.lib
smartypants
libhoedown.so*
libhoedown.*
Makefile
CMakeFiles
CMakeCache.txt
cmake_install.cmake
install_manifest.txt
compile_commands.json

3
.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "stmd"]
path = stmd
url = https://github.com/jgm/stmd

View file

@ -4,6 +4,4 @@ compiler:
- gcc
perl:
- "5.12"
before_install:
- sudo apt-get install -qq tidy
script: make test
script: cmake . && make test

62
CMakeLists.txt Normal file
View file

@ -0,0 +1,62 @@
cmake_minimum_required (VERSION 2.6)
project (Hoedown)
option (BUILD_SHARED_LIBS "Wether to build Hoedown as a shared library" true)
option (BUILD_CLI "Wether to build the hoedown executable" true)
set (FLAGS "-std=c99 -Wall -Wextra -Wno-unused-parameter")
# Pregenerated files
add_custom_target(_html_blocks.c
gperf -L ANSI-C -N hoedown_find_block_tag -7 -c -C -E -S 1 --ignore-case -m100 data/html_blocks.gperf > src/_html_blocks.c
)
add_custom_target(_autolink_schemes.c
gperf -L ANSI-C -N hoedown_find_autolink_scheme -l -c -C -E -S 1 --ignore-case -m100 data/autolink_schemes.gperf > src/_autolink_schemes.c
)
# Library
add_library(libhoedown
src/autolink.c
src/buffer.c
src/document.c
src/escape.c
src/html.c
src/pool.c
src/version.c
src/_html_blocks.c
src/_autolink_schemes.c
)
set_target_properties(libhoedown PROPERTIES
COMPILE_FLAGS ${FLAGS}
OUTPUT_NAME hoedown
SOVERSION 4
VERSION 4.0.0
)
install(TARGETS libhoedown DESTINATION lib)
install(DIRECTORY src/ DESTINATION include/hoedown
FILES_MATCHING PATTERN "*.h"
PATTERN "_*" EXCLUDE
)
# Executables
if (BUILD_CLI)
add_executable(hoedown bin/hoedown.c bin/noop.c)
target_link_libraries(hoedown libhoedown rt)
set_target_properties(hoedown PROPERTIES COMPILE_FLAGS ${FLAGS})
include_directories(${PROJECT_SOURCE_DIR}/src)
install(TARGETS hoedown DESTINATION bin)
get_target_property(HOEDOWN_LOCATION hoedown LOCATION)
add_custom_target(test
perl stmd/runtests.pl stmd/spec.txt ${HOEDOWN_LOCATION}
DEPENDS hoedown
)
endif (BUILD_CLI)

View file

@ -1,68 +0,0 @@
CFLAGS = -g -O3 -ansi -pedantic -Wall -Wextra -Wno-unused-parameter -Isrc
ifneq ($(OS),Windows_NT)
CFLAGS += -fPIC
endif
HOEDOWN_SRC=\
src/autolink.o \
src/buffer.o \
src/document.o \
src/escape.o \
src/html.o \
src/html_blocks.o \
src/html_smartypants.o \
src/stack.o \
src/version.o
.PHONY: all test test-pl clean
all: libhoedown.so hoedown smartypants
# Libraries
libhoedown.so: libhoedown.so.1
ln -f -s $^ $@
libhoedown.so.1: $(HOEDOWN_SRC)
$(CC) -shared $^ $(LDFLAGS) -o $@
libhoedown.a: $(HOEDOWN_SRC)
$(AR) rcs libhoedown.a $^
# Executables
hoedown: bin/hoedown.o $(HOEDOWN_SRC)
$(CC) $^ $(LDFLAGS) -o $@
smartypants: bin/smartypants.o $(HOEDOWN_SRC)
$(CC) $^ $(LDFLAGS) -o $@
# Perfect hashing
src/html_blocks.c: html_block_names.gperf
gperf -L ANSI-C -N hoedown_find_block_tag -c -C -E -S 1 --ignore-case -m100 $^ > $@
# Testing
test: hoedown
python test/runner.py
test-pl: hoedown
perl test/MarkdownTest_1.0.3/MarkdownTest.pl \
--script=./hoedown --testdir=test/MarkdownTest_1.0.3/Tests --tidy
# Housekeeping
clean:
$(RM) src/*.o bin/*.o
$(RM) libhoedown.so libhoedown.so.1 libhoedown.a
$(RM) hoedown smartypants hoedown.exe smartypants.exe
# Generic object compilations
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
src/html_blocks.o: src/html_blocks.c
$(CC) $(CFLAGS) -Wno-static-in-inline -c -o $@ $<

View file

@ -1,36 +0,0 @@
CC = cl
CFLAGS = /O2 /sdl /Isrc /D_CRT_SECURE_NO_WARNINGS
HOEDOWN_SRC = \
src\autolink.obj \
src\buffer.obj \
src\document.obj \
src\escape.obj \
src\html.obj \
src\html_blocks.obj \
src\html_smartypants.obj \
src\stack.obj \
src\version.obj
all: hoedown.dll hoedown.exe smartypants.exe
hoedown.dll: $(HOEDOWN_SRC) hoedown.def
$(CC) $(HOEDOWN_SRC) hoedown.def /link /DLL $(LDFLAGS) /out:$@
hoedown.exe: bin\hoedown.obj $(HOEDOWN_SRC)
$(CC) bin\hoedown.obj $(HOEDOWN_SRC) /link $(LDFLAGS) /out:$@
smartypants.exe: bin\smartypants.obj $(HOEDOWN_SRC)
$(CC) bin\smartypants.obj $(HOEDOWN_SRC) /link $(LDFLAGS) /out:$@
# Housekeeping
clean:
del $(HOEDOWN_SRC)
del hoedown.dll hoedown.exp hoedown.lib
del hoedown.exe smartypants.exe
# Generic object compilations
.c.obj:
$(CC) $(CFLAGS) /c $< /Fo$@

View file

@ -10,99 +10,90 @@
#define count_of(arr) (sizeof(arr)/sizeof(0[arr]))
int
parseint(const char *string, long *result)
{
char *end;
errno = 0;
*result = strtol(string, &end, 10);
return !(*end || errno);
static int parseint(const char *string, long *result) {
char *end;
errno = 0;
*result = strtol(string, &end, 10);
return !(*end || errno);
}
const char *
strprefix(const char *str, const char *prefix)
{
while (*prefix) {
if (!(*str && *str == *prefix)) return 0;
prefix++; str++;
}
return str;
static const char *strprefix(const char *str, const char *prefix) {
while (*prefix) {
if (!(*str && *str == *prefix)) return 0;
prefix++; str++;
}
return str;
}
void
print_option(char short_opt, const char *long_opt, const char *description)
{
if (short_opt)
printf(" -%c, ", short_opt);
else
printf(" ");
static void print_option(char short_opt, const char *long_opt, const char *description) {
if (short_opt)
printf(" -%c, ", short_opt);
else
printf(" ");
printf("--%-13s %s\n", long_opt, description);
printf("--%-15s %s\n", long_opt, description);
}
void
print_version()
{
printf("Built with Hoedown " HOEDOWN_VERSION ".\n");
static void print_version() {
printf("Built with Hoedown " HOEDOWN_VERSION ".\n");
}
int
parse_options(
int argc, char **argv,
int(*parse_short_option)(char opt, char *next, void *opaque),
int(*parse_long_option)(char *opt, char *next, void *opaque),
int(*parse_argument)(int argn, char *arg, int is_forced, void *opaque),
void *opaque)
{
int result;
int i = 1, regular_args = 0;
static int parse_options(
int argc, char **argv,
int(*parse_short_option)(char opt, char *next, void *opaque),
int(*parse_long_option)(char *opt, char *next, void *opaque),
int(*parse_argument)(int argn, char *arg, int is_forced, void *opaque),
void *opaque
){
int result;
int i = 1, regular_args = 0;
/* Parse options mixed with arguments */
while (i < argc) {
char *arg = argv[i];
/* Parse options mixed with arguments */
while (i < argc) {
char *arg = argv[i];
if (arg[0] == '-' && arg[1]) {
char *next_arg = (i+1 < argc) ? argv[i+1] : NULL;
if (arg[0] == '-' && arg[1]) {
char *next_arg = (i+1 < argc) ? argv[i+1] : NULL;
if (arg[1] == '-' && !arg[2]) {
/* '--' signals end of options */
i++;
break;
}
if (arg[1] == '-' && !arg[2]) {
/* '--' signals end of options */
i++;
break;
}
if (arg[1] == '-') {
/* Long option */
result = parse_long_option(arg + 2, next_arg, opaque);
if (!result) return 0;
i += result;
} else {
/* Sequence of short options */
size_t pos;
for (pos = 1; arg[pos]; pos++) {
char *next = (arg[pos+1]) ? arg + pos+1 : next_arg;
result = parse_short_option(arg[pos], next, opaque);
if (!result) return 0;
if (result == 2) {
i++;
break;
}
}
i++;
}
} else {
/* Argument */
result = parse_argument(regular_args++, arg, 0, opaque);
if (!result) return 0;
i++;
}
}
if (arg[1] == '-') {
/* Long option */
result = parse_long_option(arg + 2, next_arg, opaque);
if (!result) return 0;
i += result;
} else {
/* Sequence of short options */
size_t pos;
for (pos = 1; arg[pos]; pos++) {
char *next = (arg[pos+1]) ? arg + pos+1 : next_arg;
result = parse_short_option(arg[pos], next, opaque);
if (!result) return 0;
if (result == 2) {
i++;
break;
}
}
i++;
}
} else {
/* Argument */
result = parse_argument(regular_args++, arg, 0, opaque);
if (!result) return 0;
i++;
}
}
/* Parse rest as forced arguments */
while (i < argc) {
result = parse_argument(regular_args++, argv[i], 1, opaque);
if (!result) return 0;
i++;
}
/* Parse rest as forced arguments */
while (i < argc) {
result = parse_argument(regular_args++, argv[i], 1, opaque);
if (!result) return 0;
i++;
}
return 1;
return 1;
}

View file

@ -1,67 +1,93 @@
#if __STDC_VERSION__ >= 199901L
#define _XOPEN_SOURCE 600
#else
#define _XOPEN_SOURCE 500
#endif /* __STDC_VERSION__ */
#include "document.h"
#include "html.h"
#include "common.h"
/*#include <time.h>*/
#include "noop.h"
#include <time.h>
/* FEATURES INFO / DEFAULTS */
enum renderer_type {
RENDERER_HTML,
RENDERER_HTML_TOC
RENDERER_HTML,
RENDERER_NOOP
};
struct extension_category_info {
unsigned int flags;
const char *option_name;
const char *label;
struct preset_info {
hoedown_features flags;
const char *option_name;
const char *label;
const char *description;
};
struct extension_info {
unsigned int flag;
const char *option_name;
const char *description;
struct feature_category_info {
hoedown_features flags;
const char *option_name;
const char *label;
};
struct html_flag_info {
unsigned int flag;
const char *option_name;
const char *description;
struct feature_info {
hoedown_features flag;
const char *option_name;
const char *description;
};
static struct extension_category_info categories_info[] = {
{HOEDOWN_EXT_BLOCK, "block", "Block extensions"},
{HOEDOWN_EXT_SPAN, "span", "Span extensions"},
{HOEDOWN_EXT_FLAGS, "flags", "Other flags"},
{HOEDOWN_EXT_NEGATIVE, "negative", "Negative flags"},
static struct preset_info presets_info[] = {
{HOEDOWN_FT_COMMONMARK, "stmd", "CommonMark", "Parse standard CommonMark. The default."},
{HOEDOWN_FT_MARKDOWN, "markdown", "Markdown", "Parse classic Markdown."},
{0, "none", "None", "Start with all flags disabled."},
};
static struct extension_info extensions_info[] = {
{HOEDOWN_EXT_TABLES, "tables", "Parse PHP-Markdown style tables."},
{HOEDOWN_EXT_FENCED_CODE, "fenced-code", "Parse fenced code blocks."},
{HOEDOWN_EXT_FOOTNOTES, "footnotes", "Parse footnotes."},
{HOEDOWN_EXT_AUTOLINK, "autolink", "Automatically turn safe URLs into links."},
{HOEDOWN_EXT_STRIKETHROUGH, "strikethrough", "Parse ~~stikethrough~~ spans."},
{HOEDOWN_EXT_UNDERLINE, "underline", "Parse _underline_ instead of emphasis."},
{HOEDOWN_EXT_HIGHLIGHT, "highlight", "Parse ==highlight== spans."},
{HOEDOWN_EXT_QUOTE, "quote", "Render \"quotes\" as <q>quotes</q>."},
{HOEDOWN_EXT_SUPERSCRIPT, "superscript", "Parse super^script."},
{HOEDOWN_EXT_MATH, "math", "Parse TeX $$math$$ syntax, Kramdown style."},
{HOEDOWN_EXT_NO_INTRA_EMPHASIS, "disable-intra-emphasis", "Disable emphasis_between_words."},
{HOEDOWN_EXT_SPACE_HEADERS, "space-headers", "Require a space after '#' in headers."},
{HOEDOWN_EXT_MATH_EXPLICIT, "math-explicit", "Instead of guessing by context, parse $inline math$ and $$always block math$$ (requires --math)."},
{HOEDOWN_EXT_DISABLE_INDENTED_CODE, "disable-indented-code", "Don't parse indented code blocks."},
static struct feature_category_info categories_info[] = {
{HOEDOWN_FT_BLOCK, "block", "Block construct features"},
{HOEDOWN_FT_INLINE, "inline", "Inline construct features"},
{HOEDOWN_FT_OTHER, "other", "Other features"},
{HOEDOWN_FT_FLAGS, "flags", "Feature flags"},
};
static struct html_flag_info html_flags_info[] = {
{HOEDOWN_HTML_SKIP_HTML, "skip-html", "Strip all HTML tags."},
{HOEDOWN_HTML_ESCAPE, "escape", "Escape all HTML."},
{HOEDOWN_HTML_HARD_WRAP, "hard-wrap", "Render each linebreak as <br>."},
{HOEDOWN_HTML_USE_XHTML, "xhtml", "Render XHTML."},
static struct feature_info features_info[] = {
{HOEDOWN_FT_DIRECTIVE, "directive", "Parse generic directive syntax."},
{HOEDOWN_FT_INDENTED_CODE_BLOCK, "indented-code-block", "Parse indented code blocks."},
{HOEDOWN_FT_FENCED_CODE_BLOCK, "fenced-code-block", "Parse fenced code blocks."},
{HOEDOWN_FT_HORIZONTAL_RULE, "horizontal-rule", "Parse horizontal rules."},
{HOEDOWN_FT_ATX_HEADER, "atx-header", "Parse ATX headers."},
{HOEDOWN_FT_SETEXT_HEADER, "setext-header", "Parse Setext headers"},
{HOEDOWN_FT_LIST, "list", "Parse bullet and numbered lists."},
{HOEDOWN_FT_QUOTE_BLOCK, "quote-block", "Parse email-style quote blocks."},
{HOEDOWN_FT_HTML_BLOCK, "html-block", "Parse raw HTML blocks."},
{HOEDOWN_FT_TABLE, "table", "Parse Kramdown style tables."},
{HOEDOWN_FT_ROLE, "role", "Parse generic role syntax."},
{HOEDOWN_FT_ESCAPE, "escape", "Parse backslash escapes."},
{HOEDOWN_FT_HARD_LINEBREAK, "hard-linebreak", "Parse backslash linebreaks."},
{HOEDOWN_FT_LINEBREAK, "linebreak", "Parse two-space linebreaks."},
{HOEDOWN_FT_SOFT_LINEBREAK, "soft-linebreak", "Parse newlines as linebreaks."},
{HOEDOWN_FT_URI_AUTOLINK, "uri-autolink", "Parse URI autolinks between angle brackets."},
{HOEDOWN_FT_EMAIL_AUTOLINK, "email-autolink", "Parse email autolinks between angle brackets."},
{HOEDOWN_FT_HTML, "html", "Parse inline HTML."},
{HOEDOWN_FT_ENTITY, "entity", "Parse HTML entities."},
{HOEDOWN_FT_CODE_SPAN, "code-span", "Parse code spans between backticks."},
{HOEDOWN_FT_EMPHASIS, "emphasis", "Parse emphasis and strong emphasis."},
{HOEDOWN_FT_LINK, "link", "Parse links and link references."},
{HOEDOWN_FT_MATH, "math", "Parse Kramdown-style math spans."},
{HOEDOWN_FT_SUPERSCRIPT, "superscript", "Parse superscript spans."},
{HOEDOWN_FT_QUOTE, "quote", "Parse quote-delimited spans."},
{HOEDOWN_FT_STRIKETHROUGH, "strikethrough", "Parse strikethrough spans."},
{HOEDOWN_FT_HIGHLIGHT, "highlight", "Parse highlight spans."},
{HOEDOWN_FT_FOOTNOTE, "footnote", "Parse footnotes and footnote references."},
{HOEDOWN_FT_PREPROCESS, "preprocess", "Preprocess the input. Hoedown can give bad results if input is not preprocessed."},
{HOEDOWN_FT_LINK_IMAGE, "link-image", "Allow links preceded by a bang to be rendered as images."},
{HOEDOWN_FT_INTRA_EMPHASIS, "intra-emphasis", "Requires --emphasis. Allow underscores between words to be parsed as emphasis."},
{HOEDOWN_FT_MATH_EXPLICIT, "math-explicit", "Requires --math. Enables parsing $...$ as inline math and restricts $$...$$ to block math."},
};
static const char *category_prefix = "all-";
@ -74,386 +100,383 @@ static const char *negative_prefix = "no-";
/* PRINT HELP */
void
print_help(const char *basename)
{
size_t i;
size_t e;
static void print_feature_option(struct feature_info *feature) {
size_t i;
/* usage */
printf("Usage: %s [OPTION]... [FILE]\n\n", basename);
/* prepare the description */
char *desc = malloc(strlen(feature->description)+40);
memcpy(desc, feature->description, strlen(feature->description)+1);
/* description */
printf("Process the Markdown in FILE (or standard input) and render it to standard output, using the Hoedown library. "
"Parsing and rendering can be customized through the options below. The default is to parse pure markdown and output HTML.\n\n");
/* add tags to the description */
for (i = 0; i < count_of(presets_info); i++)
if (feature->flag & presets_info[i].flags) {
strcat(desc, " [");
strcat(desc, presets_info[i].label);
strcat(desc, "]");
}
/* main options */
printf("Main options:\n");
print_option('n', "max-nesting=N", "Maximum level of block nesting parsed. Default is " str(DEF_MAX_NESTING) ".");
print_option('t', "toc-level=N", "Maximum level for headers included in the TOC. Zero disables TOC (the default).");
print_option( 0, "html", "Render (X)HTML. The default.");
print_option( 0, "html-toc", "Render the Table of Contents in (X)HTML.");
print_option('T', "time", "Show time spent in rendering.");
print_option('i', "input-unit=N", "Reading block size. Default is " str(DEF_IUNIT) ".");
print_option('o', "output-unit=N", "Writing block size. Default is " str(DEF_OUNIT) ".");
print_option('h', "help", "Print this help text.");
print_option('v', "version", "Print Hoedown version.");
printf("\n");
/* print feature option */
print_option( 0, feature->option_name, desc);
}
/* extensions */
for (i = 0; i < count_of(categories_info); i++) {
struct extension_category_info *category = categories_info+i;
printf("%s (--%s%s):\n", category->label, category_prefix, category->option_name);
for (e = 0; e < count_of(extensions_info); e++) {
struct extension_info *extension = extensions_info+e;
if (extension->flag & category->flags) {
print_option( 0, extension->option_name, extension->description);
}
}
printf("\n");
}
static void print_help(const char *basename) {
size_t i, e;
/* html-specific */
printf("HTML-specific options:\n");
for (i = 0; i < count_of(html_flags_info); i++) {
struct html_flag_info *html_flag = html_flags_info+i;
print_option( 0, html_flag->option_name, html_flag->description);
}
printf("\n");
/* usage */
printf("Usage: %s [OPTION]... [FILE]\n\n", basename);
/* ending */
printf("Flags and extensions can be negated by prepending 'no' to them, as in '--no-tables', '--no-span' or '--no-escape'. "
"Options are processed in order, so in case of contradictory options the last specified stands.\n\n");
/* description */
printf("Process the Markdown in FILE (or standard input) and render it to standard output, using the Hoedown library. "
"Parsing and rendering can be customized through the options below. The default is to parse pure CommonMark and output HTML.\n\n");
printf("When FILE is '-', read standard input. If no FILE was given, read standard input. Use '--' to signal end of option parsing. "
"Exit status is 0 if no errors occurred, 1 with option parsing errors, 4 with memory allocation errors or 5 with I/O errors.\n\n");
/* main options */
printf("Main options:\n");
print_option('n', "max-nesting=N", "Maximum level of block nesting parsed. Default is " str(DEF_MAX_NESTING) ".");
print_option( 0, "html", "Render (X)HTML. The default.");
print_option( 0, "noop", "Use a special renderer whose callbacks do nothing.");
print_option('T', "time", "Show time spent in rendering.");
print_option('i', "input-unit=N", "Reading block size. Default is " str(DEF_IUNIT) ".");
print_option('o', "output-unit=N", "Writing block size. Default is " str(DEF_OUNIT) ".");
print_option( 0, "block", "Parse block-level Markdown. The default.");
print_option( 0, "inline", "Parse inline-level Markdown.");
print_option('h', "help", "Print this help text.");
print_option('v', "version", "Print Hoedown version.");
printf("\n");
/* base presets */
printf("Base presets:\n");
for (i = 0; i < count_of(presets_info); i++) {
struct preset_info *preset = &presets_info[i];
print_option( 0, preset->option_name, preset->description);
}
printf("\n");
/* features */
for (i = 0; i < count_of(categories_info); i++) {
struct feature_category_info *category = &categories_info[i];
printf("%s (--%s%s):\n", category->label, category_prefix, category->option_name);
for (e = 0; e < count_of(features_info); e++) {
struct feature_info *feature = &features_info[e];
if (feature->flag & category->flags)
print_feature_option(feature);
}
printf("\n");
}
/* ending */
printf("Feature flags can be negated by prepending 'no' to them, as in '--no-table', '--no-flags' or '--no-escape'. "
"Base presets override any flag given previously. Options are processed in order, so in case of contradictory options the last specified stands.\n\n");
printf("When FILE is '-', read standard input. If no FILE was given, read standard input. Use '--' to signal end of option parsing. "
"Exit status is 0 if no errors occured, 1 with option parsing errors, 4 with memory allocation errors or 5 with I/O errors.\n\n");
}
/* OPTION PARSING */
struct option_data {
char *basename;
int done;
char *basename;
int done;
/* time reporting */
int show_time;
/* time reporting */
int show_time;
/* I/O */
size_t iunit;
size_t ounit;
const char *filename;
/* I/O */
size_t iunit;
size_t ounit;
const char *filename;
/* renderer */
enum renderer_type renderer;
int toc_level;
hoedown_html_flags html_flags;
/* renderer */
enum renderer_type renderer;
/* parsing */
hoedown_extensions extensions;
size_t max_nesting;
/* parsing */
int is_inline;
hoedown_features features;
size_t max_nesting;
};
int
parse_short_option(char opt, char *next, void *opaque)
{
struct option_data *data = opaque;
long int num;
int isNum = next ? parseint(next, &num) : 0;
static int parse_short_option(char opt, char *next, void *opaque) {
struct option_data *data = opaque;
long int num;
int isNum = next ? parseint(next, &num) : 0;
if (opt == 'h') {
print_help(data->basename);
data->done = 1;
return 0;
}
if (opt == 'h') {
print_help(data->basename);
data->done = 1;
return 0;
}
if (opt == 'v') {
print_version();
data->done = 1;
return 0;
}
if (opt == 'v') {
print_version();
data->done = 1;
return 0;
}
if (opt == 'T') {
data->show_time = 1;
return 1;
}
if (opt == 'T') {
data->show_time = 1;
return 1;
}
/* options requiring value */
/* FIXME: add validation */
/* options requiring value */
/* FIXME: add validation */
if (opt == 'n' && isNum) {
data->max_nesting = num;
return 2;
}
if (opt == 'n' && isNum) {
data->max_nesting = num;
return 2;
}
if (opt == 't' && isNum) {
data->toc_level = num;
return 2;
}
if (opt == 'i' && isNum) {
data->iunit = num;
return 2;
}
if (opt == 'i' && isNum) {
data->iunit = num;
return 2;
}
if (opt == 'o' && isNum) {
data->ounit = num;
return 2;
}
if (opt == 'o' && isNum) {
data->ounit = num;
return 2;
}
fprintf(stderr, "Wrong option '-%c' found.\n", opt);
return 0;
fprintf(stderr, "Wrong option '-%c' found.\n", opt);
return 0;
}
int
parse_category_option(char *opt, struct option_data *data)
{
size_t i;
const char *name = strprefix(opt, category_prefix);
if (!name) return 0;
static int parse_preset_option(char *opt, struct option_data *data) {
size_t i;
for (i = 0; i < count_of(categories_info); i++) {
struct extension_category_info *category = &categories_info[i];
if (strcmp(name, category->option_name)==0) {
data->extensions |= category->flags;
return 1;
}
}
for (i = 0; i < count_of(presets_info); i++) {
struct preset_info *preset = &presets_info[i];
if (strcmp(opt, preset->option_name)==0) {
data->features = preset->flags;
return 1;
}
}
return 0;
return 0;
}
int
parse_flag_option(char *opt, struct option_data *data)
{
size_t i;
static int parse_category_option(char *opt, struct option_data *data) {
size_t i;
const char *name = strprefix(opt, category_prefix);
if (!name) return 0;
for (i = 0; i < count_of(extensions_info); i++) {
struct extension_info *extension = &extensions_info[i];
if (strcmp(opt, extension->option_name)==0) {
data->extensions |= extension->flag;
return 1;
}
}
for (i = 0; i < count_of(categories_info); i++) {
struct feature_category_info *category = &categories_info[i];
if (strcmp(name, category->option_name)==0) {
data->features |= category->flags;
return 1;
}
}
for (i = 0; i < count_of(html_flags_info); i++) {
struct html_flag_info *html_flag = &html_flags_info[i];
if (strcmp(opt, html_flag->option_name)==0) {
data->html_flags |= html_flag->flag;
return 1;
}
}
return 0;
return 0;
}
int
parse_negative_option(char *opt, struct option_data *data)
{
size_t i;
const char *name = strprefix(opt, negative_prefix);
if (!name) return 0;
static int parse_flag_option(char *opt, struct option_data *data) {
size_t i;
for (i = 0; i < count_of(categories_info); i++) {
struct extension_category_info *category = &categories_info[i];
if (strcmp(name, category->option_name)==0) {
data->extensions &= ~(category->flags);
return 1;
}
}
for (i = 0; i < count_of(features_info); i++) {
struct feature_info *feature = &features_info[i];
if (strcmp(opt, feature->option_name)==0) {
data->features |= feature->flag;
return 1;
}
}
for (i = 0; i < count_of(extensions_info); i++) {
struct extension_info *extension = &extensions_info[i];
if (strcmp(name, extension->option_name)==0) {
data->extensions &= ~(extension->flag);
return 1;
}
}
for (i = 0; i < count_of(html_flags_info); i++) {
struct html_flag_info *html_flag = &html_flags_info[i];
if (strcmp(name, html_flag->option_name)==0) {
data->html_flags &= ~(html_flag->flag);
return 1;
}
}
return 0;
return 0;
}
int
parse_long_option(char *opt, char *next, void *opaque)
{
struct option_data *data = opaque;
long int num;
int isNum = next ? parseint(next, &num) : 0;
static int parse_negative_option(char *opt, struct option_data *data) {
size_t i;
const char *name = strprefix(opt, negative_prefix);
if (!name) return 0;
if (strcmp(opt, "help")==0) {
print_help(data->basename);
data->done = 1;
return 0;
}
for (i = 0; i < count_of(categories_info); i++) {
struct feature_category_info *category = &categories_info[i];
if (strcmp(name, category->option_name)==0) {
data->features &= ~(category->flags);
return 1;
}
}
if (strcmp(opt, "version")==0) {
print_version();
data->done = 1;
return 0;
}
for (i = 0; i < count_of(features_info); i++) {
struct feature_info *feature = &features_info[i];
if (strcmp(name, feature->option_name)==0) {
data->features &= ~(feature->flag);
return 1;
}
}
if (strcmp(opt, "time")==0) {
data->show_time = 1;
return 1;
}
/* FIXME: validation */
if (strcmp(opt, "max-nesting")==0 && isNum) {
data->max_nesting = num;
return 2;
}
if (strcmp(opt, "toc-level")==0 && isNum) {
data->toc_level = num;
return 2;
}
if (strcmp(opt, "input-unit")==0 && isNum) {
data->iunit = num;
return 2;
}
if (strcmp(opt, "output-unit")==0 && isNum) {
data->ounit = num;
return 2;
}
if (strcmp(opt, "html")==0) {
data->renderer = RENDERER_HTML;
return 1;
}
if (strcmp(opt, "html-toc")==0) {
data->renderer = RENDERER_HTML_TOC;
return 1;
}
if (parse_category_option(opt, data) || parse_flag_option(opt, data) || parse_negative_option(opt, data))
return 1;
fprintf(stderr, "Wrong option '--%s' found.\n", opt);
return 0;
return 0;
}
int
parse_argument(int argn, char *arg, int is_forced, void *opaque)
{
struct option_data *data = opaque;
static int parse_long_option(char *opt, char *next, void *opaque) {
struct option_data *data = opaque;
long int num;
int isNum = next ? parseint(next, &num) : 0;
if (argn == 0) {
/* Input file */
if (strcmp(arg, "-")!=0 || is_forced) data->filename = arg;
return 1;
}
if (strcmp(opt, "help")==0) {
print_help(data->basename);
data->done = 1;
return 0;
}
fprintf(stderr, "Too many arguments.\n");
return 0;
if (strcmp(opt, "version")==0) {
print_version();
data->done = 1;
return 0;
}
if (strcmp(opt, "time")==0) {
data->show_time = 1;
return 1;
}
if (strcmp(opt, "block")==0) {
data->is_inline = 0;
return 1;
}
if (strcmp(opt, "inline")==0) {
data->is_inline = 1;
return 1;
}
/* FIXME: validation */
if (strcmp(opt, "max-nesting")==0 && isNum) {
data->max_nesting = num;
return 2;
}
if (strcmp(opt, "input-unit")==0 && isNum) {
data->iunit = num;
return 2;
}
if (strcmp(opt, "output-unit")==0 && isNum) {
data->ounit = num;
return 2;
}
if (strcmp(opt, "html")==0) {
data->renderer = RENDERER_HTML;
return 1;
}
if (strcmp(opt, "noop")==0) {
data->renderer = RENDERER_NOOP;
return 1;
}
if (parse_preset_option(opt, data) || parse_category_option(opt, data) || parse_flag_option(opt, data) || parse_negative_option(opt, data))
return 1;
fprintf(stderr, "Wrong option '--%s' found.\n", opt);
return 0;
}
static int parse_argument(int argn, char *arg, int is_forced, void *opaque) {
struct option_data *data = opaque;
if (argn == 0) {
/* Input file */
if (strcmp(arg, "-")!=0 || is_forced) data->filename = arg;
return 1;
}
fprintf(stderr, "Too many arguments.\n");
return 0;
}
/* MAIN LOGIC */
int
main(int argc, char **argv)
{
struct option_data data;
/*struct timespec start, end;*/
FILE *file = stdin;
hoedown_buffer *ib, *ob;
hoedown_renderer *renderer = NULL;
void (*renderer_free)(hoedown_renderer *) = NULL;
hoedown_document *document;
int main(int argc, char **argv) {
struct option_data data;
struct timespec start, end;
FILE *file = stdin;
hoedown_buffer *ib, *ob;
hoedown_renderer *renderer = NULL;
void (*renderer_free)(hoedown_renderer *) = NULL;
hoedown_document *document;
/* Parse options */
data.basename = argv[0];
data.done = 0;
data.show_time = 0;
data.iunit = DEF_IUNIT;
data.ounit = DEF_OUNIT;
data.filename = NULL;
data.renderer = RENDERER_HTML;
data.toc_level = 0;
data.html_flags = 0;
data.extensions = 0;
data.max_nesting = DEF_MAX_NESTING;
/* Parse options */
data.basename = argv[0];
data.done = 0;
data.show_time = 0;
data.iunit = DEF_IUNIT;
data.ounit = DEF_OUNIT;
data.filename = NULL;
data.renderer = RENDERER_HTML;
data.is_inline = 0;
data.max_nesting = DEF_MAX_NESTING;
data.features = presets_info[0].flags;
argc = parse_options(argc, argv, parse_short_option, parse_long_option, parse_argument, &data);
if (data.done) return 0;
if (!argc) return 1;
argc = parse_options(argc, argv, parse_short_option, parse_long_option, parse_argument, &data);
if (data.done) return 0;
if (!argc) return 1;
/* Open input file, if needed */
if (data.filename) {
file = fopen(data.filename, "r");
if (!file) {
fprintf(stderr, "Unable to open input file \"%s\": %s\n", data.filename, strerror(errno));
return 5;
}
}
/* Open input file, if needed */
if (data.filename) {
file = fopen(data.filename, "r");
if (!file) {
fprintf(stderr, "Unable to open input file \"%s\": %s\n", data.filename, strerror(errno));
return 5;
}
}
/* Read everything */
ib = hoedown_buffer_new(data.iunit);
/* Read everything */
ib = hoedown_buffer_new(data.iunit);
while (!feof(file)) {
if (ferror(file)) {
fprintf(stderr, "I/O errors found while reading input.\n");
return 5;
}
hoedown_buffer_grow(ib, ib->size + data.iunit);
ib->size += fread(ib->data + ib->size, 1, data.iunit, file);
}
while (!feof(file)) {
if (ferror(file)) {
fprintf(stderr, "I/O errors found while reading input.\n");
return 5;
}
hoedown_buffer_grow(ib, ib->size + data.iunit);
ib->size += fread(ib->data + ib->size, 1, data.iunit, file);
}
if (file != stdin) fclose(file);
if (file != stdin) fclose(file);
/* Create the renderer */
switch (data.renderer) {
case RENDERER_HTML:
renderer = hoedown_html_renderer_new(data.html_flags, data.toc_level);
renderer_free = hoedown_html_renderer_free;
break;
case RENDERER_HTML_TOC:
renderer = hoedown_html_toc_renderer_new(data.toc_level);
renderer_free = hoedown_html_renderer_free;
break;
};
/* Create the renderer */
switch (data.renderer) {
case RENDERER_HTML:
renderer = hoedown_html_renderer_new();
renderer_free = hoedown_html_renderer_free;
break;
case RENDERER_NOOP:
renderer = hoedown_noop_renderer_new();
renderer_free = hoedown_noop_renderer_free;
break;
};
/* Perform Markdown rendering */
ob = hoedown_buffer_new(data.ounit);
document = hoedown_document_new(renderer, data.extensions, data.max_nesting);
/* Perform Markdown rendering */
ob = hoedown_buffer_new(data.ounit);
document = hoedown_document_new(renderer, data.features, data.max_nesting);
/*clock_gettime(CLOCK_MONOTONIC, &start);*/
hoedown_document_render(document, ob, ib->data, ib->size);
/*clock_gettime(CLOCK_MONOTONIC, &end);*/
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
hoedown_document_render(document, ob, ib->data, ib->size, data.is_inline);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
/* Write the result to stdout */
(void)fwrite(ob->data, 1, ob->size, stdout);
/* Write the result to stdout */
(void)fwrite(ob->data, 1, ob->size, stdout);
/* Show rendering time */
if (data.show_time) {
/*TODO: enable this
long long elapsed = (end.tv_sec - start.tv_sec)*1e9 + (end.tv_nsec - start.tv_nsec);
if (elapsed < 1e9)
fprintf(stderr, "Time spent on rendering: %.2f ms.\n", ((double)elapsed)/1e6);
else
fprintf(stderr, "Time spent on rendering: %.3f s.\n", ((double)elapsed)/1e9);
*/
}
/* Show rendering time */
if (data.show_time) {
long long elapsed = (end.tv_sec - start.tv_sec)*1e9 + (end.tv_nsec - start.tv_nsec);
if (elapsed < 1e9)
fprintf(stderr, "Time spent on rendering: %.2f ms.\n", ((double)elapsed)/1e6);
else
fprintf(stderr, "Time spent on rendering: %.3f s.\n", ((double)elapsed)/1e9);
}
/* Cleanup */
hoedown_buffer_free(ib);
hoedown_buffer_free(ob);
/* Cleanup */
hoedown_buffer_free(ib);
hoedown_buffer_free(ob);
hoedown_document_free(document);
renderer_free(renderer);
hoedown_document_free(document);
renderer_free(renderer);
if (ferror(stdout)) {
fprintf(stderr, "I/O errors found while writing output.\n");
return 5;
}
if (ferror(stdout)) {
fprintf(stderr, "I/O errors found while writing output.\n");
return 5;
}
return 0;
return 0;
}

82
bin/noop.c Normal file
View file

@ -0,0 +1,82 @@
#include "noop.h"
#include <string.h>
/* Block constructs */
static void rndr_paragraph(void *target, void *content, int is_tight, const hoedown_renderer_data *data) {}
static void rndr_indented_code_block(void *target, const hoedown_buffer *code, const hoedown_renderer_data *data) {}
static void rndr_fenced_code_block(void *target, const hoedown_buffer *code, const hoedown_buffer *info, const hoedown_renderer_data *data) {}
static void rndr_horizontal_rule(void *target, const hoedown_renderer_data *data) {}
static void rndr_atx_header(void *target, void *content, size_t level, const hoedown_renderer_data *data) {}
static void rndr_setext_header(void *target, void *content, int is_double, const hoedown_renderer_data *data) {}
static void rndr_list(void *target, void *content, int is_ordered, int is_tight, int start, const hoedown_renderer_data *data) {}
static void rndr_list_item(void *target, void *content, int is_ordered, int is_tight, const hoedown_renderer_data *data) {}
static void rndr_quote_block(void *target, void *content, const hoedown_renderer_data *data) {}
static void rndr_html_block(void *target, const hoedown_buffer *html, const hoedown_renderer_data *data) {}
/* Inline constructs */
static void rndr_string(void *target, const hoedown_buffer *text, const hoedown_renderer_data *data) {}
static void rndr_escape(void *target, uint8_t character, const hoedown_renderer_data *data) {}
static void rndr_hard_linebreak(void *target, const hoedown_renderer_data *data) {}
static void rndr_linebreak(void *target, const hoedown_renderer_data *data) {}
static void rndr_uri_autolink(void *target, const hoedown_buffer *uri, const hoedown_renderer_data *data) {}
static void rndr_email_autolink(void *target, const hoedown_buffer *email, const hoedown_renderer_data *data) {}
static void rndr_html(void *target, const hoedown_buffer *html, const hoedown_renderer_data *data) {}
static void rndr_entity(void *target, const hoedown_buffer *character, const hoedown_renderer_data *data) {}
static void rndr_code_span(void *target, const hoedown_buffer *code, const hoedown_renderer_data *data) {}
static void rndr_emphasis(void *target, void *content, size_t level, const hoedown_renderer_data *data) {}
static void rndr_link(void *target, void *content, const hoedown_buffer *dest, const hoedown_buffer *title, int is_image, const hoedown_renderer_data *data) {}
/* Global callbacks */
static void *object_get(int is_inline, const hoedown_renderer_data *data) { return NULL; }
static void object_merge(void *target, void *content, int is_inline, const hoedown_renderer_data *data) {}
static void object_pop(void *target, int is_inline, const hoedown_renderer_data *data) {}
static void render_start(void *output, int is_inline, const hoedown_renderer_data *data) {}
static void render_end(void *output, void *target, int is_inline, const hoedown_renderer_data *data) {}
/* Exported API */
hoedown_renderer *hoedown_noop_renderer_new() {
static const hoedown_renderer temp = {
NULL,
rndr_paragraph,
rndr_indented_code_block,
rndr_fenced_code_block,
rndr_horizontal_rule,
rndr_atx_header,
rndr_setext_header,
rndr_list,
rndr_list_item,
rndr_quote_block,
rndr_html_block,
rndr_string,
rndr_escape,
rndr_hard_linebreak,
rndr_linebreak,
rndr_uri_autolink,
rndr_email_autolink,
rndr_html,
rndr_entity,
rndr_code_span,
rndr_emphasis,
rndr_link,
object_get,
object_merge,
object_pop,
render_start,
render_end,
};
hoedown_renderer *rndr = hoedown_malloc(sizeof(hoedown_renderer));
memcpy(rndr, &temp, sizeof(hoedown_renderer));
return rndr;
}
void hoedown_noop_renderer_free(hoedown_renderer *rndr) {
free(rndr);
}

28
bin/noop.h Normal file
View file

@ -0,0 +1,28 @@
/* noop.h - dumb no-op renderer */
#ifndef HOEDOWN_NOOP_H
#define HOEDOWN_NOOP_H
#include "document.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************
* FUNCTIONS *
*************/
/* hoedown_noop_renderer_new: allocate a new no-op renderer */
hoedown_renderer *hoedown_noop_renderer_new() __attribute__((malloc));
/* hoedown_noop_renderer_free: deallocate a no-op renderer */
void hoedown_noop_renderer_free(hoedown_renderer *rndr);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_NOOP_H **/

View file

@ -1,228 +0,0 @@
#include "html.h"
#include "common.h"
/*#include <time.h>*/
/* FEATURES INFO / DEFAULTS */
#define DEF_IUNIT 1024
#define DEF_OUNIT 64
/* PRINT HELP */
void
print_help(const char *basename)
{
/* usage */
printf("Usage: %s [OPTION]... [FILE]\n\n", basename);
/* description */
printf("Apply SmartyPants smart punctuation to the HTML in FILE (or standard input), and output the resulting HTML to standard output.\n\n");
/* main options */
printf("Main options:\n");
print_option('T', "time", "Show time spent in SmartyPants processing.");
print_option('i', "input-unit=N", "Reading block size. Default is " str(DEF_IUNIT) ".");
print_option('o', "output-unit=N", "Writing block size. Default is " str(DEF_OUNIT) ".");
print_option('h', "help", "Print this help text.");
print_option('v', "version", "Print Hoedown version.");
printf("\n");
/* ending */
printf("Options are processed in order, so in case of contradictory options the last specified stands.\n\n");
printf("When FILE is '-', read standard input. If no FILE was given, read standard input. Use '--' to signal end of option parsing. "
"Exit status is 0 if no errors occurred, 1 with option parsing errors, 4 with memory allocation errors or 5 with I/O errors.\n\n");
}
/* OPTION PARSING */
struct option_data {
char *basename;
int done;
/* time reporting */
int show_time;
/* I/O */
size_t iunit;
size_t ounit;
const char *filename;
};
int
parse_short_option(char opt, char *next, void *opaque)
{
struct option_data *data = opaque;
long int num;
int isNum = next ? parseint(next, &num) : 0;
if (opt == 'h') {
print_help(data->basename);
data->done = 1;
return 0;
}
if (opt == 'v') {
print_version();
data->done = 1;
return 0;
}
if (opt == 'T') {
data->show_time = 1;
return 1;
}
/* options requiring value */
/* FIXME: add validation */
if (opt == 'i' && isNum) {
data->iunit = num;
return 2;
}
if (opt == 'o' && isNum) {
data->ounit = num;
return 2;
}
fprintf(stderr, "Wrong option '-%c' found.\n", opt);
return 0;
}
int
parse_long_option(char *opt, char *next, void *opaque)
{
struct option_data *data = opaque;
long int num;
int isNum = next ? parseint(next, &num) : 0;
if (strcmp(opt, "help")==0) {
print_help(data->basename);
data->done = 1;
return 0;
}
if (strcmp(opt, "version")==0) {
print_version();
data->done = 1;
return 0;
}
if (strcmp(opt, "time")==0) {
data->show_time = 1;
return 1;
}
/* FIXME: validation */
if (strcmp(opt, "input-unit")==0 && isNum) {
data->iunit = num;
return 2;
}
if (strcmp(opt, "output-unit")==0 && isNum) {
data->ounit = num;
return 2;
}
fprintf(stderr, "Wrong option '--%s' found.\n", opt);
return 0;
}
int
parse_argument(int argn, char *arg, int is_forced, void *opaque)
{
struct option_data *data = opaque;
if (argn == 0) {
/* Input file */
if (strcmp(arg, "-")!=0 || is_forced) data->filename = arg;
return 1;
}
fprintf(stderr, "Too many arguments.\n");
return 0;
}
/* MAIN LOGIC */
int
main(int argc, char **argv)
{
struct option_data data;
/*struct timespec start, end;*/
FILE *file = stdin;
hoedown_buffer *ib, *ob;
/* Parse options */
data.basename = argv[0];
data.done = 0;
data.show_time = 0;
data.iunit = DEF_IUNIT;
data.ounit = DEF_OUNIT;
data.filename = NULL;
argc = parse_options(argc, argv, parse_short_option, parse_long_option, parse_argument, &data);
if (data.done) return 0;
if (!argc) return 1;
/* Open input file, if needed */
if (data.filename) {
file = fopen(data.filename, "r");
if (!file) {
fprintf(stderr, "Unable to open input file \"%s\": %s\n", data.filename, strerror(errno));
return 5;
}
}
/* Read everything */
ib = hoedown_buffer_new(data.iunit);
while (!feof(file)) {
if (ferror(file)) {
fprintf(stderr, "I/O errors found while reading input.\n");
return 5;
}
hoedown_buffer_grow(ib, ib->size + data.iunit);
ib->size += fread(ib->data + ib->size, 1, data.iunit, file);
}
if (file != stdin) fclose(file);
/* Perform SmartyPants processing */
ob = hoedown_buffer_new(data.ounit);
/*clock_gettime(CLOCK_MONOTONIC, &start);*/
hoedown_html_smartypants(ob, ib->data, ib->size);
/*clock_gettime(CLOCK_MONOTONIC, &end);*/
/* Write the result to stdout */
(void)fwrite(ob->data, 1, ob->size, stdout);
/* Show rendering time */
if (data.show_time) {
/*TODO: enable this
long long elapsed = (end.tv_sec - start.tv_sec)*1e9 + (end.tv_nsec - start.tv_nsec);
if (elapsed < 1e9)
fprintf(stderr, "Time spent on rendering: %.2f ms.\n", ((double)elapsed)/1e6);
else
fprintf(stderr, "Time spent on rendering: %.3f s.\n", ((double)elapsed)/1e9);
*/
}
/* Cleanup */
hoedown_buffer_free(ib);
hoedown_buffer_free(ob);
if (ferror(stdout)) {
fprintf(stderr, "I/O errors found while writing output.\n");
return 5;
}
return 0;
}

166
data/autolink_schemes.gperf Normal file
View file

@ -0,0 +1,166 @@
# Keep this updated with CommonMark spec
#
coap
doi
javascript
aaa
aaas
about
acap
cap
cid
crid
data
dav
dict
dns
file
ftp
geo
go
gopher
h323
http
https
iax
icap
im
imap
info
ipp
iris
iris.beep
iris.xpc
iris.xpcs
iris.lwz
ldap
mailto
mid
msrp
msrps
mtqp
mupdate
news
nfs
ni
nih
nntp
opaquelocktoken
pop
pres
rtsp
service
session
shttp
sieve
sip
sips
sms
snmp
soap.beep
soap.beeps
tag
tel
telnet
tftp
thismessage
tn3270
tip
tv
urn
vemmi
ws
wss
xcon
xcon-userid
xmlrpc.beep
xmlrpc.beeps
xmpp
z39.50r
z39.50s
adiumxtra
afp
afs
aim
apt
attachment
aw
beshare
bitcoin
bolo
callto
chrome
chrome-extension
com-eventbrite-attendee
content
cvs
dlna-playsingle
dlna-playcontainer
dtn
dvb
ed2k
facetime
feed
finger
fish
gg
git
gizmoproject
gtalk
hcp
icon
ipn
irc
irc6
ircs
itms
jar
jms
keyparc
lastfm
ldaps
magnet
maps
market
message
mms
ms-help
msnim
mumble
mvn
notes
oid
palm
paparazzi
platform
proxy
psyc
query
res
resource
rmi
rsync
rtmp
secondlife
sftp
sgn
skype
smb
soldat
spotify
ssh
steam
svn
teamspeak
things
udp
unreal
ut2004
ventrilo
view-source
webcal
wtai
wyciwyg
xfire
xri
ymsgr

52
data/html_blocks.gperf Normal file
View file

@ -0,0 +1,52 @@
# Keep this updated with CommonMark spec
#
article
header
aside
hgroup
blockquote
hr
iframe
body
li
map
button
object
canvas
ol
caption
output
col
p
colgroup
pre
dd
progress
div
section
dl
table
td
dt
tbody
embed
textarea
fieldset
tfoot
figcaption
th
figure
thead
footer
tr
form
ul
h1
h2
h3
h4
h5
h6
video
script
style

View file

@ -1,40 +0,0 @@
LIBRARY HOEDOWN
EXPORTS
hoedown_autolink_is_safe
hoedown_autolink__www
hoedown_autolink__email
hoedown_autolink__url
hoedown_buffer_init
hoedown_buffer_new
hoedown_buffer_reset
hoedown_buffer_grow
hoedown_buffer_put
hoedown_buffer_puts
hoedown_buffer_putc
hoedown_buffer_set
hoedown_buffer_sets
hoedown_buffer_eq
hoedown_buffer_eqs
hoedown_buffer_prefix
hoedown_buffer_slurp
hoedown_buffer_cstr
hoedown_buffer_printf
hoedown_buffer_free
hoedown_document_new
hoedown_document_render
hoedown_document_render_inline
hoedown_document_free
hoedown_escape_href
hoedown_escape_html
hoedown_html_smartypants
hoedown_html_is_tag
hoedown_html_renderer_new
hoedown_html_toc_renderer_new
hoedown_html_renderer_free
hoedown_stack_init
hoedown_stack_uninit
hoedown_stack_grow
hoedown_stack_push
hoedown_stack_pop
hoedown_stack_top
hoedown_version

View file

@ -1,24 +0,0 @@
p
dl
h1
h2
h3
h4
h5
h6
ol
ul
del
div
ins
pre
form
math
style
table
figure
iframe
script
fieldset
noscript
blockquote

1319
src/_autolink_schemes.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
/* ANSI-C code produced by gperf version 3.0.3 */
/* Command-line: gperf -L ANSI-C -N hoedown_find_block_tag -c -C -E -S 1 --ignore-case -m100 html_block_names.gperf */
/* ANSI-C code produced by gperf version 3.0.4 */
/* Command-line: gperf -L ANSI-C -N hoedown_find_block_tag -7 -c -C -E -S 1 --ignore-case -m100 data/html_blocks.gperf */
/* Computed positions: -k'1-2' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
@ -29,7 +29,7 @@
#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
#endif
/* maximum key range = 24, duplicates = 0 */
/* maximum key range = 52, duplicates = 0 */
#ifndef GPERF_DOWNCASE
#define GPERF_DOWNCASE 1
@ -88,34 +88,21 @@ hash (register const char *str, register unsigned int len)
{
static const unsigned char asso_values[] =
{
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
22, 21, 19, 18, 16, 0, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 1, 25, 0, 25,
1, 0, 0, 13, 0, 25, 25, 11, 2, 1,
0, 25, 25, 5, 0, 2, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 1, 25,
0, 25, 1, 0, 0, 13, 0, 25, 25, 11,
2, 1, 0, 25, 25, 5, 0, 2, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25
53, 53, 53, 53, 53, 53, 53, 53, 53, 53,
53, 53, 53, 53, 53, 53, 53, 53, 53, 53,
53, 53, 53, 53, 53, 53, 53, 53, 53, 53,
53, 53, 53, 53, 53, 53, 53, 53, 53, 53,
53, 53, 53, 53, 53, 53, 53, 53, 53, 53,
25, 24, 21, 20, 19, 18, 53, 53, 53, 53,
53, 53, 53, 53, 53, 26, 8, 2, 20, 30,
0, 22, 22, 23, 13, 53, 26, 0, 5, 29,
0, 53, 53, 0, 7, 25, 11, 53, 53, 53,
53, 53, 53, 53, 53, 53, 53, 26, 8, 2,
20, 30, 0, 22, 22, 23, 13, 53, 26, 0,
5, 29, 0, 53, 53, 0, 7, 25, 11, 53,
53, 53, 53, 53, 53, 53, 53, 53, 53
};
register int hval = (int)len;
register int hval = len;
switch (hval)
{
@ -131,7 +118,7 @@ hash (register const char *str, register unsigned int len)
#ifdef __GNUC__
__inline
#ifdef __GNUC_STDC_INLINE__
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
@ -140,11 +127,11 @@ hoedown_find_block_tag (register const char *str, register unsigned int len)
{
enum
{
TOTAL_KEYWORDS = 24,
TOTAL_KEYWORDS = 50,
MIN_WORD_LENGTH = 1,
MAX_WORD_LENGTH = 10,
MIN_HASH_VALUE = 1,
MAX_HASH_VALUE = 24
MAX_HASH_VALUE = 52
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
@ -160,75 +147,153 @@ hoedown_find_block_tag (register const char *str, register unsigned int len)
case 0:
resword = "p";
goto compare;
case 1:
resword = "h6";
goto compare;
case 2:
resword = "div";
goto compare;
case 3:
resword = "del";
goto compare;
case 4:
resword = "form";
goto compare;
case 5:
resword = "table";
goto compare;
case 6:
resword = "figure";
goto compare;
case 7:
resword = "pre";
goto compare;
case 3:
resword = "form";
goto compare;
case 4:
resword = "col";
goto compare;
case 5:
resword = "footer";
goto compare;
case 6:
resword = "section";
goto compare;
case 7:
resword = "progress";
goto compare;
case 8:
resword = "fieldset";
resword = "tr";
goto compare;
case 9:
resword = "noscript";
resword = "colgroup";
goto compare;
case 10:
resword = "script";
resword = "map";
goto compare;
case 11:
resword = "style";
goto compare;
case 12:
resword = "dl";
resword = "body";
goto compare;
case 13:
resword = "ol";
resword = "tbody";
goto compare;
case 14:
resword = "ul";
resword = "textarea";
goto compare;
case 15:
resword = "math";
resword = "canvas";
goto compare;
case 16:
resword = "ins";
resword = "caption";
goto compare;
case 17:
resword = "h5";
goto compare;
case 18:
resword = "iframe";
goto compare;
case 19:
resword = "h4";
goto compare;
case 20:
resword = "h3";
goto compare;
case 21:
resword = "blockquote";
goto compare;
case 18:
resword = "figure";
goto compare;
case 19:
resword = "table";
goto compare;
case 20:
resword = "fieldset";
goto compare;
case 21:
resword = "dl";
goto compare;
case 22:
resword = "h2";
resword = "figcaption";
goto compare;
case 23:
resword = "hr";
goto compare;
case 24:
resword = "button";
goto compare;
case 25:
resword = "script";
goto compare;
case 26:
resword = "ul";
goto compare;
case 27:
resword = "header";
goto compare;
case 28:
resword = "video";
goto compare;
case 29:
resword = "style";
goto compare;
case 30:
resword = "ol";
goto compare;
case 31:
resword = "th";
goto compare;
case 32:
resword = "article";
goto compare;
case 33:
resword = "tfoot";
goto compare;
case 34:
resword = "thead";
goto compare;
case 35:
resword = "div";
goto compare;
case 36:
resword = "object";
goto compare;
case 37:
resword = "aside";
goto compare;
case 38:
resword = "td";
goto compare;
case 39:
resword = "embed";
goto compare;
case 40:
resword = "li";
goto compare;
case 41:
resword = "h6";
goto compare;
case 42:
resword = "h5";
goto compare;
case 43:
resword = "h4";
goto compare;
case 44:
resword = "h3";
goto compare;
case 45:
resword = "output";
goto compare;
case 46:
resword = "dt";
goto compare;
case 47:
resword = "h2";
goto compare;
case 48:
resword = "h1";
goto compare;
case 49:
resword = "hgroup";
goto compare;
case 50:
resword = "iframe";
goto compare;
case 51:
resword = "dd";
goto compare;
}
return 0;
compare:

19287
src/_html_entities.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -8,274 +8,264 @@
#ifndef _MSC_VER
#include <strings.h>
#else
#define strncasecmp _strnicmp
#define strncasecmp _strnicmp
#endif
int
hoedown_autolink_is_safe(const uint8_t *data, size_t size)
{
static const size_t valid_uris_count = 6;
static const char *valid_uris[] = {
"http://", "https://", "/", "#", "ftp://", "mailto:"
};
static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 };
size_t i;
int hoedown_autolink_is_safe(const uint8_t *data, size_t size) {
static const size_t valid_uris_count = 6;
static const char *valid_uris[] = {
"http://", "https://", "/", "#", "ftp://", "mailto:"
};
static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 };
for (i = 0; i < valid_uris_count; ++i) {
size_t len = valid_uris_size[i];
for (size_t i = 0; i < valid_uris_count; ++i) {
size_t len = valid_uris_size[i];
if (size > len &&
strncasecmp((char *)data, valid_uris[i], len) == 0 &&
isalnum(data[len]))
return 1;
}
if (size > len &&
strncasecmp((char *)data, valid_uris[i], len) == 0 &&
isalnum(data[len]))
return 1;
}
return 0;
return 0;
}
static size_t
autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
{
uint8_t cclose, copen = 0;
size_t i;
static size_t autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size) {
uint8_t cclose, copen = 0;
size_t i;
for (i = 0; i < link_end; ++i)
if (data[i] == '<') {
link_end = i;
break;
}
for (i = 0; i < link_end; ++i)
if (data[i] == '<') {
link_end = i;
break;
}
while (link_end > 0) {
if (strchr("?!.,:", data[link_end - 1]) != NULL)
link_end--;
while (link_end > 0) {
if (strchr("?!.,:", data[link_end - 1]) != NULL)
link_end--;
else if (data[link_end - 1] == ';') {
size_t new_end = link_end - 2;
else if (data[link_end - 1] == ';') {
size_t new_end = link_end - 2;
while (new_end > 0 && isalpha(data[new_end]))
new_end--;
while (new_end > 0 && isalpha(data[new_end]))
new_end--;
if (new_end < link_end - 2 && data[new_end] == '&')
link_end = new_end;
else
link_end--;
}
else break;
}
if (new_end < link_end - 2 && data[new_end] == '&')
link_end = new_end;
else
link_end--;
}
else break;
}
if (link_end == 0)
return 0;
if (link_end == 0)
return 0;
cclose = data[link_end - 1];
cclose = data[link_end - 1];
switch (cclose) {
case '"': copen = '"'; break;
case '\'': copen = '\''; break;
case ')': copen = '('; break;
case ']': copen = '['; break;
case '}': copen = '{'; break;
}
switch (cclose) {
case '"': copen = '"'; break;
case '\'': copen = '\''; break;
case ')': copen = '('; break;
case ']': copen = '['; break;
case '}': copen = '{'; break;
}
if (copen != 0) {
size_t closing = 0;
size_t opening = 0;
size_t i = 0;
if (copen != 0) {
size_t closing = 0;
size_t opening = 0;
size_t i = 0;
/* Try to close the final punctuation sign in this same line;
* if we managed to close it outside of the URL, that means that it's
* not part of the URL. If it closes inside the URL, that means it
* is part of the URL.
*
* Examples:
*
* foo http://www.pokemon.com/Pikachu_(Electric) bar
* => http://www.pokemon.com/Pikachu_(Electric)
*
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
* => http://www.pokemon.com/Pikachu_(Electric)
*
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
* => http://www.pokemon.com/Pikachu_(Electric))
*
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
* => foo http://www.pokemon.com/Pikachu_(Electric)
*/
/* Try to close the final punctuation sign in this same line;
* if we managed to close it outside of the URL, that means that it's
* not part of the URL. If it closes inside the URL, that means it
* is part of the URL.
*
* Examples:
*
* foo http://www.pokemon.com/Pikachu_(Electric) bar
* => http://www.pokemon.com/Pikachu_(Electric)
*
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
* => http://www.pokemon.com/Pikachu_(Electric)
*
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
* => http://www.pokemon.com/Pikachu_(Electric))
*
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
* => foo http://www.pokemon.com/Pikachu_(Electric)
*/
while (i < link_end) {
if (data[i] == copen)
opening++;
else if (data[i] == cclose)
closing++;
while (i < link_end) {
if (data[i] == copen)
opening++;
else if (data[i] == cclose)
closing++;
i++;
}
i++;
}
if (closing != opening)
link_end--;
}
if (closing != opening)
link_end--;
}
return link_end;
return link_end;
}
static size_t
check_domain(uint8_t *data, size_t size, int allow_short)
{
size_t i, np = 0;
static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
size_t i, np = 0;
if (!isalnum(data[0]))
return 0;
if (!isalnum(data[0]))
return 0;
for (i = 1; i < size - 1; ++i) {
if (strchr(".:", data[i]) != NULL) np++;
else if (!isalnum(data[i]) && data[i] != '-') break;
}
for (i = 1; i < size - 1; ++i) {
if (strchr(".:", data[i]) != NULL) np++;
else if (!isalnum(data[i]) && data[i] != '-') break;
}
if (allow_short) {
/* We don't need a valid domain in the strict sense (with
* least one dot; so just make sure it's composed of valid
* domain characters and return the length of the the valid
* sequence. */
return i;
} else {
/* a valid domain needs to have at least a dot.
* that's as far as we get */
return np ? i : 0;
}
if (allow_short) {
/* We don't need a valid domain in the strict sense (with
* least one dot; so just make sure it's composed of valid
* domain characters and return the length of the the valid
* sequence. */
return i;
} else {
/* a valid domain needs to have at least a dot.
* that's as far as we get */
return np ? i : 0;
}
}
size_t
hoedown_autolink__www(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
unsigned int flags)
{
size_t link_end;
size_t hoedown_autolink__www(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
unsigned int flags
) {
size_t link_end;
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
return 0;
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
return 0;
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
return 0;
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
return 0;
link_end = check_domain(data, size, 0);
link_end = check_domain(data, size, 0);
if (link_end == 0)
return 0;
if (link_end == 0)
return 0;
while (link_end < size && !isspace(data[link_end]))
link_end++;
while (link_end < size && !isspace(data[link_end]))
link_end++;
link_end = autolink_delim(data, link_end, max_rewind, size);
link_end = autolink_delim(data, link_end, max_rewind, size);
if (link_end == 0)
return 0;
if (link_end == 0)
return 0;
hoedown_buffer_put(link, data, link_end);
*rewind_p = 0;
hoedown_buffer_put(link, data, link_end);
*rewind_p = 0;
return (int)link_end;
return (int)link_end;
}
size_t
hoedown_autolink__email(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
unsigned int flags)
{
size_t link_end, rewind;
int nb = 0, np = 0;
size_t hoedown_autolink__email(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
unsigned int flags
) {
size_t link_end, rewind;
int nb = 0, np = 0;
for (rewind = 0; rewind < max_rewind; ++rewind) {
uint8_t c = data[-1 - rewind];
for (rewind = 0; rewind < max_rewind; ++rewind) {
uint8_t c = data[-1 - rewind];
if (isalnum(c))
continue;
if (isalnum(c))
continue;
if (strchr(".+-_", c) != NULL)
continue;
if (strchr(".+-_", c) != NULL)
continue;
break;
}
break;
}
if (rewind == 0)
return 0;
if (rewind == 0)
return 0;
for (link_end = 0; link_end < size; ++link_end) {
uint8_t c = data[link_end];
for (link_end = 0; link_end < size; ++link_end) {
uint8_t c = data[link_end];
if (isalnum(c))
continue;
if (isalnum(c))
continue;
if (c == '@')
nb++;
else if (c == '.' && link_end < size - 1)
np++;
else if (c != '-' && c != '_')
break;
}
if (c == '@')
nb++;
else if (c == '.' && link_end < size - 1)
np++;
else if (c != '-' && c != '_')
break;
}
if (link_end < 2 || nb != 1 || np == 0 ||
!isalpha(data[link_end - 1]))
return 0;
if (link_end < 2 || nb != 1 || np == 0 ||
!isalpha(data[link_end - 1]))
return 0;
link_end = autolink_delim(data, link_end, max_rewind, size);
link_end = autolink_delim(data, link_end, max_rewind, size);
if (link_end == 0)
return 0;
if (link_end == 0)
return 0;
hoedown_buffer_put(link, data - rewind, link_end + rewind);
*rewind_p = rewind;
hoedown_buffer_put(link, data - rewind, link_end + rewind);
*rewind_p = rewind;
return link_end;
return link_end;
}
size_t
hoedown_autolink__url(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
unsigned int flags)
{
size_t link_end, rewind = 0, domain_len;
size_t hoedown_autolink__url(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
unsigned int flags
) {
size_t link_end, rewind = 0, domain_len;
if (size < 4 || data[1] != '/' || data[2] != '/')
return 0;
if (size < 4 || data[1] != '/' || data[2] != '/')
return 0;
while (rewind < max_rewind && isalpha(data[-1 - rewind]))
rewind++;
while (rewind < max_rewind && isalpha(data[-1 - rewind]))
rewind++;
if (!hoedown_autolink_is_safe(data - rewind, size + rewind))
return 0;
if (!hoedown_autolink_is_safe(data - rewind, size + rewind))
return 0;
link_end = strlen("://");
link_end = strlen("://");
domain_len = check_domain(
data + link_end,
size - link_end,
flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS);
domain_len = check_domain(
data + link_end,
size - link_end,
flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS);
if (domain_len == 0)
return 0;
if (domain_len == 0)
return 0;
link_end += domain_len;
while (link_end < size && !isspace(data[link_end]))
link_end++;
link_end += domain_len;
while (link_end < size && !isspace(data[link_end]))
link_end++;
link_end = autolink_delim(data, link_end, max_rewind, size);
link_end = autolink_delim(data, link_end, max_rewind, size);
if (link_end == 0)
return 0;
if (link_end == 0)
return 0;
hoedown_buffer_put(link, data - rewind, link_end + rewind);
*rewind_p = rewind;
hoedown_buffer_put(link, data - rewind, link_end + rewind);
*rewind_p = rewind;
return link_end;
return link_end;
}

View file

@ -15,7 +15,7 @@ extern "C" {
*************/
typedef enum hoedown_autolink_flags {
HOEDOWN_AUTOLINK_SHORT_DOMAINS = (1 << 0)
HOEDOWN_AUTOLINK_SHORT_DOMAINS = (1 << 0)
} hoedown_autolink_flags;
@ -27,16 +27,34 @@ typedef enum hoedown_autolink_flags {
int hoedown_autolink_is_safe(const uint8_t *data, size_t size);
/* hoedown_autolink__www: search for the next www link in data */
size_t hoedown_autolink__www(size_t *rewind_p, hoedown_buffer *link,
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
size_t hoedown_autolink__www(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t offset,
size_t size,
hoedown_autolink_flags flags
);
/* hoedown_autolink__email: search for the next email in data */
size_t hoedown_autolink__email(size_t *rewind_p, hoedown_buffer *link,
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
size_t hoedown_autolink__email(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t offset,
size_t size,
hoedown_autolink_flags flags
);
/* hoedown_autolink__url: search for the next URL in data */
size_t hoedown_autolink__url(size_t *rewind_p, hoedown_buffer *link,
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
size_t hoedown_autolink__url(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t offset,
size_t size,
hoedown_autolink_flags flags
);
#ifdef __cplusplus

View file

@ -5,251 +5,211 @@
#include <string.h>
#include <assert.h>
void *
hoedown_malloc(size_t size)
{
void *ret = malloc(size);
void hoedown_buffer_init(
hoedown_buffer *buf,
size_t unit,
hoedown_realloc_callback data_realloc,
hoedown_free_callback data_free
) {
assert(buf);
if (!ret) {
fprintf(stderr, "Allocation failed.\n");
abort();
}
return ret;
buf->data = NULL;
buf->size = buf->asize = 0;
buf->unit = unit;
buf->data_realloc = data_realloc;
buf->data_free = data_free;
}
void *
hoedown_calloc(size_t nmemb, size_t size)
{
void *ret = calloc(nmemb, size);
if (!ret) {
fprintf(stderr, "Allocation failed.\n");
abort();
}
return ret;
void hoedown_buffer_uninit(hoedown_buffer *buf) {
buf->data_free(buf->data);
}
void *
hoedown_realloc(void *ptr, size_t size)
{
void *ret = realloc(ptr, size);
if (!ret) {
fprintf(stderr, "Allocation failed.\n");
abort();
}
return ret;
hoedown_buffer *hoedown_buffer_new(size_t unit) {
hoedown_buffer *ret = hoedown_malloc(sizeof (hoedown_buffer));
hoedown_buffer_init(ret, unit, hoedown_realloc, free);
return ret;
}
void
hoedown_buffer_init(
hoedown_buffer *buf,
size_t unit,
hoedown_realloc_callback data_realloc,
hoedown_free_callback data_free,
hoedown_free_callback buffer_free)
{
assert(buf);
buf->data = NULL;
buf->size = buf->asize = 0;
buf->unit = unit;
buf->data_realloc = data_realloc;
buf->data_free = data_free;
buf->buffer_free = buffer_free;
void hoedown_buffer_free(hoedown_buffer *buf) {
if (!buf) return;
hoedown_buffer_uninit(buf);
free(buf);
}
hoedown_buffer *
hoedown_buffer_new(size_t unit)
{
hoedown_buffer *ret = hoedown_malloc(sizeof (hoedown_buffer));
hoedown_buffer_init(ret, unit, hoedown_realloc, free, free);
return ret;
void hoedown_buffer_reset(hoedown_buffer *buf) {
assert(buf && buf->unit);
buf->data_free(buf->data);
buf->data = NULL;
buf->size = buf->asize = 0;
}
void
hoedown_buffer_free(hoedown_buffer *buf)
{
if (!buf) return;
void hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz) {
size_t neoasz;
assert(buf && buf->unit);
buf->data_free(buf->data);
if (buf->asize >= neosz)
return;
if (buf->buffer_free)
buf->buffer_free(buf);
neoasz = buf->asize + buf->unit;
while (neoasz < neosz)
neoasz += buf->unit;
buf->data = buf->data_realloc(buf->data, neoasz);
buf->asize = neoasz;
}
void
hoedown_buffer_reset(hoedown_buffer *buf)
{
assert(buf && buf->unit);
void hoedown_buffer_put(hoedown_buffer *buf, const uint8_t *data, size_t size) {
assert(buf && buf->unit);
buf->data_free(buf->data);
buf->data = NULL;
buf->size = buf->asize = 0;
if (buf->size + size > buf->asize)
hoedown_buffer_grow(buf, buf->size + size);
memcpy(buf->data + buf->size, data, size);
buf->size += size;
}
void
hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz)
{
size_t neoasz;
assert(buf && buf->unit);
if (buf->asize >= neosz)
return;
neoasz = buf->asize + buf->unit;
while (neoasz < neosz)
neoasz += buf->unit;
buf->data = buf->data_realloc(buf->data, neoasz);
buf->asize = neoasz;
void hoedown_buffer_puts(hoedown_buffer *buf, const char *str) {
hoedown_buffer_put(buf, (const uint8_t *)str, strlen(str));
}
void
hoedown_buffer_put(hoedown_buffer *buf, const uint8_t *data, size_t size)
{
assert(buf && buf->unit);
void hoedown_buffer_putc(hoedown_buffer *buf, uint8_t c) {
assert(buf && buf->unit);
if (buf->size + size > buf->asize)
hoedown_buffer_grow(buf, buf->size + size);
if (buf->size >= buf->asize)
hoedown_buffer_grow(buf, buf->size + 1);
memcpy(buf->data + buf->size, data, size);
buf->size += size;
buf->data[buf->size] = c;
buf->size += 1;
}
void
hoedown_buffer_puts(hoedown_buffer *buf, const char *str)
{
hoedown_buffer_put(buf, (const uint8_t *)str, strlen(str));
void hoedown_buffer_set(hoedown_buffer *buf, const uint8_t *data, size_t size) {
assert(buf && buf->unit);
if (size > buf->asize)
hoedown_buffer_grow(buf, size);
memcpy(buf->data, data, size);
buf->size = size;
}
void
hoedown_buffer_putc(hoedown_buffer *buf, uint8_t c)
{
assert(buf && buf->unit);
if (buf->size >= buf->asize)
hoedown_buffer_grow(buf, buf->size + 1);
buf->data[buf->size] = c;
buf->size += 1;
void hoedown_buffer_sets(hoedown_buffer *buf, const char *str) {
hoedown_buffer_set(buf, (const uint8_t *)str, strlen(str));
}
void
hoedown_buffer_set(hoedown_buffer *buf, const uint8_t *data, size_t size)
{
assert(buf && buf->unit);
if (size > buf->asize)
hoedown_buffer_grow(buf, size);
memcpy(buf->data, data, size);
buf->size = size;
int hoedown_buffer_eq(const hoedown_buffer *buf, const uint8_t *data, size_t size) {
if (buf->size != size) return 0;
return memcmp(buf->data, data, size) == 0;
}
void
hoedown_buffer_sets(hoedown_buffer *buf, const char *str)
{
hoedown_buffer_set(buf, (const uint8_t *)str, strlen(str));
int hoedown_buffer_eqs(const hoedown_buffer *buf, const char *str) {
return hoedown_buffer_eq(buf, (const uint8_t *)str, strlen(str));
}
int
hoedown_buffer_eq(const hoedown_buffer *buf, const uint8_t *data, size_t size)
{
if (buf->size != size) return 0;
return memcmp(buf->data, data, size) == 0;
int hoedown_buffer_prefix(const hoedown_buffer *buf, const char *prefix) {
assert(buf && buf->unit);
for (size_t i = 0; i < buf->size; ++i) {
if (prefix[i] == 0)
return 0;
if (buf->data[i] != prefix[i])
return buf->data[i] - prefix[i];
}
return 0;
}
int
hoedown_buffer_eqs(const hoedown_buffer *buf, const char *str)
{
return hoedown_buffer_eq(buf, (const uint8_t *)str, strlen(str));
void hoedown_buffer_slurp(hoedown_buffer *buf, size_t size) {
assert(buf && buf->unit);
if (size >= buf->size) {
buf->size = 0;
return;
}
buf->size -= size;
memmove(buf->data, buf->data + size, buf->size);
}
int
hoedown_buffer_prefix(const hoedown_buffer *buf, const char *prefix)
{
size_t i;
const char *hoedown_buffer_cstr(hoedown_buffer *buf) {
assert(buf && buf->unit);
assert(buf && buf->unit);
if (buf->size < buf->asize && buf->data[buf->size] == 0)
return (char *)buf->data;
for (i = 0; i < buf->size; ++i) {
if (prefix[i] == 0)
return 0;
hoedown_buffer_grow(buf, buf->size + 1);
buf->data[buf->size] = 0;
if (buf->data[i] != prefix[i])
return buf->data[i] - prefix[i];
}
return 0;
return (char *)buf->data;
}
void
hoedown_buffer_slurp(hoedown_buffer *buf, size_t size)
{
assert(buf && buf->unit);
void hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...) {
va_list ap;
int n;
if (size >= buf->size) {
buf->size = 0;
return;
}
assert(buf && buf->unit);
buf->size -= size;
memmove(buf->data, buf->data + size, buf->size);
}
if (buf->size >= buf->asize)
hoedown_buffer_grow(buf, buf->size + 1);
const char *
hoedown_buffer_cstr(hoedown_buffer *buf)
{
assert(buf && buf->unit);
va_start(ap, fmt);
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
if (buf->size < buf->asize && buf->data[buf->size] == 0)
return (char *)buf->data;
hoedown_buffer_grow(buf, buf->size + 1);
buf->data[buf->size] = 0;
return (char *)buf->data;
}
void
hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...)
{
va_list ap;
int n;
assert(buf && buf->unit);
if (buf->size >= buf->asize)
hoedown_buffer_grow(buf, buf->size + 1);
va_start(ap, fmt);
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
if (n < 0) {
if (n < 0) {
#ifndef _MSC_VER
return;
return;
#else
va_start(ap, fmt);
n = _vscprintf(fmt, ap);
va_end(ap);
va_start(ap, fmt);
n = _vscprintf(fmt, ap);
va_end(ap);
#endif
}
}
if ((size_t)n >= buf->asize - buf->size) {
hoedown_buffer_grow(buf, buf->size + n + 1);
if ((size_t)n >= buf->asize - buf->size) {
hoedown_buffer_grow(buf, buf->size + n + 1);
va_start(ap, fmt);
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
}
va_start(ap, fmt);
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
}
if (n < 0)
return;
if (n < 0)
return;
buf->size += n;
buf->size += n;
}
void hoedown_buffer_put_utf8(hoedown_buffer *ob, unsigned int c) {
unsigned char unichar[4];
if (c < 0x80) {
hoedown_buffer_putc(ob, c);
}
else if (c < 0x800) {
unichar[0] = 192 + (c / 64);
unichar[1] = 128 + (c % 64);
hoedown_buffer_put(ob, unichar, 2);
}
else if (c - 0xd800u < 0x800) {
HOEDOWN_BUFPUTSL(ob, "\xef\xbf\xbd");
}
else if (c < 0x10000) {
unichar[0] = 224 + (c / 4096);
unichar[1] = 128 + (c / 64) % 64;
unichar[2] = 128 + (c % 64);
hoedown_buffer_put(ob, unichar, 3);
}
else if (c < 0x110000) {
unichar[0] = 240 + (c / 262144);
unichar[1] = 128 + (c / 4096) % 64;
unichar[2] = 128 + (c / 64) % 64;
unichar[3] = 128 + (c % 64);
hoedown_buffer_put(ob, unichar, 4);
}
else {
HOEDOWN_BUFPUTSL(ob, "\xef\xbf\xbd");
}
}

View file

@ -27,18 +27,27 @@ extern "C" {
typedef void *(*hoedown_realloc_callback)(void *, size_t);
typedef void (*hoedown_free_callback)(void *);
struct hoedown_buffer {
uint8_t *data; /* actual character data */
size_t size; /* size of the string */
size_t asize; /* allocated size (0 = volatile buffer) */
size_t unit; /* reallocation unit size (0 = read-only buffer) */
typedef struct hoedown_buffer {
uint8_t *data; /* actual character data */
size_t size; /* size of the string */
size_t asize; /* allocated size (0 = volatile buffer) */
size_t unit; /* reallocation unit size (0 = read-only buffer) */
hoedown_realloc_callback data_realloc;
hoedown_free_callback data_free;
hoedown_free_callback buffer_free;
};
hoedown_realloc_callback data_realloc;
hoedown_free_callback data_free;
} hoedown_buffer;
typedef struct hoedown_buffer hoedown_buffer;
/* malloc / realloc / calloc wrappers */
#define HOEDOWN_ALLOC_WRAPPER(sig, call) \
static inline void *hoedown_##sig __attribute__ ((malloc)); \
static inline void *hoedown_##sig { \
void *ret = call; \
if (!ret) { \
fprintf(stderr, "Allocation failed.\n"); \
abort(); \
} \
return ret; \
}
/*************
@ -46,19 +55,21 @@ typedef struct hoedown_buffer hoedown_buffer;
*************/
/* allocation wrappers */
void *hoedown_malloc(size_t size) __attribute__ ((malloc));
void *hoedown_calloc(size_t nmemb, size_t size) __attribute__ ((malloc));
void *hoedown_realloc(void *ptr, size_t size) __attribute__ ((malloc));
HOEDOWN_ALLOC_WRAPPER(malloc(size_t size), malloc(size));
HOEDOWN_ALLOC_WRAPPER(calloc(size_t nmemb, size_t size), calloc(nmemb, size));
HOEDOWN_ALLOC_WRAPPER(realloc(void *ptr, size_t size), realloc(ptr, size));
/* hoedown_buffer_init: initialize a buffer with custom allocators */
void hoedown_buffer_init(
hoedown_buffer *buffer,
size_t unit,
hoedown_realloc_callback data_realloc,
hoedown_free_callback data_free,
hoedown_free_callback buffer_free
hoedown_buffer *buffer,
size_t unit,
hoedown_realloc_callback data_realloc,
hoedown_free_callback data_free
);
/* hoedown_buffer_uninit: uninitialize an existing buffer */
void hoedown_buffer_uninit(hoedown_buffer *buf);
/* hoedown_buffer_new: allocate a new buffer */
hoedown_buffer *hoedown_buffer_new(size_t unit) __attribute__ ((malloc));
@ -101,21 +112,24 @@ const char *hoedown_buffer_cstr(hoedown_buffer *buf);
/* hoedown_buffer_printf: formatted printing to a buffer */
void hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...) __attribute__ ((format (printf, 2, 3)));
/* hoedown_buffer_put_utf8: put a Unicode character encoded as UTF-8 */
void hoedown_buffer_put_utf8(hoedown_buffer *buf, unsigned int codepoint);
/* hoedown_buffer_free: free the buffer */
void hoedown_buffer_free(hoedown_buffer *buf);
/* HOEDOWN_BUFPUTSL: optimized hoedown_buffer_puts of a string literal */
#define HOEDOWN_BUFPUTSL(output, literal) \
hoedown_buffer_put(output, (const uint8_t *)literal, sizeof(literal) - 1)
hoedown_buffer_put(output, (const uint8_t *)literal, sizeof(literal) - 1)
/* HOEDOWN_BUFSETSL: optimized hoedown_buffer_sets of a string literal */
#define HOEDOWN_BUFSETSL(output, literal) \
hoedown_buffer_set(output, (const uint8_t *)literal, sizeof(literal) - 1)
hoedown_buffer_set(output, (const uint8_t *)literal, sizeof(literal) - 1)
/* HOEDOWN_BUFEQSL: optimized hoedown_buffer_eqs of a string literal */
#define HOEDOWN_BUFEQSL(output, literal) \
hoedown_buffer_eq(output, (const uint8_t *)literal, sizeof(literal) - 1)
hoedown_buffer_eq(output, (const uint8_t *)literal, sizeof(literal) - 1)
#ifdef __cplusplus

File diff suppressed because it is too large Load diff

View file

@ -15,153 +15,232 @@ extern "C" {
* CONSTANTS *
*************/
typedef enum hoedown_extensions {
/* block-level extensions */
HOEDOWN_EXT_TABLES = (1 << 0),
HOEDOWN_EXT_FENCED_CODE = (1 << 1),
HOEDOWN_EXT_FOOTNOTES = (1 << 2),
typedef enum hoedown_features {
/* Block constructs */
HOEDOWN_FT_DIRECTIVE = (1 << 0),
HOEDOWN_FT_INDENTED_CODE_BLOCK = (1 << 1),
HOEDOWN_FT_FENCED_CODE_BLOCK = (1 << 2),
HOEDOWN_FT_HORIZONTAL_RULE = (1 << 3),
HOEDOWN_FT_ATX_HEADER = (1 << 4),
HOEDOWN_FT_SETEXT_HEADER = (1 << 5),
HOEDOWN_FT_LIST = (1 << 8),
HOEDOWN_FT_QUOTE_BLOCK = (1 << 31),
HOEDOWN_FT_HTML_BLOCK = (1 << 6),
HOEDOWN_FT_TABLE = (1 << 7),
/* span-level extensions */
HOEDOWN_EXT_AUTOLINK = (1 << 3),
HOEDOWN_EXT_STRIKETHROUGH = (1 << 4),
HOEDOWN_EXT_UNDERLINE = (1 << 5),
HOEDOWN_EXT_HIGHLIGHT = (1 << 6),
HOEDOWN_EXT_QUOTE = (1 << 7),
HOEDOWN_EXT_SUPERSCRIPT = (1 << 8),
HOEDOWN_EXT_MATH = (1 << 9),
/* Inline constructs */
HOEDOWN_FT_ROLE = (1 << 9),
HOEDOWN_FT_ESCAPE = (1 << 10),
HOEDOWN_FT_HARD_LINEBREAK = (1 << 11),
HOEDOWN_FT_LINEBREAK = (1 << 12),
HOEDOWN_FT_SOFT_LINEBREAK = (1 << 13),
HOEDOWN_FT_URI_AUTOLINK = (1 << 14),
HOEDOWN_FT_EMAIL_AUTOLINK = (1 << 15),
HOEDOWN_FT_HTML = (1 << 16),
HOEDOWN_FT_ENTITY = (1 << 17),
HOEDOWN_FT_CODE_SPAN = (1 << 18),
HOEDOWN_FT_EMPHASIS = (1 << 19),
HOEDOWN_FT_LINK = (1 << 20),
HOEDOWN_FT_MATH = (1 << 22),
HOEDOWN_FT_SUPERSCRIPT = (1 << 23),
HOEDOWN_FT_QUOTE = (1 << 24),
HOEDOWN_FT_STRIKETHROUGH = (1 << 25),
HOEDOWN_FT_HIGHLIGHT = (1 << 26),
HOEDOWN_FT_FOOTNOTE = (1 << 27),
/* other flags */
HOEDOWN_EXT_NO_INTRA_EMPHASIS = (1 << 11),
HOEDOWN_EXT_SPACE_HEADERS = (1 << 12),
HOEDOWN_EXT_MATH_EXPLICIT = (1 << 13),
/* Other features */
HOEDOWN_FT_PREPROCESS = (1 << 28),
/* negative flags */
HOEDOWN_EXT_DISABLE_INDENTED_CODE = (1 << 14)
} hoedown_extensions;
/* Flags */
HOEDOWN_FT_LINK_IMAGE = (1 << 21),
HOEDOWN_FT_INTRA_EMPHASIS = (1 << 29),
HOEDOWN_FT_MATH_EXPLICIT = (1 << 30),
} hoedown_features;
#define HOEDOWN_EXT_BLOCK (\
HOEDOWN_EXT_TABLES |\
HOEDOWN_EXT_FENCED_CODE |\
HOEDOWN_EXT_FOOTNOTES )
#define HOEDOWN_EXT_SPAN (\
HOEDOWN_EXT_AUTOLINK |\
HOEDOWN_EXT_STRIKETHROUGH |\
HOEDOWN_EXT_UNDERLINE |\
HOEDOWN_EXT_HIGHLIGHT |\
HOEDOWN_EXT_QUOTE |\
HOEDOWN_EXT_SUPERSCRIPT |\
HOEDOWN_EXT_MATH )
#define HOEDOWN_FT_BLOCK (\
HOEDOWN_FT_DIRECTIVE |\
HOEDOWN_FT_INDENTED_CODE_BLOCK |\
HOEDOWN_FT_FENCED_CODE_BLOCK |\
HOEDOWN_FT_HORIZONTAL_RULE |\
HOEDOWN_FT_ATX_HEADER |\
HOEDOWN_FT_SETEXT_HEADER |\
HOEDOWN_FT_LIST |\
HOEDOWN_FT_QUOTE_BLOCK |\
HOEDOWN_FT_HTML_BLOCK |\
HOEDOWN_FT_TABLE |\
0)
#define HOEDOWN_EXT_FLAGS (\
HOEDOWN_EXT_NO_INTRA_EMPHASIS |\
HOEDOWN_EXT_SPACE_HEADERS |\
HOEDOWN_EXT_MATH_EXPLICIT )
#define HOEDOWN_FT_INLINE (\
HOEDOWN_FT_ROLE |\
HOEDOWN_FT_ESCAPE |\
HOEDOWN_FT_HARD_LINEBREAK |\
HOEDOWN_FT_LINEBREAK |\
HOEDOWN_FT_SOFT_LINEBREAK |\
HOEDOWN_FT_URI_AUTOLINK |\
HOEDOWN_FT_EMAIL_AUTOLINK |\
HOEDOWN_FT_HTML |\
HOEDOWN_FT_ENTITY |\
HOEDOWN_FT_CODE_SPAN |\
HOEDOWN_FT_EMPHASIS |\
HOEDOWN_FT_LINK |\
HOEDOWN_FT_MATH |\
HOEDOWN_FT_SUPERSCRIPT |\
HOEDOWN_FT_QUOTE |\
HOEDOWN_FT_STRIKETHROUGH |\
HOEDOWN_FT_HIGHLIGHT |\
HOEDOWN_FT_FOOTNOTE |\
0)
#define HOEDOWN_EXT_NEGATIVE (\
HOEDOWN_EXT_DISABLE_INDENTED_CODE )
#define HOEDOWN_FT_OTHER (\
HOEDOWN_FT_PREPROCESS |\
0)
typedef enum hoedown_list_flags {
HOEDOWN_LIST_ORDERED = (1 << 0),
HOEDOWN_LI_BLOCK = (1 << 1) /* <li> containing block data */
} hoedown_list_flags;
#define HOEDOWN_FT_FLAGS (\
HOEDOWN_FT_LINK_IMAGE |\
HOEDOWN_FT_INTRA_EMPHASIS |\
HOEDOWN_FT_MATH_EXPLICIT |\
0)
typedef enum hoedown_table_flags {
HOEDOWN_TABLE_ALIGN_LEFT = 1,
HOEDOWN_TABLE_ALIGN_RIGHT = 2,
HOEDOWN_TABLE_ALIGN_CENTER = 3,
HOEDOWN_TABLE_ALIGNMASK = 3,
HOEDOWN_TABLE_HEADER = 4
} hoedown_table_flags;
typedef enum hoedown_autolink_type {
HOEDOWN_AUTOLINK_NONE, /* used internally when it is not an autolink*/
HOEDOWN_AUTOLINK_NORMAL, /* normal http/http/ftp/mailto/etc link */
HOEDOWN_AUTOLINK_EMAIL /* e-mail link without explit mailto: */
} hoedown_autolink_type;
/***********
* PRESETS *
***********/
#define HOEDOWN_FT_COMMONMARK (\
HOEDOWN_FT_INDENTED_CODE_BLOCK |\
HOEDOWN_FT_FENCED_CODE_BLOCK |\
HOEDOWN_FT_HORIZONTAL_RULE |\
HOEDOWN_FT_ATX_HEADER |\
HOEDOWN_FT_SETEXT_HEADER |\
HOEDOWN_FT_LIST |\
HOEDOWN_FT_QUOTE_BLOCK |\
HOEDOWN_FT_HTML_BLOCK |\
\
HOEDOWN_FT_ESCAPE |\
HOEDOWN_FT_HARD_LINEBREAK |\
HOEDOWN_FT_LINEBREAK |\
HOEDOWN_FT_URI_AUTOLINK |\
HOEDOWN_FT_EMAIL_AUTOLINK |\
HOEDOWN_FT_HTML |\
HOEDOWN_FT_ENTITY |\
HOEDOWN_FT_CODE_SPAN |\
HOEDOWN_FT_EMPHASIS |\
HOEDOWN_FT_LINK |\
\
HOEDOWN_FT_PREPROCESS |\
\
HOEDOWN_FT_LINK_IMAGE |\
0)
#define HOEDOWN_FT_MARKDOWN (\
HOEDOWN_FT_INDENTED_CODE_BLOCK |\
HOEDOWN_FT_HORIZONTAL_RULE |\
HOEDOWN_FT_ATX_HEADER |\
HOEDOWN_FT_SETEXT_HEADER |\
HOEDOWN_FT_LIST |\
HOEDOWN_FT_QUOTE_BLOCK |\
HOEDOWN_FT_HTML_BLOCK |\
\
HOEDOWN_FT_ESCAPE |\
HOEDOWN_FT_LINEBREAK |\
HOEDOWN_FT_URI_AUTOLINK |\
HOEDOWN_FT_EMAIL_AUTOLINK |\
HOEDOWN_FT_HTML |\
HOEDOWN_FT_ENTITY |\
HOEDOWN_FT_CODE_SPAN |\
HOEDOWN_FT_EMPHASIS |\
HOEDOWN_FT_LINK |\
\
HOEDOWN_FT_PREPROCESS |\
\
HOEDOWN_FT_LINK_IMAGE |\
HOEDOWN_FT_INTRA_EMPHASIS |\
0)
/*********
* TYPES *
*********/
struct hoedown_document;
typedef struct hoedown_document hoedown_document;
struct hoedown_renderer_data {
void *opaque;
};
typedef struct hoedown_renderer_data hoedown_renderer_data;
typedef struct hoedown_internal hoedown_internal;
/* hoedown_renderer - functions for rendering parsed data */
struct hoedown_renderer {
/* state object */
void *opaque;
typedef struct hoedown_renderer_data {
void *opaque;
void *output;
hoedown_internal *doc;
} hoedown_renderer_data;
/* block level callbacks - NULL skips the block */
void (*blockcode)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_buffer *lang, const hoedown_renderer_data *data);
void (*blockquote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*header)(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data);
void (*hrule)(hoedown_buffer *ob, const hoedown_renderer_data *data);
void (*list)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
void (*listitem)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
void (*paragraph)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_header)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_body)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_row)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_cell)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_table_flags flags, const hoedown_renderer_data *data);
void (*footnotes)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*footnote_def)(hoedown_buffer *ob, const hoedown_buffer *content, unsigned int num, const hoedown_renderer_data *data);
void (*blockhtml)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
typedef struct hoedown_renderer {
void *opaque;
/* span level callbacks - NULL or return 0 prints the span verbatim */
int (*autolink)(hoedown_buffer *ob, const hoedown_buffer *link, hoedown_autolink_type type, const hoedown_renderer_data *data);
int (*codespan)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
int (*double_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*underline)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*highlight)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*quote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*image)(hoedown_buffer *ob, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_buffer *alt, const hoedown_renderer_data *data);
int (*linebreak)(hoedown_buffer *ob, const hoedown_renderer_data *data);
int (*link)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data);
int (*triple_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*strikethrough)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*superscript)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*footnote_ref)(hoedown_buffer *ob, unsigned int num, const hoedown_renderer_data *data);
int (*math)(hoedown_buffer *ob, const hoedown_buffer *text, int displaymode, const hoedown_renderer_data *data);
int (*raw_html)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* Block constructs */
void (*paragraph)(void *target, void *content, int is_tight, const hoedown_renderer_data *data);
void (*indented_code_block)(void *target, const hoedown_buffer *code, const hoedown_renderer_data *data);
void (*fenced_code_block)(void *target, const hoedown_buffer *code, const hoedown_buffer *info, const hoedown_renderer_data *data);
void (*horizontal_rule)(void *target, const hoedown_renderer_data *data);
void (*atx_header)(void *target, void *content, size_t level, const hoedown_renderer_data *data);
void (*setext_header)(void *target, void *content, int is_double, const hoedown_renderer_data *data);
void (*list)(void *target, void *content, int is_ordered, int is_tight, int start, const hoedown_renderer_data *data);
void (*list_item)(void *target, void *content, int is_ordered, int is_tight, const hoedown_renderer_data *data);
void (*quote_block)(void *target, void *content, const hoedown_renderer_data *data);
void (*html_block)(void *target, const hoedown_buffer *html, const hoedown_renderer_data *data);
/* low level callbacks - NULL copies input directly into the output */
void (*entity)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
void (*normal_text)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* Inline constructs */
void (*string)(void *target, const hoedown_buffer *text, const hoedown_renderer_data *data);
void (*escape)(void *target, uint8_t character, const hoedown_renderer_data *data);
void (*hard_linebreak)(void *target, const hoedown_renderer_data *data);
void (*linebreak)(void *target, const hoedown_renderer_data *data);
void (*uri_autolink)(void *target, const hoedown_buffer *uri, const hoedown_renderer_data *data);
void (*email_autolink)(void *target, const hoedown_buffer *email, const hoedown_renderer_data *data);
void (*html)(void *target, const hoedown_buffer *html, const hoedown_renderer_data *data);
void (*entity)(void *target, const hoedown_buffer *character, const hoedown_renderer_data *data);
void (*code_span)(void *target, const hoedown_buffer *code, const hoedown_renderer_data *data);
void (*emphasis)(void *target, void *content, size_t level, const hoedown_renderer_data *data);
void (*link)(void *target, void *content, const hoedown_buffer *dest, const hoedown_buffer *title, int is_image, const hoedown_renderer_data *data);
/* miscellaneous callbacks */
void (*doc_header)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
void (*doc_footer)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
};
typedef struct hoedown_renderer hoedown_renderer;
/* Global callbacks */
void *(*object_get)(int is_inline, const hoedown_renderer_data *data);
void (*object_merge)(void *target, void *content, int is_inline, const hoedown_renderer_data *data);
void (*object_pop)(void *target, int is_inline, const hoedown_renderer_data *data);
void (*render_start)(void *output, int is_inline, const hoedown_renderer_data *data);
void (*render_end)(void *output, void *target, int is_inline, const hoedown_renderer_data *data);
} hoedown_renderer;
/*************
* FUNCTIONS *
*************/
/* hoedown_document_new: allocate a new document processor instance */
/* hoedown_find_block_tag: lookup if an HTML tag name is a block */
const char *hoedown_find_block_tag(const char *str, unsigned int len);
/* hoedown_find_autolink_scheme: lookup if a scheme is well-known */
const char *hoedown_find_autolink_scheme(const char *str, unsigned int len);
/* hoedown_preprocess: preprocess input for markdown rendering */
void hoedown_preprocess(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_document_new: allocate a new document processor */
hoedown_document *hoedown_document_new(
const hoedown_renderer *renderer,
hoedown_extensions extensions,
size_t max_nesting
) __attribute__ ((malloc));
hoedown_renderer *renderer,
hoedown_features features,
size_t max_nesting
) __attribute__((malloc));
/* hoedown_document_render: render regular Markdown using the document processor */
void hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_document_render: render markdown with a document processor */
void hoedown_document_render(
hoedown_document *doc,
void *output,
const uint8_t *data, size_t size,
int is_inline
);
/* hoedown_document_render_inline: render inline Markdown using the document processor */
void hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_document_free: deallocate a document processor instance */
/* hoedown_document_free: deallocate a document processor */
void hoedown_document_free(hoedown_document *doc);

View file

@ -8,19 +8,24 @@
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
#include "_html_entities.h"
/*
* The following characters will not be escaped:
*
* -_.+!*'(),%#@?=;:/,+&$ alphanum
* -_.+!*'(),%#@?=;:/,+&$ alphanum
*
* Note that this character set is the addition of:
*
* - The characters which are safe to be in an URL
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
* - The characters which are safe to be in an URL
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
*
* We assume (lazily) that any RESERVED char that
* We asume (lazily) that any RESERVED char that
* appears inside an URL is actually meant to
* have its native function (i.e. as an URL
* component/separator) and hence needs no escaping.
@ -35,84 +40,82 @@
*
*/
static const uint8_t HREF_SAFE[UINT8_MAX+1] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
void
hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size)
{
static const char hex_chars[] = "0123456789ABCDEF";
size_t i = 0, mark;
char hex_str[3];
void hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size) {
static const char hex_chars[] = "0123456789ABCDEF";
size_t i = 0, mark;
char hex_str[3];
hex_str[0] = '%';
hex_str[0] = '%';
while (i < size) {
mark = i;
while (i < size && HREF_SAFE[data[i]]) i++;
while (i < size) {
mark = i;
while (i < size && HREF_SAFE[data[i]]) i++;
/* Optimization for cases where there's nothing to escape */
if (mark == 0 && i >= size) {
hoedown_buffer_put(ob, data, size);
return;
}
/* Optimization for cases where there's nothing to escape */
if (mark == 0 && i >= size) {
hoedown_buffer_put(ob, data, size);
return;
}
if (likely(i > mark)) {
hoedown_buffer_put(ob, data + mark, i - mark);
}
if (likely(i > mark)) {
hoedown_buffer_put(ob, data + mark, i - mark);
}
/* escaping */
if (i >= size)
break;
/* escaping */
if (i >= size)
break;
switch (data[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
HOEDOWN_BUFPUTSL(ob, "&amp;");
break;
switch (data[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
HOEDOWN_BUFPUTSL(ob, "&amp;");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
HOEDOWN_BUFPUTSL(ob, "&#x27;");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
HOEDOWN_BUFPUTSL(ob, "&#x27;");
break;
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
* when building GET strings */
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
* when building GET strings */
#if 0
case ' ':
hoedown_buffer_putc(ob, '+');
break;
case ' ':
hoedown_buffer_putc(ob, '+');
break;
#endif
/* every other character goes with a %XX escaping */
default:
hex_str[1] = hex_chars[(data[i] >> 4) & 0xF];
hex_str[2] = hex_chars[data[i] & 0xF];
hoedown_buffer_put(ob, (uint8_t *)hex_str, 3);
}
/* every other character goes with a %XX escaping */
default:
hex_str[1] = hex_chars[(data[i] >> 4) & 0xF];
hex_str[2] = hex_chars[data[i] & 0xF];
hoedown_buffer_put(ob, (uint8_t *)hex_str, 3);
}
i++;
}
i++;
}
}
@ -128,22 +131,22 @@ hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size)
*
*/
static const uint8_t HTML_ESCAPE_TABLE[UINT8_MAX+1] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const char *HTML_ESCAPES[] = {
@ -151,38 +154,110 @@ static const char *HTML_ESCAPES[] = {
"&quot;",
"&amp;",
"&#39;",
"&#47;",
"&lt;",
"&gt;"
};
void
hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure)
{
size_t i = 0, mark;
void hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size) {
size_t i = 0, mark;
while (1) {
mark = i;
while (i < size && HTML_ESCAPE_TABLE[data[i]] == 0) i++;
while (1) {
mark = i;
while (i < size && HTML_ESCAPE_TABLE[data[i]] == 0) i++;
/* Optimization for cases where there's nothing to escape */
if (mark == 0 && i >= size) {
hoedown_buffer_put(ob, data, size);
return;
}
/* Optimization for cases where there's nothing to escape */
if (mark == 0 && i >= size) {
hoedown_buffer_put(ob, data, size);
return;
}
if (likely(i > mark))
hoedown_buffer_put(ob, data + mark, i - mark);
if (likely(i > mark))
hoedown_buffer_put(ob, data + mark, i - mark);
if (i >= size) break;
if (i >= size) break;
/* The forward slash is only escaped in secure mode */
if (!secure && data[i] == '/') {
hoedown_buffer_putc(ob, '/');
} else {
hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[i]]]);
}
i++;
}
hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[i]]]);
i++;
}
}
void hoedown_escape_character(hoedown_buffer *ob, const uint8_t *data, size_t size) {
if (size == 1 && HTML_ESCAPE_TABLE[data[0]]) {
hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[0]]]);
return;
}
/* Character doesn't need escaping */
hoedown_buffer_put(ob, data, size);
}
size_t hoedown_unescape_entity(hoedown_buffer *ob, const uint8_t *data, size_t size) {
size_t i = 0;
if (size > 3 && data[0] == '#') {
int codepoint = 0;
if (_isdigit(data[1])) {
for (i = 1; i < size && _isdigit(data[i]); ++i)
codepoint = (codepoint * 10) + (data[i] - '0');
}
else if (data[1] == 'x' || data[1] == 'X') {
for (i = 2; i < size && _isxdigit(data[i]); ++i)
codepoint = (codepoint * 16) + ((data[i] | 32) % 39 - 9);
if (i == 2) return 0;
}
if (i < size && data[i] == ';') {
hoedown_buffer_put_utf8(ob, codepoint);
return i + 1;
}
}
else {
if (size > MAX_WORD_LENGTH)
size = MAX_WORD_LENGTH;
for (i = MIN_WORD_LENGTH; i < size; ++i) {
if (data[i] == ' ')
break;
if (data[i] == ';') {
const struct html_entity *entity = find_entity((const char *)data, i);
if (entity != NULL) {
hoedown_buffer_put(ob, entity->utf8, entity->size);
return i + 1;
}
break;
}
}
}
return 0;
}
void hoedown_unescape_html(hoedown_buffer *ob, const uint8_t *data, size_t size) {
size_t i = 0, mark = 0;
while (1) {
while (i < size && data[i] != '&') i++;
/* Optimization for cases where there's nothing to escape */
if (mark == 0 && i >= size) {
hoedown_buffer_put(ob, data, size);
return;
}
if (likely(i > mark))
hoedown_buffer_put(ob, data + mark, i - mark);
if (i >= size) break;
mark = i;
i++;
i += hoedown_unescape_entity(ob, data + i, size - i);
if (i > mark + 1) mark = i;
}
}

View file

@ -18,7 +18,17 @@ extern "C" {
void hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_escape_html: escape HTML */
void hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure);
void hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_unescape_html: unescape HTML */
void hoedown_unescape_html(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_unescape_entity: unescape a single HTML entity */
size_t hoedown_unescape_entity(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_escape_character: escape a single UTF-8 character in HTML (if needed) */
/* DO NOT USE unless you know it's a single character */
void hoedown_escape_character(hoedown_buffer *ob, const uint8_t *data, size_t size);
#ifdef __cplusplus

1018
src/html.c

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@
#include "document.h"
#include "buffer.h"
#include "pool.h"
#ifdef __cplusplus
extern "C" {
@ -15,66 +16,25 @@ extern "C" {
* CONSTANTS *
*************/
typedef enum hoedown_html_flags {
HOEDOWN_HTML_SKIP_HTML = (1 << 0),
HOEDOWN_HTML_ESCAPE = (1 << 1),
HOEDOWN_HTML_HARD_WRAP = (1 << 2),
HOEDOWN_HTML_USE_XHTML = (1 << 3)
} hoedown_html_flags;
typedef enum hoedown_html_tag {
HOEDOWN_HTML_TAG_NONE = 0,
HOEDOWN_HTML_TAG_OPEN,
HOEDOWN_HTML_TAG_CLOSE
} hoedown_html_tag;
//TODO
/*********
* TYPES *
*********/
struct hoedown_html_renderer_state {
void *opaque;
struct {
int header_count;
int current_level;
int level_offset;
int nesting_level;
} toc_data;
hoedown_html_flags flags;
/* extra callbacks */
void (*link_attributes)(hoedown_buffer *ob, const hoedown_buffer *url, const hoedown_renderer_data *data);
};
typedef struct hoedown_html_renderer_state hoedown_html_renderer_state;
//TODO
/*************
* FUNCTIONS *
*************/
/* hoedown_html_smartypants: process an HTML snippet using SmartyPants for smart punctuation */
void hoedown_html_smartypants(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_html_is_tag: checks if data starts with a specific tag, returns the tag type or NONE */
hoedown_html_tag hoedown_html_is_tag(const uint8_t *data, size_t size, const char *tagname);
/* hoedown_html_renderer_new: allocates a regular HTML renderer */
hoedown_renderer *hoedown_html_renderer_new(
hoedown_html_flags render_flags,
int nesting_level
) __attribute__ ((malloc));
/* hoedown_html_toc_renderer_new: like hoedown_html_renderer_new, but the returned renderer produces the Table of Contents */
hoedown_renderer *hoedown_html_toc_renderer_new(
int nesting_level
) __attribute__ ((malloc));
/* hoedown_html_renderer_new: allocate a new HTML renderer */
hoedown_renderer *hoedown_html_renderer_new() __attribute__((malloc));
/* hoedown_html_renderer_free: deallocate an HTML renderer */
void hoedown_html_renderer_free(hoedown_renderer *renderer);
void hoedown_html_renderer_free(hoedown_renderer *rndr);
#ifdef __cplusplus

View file

@ -1,425 +0,0 @@
#include "html.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#ifdef _MSC_VER
#define snprintf _snprintf
#endif
struct smartypants_data {
int in_squote;
int in_dquote;
};
static size_t smartypants_cb__ltag(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__dquote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__amp(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__period(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__number(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__dash(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__parens(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__backtick(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__escape(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t (*smartypants_cb_ptrs[])
(hoedown_buffer *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
{
NULL, /* 0 */
smartypants_cb__dash, /* 1 */
smartypants_cb__parens, /* 2 */
smartypants_cb__squote, /* 3 */
smartypants_cb__dquote, /* 4 */
smartypants_cb__amp, /* 5 */
smartypants_cb__period, /* 6 */
smartypants_cb__number, /* 7 */
smartypants_cb__ltag, /* 8 */
smartypants_cb__backtick, /* 9 */
smartypants_cb__escape, /* 10 */
};
static const uint8_t smartypants_cb_chars[UINT8_MAX+1] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static int
word_boundary(uint8_t c)
{
return c == 0 || isspace(c) || ispunct(c);
}
/*
If 'text' begins with any kind of single quote (e.g. "'" or "&apos;" etc.),
returns the length of the sequence of characters that makes up the single-
quote. Otherwise, returns zero.
*/
static size_t
squote_len(const uint8_t *text, size_t size)
{
static char* single_quote_list[] = { "'", "&#39;", "&#x27;", "&apos;", NULL };
char** p;
for (p = single_quote_list; *p; ++p) {
size_t len = strlen(*p);
if (size >= len && memcmp(text, *p, len) == 0) {
return len;
}
}
return 0;
}
/* Converts " or ' at very beginning or end of a word to left or right quote */
static int
smartypants_quotes(hoedown_buffer *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
{
char ent[8];
if (*is_open && !word_boundary(next_char))
return 0;
if (!(*is_open) && !word_boundary(previous_char))
return 0;
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
*is_open = !(*is_open);
hoedown_buffer_puts(ob, ent);
return 1;
}
/*
Converts ' to left or right single quote; but the initial ' might be in
different forms, e.g. &apos; or &#39; or &#x27;.
'squote_text' points to the original single quote, and 'squote_size' is its length.
'text' points at the last character of the single-quote, e.g. ' or ;
*/
static size_t
smartypants_squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
const uint8_t *squote_text, size_t squote_size)
{
if (size >= 2) {
uint8_t t1 = tolower(text[1]);
size_t next_squote_len = squote_len(text+1, size-1);
/* convert '' to &ldquo; or &rdquo; */
if (next_squote_len > 0) {
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
return next_squote_len;
}
/* Tom's, isn't, I'm, I'd */
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
(size == 3 || word_boundary(text[2]))) {
HOEDOWN_BUFPUTSL(ob, "&rsquo;");
return 0;
}
/* you're, you'll, you've */
if (size >= 3) {
uint8_t t2 = tolower(text[2]);
if (((t1 == 'r' && t2 == 'e') ||
(t1 == 'l' && t2 == 'l') ||
(t1 == 'v' && t2 == 'e')) &&
(size == 4 || word_boundary(text[3]))) {
HOEDOWN_BUFPUTSL(ob, "&rsquo;");
return 0;
}
}
}
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
return 0;
hoedown_buffer_put(ob, squote_text, squote_size);
return 0;
}
/* Converts ' to left or right single quote. */
static size_t
smartypants_cb__squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
}
/* Converts (c), (r), (tm) */
static size_t
smartypants_cb__parens(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3) {
uint8_t t1 = tolower(text[1]);
uint8_t t2 = tolower(text[2]);
if (t1 == 'c' && t2 == ')') {
HOEDOWN_BUFPUTSL(ob, "&copy;");
return 2;
}
if (t1 == 'r' && t2 == ')') {
HOEDOWN_BUFPUTSL(ob, "&reg;");
return 2;
}
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
HOEDOWN_BUFPUTSL(ob, "&trade;");
return 3;
}
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts "--" to em-dash, etc. */
static size_t
smartypants_cb__dash(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3 && text[1] == '-' && text[2] == '-') {
HOEDOWN_BUFPUTSL(ob, "&mdash;");
return 2;
}
if (size >= 2 && text[1] == '-') {
HOEDOWN_BUFPUTSL(ob, "&ndash;");
return 1;
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts &quot; etc. */
static size_t
smartypants_cb__amp(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
size_t len;
if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
return 5;
}
len = squote_len(text, size);
if (len > 0) {
return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
}
if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
return 3;
hoedown_buffer_putc(ob, '&');
return 0;
}
/* Converts "..." to ellipsis */
static size_t
smartypants_cb__period(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3 && text[1] == '.' && text[2] == '.') {
HOEDOWN_BUFPUTSL(ob, "&hellip;");
return 2;
}
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
HOEDOWN_BUFPUTSL(ob, "&hellip;");
return 4;
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts `` to opening double quote */
static size_t
smartypants_cb__backtick(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 2 && text[1] == '`') {
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
return 1;
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts 1/2, 1/4, 3/4 */
static size_t
smartypants_cb__number(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (word_boundary(previous_char) && size >= 3) {
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
if (size == 3 || word_boundary(text[3])) {
HOEDOWN_BUFPUTSL(ob, "&frac12;");
return 2;
}
}
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
if (size == 3 || word_boundary(text[3]) ||
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
HOEDOWN_BUFPUTSL(ob, "&frac14;");
return 2;
}
}
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
if (size == 3 || word_boundary(text[3]) ||
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
HOEDOWN_BUFPUTSL(ob, "&frac34;");
return 2;
}
}
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts " to left or right double quote */
static size_t
smartypants_cb__dquote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
HOEDOWN_BUFPUTSL(ob, "&quot;");
return 0;
}
static size_t
smartypants_cb__ltag(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
static const char *skip_tags[] = {
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
};
static const size_t skip_tags_count = 8;
size_t tag, i = 0;
while (i < size && text[i] != '>')
i++;
for (tag = 0; tag < skip_tags_count; ++tag) {
if (hoedown_html_is_tag(text, size, skip_tags[tag]) == HOEDOWN_HTML_TAG_OPEN)
break;
}
if (tag < skip_tags_count) {
for (;;) {
while (i < size && text[i] != '<')
i++;
if (i == size)
break;
if (hoedown_html_is_tag(text + i, size - i, skip_tags[tag]) == HOEDOWN_HTML_TAG_CLOSE)
break;
i++;
}
while (i < size && text[i] != '>')
i++;
}
hoedown_buffer_put(ob, text, i + 1);
return i;
}
static size_t
smartypants_cb__escape(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size < 2)
return 0;
switch (text[1]) {
case '\\':
case '"':
case '\'':
case '.':
case '-':
case '`':
hoedown_buffer_putc(ob, text[1]);
return 1;
default:
hoedown_buffer_putc(ob, '\\');
return 0;
}
}
#if 0
static struct {
uint8_t c0;
const uint8_t *pattern;
const uint8_t *entity;
int skip;
} smartypants_subs[] = {
{ '\'', "'s>", "&rsquo;", 0 },
{ '\'', "'t>", "&rsquo;", 0 },
{ '\'', "'re>", "&rsquo;", 0 },
{ '\'', "'ll>", "&rsquo;", 0 },
{ '\'', "'ve>", "&rsquo;", 0 },
{ '\'', "'m>", "&rsquo;", 0 },
{ '\'', "'d>", "&rsquo;", 0 },
{ '-', "--", "&mdash;", 1 },
{ '-', "<->", "&ndash;", 0 },
{ '.', "...", "&hellip;", 2 },
{ '.', ". . .", "&hellip;", 4 },
{ '(', "(c)", "&copy;", 2 },
{ '(', "(r)", "&reg;", 2 },
{ '(', "(tm)", "&trade;", 3 },
{ '3', "<3/4>", "&frac34;", 2 },
{ '3', "<3/4ths>", "&frac34;", 2 },
{ '1', "<1/2>", "&frac12;", 2 },
{ '1', "<1/4>", "&frac14;", 2 },
{ '1', "<1/4th>", "&frac14;", 2 },
{ '&', "&#0;", 0, 3 },
};
#endif
void
hoedown_html_smartypants(hoedown_buffer *ob, const uint8_t *text, size_t size)
{
size_t i;
struct smartypants_data smrt = {0, 0};
if (!text)
return;
hoedown_buffer_grow(ob, size);
for (i = 0; i < size; ++i) {
size_t org;
uint8_t action = 0;
org = i;
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
i++;
if (i > org)
hoedown_buffer_put(ob, text + org, i - org);
if (i < size) {
i += smartypants_cb_ptrs[(int)action]
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
}
}
}

63
src/pool.c Normal file
View file

@ -0,0 +1,63 @@
#include "pool.h"
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
void hoedown_pool_init(
hoedown_pool *pool,
size_t initial_size,
void *(*new_function)(void *opaque),
void (*free_function)(void *item, void *opaque),
void *opaque
) {
if (!initial_size) initial_size = 8;
pool->item = hoedown_malloc(sizeof(void *) * initial_size);
pool->size = pool->isize = 0;
pool->asize = initial_size;
pool->new_function = new_function;
pool->free_function = free_function;
pool->opaque = opaque;
}
void *hoedown_pool_get(hoedown_pool *pool) {
if (unlikely(pool->size >= pool->isize)) {
/* Make sure there's space allocated */
if (unlikely(pool->isize >= pool->asize)) {
pool->asize *= 2;
pool->item = hoedown_realloc(pool->item, sizeof(void *) * pool->asize);
}
/* Initialize a new object */
pool->item[pool->isize++] = pool->new_function(pool->opaque);
}
return pool->item[pool->size++];
}
void hoedown_pool_pop(hoedown_pool *pool) {
pool->size--;
}
void hoedown_pool_uninit(hoedown_pool *pool) {
for (size_t i = 0; i < pool->isize; i++)
pool->free_function(pool->item[i], pool->opaque);
free(pool->item);
}
/* Pool of hoedown_buffer objects */
static void *buffer_new(void *opaque) {
return hoedown_buffer_new((size_t)opaque);
}
static void buffer_free(void *item, void *opaque) {
hoedown_buffer_free(item);
}
void hoedown_buffer_pool_init(hoedown_pool *pool, size_t initial_size, size_t unit) {
hoedown_pool_init(pool, initial_size, buffer_new, buffer_free, (void *)unit);
}

60
src/pool.h Normal file
View file

@ -0,0 +1,60 @@
/* pool.h - stack-based pool of reusable objects */
#ifndef HOEDOWN_POOL_H
#define HOEDOWN_POOL_H
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
/*********
* TYPES *
*********/
typedef struct hoedown_pool {
void **item;
size_t size;
size_t isize;
size_t asize;
void *(*new_function)(void *opaque);
void (*free_function)(void *item, void *opaque);
void *opaque;
} hoedown_pool;
/*************
* FUNCTIONS *
*************/
/* hoedown_pool_init: initialize a new pool of objects */
void hoedown_pool_init(
hoedown_pool *pool,
size_t initial_size,
void *(*new_function)(void *opaque),
void (*free_function)(void *item, void *opaque),
void *opaque
);
/* hoedown_pool_get: reserve and return the next object from the pool */
void *hoedown_pool_get(hoedown_pool *pool);
/* hoedown_pool_pop: unreserve the last reserved object from the pool */
void hoedown_pool_pop(hoedown_pool *pool);
/* hoedown_pool_uninit: uninitialize a pool of objects */
void hoedown_pool_uninit(hoedown_pool *pool);
/* hoedown_buffer_pool_init: convenience method to create a pool of buffers */
void hoedown_buffer_pool_init(hoedown_pool *pool, size_t initial_size, size_t unit);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_POOL_H **/

View file

@ -1,79 +0,0 @@
#include "stack.h"
#include "buffer.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
void
hoedown_stack_init(hoedown_stack *st, size_t initial_size)
{
assert(st);
st->item = NULL;
st->size = st->asize = 0;
if (!initial_size)
initial_size = 8;
hoedown_stack_grow(st, initial_size);
}
void
hoedown_stack_uninit(hoedown_stack *st)
{
assert(st);
free(st->item);
}
void
hoedown_stack_grow(hoedown_stack *st, size_t neosz)
{
assert(st);
if (st->asize >= neosz)
return;
st->item = hoedown_realloc(st->item, neosz * sizeof(void *));
memset(st->item + st->asize, 0x0, (neosz - st->asize) * sizeof(void *));
st->asize = neosz;
if (st->size > neosz)
st->size = neosz;
}
void
hoedown_stack_push(hoedown_stack *st, void *item)
{
assert(st);
if (st->size >= st->asize)
hoedown_stack_grow(st, st->size * 2);
st->item[st->size++] = item;
}
void *
hoedown_stack_pop(hoedown_stack *st)
{
assert(st);
if (!st->size)
return NULL;
return st->item[--st->size];
}
void *
hoedown_stack_top(const hoedown_stack *st)
{
assert(st);
if (!st->size)
return NULL;
return st->item[st->size - 1];
}

View file

@ -1,52 +0,0 @@
/* stack.h - simple stacking */
#ifndef HOEDOWN_STACK_H
#define HOEDOWN_STACK_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/*********
* TYPES *
*********/
struct hoedown_stack {
void **item;
size_t size;
size_t asize;
};
typedef struct hoedown_stack hoedown_stack;
/*************
* FUNCTIONS *
*************/
/* hoedown_stack_init: initialize a stack */
void hoedown_stack_init(hoedown_stack *st, size_t initial_size);
/* hoedown_stack_uninit: free internal data of the stack */
void hoedown_stack_uninit(hoedown_stack *st);
/* hoedown_stack_grow: increase the allocated size to the given value */
void hoedown_stack_grow(hoedown_stack *st, size_t neosz);
/* hoedown_stack_push: push an item to the top of the stack */
void hoedown_stack_push(hoedown_stack *st, void *item);
/* hoedown_stack_pop: retrieve and remove the item at the top of the stack */
void *hoedown_stack_pop(hoedown_stack *st);
/* hoedown_stack_top: retrieve the item at the top of the stack */
void *hoedown_stack_top(const hoedown_stack *st);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_STACK_H **/

View file

@ -1,9 +1,7 @@
#include "version.h"
void
hoedown_version(int *major, int *minor, int *revision)
{
*major = HOEDOWN_VERSION_MAJOR;
*minor = HOEDOWN_VERSION_MINOR;
*revision = HOEDOWN_VERSION_REVISION;
void hoedown_version(int *major, int *minor, int *revision) {
*major = HOEDOWN_VERSION_MAJOR;
*minor = HOEDOWN_VERSION_MINOR;
*revision = HOEDOWN_VERSION_REVISION;
}

View file

@ -12,8 +12,8 @@ extern "C" {
* CONSTANTS *
*************/
#define HOEDOWN_VERSION "2.0.0"
#define HOEDOWN_VERSION_MAJOR 2
#define HOEDOWN_VERSION "4.0.0-pre"
#define HOEDOWN_VERSION_MAJOR 4
#define HOEDOWN_VERSION_MINOR 0
#define HOEDOWN_VERSION_REVISION 0

1
stmd Submodule

@ -0,0 +1 @@
Subproject commit a5fa2d573185bcc565da89effcfbfdc2967ef939

View file

@ -1,177 +0,0 @@
#!/usr/bin/perl
#
# MarkdownTester -- Run tests for Markdown implementations
#
# Copyright (c) 2004-2005 John Gruber
# <http://daringfireball.net/projects/markdown/>
#
use strict;
use warnings;
use Getopt::Long;
use Benchmark;
our $VERSION = '1.0.2';
# Sat 24 Dec 2005
my $time_start = new Benchmark;
my $test_dir = "Tests";
my $script = "./Markdown.pl";
my $use_tidy = 0;
my ($flag_version);
GetOptions (
"script=s" => \$script,
"testdir=s" => \$test_dir,
"tidy" => \$use_tidy,
"version" => \$flag_version,
);
if($flag_version) {
my $progname = $0;
$progname =~ s{.*/}{};
die "$progname version $VERSION\n";
}
unless (-d $test_dir) { die "'$test_dir' is not a directory.\n"; }
unless (-f $script) { die "$script does not exist.\n"; }
unless (-x $script) { die "$script is not executable.\n"; }
my $tests_passed = 0;
my $tests_failed = 0;
TEST:
foreach my $testfile (glob "$test_dir/*.text") {
my $testname = $testfile;
$testname =~ s{.*/(.+)\.text$}{$1}i;
print "$testname ... ";
# Look for a corresponding .html file for each .text file:
my $resultfile = $testfile;
$resultfile =~ s{\.text$}{\.html}i;
unless (-f $resultfile) {
print "'$resultfile' does not exist.\n\n";
next TEST;
}
# open(TEST, $testfile) || die("Can't open testfile: $!");
open(RESULT, $resultfile) || die("Can't open resultfile: $!");
undef $/;
# my $t_input = <TEST>;
my $t_result = <RESULT>;
my $t_output = `'$script' '$testfile'`;
# Normalize the output and expected result strings:
$t_result =~ s/\s+\z//; # trim trailing whitespace
$t_output =~ s/\s+\z//; # trim trailing whitespace
if ($use_tidy) {
# Escape the strings, pass them through to CLI tidy tool for tag-level equivalency
$t_result =~ s{'}{'\\''}g; # escape ' chars for shell
$t_output =~ s{'}{'\\''}g;
$t_result = `echo '$t_result' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`;
$t_output = `echo '$t_output' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`;
}
if ($t_output eq $t_result) {
print "OK\n";
$tests_passed++;
}
else {
print "FAILED\n\n";
# This part added by JM to print diffs
open(OUT, '>tmp1') or die $!;
print OUT $t_output or die $!;
open(RES, '>tmp2') or die $!;
print RES $t_result or die $!;
print `diff tmp1 tmp2`;
close RES;
close OUT;
print "\n";
`rm tmp?`;
# End of added part
$tests_failed++;
}
}
print "\n\n";
print "$tests_passed passed; $tests_failed failed.\n";
my $time_end = new Benchmark;
my $time_diff = timediff($time_end, $time_start);
print "Benchmark: ", timestr($time_diff), "\n";
exit 1 if $tests_failed;
__END__
=pod
=head1 NAME
B<MarkdownTest>
=head1 SYNOPSIS
B<MarkdownTest.pl> [ B<--options> ] [ I<file> ... ]
=head1 DESCRIPTION
=head1 OPTIONS
Use "--" to end switch parsing. For example, to open a file named "-z", use:
MarkdownTest.pl -- -z
=over 4
=item B<--script>
Specify the path to the Markdown script to test. Defaults to
"./Markdown.pl". Example:
./MarkdownTest.pl --script ./PHP-Markdown/php-markdown
=item B<--testdir>
Specify the path to a directory containing test data. Defaults to "Tests".
=item B<--tidy>
Flag to turn on using the command line 'tidy' tool to normalize HTML
output before comparing script output to the expected test result.
Assumes that the 'tidy' command is available in your PATH. Defaults to
off.
=back
=head1 BUGS
=head1 VERSION HISTORY
1.0 Mon 13 Dec 2004-2005
1.0.1 Mon 19 Sep 2005
+ Better handling of case when foo.text exists, but foo.html doesn't.
It now prints a message and moves on, rather than dying.
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2004-2005 John Gruber
<http://daringfireball.net/>
All rights reserved.
This is free software; you may redistribute it and/or modify it under
the same terms as Perl itself.
=cut

View file

@ -1,17 +0,0 @@
<p>AT&amp;T has an ampersand in their name.</p>
<p>AT&amp;T is another way to write it.</p>
<p>This &amp; that.</p>
<p>4 &lt; 5.</p>
<p>6 > 5.</p>
<p>Here's a <a href="http://example.com/?foo=1&amp;bar=2">link</a> with an ampersand in the URL.</p>
<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>

View file

@ -1,21 +0,0 @@
AT&T has an ampersand in their name.
AT&amp;T is another way to write it.
This & that.
4 < 5.
6 > 5.
Here's a [link] [1] with an ampersand in the URL.
Here's a link with an amersand in the link text: [AT&T] [2].
Here's an inline [link](/script?foo=1&bar=2).
Here's an inline [link](</script?foo=1&bar=2>).
[1]: http://example.com/?foo=1&bar=2
[2]: http://att.com/ "AT&T"

View file

@ -1,18 +0,0 @@
<p>Link: <a href="http://example.com/">http://example.com/</a>.</p>
<p>With an ampersand: <a href="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</a></p>
<ul>
<li>In a list?</li>
<li><a href="http://example.com/">http://example.com/</a></li>
<li>It should.</li>
</ul>
<blockquote>
<p>Blockquoted: <a href="http://example.com/">http://example.com/</a></p>
</blockquote>
<p>Auto-links should not occur here: <code>&lt;http://example.com/&gt;</code></p>
<pre><code>or here: &lt;http://example.com/&gt;
</code></pre>

View file

@ -1,13 +0,0 @@
Link: <http://example.com/>.
With an ampersand: <http://example.com/?foo=1&bar=2>
* In a list?
* <http://example.com/>
* It should.
> Blockquoted: <http://example.com/>
Auto-links should not occur here: `<http://example.com/>`
or here: <http://example.com/>

View file

@ -1,118 +0,0 @@
<p>These should all get escaped:</p>
<p>Backslash: \</p>
<p>Backtick: `</p>
<p>Asterisk: *</p>
<p>Underscore: _</p>
<p>Left brace: {</p>
<p>Right brace: }</p>
<p>Left bracket: [</p>
<p>Right bracket: ]</p>
<p>Left paren: (</p>
<p>Right paren: )</p>
<p>Greater-than: ></p>
<p>Hash: #</p>
<p>Period: .</p>
<p>Bang: !</p>
<p>Plus: +</p>
<p>Minus: -</p>
<p>These should not, because they occur within a code block:</p>
<pre><code>Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \&gt;
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
</code></pre>
<p>Nor should these, which occur in code spans:</p>
<p>Backslash: <code>\\</code></p>
<p>Backtick: <code>\`</code></p>
<p>Asterisk: <code>\*</code></p>
<p>Underscore: <code>\_</code></p>
<p>Left brace: <code>\{</code></p>
<p>Right brace: <code>\}</code></p>
<p>Left bracket: <code>\[</code></p>
<p>Right bracket: <code>\]</code></p>
<p>Left paren: <code>\(</code></p>
<p>Right paren: <code>\)</code></p>
<p>Greater-than: <code>\&gt;</code></p>
<p>Hash: <code>\#</code></p>
<p>Period: <code>\.</code></p>
<p>Bang: <code>\!</code></p>
<p>Plus: <code>\+</code></p>
<p>Minus: <code>\-</code></p>
<p>These should get escaped, even though they're matching pairs for
other Markdown constructs:</p>
<p>*asterisks*</p>
<p>_underscores_</p>
<p>`backticks`</p>
<p>This is a code span with a literal backslash-backtick sequence: <code>\`</code></p>
<p>This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.</p>
<p>This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.</p>

View file

@ -1,120 +0,0 @@
These should all get escaped:
Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \>
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
These should not, because they occur within a code block:
Backslash: \\
Backtick: \`
Asterisk: \*
Underscore: \_
Left brace: \{
Right brace: \}
Left bracket: \[
Right bracket: \]
Left paren: \(
Right paren: \)
Greater-than: \>
Hash: \#
Period: \.
Bang: \!
Plus: \+
Minus: \-
Nor should these, which occur in code spans:
Backslash: `\\`
Backtick: `` \` ``
Asterisk: `\*`
Underscore: `\_`
Left brace: `\{`
Right brace: `\}`
Left bracket: `\[`
Right bracket: `\]`
Left paren: `\(`
Right paren: `\)`
Greater-than: `\>`
Hash: `\#`
Period: `\.`
Bang: `\!`
Plus: `\+`
Minus: `\-`
These should get escaped, even though they're matching pairs for
other Markdown constructs:
\*asterisks\*
\_underscores\_
\`backticks\`
This is a code span with a literal backslash-backtick sequence: `` \` ``
This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.
This is a tag with backslashes <span attr='\\backslashes\\'>bar</span>.

View file

@ -1,15 +0,0 @@
<blockquote>
<p>Example:</p>
<pre><code>sub status {
print "working";
}
</code></pre>
<p>Or:</p>
<pre><code>sub status {
return "working";
}
</code></pre>
</blockquote>

View file

@ -1,11 +0,0 @@
> Example:
>
> sub status {
> print "working";
> }
>
> Or:
>
> sub status {
> return "working";
> }

View file

@ -1,18 +0,0 @@
<pre><code>code block on the first line
</code></pre>
<p>Regular text.</p>
<pre><code>code block indented by spaces
</code></pre>
<p>Regular text.</p>
<pre><code>the lines in this block
all contain trailing spaces
</code></pre>
<p>Regular Text.</p>
<pre><code>code block on the last line
</code></pre>

View file

@ -1,14 +0,0 @@
code block on the first line
Regular text.
code block indented by spaces
Regular text.
the lines in this block
all contain trailing spaces
Regular Text.
code block on the last line

View file

@ -1,6 +0,0 @@
<p><code>&lt;test a="</code> content of attribute <code>"&gt;</code></p>
<p>Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span></p>
<p>Here's how you put <code>`backticks`</code> in a code span.</p>

View file

@ -1,6 +0,0 @@
`<test a="` content of attribute `">`
Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
Here's how you put `` `backticks` `` in a code span.

View file

@ -1,8 +0,0 @@
<p>In Markdown 1.0.0 and earlier. Version
8. This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.</p>
<p>Here's one with a bullet.
* criminey.</p>

View file

@ -1,8 +0,0 @@
In Markdown 1.0.0 and earlier. Version
8. This line turns into a list item.
Because a hard-wrapped line in the
middle of a paragraph looked like a
list item.
Here's one with a bullet.
* criminey.

View file

@ -1,71 +0,0 @@
<p>Dashes:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>---
</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>- - -
</code></pre>
<p>Asterisks:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>***
</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>* * *
</code></pre>
<p>Underscores:</p>
<hr />
<hr />
<hr />
<hr />
<pre><code>___
</code></pre>
<hr />
<hr />
<hr />
<hr />
<pre><code>_ _ _
</code></pre>

View file

@ -1,67 +0,0 @@
Dashes:
---
---
---
---
---
- - -
- - -
- - -
- - -
- - -
Asterisks:
***
***
***
***
***
* * *
* * *
* * *
* * *
* * *
Underscores:
___
___
___
___
___
_ _ _
_ _ _
_ _ _
_ _ _
_ _ _

View file

@ -1,15 +0,0 @@
<p>Simple block on one line:</p>
<div>foo</div>
<p>And nested without indentation:</p>
<div>
<div>
<div>
foo
</div>
<div style=">"/>
</div>
<div>bar</div>
</div>

View file

@ -1,15 +0,0 @@
Simple block on one line:
<div>foo</div>
And nested without indentation:
<div>
<div>
<div>
foo
</div>
<div style=">"/>
</div>
<div>bar</div>
</div>

View file

@ -1,72 +0,0 @@
<p>Here's a simple block:</p>
<div>
foo
</div>
<p>This should be a code block, though:</p>
<pre><code>&lt;div&gt;
foo
&lt;/div&gt;
</code></pre>
<p>As should this:</p>
<pre><code>&lt;div&gt;foo&lt;/div&gt;
</code></pre>
<p>Now, nested:</p>
<div>
<div>
<div>
foo
</div>
</div>
</div>
<p>This should just be an HTML comment:</p>
<!-- Comment -->
<p>Multiline:</p>
<!--
Blah
Blah
-->
<p>Code block:</p>
<pre><code>&lt;!-- Comment --&gt;
</code></pre>
<p>Just plain comment, with trailing spaces on the line:</p>
<!-- foo -->
<p>Code:</p>
<pre><code>&lt;hr /&gt;
</code></pre>
<p>Hr's:</p>
<hr>
<hr/>
<hr />
<hr>
<hr/>
<hr />
<hr class="foo" id="bar" />
<hr class="foo" id="bar"/>
<hr class="foo" id="bar" >

View file

@ -1,69 +0,0 @@
Here's a simple block:
<div>
foo
</div>
This should be a code block, though:
<div>
foo
</div>
As should this:
<div>foo</div>
Now, nested:
<div>
<div>
<div>
foo
</div>
</div>
</div>
This should just be an HTML comment:
<!-- Comment -->
Multiline:
<!--
Blah
Blah
-->
Code block:
<!-- Comment -->
Just plain comment, with trailing spaces on the line:
<!-- foo -->
Code:
<hr />
Hr's:
<hr>
<hr/>
<hr />
<hr>
<hr/>
<hr />
<hr class="foo" id="bar" />
<hr class="foo" id="bar"/>
<hr class="foo" id="bar" >

View file

@ -1,13 +0,0 @@
<p>Paragraph one.</p>
<!-- This is a simple comment -->
<!--
This is another comment.
-->
<p>Paragraph two.</p>
<!-- one comment block -- -- with two comments -->
<p>The end.</p>

View file

@ -1,13 +0,0 @@
Paragraph one.
<!-- This is a simple comment -->
<!--
This is another comment.
-->
Paragraph two.
<!-- one comment block -- -- with two comments -->
The end.

View file

@ -1,11 +0,0 @@
<p>Just a <a href="/url/">URL</a>.</p>
<p><a href="/url/" title="title">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by two spaces">URL and title</a>.</p>
<p><a href="/url/" title="title preceded by a tab">URL and title</a>.</p>
<p><a href="/url/" title="title has spaces afterward">URL and title</a>.</p>
<p><a href="">Empty</a>.</p>

View file

@ -1,12 +0,0 @@
Just a [URL](/url/).
[URL and title](/url/ "title").
[URL and title](/url/ "title preceded by two spaces").
[URL and title](/url/ "title preceded by a tab").
[URL and title](/url/ "title has spaces afterward" ).
[Empty]().

View file

@ -1,52 +0,0 @@
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>Foo <a href="/url/" title="Title">bar</a>.</p>
<p>With <a href="/url/">embedded [brackets]</a>.</p>
<p>Indented <a href="/url">once</a>.</p>
<p>Indented <a href="/url">twice</a>.</p>
<p>Indented <a href="/url">thrice</a>.</p>
<p>Indented [four][] times.</p>
<pre><code>[four]: /url
</code></pre>
<hr />
<p><a href="foo">this</a> should work</p>
<p>So should <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>And <a href="foo">this</a>.</p>
<p>But not [that] [].</p>
<p>Nor [that][].</p>
<p>Nor [that].</p>
<p>[Something in brackets like <a href="foo">this</a> should work]</p>
<p>[Same with <a href="foo">this</a>.]</p>
<p>In this case, <a href="/somethingelse/">this</a> points to something else.</p>
<p>Backslashing should suppress [this] and [this].</p>
<hr />
<p>Here's one where the <a href="/url/">link
breaks</a> across lines.</p>
<p>Here's another where the <a href="/url/">link
breaks</a> across lines, but with a line-ending space.</p>

View file

@ -1,71 +0,0 @@
Foo [bar] [1].
Foo [bar][1].
Foo [bar]
[1].
[1]: /url/ "Title"
With [embedded [brackets]] [b].
Indented [once][].
Indented [twice][].
Indented [thrice][].
Indented [four][] times.
[once]: /url
[twice]: /url
[thrice]: /url
[four]: /url
[b]: /url/
* * *
[this] [this] should work
So should [this][this].
And [this] [].
And [this][].
And [this].
But not [that] [].
Nor [that][].
Nor [that].
[Something in brackets like [this][] should work]
[Same with [this].]
In this case, [this](/somethingelse/) points to something else.
Backslashing should suppress \[this] and [this\].
[this]: foo
* * *
Here's one where the [link
breaks] across lines.
Here's another where the [link
breaks] across lines, but with a line-ending space.
[link breaks]: /url/

View file

@ -1,9 +0,0 @@
<p>This is the <a href="/simple">simple case</a>.</p>
<p>This one has a <a href="/foo">line
break</a>.</p>
<p>This one has a <a href="/foo">line
break</a> with a line-ending space.</p>
<p><a href="/that">this</a> and the <a href="/other">other</a></p>

View file

@ -1,20 +0,0 @@
This is the [simple case].
[simple case]: /simple
This one has a [line
break].
This one has a [line
break] with a line-ending space.
[line break]: /foo
[this] [that] and the [other]
[this]: /this
[that]: /that
[other]: /other

View file

@ -1,3 +0,0 @@
<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>
<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>

View file

@ -1,7 +0,0 @@
Foo [bar][].
Foo [bar](/url/ "Title with "quotes" inside").
[bar]: /url/ "Title with "quotes" inside"

View file

@ -1,314 +0,0 @@
<h1>Markdown: Basics</h1>
<ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a class="selected" title="Markdown Basics">Basics</a></li>
<li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
</ul>
<h2>Getting the Gist of Markdown's Formatting Syntax</h2>
<p>This page offers a brief overview of what it's like to use Markdown.
The <a href="/projects/markdown/syntax" title="Markdown Syntax">syntax page</a> provides complete, detailed documentation for
every feature, but Markdown should be very easy to pick up simply by
looking at a few examples of it in action. The examples on this page
are written in a before/after style, showing example syntax and the
HTML output produced by Markdown.</p>
<p>It's also helpful to simply try Markdown out; the <a href="/projects/markdown/dingus" title="Markdown Dingus">Dingus</a> is a
web application that allows you type your own Markdown-formatted text
and translate it to XHTML.</p>
<p><strong>Note:</strong> This document is itself written using Markdown; you
can <a href="/projects/markdown/basics.text">see the source for it by adding '.text' to the URL</a>.</p>
<h2>Paragraphs, Headers, Blockquotes</h2>
<p>A paragraph is simply one or more consecutive lines of text, separated
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing spaces or tabs is considered
blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
<p>Markdown offers two styles of headers: <em>Setext</em> and <em>atx</em>.
Setext-style headers for <code>&lt;h1&gt;</code> and <code>&lt;h2&gt;</code> are created by
"underlining" with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively.
To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the
beginning of the line -- the number of hashes equals the resulting
HTML header level.</p>
<p>Blockquotes are indicated using email-style '<code>&gt;</code>' angle brackets.</p>
<p>Markdown:</p>
<pre><code>A First Level Header
====================
A Second Level Header
---------------------
Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.
The quick brown fox jumped over the lazy
dog's back.
### Header 3
&gt; This is a blockquote.
&gt;
&gt; This is the second paragraph in the blockquote.
&gt;
&gt; ## This is an H2 in a blockquote
</code></pre>
<p>Output:</p>
<pre><code>&lt;h1&gt;A First Level Header&lt;/h1&gt;
&lt;h2&gt;A Second Level Header&lt;/h2&gt;
&lt;p&gt;Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.&lt;/p&gt;
&lt;p&gt;The quick brown fox jumped over the lazy
dog's back.&lt;/p&gt;
&lt;h3&gt;Header 3&lt;/h3&gt;
&lt;blockquote&gt;
&lt;p&gt;This is a blockquote.&lt;/p&gt;
&lt;p&gt;This is the second paragraph in the blockquote.&lt;/p&gt;
&lt;h2&gt;This is an H2 in a blockquote&lt;/h2&gt;
&lt;/blockquote&gt;
</code></pre>
<h3>Phrase Emphasis</h3>
<p>Markdown uses asterisks and underscores to indicate spans of emphasis.</p>
<p>Markdown:</p>
<pre><code>Some of these words *are emphasized*.
Some of these words _are emphasized also_.
Use two asterisks for **strong emphasis**.
Or, if you prefer, __use two underscores instead__.
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;Some of these words &lt;em&gt;are emphasized&lt;/em&gt;.
Some of these words &lt;em&gt;are emphasized also&lt;/em&gt;.&lt;/p&gt;
&lt;p&gt;Use two asterisks for &lt;strong&gt;strong emphasis&lt;/strong&gt;.
Or, if you prefer, &lt;strong&gt;use two underscores instead&lt;/strong&gt;.&lt;/p&gt;
</code></pre>
<h2>Lists</h2>
<p>Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>,
<code>+</code>, and <code>-</code>) as list markers. These three markers are
interchangable; this:</p>
<pre><code>* Candy.
* Gum.
* Booze.
</code></pre>
<p>this:</p>
<pre><code>+ Candy.
+ Gum.
+ Booze.
</code></pre>
<p>and this:</p>
<pre><code>- Candy.
- Gum.
- Booze.
</code></pre>
<p>all produce the same output:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;Candy.&lt;/li&gt;
&lt;li&gt;Gum.&lt;/li&gt;
&lt;li&gt;Booze.&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<p>Ordered (numbered) lists use regular numbers, followed by periods, as
list markers:</p>
<pre><code>1. Red
2. Green
3. Blue
</code></pre>
<p>Output:</p>
<pre><code>&lt;ol&gt;
&lt;li&gt;Red&lt;/li&gt;
&lt;li&gt;Green&lt;/li&gt;
&lt;li&gt;Blue&lt;/li&gt;
&lt;/ol&gt;
</code></pre>
<p>If you put blank lines between items, you'll get <code>&lt;p&gt;</code> tags for the
list item text. You can create multi-paragraph list items by indenting
the paragraphs by 4 spaces or 1 tab:</p>
<pre><code>* A list item.
With multiple paragraphs.
* Another item in the list.
</code></pre>
<p>Output:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;&lt;p&gt;A list item.&lt;/p&gt;
&lt;p&gt;With multiple paragraphs.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;Another item in the list.&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<h3>Links</h3>
<p>Markdown supports two styles for creating links: <em>inline</em> and
<em>reference</em>. With both styles, you use square brackets to delimit the
text you want to turn into a link.</p>
<p>Inline-style links use parentheses immediately after the link text.
For example:</p>
<pre><code>This is an [example link](http://example.com/).
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;This is an &lt;a href="http://example.com/"&gt;
example link&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>Optionally, you may include a title attribute in the parentheses:</p>
<pre><code>This is an [example link](http://example.com/ "With a Title").
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;This is an &lt;a href="http://example.com/" title="With a Title"&gt;
example link&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>Reference-style links allow you to refer to your links by names, which
you define elsewhere in your document:</p>
<pre><code>I get 10 times more traffic from [Google][1] than from
[Yahoo][2] or [MSN][3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;I get 10 times more traffic from &lt;a href="http://google.com/"
title="Google"&gt;Google&lt;/a&gt; than from &lt;a href="http://search.yahoo.com/"
title="Yahoo Search"&gt;Yahoo&lt;/a&gt; or &lt;a href="http://search.msn.com/"
title="MSN Search"&gt;MSN&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>The title attribute is optional. Link names may contain letters,
numbers and spaces, but are <em>not</em> case sensitive:</p>
<pre><code>I start my morning with a cup of coffee and
[The New York Times][NY Times].
[ny times]: http://www.nytimes.com/
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;I start my morning with a cup of coffee and
&lt;a href="http://www.nytimes.com/"&gt;The New York Times&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<h3>Images</h3>
<p>Image syntax is very much like link syntax.</p>
<p>Inline (titles are optional):</p>
<pre><code>![alt text](/path/to/img.jpg "Title")
</code></pre>
<p>Reference-style:</p>
<pre><code>![alt text][id]
[id]: /path/to/img.jpg "Title"
</code></pre>
<p>Both of the above examples produce the same output:</p>
<pre><code>&lt;img src="/path/to/img.jpg" alt="alt text" title="Title" /&gt;
</code></pre>
<h3>Code</h3>
<p>In a regular paragraph, you can create code span by wrapping text in
backtick quotes. Any ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> or
<code>&gt;</code>) will automatically be translated into HTML entities. This makes
it easy to use Markdown to write about HTML example code:</p>
<pre><code>I strongly recommend against using any `&lt;blink&gt;` tags.
I wish SmartyPants used named entities like `&amp;mdash;`
instead of decimal-encoded entites like `&amp;#8212;`.
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;I strongly recommend against using any
&lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
&lt;p&gt;I wish SmartyPants used named entities like
&lt;code&gt;&amp;amp;mdash;&lt;/code&gt; instead of decimal-encoded
entites like &lt;code&gt;&amp;amp;#8212;&lt;/code&gt;.&lt;/p&gt;
</code></pre>
<p>To specify an entire block of pre-formatted code, indent every line of
the block by 4 spaces or 1 tab. Just like with code spans, <code>&amp;</code>, <code>&lt;</code>,
and <code>&gt;</code> characters will be escaped automatically.</p>
<p>Markdown:</p>
<pre><code>If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:
&lt;blockquote&gt;
&lt;p&gt;For example.&lt;/p&gt;
&lt;/blockquote&gt;
</code></pre>
<p>Output:</p>
<pre><code>&lt;p&gt;If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;&amp;lt;blockquote&amp;gt;
&amp;lt;p&amp;gt;For example.&amp;lt;/p&amp;gt;
&amp;lt;/blockquote&amp;gt;
&lt;/code&gt;&lt;/pre&gt;
</code></pre>

View file

@ -1,306 +0,0 @@
Markdown: Basics
================
<ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a class="selected" title="Markdown Basics">Basics</a></li>
<li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
</ul>
Getting the Gist of Markdown's Formatting Syntax
------------------------------------------------
This page offers a brief overview of what it's like to use Markdown.
The [syntax page] [s] provides complete, detailed documentation for
every feature, but Markdown should be very easy to pick up simply by
looking at a few examples of it in action. The examples on this page
are written in a before/after style, showing example syntax and the
HTML output produced by Markdown.
It's also helpful to simply try Markdown out; the [Dingus] [d] is a
web application that allows you type your own Markdown-formatted text
and translate it to XHTML.
**Note:** This document is itself written using Markdown; you
can [see the source for it by adding '.text' to the URL] [src].
[s]: /projects/markdown/syntax "Markdown Syntax"
[d]: /projects/markdown/dingus "Markdown Dingus"
[src]: /projects/markdown/basics.text
## Paragraphs, Headers, Blockquotes ##
A paragraph is simply one or more consecutive lines of text, separated
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing spaces or tabs is considered
blank.) Normal paragraphs should not be intended with spaces or tabs.
Markdown offers two styles of headers: *Setext* and *atx*.
Setext-style headers for `<h1>` and `<h2>` are created by
"underlining" with equal signs (`=`) and hyphens (`-`), respectively.
To create an atx-style header, you put 1-6 hash marks (`#`) at the
beginning of the line -- the number of hashes equals the resulting
HTML header level.
Blockquotes are indicated using email-style '`>`' angle brackets.
Markdown:
A First Level Header
====================
A Second Level Header
---------------------
Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.
The quick brown fox jumped over the lazy
dog's back.
### Header 3
> This is a blockquote.
>
> This is the second paragraph in the blockquote.
>
> ## This is an H2 in a blockquote
Output:
<h1>A First Level Header</h1>
<h2>A Second Level Header</h2>
<p>Now is the time for all good men to come to
the aid of their country. This is just a
regular paragraph.</p>
<p>The quick brown fox jumped over the lazy
dog's back.</p>
<h3>Header 3</h3>
<blockquote>
<p>This is a blockquote.</p>
<p>This is the second paragraph in the blockquote.</p>
<h2>This is an H2 in a blockquote</h2>
</blockquote>
### Phrase Emphasis ###
Markdown uses asterisks and underscores to indicate spans of emphasis.
Markdown:
Some of these words *are emphasized*.
Some of these words _are emphasized also_.
Use two asterisks for **strong emphasis**.
Or, if you prefer, __use two underscores instead__.
Output:
<p>Some of these words <em>are emphasized</em>.
Some of these words <em>are emphasized also</em>.</p>
<p>Use two asterisks for <strong>strong emphasis</strong>.
Or, if you prefer, <strong>use two underscores instead</strong>.</p>
## Lists ##
Unordered (bulleted) lists use asterisks, pluses, and hyphens (`*`,
`+`, and `-`) as list markers. These three markers are
interchangable; this:
* Candy.
* Gum.
* Booze.
this:
+ Candy.
+ Gum.
+ Booze.
and this:
- Candy.
- Gum.
- Booze.
all produce the same output:
<ul>
<li>Candy.</li>
<li>Gum.</li>
<li>Booze.</li>
</ul>
Ordered (numbered) lists use regular numbers, followed by periods, as
list markers:
1. Red
2. Green
3. Blue
Output:
<ol>
<li>Red</li>
<li>Green</li>
<li>Blue</li>
</ol>
If you put blank lines between items, you'll get `<p>` tags for the
list item text. You can create multi-paragraph list items by indenting
the paragraphs by 4 spaces or 1 tab:
* A list item.
With multiple paragraphs.
* Another item in the list.
Output:
<ul>
<li><p>A list item.</p>
<p>With multiple paragraphs.</p></li>
<li><p>Another item in the list.</p></li>
</ul>
### Links ###
Markdown supports two styles for creating links: *inline* and
*reference*. With both styles, you use square brackets to delimit the
text you want to turn into a link.
Inline-style links use parentheses immediately after the link text.
For example:
This is an [example link](http://example.com/).
Output:
<p>This is an <a href="http://example.com/">
example link</a>.</p>
Optionally, you may include a title attribute in the parentheses:
This is an [example link](http://example.com/ "With a Title").
Output:
<p>This is an <a href="http://example.com/" title="With a Title">
example link</a>.</p>
Reference-style links allow you to refer to your links by names, which
you define elsewhere in your document:
I get 10 times more traffic from [Google][1] than from
[Yahoo][2] or [MSN][3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
Output:
<p>I get 10 times more traffic from <a href="http://google.com/"
title="Google">Google</a> than from <a href="http://search.yahoo.com/"
title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
title="MSN Search">MSN</a>.</p>
The title attribute is optional. Link names may contain letters,
numbers and spaces, but are *not* case sensitive:
I start my morning with a cup of coffee and
[The New York Times][NY Times].
[ny times]: http://www.nytimes.com/
Output:
<p>I start my morning with a cup of coffee and
<a href="http://www.nytimes.com/">The New York Times</a>.</p>
### Images ###
Image syntax is very much like link syntax.
Inline (titles are optional):
![alt text](/path/to/img.jpg "Title")
Reference-style:
![alt text][id]
[id]: /path/to/img.jpg "Title"
Both of the above examples produce the same output:
<img src="/path/to/img.jpg" alt="alt text" title="Title" />
### Code ###
In a regular paragraph, you can create code span by wrapping text in
backtick quotes. Any ampersands (`&`) and angle brackets (`<` or
`>`) will automatically be translated into HTML entities. This makes
it easy to use Markdown to write about HTML example code:
I strongly recommend against using any `<blink>` tags.
I wish SmartyPants used named entities like `&mdash;`
instead of decimal-encoded entites like `&#8212;`.
Output:
<p>I strongly recommend against using any
<code>&lt;blink&gt;</code> tags.</p>
<p>I wish SmartyPants used named entities like
<code>&amp;mdash;</code> instead of decimal-encoded
entites like <code>&amp;#8212;</code>.</p>
To specify an entire block of pre-formatted code, indent every line of
the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`,
and `>` characters will be escaped automatically.
Markdown:
If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:
<blockquote>
<p>For example.</p>
</blockquote>
Output:
<p>If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:</p>
<pre><code>&lt;blockquote&gt;
&lt;p&gt;For example.&lt;/p&gt;
&lt;/blockquote&gt;
</code></pre>

View file

@ -1,942 +0,0 @@
<h1>Markdown: Syntax</h1>
<ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li>
<li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
</ul>
<ul>
<li><a href="#overview">Overview</a>
<ul>
<li><a href="#philosophy">Philosophy</a></li>
<li><a href="#html">Inline HTML</a></li>
<li><a href="#autoescape">Automatic Escaping for Special Characters</a></li>
</ul></li>
<li><a href="#block">Block Elements</a>
<ul>
<li><a href="#p">Paragraphs and Line Breaks</a></li>
<li><a href="#header">Headers</a></li>
<li><a href="#blockquote">Blockquotes</a></li>
<li><a href="#list">Lists</a></li>
<li><a href="#precode">Code Blocks</a></li>
<li><a href="#hr">Horizontal Rules</a></li>
</ul></li>
<li><a href="#span">Span Elements</a>
<ul>
<li><a href="#link">Links</a></li>
<li><a href="#em">Emphasis</a></li>
<li><a href="#code">Code</a></li>
<li><a href="#img">Images</a></li>
</ul></li>
<li><a href="#misc">Miscellaneous</a>
<ul>
<li><a href="#backslash">Backslash Escapes</a></li>
<li><a href="#autolink">Automatic Links</a></li>
</ul></li>
</ul>
<p><strong>Note:</strong> This document is itself written using Markdown; you
can <a href="/projects/markdown/syntax.text">see the source for it by adding '.text' to the URL</a>.</p>
<hr />
<h2 id="overview">Overview</h2>
<h3 id="philosophy">Philosophy</h3>
<p>Markdown is intended to be as easy-to-read and easy-to-write as is feasible.</p>
<p>Readability, however, is emphasized above all else. A Markdown-formatted
document should be publishable as-is, as plain text, without looking
like it's been marked up with tags or formatting instructions. While
Markdown's syntax has been influenced by several existing text-to-HTML
filters -- including <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a>, <a href="http://www.aaronsw.com/2002/atx/">atx</a>, <a href="http://textism.com/tools/textile/">Textile</a>, <a href="http://docutils.sourceforge.net/rst.html">reStructuredText</a>,
<a href="http://www.triptico.com/software/grutatxt.html">Grutatext</a>, and <a href="http://ettext.taint.org/doc/">EtText</a> -- the single biggest source of
inspiration for Markdown's syntax is the format of plain text email.</p>
<p>To this end, Markdown's syntax is comprised entirely of punctuation
characters, which punctuation characters have been carefully chosen so
as to look like what they mean. E.g., asterisks around a word actually
look like *emphasis*. Markdown lists look like, well, lists. Even
blockquotes look like quoted passages of text, assuming you've ever
used email.</p>
<h3 id="html">Inline HTML</h3>
<p>Markdown's syntax is intended for one purpose: to be used as a
format for <em>writing</em> for the web.</p>
<p>Markdown is not a replacement for HTML, or even close to it. Its
syntax is very small, corresponding only to a very small subset of
HTML tags. The idea is <em>not</em> to create a syntax that makes it easier
to insert HTML tags. In my opinion, HTML tags are already easy to
insert. The idea for Markdown is to make it easy to read, write, and
edit prose. HTML is a <em>publishing</em> format; Markdown is a <em>writing</em>
format. Thus, Markdown's formatting syntax only addresses issues that
can be conveyed in plain text.</p>
<p>For any markup that is not covered by Markdown's syntax, you simply
use HTML itself. There's no need to preface it or delimit it to
indicate that you're switching from Markdown to HTML; you just use
the tags.</p>
<p>The only restrictions are that block-level HTML elements -- e.g. <code>&lt;div&gt;</code>,
<code>&lt;table&gt;</code>, <code>&lt;pre&gt;</code>, <code>&lt;p&gt;</code>, etc. -- must be separated from surrounding
content by blank lines, and the start and end tags of the block should
not be indented with tabs or spaces. Markdown is smart enough not
to add extra (unwanted) <code>&lt;p&gt;</code> tags around HTML block-level tags.</p>
<p>For example, to add an HTML table to a Markdown article:</p>
<pre><code>This is a regular paragraph.
&lt;table&gt;
&lt;tr&gt;
&lt;td&gt;Foo&lt;/td&gt;
&lt;/tr&gt;
&lt;/table&gt;
This is another regular paragraph.
</code></pre>
<p>Note that Markdown formatting syntax is not processed within block-level
HTML tags. E.g., you can't use Markdown-style <code>*emphasis*</code> inside an
HTML block.</p>
<p>Span-level HTML tags -- e.g. <code>&lt;span&gt;</code>, <code>&lt;cite&gt;</code>, or <code>&lt;del&gt;</code> -- can be
used anywhere in a Markdown paragraph, list item, or header. If you
want, you can even use HTML tags instead of Markdown formatting; e.g. if
you'd prefer to use HTML <code>&lt;a&gt;</code> or <code>&lt;img&gt;</code> tags instead of Markdown's
link or image syntax, go right ahead.</p>
<p>Unlike block-level HTML tags, Markdown syntax <em>is</em> processed within
span-level tags.</p>
<h3 id="autoescape">Automatic Escaping for Special Characters</h3>
<p>In HTML, there are two characters that demand special treatment: <code>&lt;</code>
and <code>&amp;</code>. Left angle brackets are used to start tags; ampersands are
used to denote HTML entities. If you want to use them as literal
characters, you must escape them as entities, e.g. <code>&amp;lt;</code>, and
<code>&amp;amp;</code>.</p>
<p>Ampersands in particular are bedeviling for web writers. If you want to
write about 'AT&amp;T', you need to write '<code>AT&amp;amp;T</code>'. You even need to
escape ampersands within URLs. Thus, if you want to link to:</p>
<pre><code>http://images.google.com/images?num=30&amp;q=larry+bird
</code></pre>
<p>you need to encode the URL as:</p>
<pre><code>http://images.google.com/images?num=30&amp;amp;q=larry+bird
</code></pre>
<p>in your anchor tag <code>href</code> attribute. Needless to say, this is easy to
forget, and is probably the single most common source of HTML validation
errors in otherwise well-marked-up web sites.</p>
<p>Markdown allows you to use these characters naturally, taking care of
all the necessary escaping for you. If you use an ampersand as part of
an HTML entity, it remains unchanged; otherwise it will be translated
into <code>&amp;amp;</code>.</p>
<p>So, if you want to include a copyright symbol in your article, you can write:</p>
<pre><code>&amp;copy;
</code></pre>
<p>and Markdown will leave it alone. But if you write:</p>
<pre><code>AT&amp;T
</code></pre>
<p>Markdown will translate it to:</p>
<pre><code>AT&amp;amp;T
</code></pre>
<p>Similarly, because Markdown supports <a href="#html">inline HTML</a>, if you use
angle brackets as delimiters for HTML tags, Markdown will treat them as
such. But if you write:</p>
<pre><code>4 &lt; 5
</code></pre>
<p>Markdown will translate it to:</p>
<pre><code>4 &amp;lt; 5
</code></pre>
<p>However, inside Markdown code spans and blocks, angle brackets and
ampersands are <em>always</em> encoded automatically. This makes it easy to use
Markdown to write about HTML code. (As opposed to raw HTML, which is a
terrible format for writing about HTML syntax, because every single <code>&lt;</code>
and <code>&amp;</code> in your example code needs to be escaped.)</p>
<hr />
<h2 id="block">Block Elements</h2>
<h3 id="p">Paragraphs and Line Breaks</h3>
<p>A paragraph is simply one or more consecutive lines of text, separated
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing but spaces or tabs is considered
blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
<p>The implication of the "one or more consecutive lines of text" rule is
that Markdown supports "hard-wrapped" text paragraphs. This differs
significantly from most other text-to-HTML formatters (including Movable
Type's "Convert Line Breaks" option) which translate every line break
character in a paragraph into a <code>&lt;br /&gt;</code> tag.</p>
<p>When you <em>do</em> want to insert a <code>&lt;br /&gt;</code> break tag using Markdown, you
end a line with two or more spaces, then type return.</p>
<p>Yes, this takes a tad more effort to create a <code>&lt;br /&gt;</code>, but a simplistic
"every line break is a <code>&lt;br /&gt;</code>" rule wouldn't work for Markdown.
Markdown's email-style <a href="#blockquote">blockquoting</a> and multi-paragraph <a href="#list">list items</a>
work best -- and look better -- when you format them with hard breaks.</p>
<h3 id="header">Headers</h3>
<p>Markdown supports two styles of headers, <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a> and <a href="http://www.aaronsw.com/2002/atx/">atx</a>.</p>
<p>Setext-style headers are "underlined" using equal signs (for first-level
headers) and dashes (for second-level headers). For example:</p>
<pre><code>This is an H1
=============
This is an H2
-------------
</code></pre>
<p>Any number of underlining <code>=</code>'s or <code>-</code>'s will work.</p>
<p>Atx-style headers use 1-6 hash characters at the start of the line,
corresponding to header levels 1-6. For example:</p>
<pre><code># This is an H1
## This is an H2
###### This is an H6
</code></pre>
<p>Optionally, you may "close" atx-style headers. This is purely
cosmetic -- you can use this if you think it looks better. The
closing hashes don't even need to match the number of hashes
used to open the header. (The number of opening hashes
determines the header level.) :</p>
<pre><code># This is an H1 #
## This is an H2 ##
### This is an H3 ######
</code></pre>
<h3 id="blockquote">Blockquotes</h3>
<p>Markdown uses email-style <code>&gt;</code> characters for blockquoting. If you're
familiar with quoting passages of text in an email message, then you
know how to create a blockquote in Markdown. It looks best if you hard
wrap the text and put a <code>&gt;</code> before every line:</p>
<pre><code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
&gt; consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
&gt; Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
&gt;
&gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
&gt; id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>Markdown allows you to be lazy and only put the <code>&gt;</code> before the first
line of a hard-wrapped paragraph:</p>
<pre><code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
&gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
adding additional levels of <code>&gt;</code>:</p>
<pre><code>&gt; This is the first level of quoting.
&gt;
&gt; &gt; This is nested blockquote.
&gt;
&gt; Back to the first level.
</code></pre>
<p>Blockquotes can contain other Markdown elements, including headers, lists,
and code blocks:</p>
<pre><code>&gt; ## This is a header.
&gt;
&gt; 1. This is the first list item.
&gt; 2. This is the second list item.
&gt;
&gt; Here's some example code:
&gt;
&gt; return shell_exec("echo $input | $markdown_script");
</code></pre>
<p>Any decent text editor should make email-style quoting easy. For
example, with BBEdit, you can make a selection and choose Increase
Quote Level from the Text menu.</p>
<h3 id="list">Lists</h3>
<p>Markdown supports ordered (numbered) and unordered (bulleted) lists.</p>
<p>Unordered lists use asterisks, pluses, and hyphens -- interchangably
-- as list markers:</p>
<pre><code>* Red
* Green
* Blue
</code></pre>
<p>is equivalent to:</p>
<pre><code>+ Red
+ Green
+ Blue
</code></pre>
<p>and:</p>
<pre><code>- Red
- Green
- Blue
</code></pre>
<p>Ordered lists use numbers followed by periods:</p>
<pre><code>1. Bird
2. McHale
3. Parish
</code></pre>
<p>It's important to note that the actual numbers you use to mark the
list have no effect on the HTML output Markdown produces. The HTML
Markdown produces from the above list is:</p>
<pre><code>&lt;ol&gt;
&lt;li&gt;Bird&lt;/li&gt;
&lt;li&gt;McHale&lt;/li&gt;
&lt;li&gt;Parish&lt;/li&gt;
&lt;/ol&gt;
</code></pre>
<p>If you instead wrote the list in Markdown like this:</p>
<pre><code>1. Bird
1. McHale
1. Parish
</code></pre>
<p>or even:</p>
<pre><code>3. Bird
1. McHale
8. Parish
</code></pre>
<p>you'd get the exact same HTML output. The point is, if you want to,
you can use ordinal numbers in your ordered Markdown lists, so that
the numbers in your source match the numbers in your published HTML.
But if you want to be lazy, you don't have to.</p>
<p>If you do use lazy list numbering, however, you should still start the
list with the number 1. At some point in the future, Markdown may support
starting ordered lists at an arbitrary number.</p>
<p>List markers typically start at the left margin, but may be indented by
up to three spaces. List markers must be followed by one or more spaces
or a tab.</p>
<p>To make lists look nice, you can wrap items with hanging indents:</p>
<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>But if you want to be lazy, you don't have to:</p>
<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>If list items are separated by blank lines, Markdown will wrap the
items in <code>&lt;p&gt;</code> tags in the HTML output. For example, this input:</p>
<pre><code>* Bird
* Magic
</code></pre>
<p>will turn into:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;Bird&lt;/li&gt;
&lt;li&gt;Magic&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<p>But this:</p>
<pre><code>* Bird
* Magic
</code></pre>
<p>will turn into:</p>
<pre><code>&lt;ul&gt;
&lt;li&gt;&lt;p&gt;Bird&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;Magic&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;
</code></pre>
<p>List items may consist of multiple paragraphs. Each subsequent
paragraph in a list item must be intended by either 4 spaces
or one tab:</p>
<pre><code>1. This is a list item with two paragraphs. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit. Aliquam hendrerit
mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet
vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
sit amet velit.
2. Suspendisse id sem consectetuer libero luctus adipiscing.
</code></pre>
<p>It looks nice if you indent every line of the subsequent
paragraphs, but here again, Markdown will allow you to be
lazy:</p>
<pre><code>* This is a list item with two paragraphs.
This is the second paragraph in the list item. You're
only required to indent the first line. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit.
* Another item in the same list.
</code></pre>
<p>To put a blockquote within a list item, the blockquote's <code>&gt;</code>
delimiters need to be indented:</p>
<pre><code>* A list item with a blockquote:
&gt; This is a blockquote
&gt; inside a list item.
</code></pre>
<p>To put a code block within a list item, the code block needs
to be indented <em>twice</em> -- 8 spaces or two tabs:</p>
<pre><code>* A list item with a code block:
&lt;code goes here&gt;
</code></pre>
<p>It's worth noting that it's possible to trigger an ordered list by
accident, by writing something like this:</p>
<pre><code>1986. What a great season.
</code></pre>
<p>In other words, a <em>number-period-space</em> sequence at the beginning of a
line. To avoid this, you can backslash-escape the period:</p>
<pre><code>1986\. What a great season.
</code></pre>
<h3 id="precode">Code Blocks</h3>
<p>Pre-formatted code blocks are used for writing about programming or
markup source code. Rather than forming normal paragraphs, the lines
of a code block are interpreted literally. Markdown wraps a code block
in both <code>&lt;pre&gt;</code> and <code>&lt;code&gt;</code> tags.</p>
<p>To produce a code block in Markdown, simply indent every line of the
block by at least 4 spaces or 1 tab. For example, given this input:</p>
<pre><code>This is a normal paragraph:
This is a code block.
</code></pre>
<p>Markdown will generate:</p>
<pre><code>&lt;p&gt;This is a normal paragraph:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;This is a code block.
&lt;/code&gt;&lt;/pre&gt;
</code></pre>
<p>One level of indentation -- 4 spaces or 1 tab -- is removed from each
line of the code block. For example, this:</p>
<pre><code>Here is an example of AppleScript:
tell application "Foo"
beep
end tell
</code></pre>
<p>will turn into:</p>
<pre><code>&lt;p&gt;Here is an example of AppleScript:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;tell application "Foo"
beep
end tell
&lt;/code&gt;&lt;/pre&gt;
</code></pre>
<p>A code block continues until it reaches a line that is not indented
(or the end of the article).</p>
<p>Within a code block, ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> and <code>&gt;</code>)
are automatically converted into HTML entities. This makes it very
easy to include example HTML source code using Markdown -- just paste
it and indent it, and Markdown will handle the hassle of encoding the
ampersands and angle brackets. For example, this:</p>
<pre><code> &lt;div class="footer"&gt;
&amp;copy; 2004 Foo Corporation
&lt;/div&gt;
</code></pre>
<p>will turn into:</p>
<pre><code>&lt;pre&gt;&lt;code&gt;&amp;lt;div class="footer"&amp;gt;
&amp;amp;copy; 2004 Foo Corporation
&amp;lt;/div&amp;gt;
&lt;/code&gt;&lt;/pre&gt;
</code></pre>
<p>Regular Markdown syntax is not processed within code blocks. E.g.,
asterisks are just literal asterisks within a code block. This means
it's also easy to use Markdown to write about Markdown's own syntax.</p>
<h3 id="hr">Horizontal Rules</h3>
<p>You can produce a horizontal rule tag (<code>&lt;hr /&gt;</code>) by placing three or
more hyphens, asterisks, or underscores on a line by themselves. If you
wish, you may use spaces between the hyphens or asterisks. Each of the
following lines will produce a horizontal rule:</p>
<pre><code>* * *
***
*****
- - -
---------------------------------------
_ _ _
</code></pre>
<hr />
<h2 id="span">Span Elements</h2>
<h3 id="link">Links</h3>
<p>Markdown supports two style of links: <em>inline</em> and <em>reference</em>.</p>
<p>In both styles, the link text is delimited by [square brackets].</p>
<p>To create an inline link, use a set of regular parentheses immediately
after the link text's closing square bracket. Inside the parentheses,
put the URL where you want the link to point, along with an <em>optional</em>
title for the link, surrounded in quotes. For example:</p>
<pre><code>This is [an example](http://example.com/ "Title") inline link.
[This link](http://example.net/) has no title attribute.
</code></pre>
<p>Will produce:</p>
<pre><code>&lt;p&gt;This is &lt;a href="http://example.com/" title="Title"&gt;
an example&lt;/a&gt; inline link.&lt;/p&gt;
&lt;p&gt;&lt;a href="http://example.net/"&gt;This link&lt;/a&gt; has no
title attribute.&lt;/p&gt;
</code></pre>
<p>If you're referring to a local resource on the same server, you can
use relative paths:</p>
<pre><code>See my [About](/about/) page for details.
</code></pre>
<p>Reference-style links use a second set of square brackets, inside
which you place a label of your choosing to identify the link:</p>
<pre><code>This is [an example][id] reference-style link.
</code></pre>
<p>You can optionally use a space to separate the sets of brackets:</p>
<pre><code>This is [an example] [id] reference-style link.
</code></pre>
<p>Then, anywhere in the document, you define your link label like this,
on a line by itself:</p>
<pre><code>[id]: http://example.com/ "Optional Title Here"
</code></pre>
<p>That is:</p>
<ul>
<li>Square brackets containing the link identifier (optionally
indented from the left margin using up to three spaces);</li>
<li>followed by a colon;</li>
<li>followed by one or more spaces (or tabs);</li>
<li>followed by the URL for the link;</li>
<li>optionally followed by a title attribute for the link, enclosed
in double or single quotes.</li>
</ul>
<p>The link URL may, optionally, be surrounded by angle brackets:</p>
<pre><code>[id]: &lt;http://example.com/&gt; "Optional Title Here"
</code></pre>
<p>You can put the title attribute on the next line and use extra spaces
or tabs for padding, which tends to look better with longer URLs:</p>
<pre><code>[id]: http://example.com/longish/path/to/resource/here
"Optional Title Here"
</code></pre>
<p>Link definitions are only used for creating links during Markdown
processing, and are stripped from your document in the HTML output.</p>
<p>Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are <em>not</em> case sensitive. E.g. these two links:</p>
<pre><code>[link text][a]
[link text][A]
</code></pre>
<p>are equivalent.</p>
<p>The <em>implicit link name</em> shortcut allows you to omit the name of the
link, in which case the link text itself is used as the name.
Just use an empty set of square brackets -- e.g., to link the word
"Google" to the google.com web site, you could simply write:</p>
<pre><code>[Google][]
</code></pre>
<p>And then define the link:</p>
<pre><code>[Google]: http://google.com/
</code></pre>
<p>Because link names may contain spaces, this shortcut even works for
multiple words in the link text:</p>
<pre><code>Visit [Daring Fireball][] for more information.
</code></pre>
<p>And then define the link:</p>
<pre><code>[Daring Fireball]: http://daringfireball.net/
</code></pre>
<p>Link definitions can be placed anywhere in your Markdown document. I
tend to put them immediately after each paragraph in which they're
used, but if you want, you can put them all at the end of your
document, sort of like footnotes.</p>
<p>Here's an example of reference links in action:</p>
<pre><code>I get 10 times more traffic from [Google] [1] than from
[Yahoo] [2] or [MSN] [3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
</code></pre>
<p>Using the implicit link name shortcut, you could instead write:</p>
<pre><code>I get 10 times more traffic from [Google][] than from
[Yahoo][] or [MSN][].
[google]: http://google.com/ "Google"
[yahoo]: http://search.yahoo.com/ "Yahoo Search"
[msn]: http://search.msn.com/ "MSN Search"
</code></pre>
<p>Both of the above examples will produce the following HTML output:</p>
<pre><code>&lt;p&gt;I get 10 times more traffic from &lt;a href="http://google.com/"
title="Google"&gt;Google&lt;/a&gt; than from
&lt;a href="http://search.yahoo.com/" title="Yahoo Search"&gt;Yahoo&lt;/a&gt;
or &lt;a href="http://search.msn.com/" title="MSN Search"&gt;MSN&lt;/a&gt;.&lt;/p&gt;
</code></pre>
<p>For comparison, here is the same paragraph written using
Markdown's inline link style:</p>
<pre><code>I get 10 times more traffic from [Google](http://google.com/ "Google")
than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
[MSN](http://search.msn.com/ "MSN Search").
</code></pre>
<p>The point of reference-style links is not that they're easier to
write. The point is that with reference-style links, your document
source is vastly more readable. Compare the above examples: using
reference-style links, the paragraph itself is only 81 characters
long; with inline-style links, it's 176 characters; and as raw HTML,
it's 234 characters. In the raw HTML, there's more markup than there
is text.</p>
<p>With Markdown's reference-style links, a source document much more
closely resembles the final output, as rendered in a browser. By
allowing you to move the markup-related metadata out of the paragraph,
you can add links without interrupting the narrative flow of your
prose.</p>
<h3 id="em">Emphasis</h3>
<p>Markdown treats asterisks (<code>*</code>) and underscores (<code>_</code>) as indicators of
emphasis. Text wrapped with one <code>*</code> or <code>_</code> will be wrapped with an
HTML <code>&lt;em&gt;</code> tag; double <code>*</code>'s or <code>_</code>'s will be wrapped with an HTML
<code>&lt;strong&gt;</code> tag. E.g., this input:</p>
<pre><code>*single asterisks*
_single underscores_
**double asterisks**
__double underscores__
</code></pre>
<p>will produce:</p>
<pre><code>&lt;em&gt;single asterisks&lt;/em&gt;
&lt;em&gt;single underscores&lt;/em&gt;
&lt;strong&gt;double asterisks&lt;/strong&gt;
&lt;strong&gt;double underscores&lt;/strong&gt;
</code></pre>
<p>You can use whichever style you prefer; the lone restriction is that
the same character must be used to open and close an emphasis span.</p>
<p>Emphasis can be used in the middle of a word:</p>
<pre><code>un*fucking*believable
</code></pre>
<p>But if you surround an <code>*</code> or <code>_</code> with spaces, it'll be treated as a
literal asterisk or underscore.</p>
<p>To produce a literal asterisk or underscore at a position where it
would otherwise be used as an emphasis delimiter, you can backslash
escape it:</p>
<pre><code>\*this text is surrounded by literal asterisks\*
</code></pre>
<h3 id="code">Code</h3>
<p>To indicate a span of code, wrap it with backtick quotes (<code>`</code>).
Unlike a pre-formatted code block, a code span indicates code within a
normal paragraph. For example:</p>
<pre><code>Use the `printf()` function.
</code></pre>
<p>will produce:</p>
<pre><code>&lt;p&gt;Use the &lt;code&gt;printf()&lt;/code&gt; function.&lt;/p&gt;
</code></pre>
<p>To include a literal backtick character within a code span, you can use
multiple backticks as the opening and closing delimiters:</p>
<pre><code>``There is a literal backtick (`) here.``
</code></pre>
<p>which will produce this:</p>
<pre><code>&lt;p&gt;&lt;code&gt;There is a literal backtick (`) here.&lt;/code&gt;&lt;/p&gt;
</code></pre>
<p>The backtick delimiters surrounding a code span may include spaces --
one after the opening, one before the closing. This allows you to place
literal backtick characters at the beginning or end of a code span:</p>
<pre><code>A single backtick in a code span: `` ` ``
A backtick-delimited string in a code span: `` `foo` ``
</code></pre>
<p>will produce:</p>
<pre><code>&lt;p&gt;A single backtick in a code span: &lt;code&gt;`&lt;/code&gt;&lt;/p&gt;
&lt;p&gt;A backtick-delimited string in a code span: &lt;code&gt;`foo`&lt;/code&gt;&lt;/p&gt;
</code></pre>
<p>With a code span, ampersands and angle brackets are encoded as HTML
entities automatically, which makes it easy to include example HTML
tags. Markdown will turn this:</p>
<pre><code>Please don't use any `&lt;blink&gt;` tags.
</code></pre>
<p>into:</p>
<pre><code>&lt;p&gt;Please don't use any &lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
</code></pre>
<p>You can write this:</p>
<pre><code>`&amp;#8212;` is the decimal-encoded equivalent of `&amp;mdash;`.
</code></pre>
<p>to produce:</p>
<pre><code>&lt;p&gt;&lt;code&gt;&amp;amp;#8212;&lt;/code&gt; is the decimal-encoded
equivalent of &lt;code&gt;&amp;amp;mdash;&lt;/code&gt;.&lt;/p&gt;
</code></pre>
<h3 id="img">Images</h3>
<p>Admittedly, it's fairly difficult to devise a "natural" syntax for
placing images into a plain text document format.</p>
<p>Markdown uses an image syntax that is intended to resemble the syntax
for links, allowing for two styles: <em>inline</em> and <em>reference</em>.</p>
<p>Inline image syntax looks like this:</p>
<pre><code>![Alt text](/path/to/img.jpg)
![Alt text](/path/to/img.jpg "Optional title")
</code></pre>
<p>That is:</p>
<ul>
<li>An exclamation mark: <code>!</code>;</li>
<li>followed by a set of square brackets, containing the <code>alt</code>
attribute text for the image;</li>
<li>followed by a set of parentheses, containing the URL or path to
the image, and an optional <code>title</code> attribute enclosed in double
or single quotes.</li>
</ul>
<p>Reference-style image syntax looks like this:</p>
<pre><code>![Alt text][id]
</code></pre>
<p>Where "id" is the name of a defined image reference. Image references
are defined using syntax identical to link references:</p>
<pre><code>[id]: url/to/image "Optional title attribute"
</code></pre>
<p>As of this writing, Markdown has no syntax for specifying the
dimensions of an image; if this is important to you, you can simply
use regular HTML <code>&lt;img&gt;</code> tags.</p>
<hr />
<h2 id="misc">Miscellaneous</h2>
<h3 id="autolink">Automatic Links</h3>
<p>Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:</p>
<pre><code>&lt;http://example.com/&gt;
</code></pre>
<p>Markdown will turn this into:</p>
<pre><code>&lt;a href="http://example.com/"&gt;http://example.com/&lt;/a&gt;
</code></pre>
<p>Automatic links for email addresses work similarly, except that
Markdown will also perform a bit of randomized decimal and hex
entity-encoding to help obscure your address from address-harvesting
spambots. For example, Markdown will turn this:</p>
<pre><code>&lt;address@example.com&gt;
</code></pre>
<p>into something like this:</p>
<pre><code>&lt;a href="&amp;#x6D;&amp;#x61;i&amp;#x6C;&amp;#x74;&amp;#x6F;:&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;
&amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;&amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;
&amp;#109;"&gt;&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;&amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;
&amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;&amp;#109;&lt;/a&gt;
</code></pre>
<p>which will render in a browser as a clickable link to "address@example.com".</p>
<p>(This sort of entity-encoding trick will indeed fool many, if not
most, address-harvesting bots, but it definitely won't fool all of
them. It's better than nothing, but an address published in this way
will probably eventually start receiving spam.)</p>
<h3 id="backslash">Backslash Escapes</h3>
<p>Markdown allows you to use backslash escapes to generate literal
characters which would otherwise have special meaning in Markdown's
formatting syntax. For example, if you wanted to surround a word with
literal asterisks (instead of an HTML <code>&lt;em&gt;</code> tag), you can backslashes
before the asterisks, like this:</p>
<pre><code>\*literal asterisks\*
</code></pre>
<p>Markdown provides backslash escapes for the following characters:</p>
<pre><code>\ backslash
` backtick
* asterisk
_ underscore
{} curly braces
[] square brackets
() parentheses
# hash mark
+ plus sign
- minus sign (hyphen)
. dot
! exclamation mark
</code></pre>

View file

@ -1,888 +0,0 @@
Markdown: Syntax
================
<ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li>
<li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
</ul>
* [Overview](#overview)
* [Philosophy](#philosophy)
* [Inline HTML](#html)
* [Automatic Escaping for Special Characters](#autoescape)
* [Block Elements](#block)
* [Paragraphs and Line Breaks](#p)
* [Headers](#header)
* [Blockquotes](#blockquote)
* [Lists](#list)
* [Code Blocks](#precode)
* [Horizontal Rules](#hr)
* [Span Elements](#span)
* [Links](#link)
* [Emphasis](#em)
* [Code](#code)
* [Images](#img)
* [Miscellaneous](#misc)
* [Backslash Escapes](#backslash)
* [Automatic Links](#autolink)
**Note:** This document is itself written using Markdown; you
can [see the source for it by adding '.text' to the URL][src].
[src]: /projects/markdown/syntax.text
* * *
<h2 id="overview">Overview</h2>
<h3 id="philosophy">Philosophy</h3>
Markdown is intended to be as easy-to-read and easy-to-write as is feasible.
Readability, however, is emphasized above all else. A Markdown-formatted
document should be publishable as-is, as plain text, without looking
like it's been marked up with tags or formatting instructions. While
Markdown's syntax has been influenced by several existing text-to-HTML
filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4],
[Grutatext] [5], and [EtText] [6] -- the single biggest source of
inspiration for Markdown's syntax is the format of plain text email.
[1]: http://docutils.sourceforge.net/mirror/setext.html
[2]: http://www.aaronsw.com/2002/atx/
[3]: http://textism.com/tools/textile/
[4]: http://docutils.sourceforge.net/rst.html
[5]: http://www.triptico.com/software/grutatxt.html
[6]: http://ettext.taint.org/doc/
To this end, Markdown's syntax is comprised entirely of punctuation
characters, which punctuation characters have been carefully chosen so
as to look like what they mean. E.g., asterisks around a word actually
look like \*emphasis\*. Markdown lists look like, well, lists. Even
blockquotes look like quoted passages of text, assuming you've ever
used email.
<h3 id="html">Inline HTML</h3>
Markdown's syntax is intended for one purpose: to be used as a
format for *writing* for the web.
Markdown is not a replacement for HTML, or even close to it. Its
syntax is very small, corresponding only to a very small subset of
HTML tags. The idea is *not* to create a syntax that makes it easier
to insert HTML tags. In my opinion, HTML tags are already easy to
insert. The idea for Markdown is to make it easy to read, write, and
edit prose. HTML is a *publishing* format; Markdown is a *writing*
format. Thus, Markdown's formatting syntax only addresses issues that
can be conveyed in plain text.
For any markup that is not covered by Markdown's syntax, you simply
use HTML itself. There's no need to preface it or delimit it to
indicate that you're switching from Markdown to HTML; you just use
the tags.
The only restrictions are that block-level HTML elements -- e.g. `<div>`,
`<table>`, `<pre>`, `<p>`, etc. -- must be separated from surrounding
content by blank lines, and the start and end tags of the block should
not be indented with tabs or spaces. Markdown is smart enough not
to add extra (unwanted) `<p>` tags around HTML block-level tags.
For example, to add an HTML table to a Markdown article:
This is a regular paragraph.
<table>
<tr>
<td>Foo</td>
</tr>
</table>
This is another regular paragraph.
Note that Markdown formatting syntax is not processed within block-level
HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an
HTML block.
Span-level HTML tags -- e.g. `<span>`, `<cite>`, or `<del>` -- can be
used anywhere in a Markdown paragraph, list item, or header. If you
want, you can even use HTML tags instead of Markdown formatting; e.g. if
you'd prefer to use HTML `<a>` or `<img>` tags instead of Markdown's
link or image syntax, go right ahead.
Unlike block-level HTML tags, Markdown syntax *is* processed within
span-level tags.
<h3 id="autoescape">Automatic Escaping for Special Characters</h3>
In HTML, there are two characters that demand special treatment: `<`
and `&`. Left angle brackets are used to start tags; ampersands are
used to denote HTML entities. If you want to use them as literal
characters, you must escape them as entities, e.g. `&lt;`, and
`&amp;`.
Ampersands in particular are bedeviling for web writers. If you want to
write about 'AT&T', you need to write '`AT&amp;T`'. You even need to
escape ampersands within URLs. Thus, if you want to link to:
http://images.google.com/images?num=30&q=larry+bird
you need to encode the URL as:
http://images.google.com/images?num=30&amp;q=larry+bird
in your anchor tag `href` attribute. Needless to say, this is easy to
forget, and is probably the single most common source of HTML validation
errors in otherwise well-marked-up web sites.
Markdown allows you to use these characters naturally, taking care of
all the necessary escaping for you. If you use an ampersand as part of
an HTML entity, it remains unchanged; otherwise it will be translated
into `&amp;`.
So, if you want to include a copyright symbol in your article, you can write:
&copy;
and Markdown will leave it alone. But if you write:
AT&T
Markdown will translate it to:
AT&amp;T
Similarly, because Markdown supports [inline HTML](#html), if you use
angle brackets as delimiters for HTML tags, Markdown will treat them as
such. But if you write:
4 < 5
Markdown will translate it to:
4 &lt; 5
However, inside Markdown code spans and blocks, angle brackets and
ampersands are *always* encoded automatically. This makes it easy to use
Markdown to write about HTML code. (As opposed to raw HTML, which is a
terrible format for writing about HTML syntax, because every single `<`
and `&` in your example code needs to be escaped.)
* * *
<h2 id="block">Block Elements</h2>
<h3 id="p">Paragraphs and Line Breaks</h3>
A paragraph is simply one or more consecutive lines of text, separated
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing but spaces or tabs is considered
blank.) Normal paragraphs should not be intended with spaces or tabs.
The implication of the "one or more consecutive lines of text" rule is
that Markdown supports "hard-wrapped" text paragraphs. This differs
significantly from most other text-to-HTML formatters (including Movable
Type's "Convert Line Breaks" option) which translate every line break
character in a paragraph into a `<br />` tag.
When you *do* want to insert a `<br />` break tag using Markdown, you
end a line with two or more spaces, then type return.
Yes, this takes a tad more effort to create a `<br />`, but a simplistic
"every line break is a `<br />`" rule wouldn't work for Markdown.
Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l]
work best -- and look better -- when you format them with hard breaks.
[bq]: #blockquote
[l]: #list
<h3 id="header">Headers</h3>
Markdown supports two styles of headers, [Setext] [1] and [atx] [2].
Setext-style headers are "underlined" using equal signs (for first-level
headers) and dashes (for second-level headers). For example:
This is an H1
=============
This is an H2
-------------
Any number of underlining `=`'s or `-`'s will work.
Atx-style headers use 1-6 hash characters at the start of the line,
corresponding to header levels 1-6. For example:
# This is an H1
## This is an H2
###### This is an H6
Optionally, you may "close" atx-style headers. This is purely
cosmetic -- you can use this if you think it looks better. The
closing hashes don't even need to match the number of hashes
used to open the header. (The number of opening hashes
determines the header level.) :
# This is an H1 #
## This is an H2 ##
### This is an H3 ######
<h3 id="blockquote">Blockquotes</h3>
Markdown uses email-style `>` characters for blockquoting. If you're
familiar with quoting passages of text in an email message, then you
know how to create a blockquote in Markdown. It looks best if you hard
wrap the text and put a `>` before every line:
> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
>
> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
> id sem consectetuer libero luctus adipiscing.
Markdown allows you to be lazy and only put the `>` before the first
line of a hard-wrapped paragraph:
> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
id sem consectetuer libero luctus adipiscing.
Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
adding additional levels of `>`:
> This is the first level of quoting.
>
> > This is nested blockquote.
>
> Back to the first level.
Blockquotes can contain other Markdown elements, including headers, lists,
and code blocks:
> ## This is a header.
>
> 1. This is the first list item.
> 2. This is the second list item.
>
> Here's some example code:
>
> return shell_exec("echo $input | $markdown_script");
Any decent text editor should make email-style quoting easy. For
example, with BBEdit, you can make a selection and choose Increase
Quote Level from the Text menu.
<h3 id="list">Lists</h3>
Markdown supports ordered (numbered) and unordered (bulleted) lists.
Unordered lists use asterisks, pluses, and hyphens -- interchangably
-- as list markers:
* Red
* Green
* Blue
is equivalent to:
+ Red
+ Green
+ Blue
and:
- Red
- Green
- Blue
Ordered lists use numbers followed by periods:
1. Bird
2. McHale
3. Parish
It's important to note that the actual numbers you use to mark the
list have no effect on the HTML output Markdown produces. The HTML
Markdown produces from the above list is:
<ol>
<li>Bird</li>
<li>McHale</li>
<li>Parish</li>
</ol>
If you instead wrote the list in Markdown like this:
1. Bird
1. McHale
1. Parish
or even:
3. Bird
1. McHale
8. Parish
you'd get the exact same HTML output. The point is, if you want to,
you can use ordinal numbers in your ordered Markdown lists, so that
the numbers in your source match the numbers in your published HTML.
But if you want to be lazy, you don't have to.
If you do use lazy list numbering, however, you should still start the
list with the number 1. At some point in the future, Markdown may support
starting ordered lists at an arbitrary number.
List markers typically start at the left margin, but may be indented by
up to three spaces. List markers must be followed by one or more spaces
or a tab.
To make lists look nice, you can wrap items with hanging indents:
* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
But if you want to be lazy, you don't have to:
* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
If list items are separated by blank lines, Markdown will wrap the
items in `<p>` tags in the HTML output. For example, this input:
* Bird
* Magic
will turn into:
<ul>
<li>Bird</li>
<li>Magic</li>
</ul>
But this:
* Bird
* Magic
will turn into:
<ul>
<li><p>Bird</p></li>
<li><p>Magic</p></li>
</ul>
List items may consist of multiple paragraphs. Each subsequent
paragraph in a list item must be intended by either 4 spaces
or one tab:
1. This is a list item with two paragraphs. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit. Aliquam hendrerit
mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet
vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
sit amet velit.
2. Suspendisse id sem consectetuer libero luctus adipiscing.
It looks nice if you indent every line of the subsequent
paragraphs, but here again, Markdown will allow you to be
lazy:
* This is a list item with two paragraphs.
This is the second paragraph in the list item. You're
only required to indent the first line. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit.
* Another item in the same list.
To put a blockquote within a list item, the blockquote's `>`
delimiters need to be indented:
* A list item with a blockquote:
> This is a blockquote
> inside a list item.
To put a code block within a list item, the code block needs
to be indented *twice* -- 8 spaces or two tabs:
* A list item with a code block:
<code goes here>
It's worth noting that it's possible to trigger an ordered list by
accident, by writing something like this:
1986. What a great season.
In other words, a *number-period-space* sequence at the beginning of a
line. To avoid this, you can backslash-escape the period:
1986\. What a great season.
<h3 id="precode">Code Blocks</h3>
Pre-formatted code blocks are used for writing about programming or
markup source code. Rather than forming normal paragraphs, the lines
of a code block are interpreted literally. Markdown wraps a code block
in both `<pre>` and `<code>` tags.
To produce a code block in Markdown, simply indent every line of the
block by at least 4 spaces or 1 tab. For example, given this input:
This is a normal paragraph:
This is a code block.
Markdown will generate:
<p>This is a normal paragraph:</p>
<pre><code>This is a code block.
</code></pre>
One level of indentation -- 4 spaces or 1 tab -- is removed from each
line of the code block. For example, this:
Here is an example of AppleScript:
tell application "Foo"
beep
end tell
will turn into:
<p>Here is an example of AppleScript:</p>
<pre><code>tell application "Foo"
beep
end tell
</code></pre>
A code block continues until it reaches a line that is not indented
(or the end of the article).
Within a code block, ampersands (`&`) and angle brackets (`<` and `>`)
are automatically converted into HTML entities. This makes it very
easy to include example HTML source code using Markdown -- just paste
it and indent it, and Markdown will handle the hassle of encoding the
ampersands and angle brackets. For example, this:
<div class="footer">
&copy; 2004 Foo Corporation
</div>
will turn into:
<pre><code>&lt;div class="footer"&gt;
&amp;copy; 2004 Foo Corporation
&lt;/div&gt;
</code></pre>
Regular Markdown syntax is not processed within code blocks. E.g.,
asterisks are just literal asterisks within a code block. This means
it's also easy to use Markdown to write about Markdown's own syntax.
<h3 id="hr">Horizontal Rules</h3>
You can produce a horizontal rule tag (`<hr />`) by placing three or
more hyphens, asterisks, or underscores on a line by themselves. If you
wish, you may use spaces between the hyphens or asterisks. Each of the
following lines will produce a horizontal rule:
* * *
***
*****
- - -
---------------------------------------
_ _ _
* * *
<h2 id="span">Span Elements</h2>
<h3 id="link">Links</h3>
Markdown supports two style of links: *inline* and *reference*.
In both styles, the link text is delimited by [square brackets].
To create an inline link, use a set of regular parentheses immediately
after the link text's closing square bracket. Inside the parentheses,
put the URL where you want the link to point, along with an *optional*
title for the link, surrounded in quotes. For example:
This is [an example](http://example.com/ "Title") inline link.
[This link](http://example.net/) has no title attribute.
Will produce:
<p>This is <a href="http://example.com/" title="Title">
an example</a> inline link.</p>
<p><a href="http://example.net/">This link</a> has no
title attribute.</p>
If you're referring to a local resource on the same server, you can
use relative paths:
See my [About](/about/) page for details.
Reference-style links use a second set of square brackets, inside
which you place a label of your choosing to identify the link:
This is [an example][id] reference-style link.
You can optionally use a space to separate the sets of brackets:
This is [an example] [id] reference-style link.
Then, anywhere in the document, you define your link label like this,
on a line by itself:
[id]: http://example.com/ "Optional Title Here"
That is:
* Square brackets containing the link identifier (optionally
indented from the left margin using up to three spaces);
* followed by a colon;
* followed by one or more spaces (or tabs);
* followed by the URL for the link;
* optionally followed by a title attribute for the link, enclosed
in double or single quotes.
The link URL may, optionally, be surrounded by angle brackets:
[id]: <http://example.com/> "Optional Title Here"
You can put the title attribute on the next line and use extra spaces
or tabs for padding, which tends to look better with longer URLs:
[id]: http://example.com/longish/path/to/resource/here
"Optional Title Here"
Link definitions are only used for creating links during Markdown
processing, and are stripped from your document in the HTML output.
Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are *not* case sensitive. E.g. these two links:
[link text][a]
[link text][A]
are equivalent.
The *implicit link name* shortcut allows you to omit the name of the
link, in which case the link text itself is used as the name.
Just use an empty set of square brackets -- e.g., to link the word
"Google" to the google.com web site, you could simply write:
[Google][]
And then define the link:
[Google]: http://google.com/
Because link names may contain spaces, this shortcut even works for
multiple words in the link text:
Visit [Daring Fireball][] for more information.
And then define the link:
[Daring Fireball]: http://daringfireball.net/
Link definitions can be placed anywhere in your Markdown document. I
tend to put them immediately after each paragraph in which they're
used, but if you want, you can put them all at the end of your
document, sort of like footnotes.
Here's an example of reference links in action:
I get 10 times more traffic from [Google] [1] than from
[Yahoo] [2] or [MSN] [3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
Using the implicit link name shortcut, you could instead write:
I get 10 times more traffic from [Google][] than from
[Yahoo][] or [MSN][].
[google]: http://google.com/ "Google"
[yahoo]: http://search.yahoo.com/ "Yahoo Search"
[msn]: http://search.msn.com/ "MSN Search"
Both of the above examples will produce the following HTML output:
<p>I get 10 times more traffic from <a href="http://google.com/"
title="Google">Google</a> than from
<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
For comparison, here is the same paragraph written using
Markdown's inline link style:
I get 10 times more traffic from [Google](http://google.com/ "Google")
than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
[MSN](http://search.msn.com/ "MSN Search").
The point of reference-style links is not that they're easier to
write. The point is that with reference-style links, your document
source is vastly more readable. Compare the above examples: using
reference-style links, the paragraph itself is only 81 characters
long; with inline-style links, it's 176 characters; and as raw HTML,
it's 234 characters. In the raw HTML, there's more markup than there
is text.
With Markdown's reference-style links, a source document much more
closely resembles the final output, as rendered in a browser. By
allowing you to move the markup-related metadata out of the paragraph,
you can add links without interrupting the narrative flow of your
prose.
<h3 id="em">Emphasis</h3>
Markdown treats asterisks (`*`) and underscores (`_`) as indicators of
emphasis. Text wrapped with one `*` or `_` will be wrapped with an
HTML `<em>` tag; double `*`'s or `_`'s will be wrapped with an HTML
`<strong>` tag. E.g., this input:
*single asterisks*
_single underscores_
**double asterisks**
__double underscores__
will produce:
<em>single asterisks</em>
<em>single underscores</em>
<strong>double asterisks</strong>
<strong>double underscores</strong>
You can use whichever style you prefer; the lone restriction is that
the same character must be used to open and close an emphasis span.
Emphasis can be used in the middle of a word:
un*fucking*believable
But if you surround an `*` or `_` with spaces, it'll be treated as a
literal asterisk or underscore.
To produce a literal asterisk or underscore at a position where it
would otherwise be used as an emphasis delimiter, you can backslash
escape it:
\*this text is surrounded by literal asterisks\*
<h3 id="code">Code</h3>
To indicate a span of code, wrap it with backtick quotes (`` ` ``).
Unlike a pre-formatted code block, a code span indicates code within a
normal paragraph. For example:
Use the `printf()` function.
will produce:
<p>Use the <code>printf()</code> function.</p>
To include a literal backtick character within a code span, you can use
multiple backticks as the opening and closing delimiters:
``There is a literal backtick (`) here.``
which will produce this:
<p><code>There is a literal backtick (`) here.</code></p>
The backtick delimiters surrounding a code span may include spaces --
one after the opening, one before the closing. This allows you to place
literal backtick characters at the beginning or end of a code span:
A single backtick in a code span: `` ` ``
A backtick-delimited string in a code span: `` `foo` ``
will produce:
<p>A single backtick in a code span: <code>`</code></p>
<p>A backtick-delimited string in a code span: <code>`foo`</code></p>
With a code span, ampersands and angle brackets are encoded as HTML
entities automatically, which makes it easy to include example HTML
tags. Markdown will turn this:
Please don't use any `<blink>` tags.
into:
<p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
You can write this:
`&#8212;` is the decimal-encoded equivalent of `&mdash;`.
to produce:
<p><code>&amp;#8212;</code> is the decimal-encoded
equivalent of <code>&amp;mdash;</code>.</p>
<h3 id="img">Images</h3>
Admittedly, it's fairly difficult to devise a "natural" syntax for
placing images into a plain text document format.
Markdown uses an image syntax that is intended to resemble the syntax
for links, allowing for two styles: *inline* and *reference*.
Inline image syntax looks like this:
![Alt text](/path/to/img.jpg)
![Alt text](/path/to/img.jpg "Optional title")
That is:
* An exclamation mark: `!`;
* followed by a set of square brackets, containing the `alt`
attribute text for the image;
* followed by a set of parentheses, containing the URL or path to
the image, and an optional `title` attribute enclosed in double
or single quotes.
Reference-style image syntax looks like this:
![Alt text][id]
Where "id" is the name of a defined image reference. Image references
are defined using syntax identical to link references:
[id]: url/to/image "Optional title attribute"
As of this writing, Markdown has no syntax for specifying the
dimensions of an image; if this is important to you, you can simply
use regular HTML `<img>` tags.
* * *
<h2 id="misc">Miscellaneous</h2>
<h3 id="autolink">Automatic Links</h3>
Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:
<http://example.com/>
Markdown will turn this into:
<a href="http://example.com/">http://example.com/</a>
Automatic links for email addresses work similarly, except that
Markdown will also perform a bit of randomized decimal and hex
entity-encoding to help obscure your address from address-harvesting
spambots. For example, Markdown will turn this:
<address@example.com>
into something like this:
<a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
which will render in a browser as a clickable link to "address@example.com".
(This sort of entity-encoding trick will indeed fool many, if not
most, address-harvesting bots, but it definitely won't fool all of
them. It's better than nothing, but an address published in this way
will probably eventually start receiving spam.)
<h3 id="backslash">Backslash Escapes</h3>
Markdown allows you to use backslash escapes to generate literal
characters which would otherwise have special meaning in Markdown's
formatting syntax. For example, if you wanted to surround a word with
literal asterisks (instead of an HTML `<em>` tag), you can backslashes
before the asterisks, like this:
\*literal asterisks\*
Markdown provides backslash escapes for the following characters:
\ backslash
` backtick
* asterisk
_ underscore
{} curly braces
[] square brackets
() parentheses
# hash mark
+ plus sign
- minus sign (hyphen)
. dot
! exclamation mark

View file

@ -1,9 +0,0 @@
<blockquote>
<p>foo</p>
<blockquote>
<p>bar</p>
</blockquote>
<p>foo</p>
</blockquote>

View file

@ -1,5 +0,0 @@
> foo
>
> > bar
>
> foo

View file

@ -1,148 +0,0 @@
<h2>Unordered</h2>
<p>Asterisks tight:</p>
<ul>
<li>asterisk 1</li>
<li>asterisk 2</li>
<li>asterisk 3</li>
</ul>
<p>Asterisks loose:</p>
<ul>
<li><p>asterisk 1</p></li>
<li><p>asterisk 2</p></li>
<li><p>asterisk 3</p></li>
</ul>
<hr />
<p>Pluses tight:</p>
<ul>
<li>Plus 1</li>
<li>Plus 2</li>
<li>Plus 3</li>
</ul>
<p>Pluses loose:</p>
<ul>
<li><p>Plus 1</p></li>
<li><p>Plus 2</p></li>
<li><p>Plus 3</p></li>
</ul>
<hr />
<p>Minuses tight:</p>
<ul>
<li>Minus 1</li>
<li>Minus 2</li>
<li>Minus 3</li>
</ul>
<p>Minuses loose:</p>
<ul>
<li><p>Minus 1</p></li>
<li><p>Minus 2</p></li>
<li><p>Minus 3</p></li>
</ul>
<h2>Ordered</h2>
<p>Tight:</p>
<ol>
<li>First</li>
<li>Second</li>
<li>Third</li>
</ol>
<p>and:</p>
<ol>
<li>One</li>
<li>Two</li>
<li>Three</li>
</ol>
<p>Loose using tabs:</p>
<ol>
<li><p>First</p></li>
<li><p>Second</p></li>
<li><p>Third</p></li>
</ol>
<p>and using spaces:</p>
<ol>
<li><p>One</p></li>
<li><p>Two</p></li>
<li><p>Three</p></li>
</ol>
<p>Multiple paragraphs:</p>
<ol>
<li><p>Item 1, graf one.</p>
<p>Item 2. graf two. The quick brown fox jumped over the lazy dog's
back.</p></li>
<li><p>Item 2.</p></li>
<li><p>Item 3.</p></li>
</ol>
<h2>Nested</h2>
<ul>
<li>Tab
<ul>
<li>Tab
<ul>
<li>Tab</li>
</ul></li>
</ul></li>
</ul>
<p>Here's another:</p>
<ol>
<li>First</li>
<li>Second:
<ul>
<li>Fee</li>
<li>Fie</li>
<li>Foe</li>
</ul></li>
<li>Third</li>
</ol>
<p>Same thing but with paragraphs:</p>
<ol>
<li><p>First</p></li>
<li><p>Second:</p>
<ul>
<li>Fee</li>
<li>Fie</li>
<li>Foe</li>
</ul></li>
<li><p>Third</p></li>
</ol>
<p>This was an error in Markdown 1.0.1:</p>
<ul>
<li><p>this</p>
<ul><li>sub</li></ul>
<p>that</p></li>
</ul>

View file

@ -1,131 +0,0 @@
## Unordered
Asterisks tight:
* asterisk 1
* asterisk 2
* asterisk 3
Asterisks loose:
* asterisk 1
* asterisk 2
* asterisk 3
* * *
Pluses tight:
+ Plus 1
+ Plus 2
+ Plus 3
Pluses loose:
+ Plus 1
+ Plus 2
+ Plus 3
* * *
Minuses tight:
- Minus 1
- Minus 2
- Minus 3
Minuses loose:
- Minus 1
- Minus 2
- Minus 3
## Ordered
Tight:
1. First
2. Second
3. Third
and:
1. One
2. Two
3. Three
Loose using tabs:
1. First
2. Second
3. Third
and using spaces:
1. One
2. Two
3. Three
Multiple paragraphs:
1. Item 1, graf one.
Item 2. graf two. The quick brown fox jumped over the lazy dog's
back.
2. Item 2.
3. Item 3.
## Nested
* Tab
* Tab
* Tab
Here's another:
1. First
2. Second:
* Fee
* Fie
* Foe
3. Third
Same thing but with paragraphs:
1. First
2. Second:
* Fee
* Fie
* Foe
3. Third
This was an error in Markdown 1.0.1:
* this
* sub
that

View file

@ -1,7 +0,0 @@
<p><strong><em>This is strong and em.</em></strong></p>
<p>So is <strong><em>this</em></strong> word.</p>
<p><strong><em>This is strong and em.</em></strong></p>
<p>So is <strong><em>this</em></strong> word.</p>

View file

@ -1,7 +0,0 @@
***This is strong and em.***
So is ***this*** word.
___This is strong and em.___
So is ___this___ word.

View file

@ -1,25 +0,0 @@
<ul>
<li><p>this is a list item
indented with tabs</p></li>
<li><p>this is a list item
indented with spaces</p></li>
</ul>
<p>Code:</p>
<pre><code>this code block is indented by one tab
</code></pre>
<p>And:</p>
<pre><code> this code block is indented by two tabs
</code></pre>
<p>And:</p>
<pre><code>+ this is an example list item
indented with tabs
+ this is an example list item
indented with spaces
</code></pre>

View file

@ -1,21 +0,0 @@
+ this is a list item
indented with tabs
+ this is a list item
indented with spaces
Code:
this code block is indented by one tab
And:
this code block is indented by two tabs
And:
+ this is an example list item
indented with tabs
+ this is an example list item
indented with spaces

View file

@ -1,8 +0,0 @@
<blockquote>
<p>A list within a blockquote:</p>
<ul>
<li>asterisk 1</li>
<li>asterisk 2</li>
<li>asterisk 3</li>
</ul>
</blockquote>

View file

@ -1,5 +0,0 @@
> A list within a blockquote:
>
> * asterisk 1
> * asterisk 2
> * asterisk 3

View file

@ -1,51 +0,0 @@
<p>==Highlight==</p>
<p>~~Strikethrough~~</p>
<p>_Underscore_</p>
<p>_<em>Underscore_</em></p>
<p><em>_Underscore</em>_</p>
<p>_<em>Underscore</em>_</p>
<p><em>_Underscore_</em></p>
<p>*Asterisk*</p>
<p>*<em>Asterisk*</em></p>
<p>*<em>Asterisk</em>*</p>
<p><em>*Asterisk*</em></p>
<p><em>*Asterisk</em>*</p>
<p>[Bracket]</p>
<p>(Parenthesis)</p>
<p>&lt;Chevron&gt;</p>
<p>Super^script</p>
<p>`Backtick`</p>
<p>&quot;Quote&quot;</p>
<p><strong>Foo\</strong></p>
<p><em>Foo\*</em></p>
<p><strong>Foo\\Bar\</strong></p>
<p>*Foo\Bar\*</p>
<p><a href="http://example.com">Foo]</a></p>
<p><a href="http://example.com">Foo\</a></p>
<p><a href="http://example.com">Foo\]</a></p>
<p><a href="http://example.com">Foo\\</a></p>

View file

@ -1,51 +0,0 @@
\==Highlight\==
\~~Strikethrough\~~
\_Underscore\_
\__Underscore\__
_\_Underscore_\_
\__Underscore_\_
_\_Underscore\__
\*Asterisk\*
\**Asterisk\**
\**Asterisk*\*
*\*Asterisk\**
*\*Asterisk*\*
\[Bracket\]
\(Parenthesis\)
\<Chevron\>
Super\^script
\`Backtick\`
\"Quote\"
**Foo\\**
*Foo\\\**
**Foo\\\Bar\\**
*Foo\\Bar\\\*
[Foo\]](http://example.com)
[Foo\\](http://example.com)
[Foo\\\]](http://example.com)
[Foo\\\\](http://example.com)

View file

@ -1,31 +0,0 @@
<p>\[
1*2*3 multi-line math
\]</p>
<p>\( 1*2*3 inline-math \)</p>
<p>\[ 1*2*3 math with dollar \]</p>
<p>\[ 1*2*3 \$ \\ \text{dollar with escapes} \]</p>
<p>\( \\ \text{backslash with escapes} \$ 1*2*3 \)</p>
<p>( not <em>really</em> math )</p>
<p>$$ also <em>not</em> math $$</p>
<p>this \(*should* be\) math</p>
<p>this\( *should* also be\) math</p>
<p>and \(this *should* \)too</p>
<p>Something \{ like <em>math</em> but \} is not</p>
<p>Also \(like <em>math</em> but \) is not</p>
<p>\\( should be *math* as well \\\)</p>
<p>This is \( math, and the \\\( inner one \\\) should be \) preserved</p>
<p>\[ did you &lt;em&gt; know &lt;/em&gt; this is math? \]</p>

View file

@ -1,31 +0,0 @@
\\[
1*2*3 multi-line math
\\]
\\( 1*2*3 inline-math \\)
$$ 1*2*3 math with dollar $$
$$ 1*2*3 \$ \\ \text{dollar with escapes} $$
\\( \\ \text{backslash with escapes} \$ 1*2*3 \\)
\( not *really* math \)
\$$ also *not* math \$$
this $$*should* be$$ math
this$$ *should* also be$$ math
and $$this *should* $$too
Something \\{ like *math* but \\} is not
Also \\\(like *math* but \\\) is not
\\\\( should be *math* as well \\\\)
This is \\( math, and the \\\( inner one \\\) should be \\) preserved
$$ did you <em> know </em> this is math? $$

View file

@ -1,101 +0,0 @@
{
"tests": [
{
"input": "MarkdownTest_1.0.3/Tests/Amps and angle encoding.text",
"output": "MarkdownTest_1.0.3/Tests/Amps and angle encoding.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Auto links.text",
"output": "MarkdownTest_1.0.3/Tests/Auto links.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Backslash escapes.text",
"output": "MarkdownTest_1.0.3/Tests/Backslash escapes.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.text",
"output": "MarkdownTest_1.0.3/Tests/Blockquotes with code blocks.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Code Blocks.text",
"output": "MarkdownTest_1.0.3/Tests/Code Blocks.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Code Spans.text",
"output": "MarkdownTest_1.0.3/Tests/Code Spans.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.text",
"output": "MarkdownTest_1.0.3/Tests/Hard-wrapped paragraphs with list-like lines.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Horizontal rules.text",
"output": "MarkdownTest_1.0.3/Tests/Horizontal rules.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).text",
"output": "MarkdownTest_1.0.3/Tests/Inline HTML (Advanced).html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Inline HTML (Simple).text",
"output": "MarkdownTest_1.0.3/Tests/Inline HTML (Simple).html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Inline HTML comments.text",
"output": "MarkdownTest_1.0.3/Tests/Inline HTML comments.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Links, inline style.text",
"output": "MarkdownTest_1.0.3/Tests/Links, inline style.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Links, reference style.text",
"output": "MarkdownTest_1.0.3/Tests/Links, reference style.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Links, shortcut references.text",
"output": "MarkdownTest_1.0.3/Tests/Links, shortcut references.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Literal quotes in titles.text",
"output": "MarkdownTest_1.0.3/Tests/Literal quotes in titles.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.text",
"output": "MarkdownTest_1.0.3/Tests/Markdown Documentation - Basics.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.text",
"output": "MarkdownTest_1.0.3/Tests/Markdown Documentation - Syntax.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Nested blockquotes.text",
"output": "MarkdownTest_1.0.3/Tests/Nested blockquotes.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Ordered and unordered lists.text",
"output": "MarkdownTest_1.0.3/Tests/Ordered and unordered lists.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Strong and em together.text",
"output": "MarkdownTest_1.0.3/Tests/Strong and em together.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Tabs.text",
"output": "MarkdownTest_1.0.3/Tests/Tabs.html"
},
{
"input": "MarkdownTest_1.0.3/Tests/Tidyness.text",
"output": "MarkdownTest_1.0.3/Tests/Tidyness.html"
},
{
"input": "Tests/Escape character.text",
"output": "Tests/Escape character.html"
},
{
"input": "Tests/Math.text",
"output": "Tests/Math.html",
"flags": ["--math"]
}
]
}

View file

@ -1,108 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import difflib
import json
import os
import re
import subprocess
import unittest
TEST_ROOT = os.path.dirname(__file__)
PROJECT_ROOT = os.path.dirname(TEST_ROOT)
HOEDOWN = [os.path.abspath(os.path.join(PROJECT_ROOT, 'hoedown'))]
TIDY = ['tidy', '--show-body-only', '1', '--show-warnings', '0',
'--quiet', '1']
CONFIG_PATH = os.path.join(TEST_ROOT, 'config.json')
SLUGIFY_PATTERN = re.compile(r'\W')
def with_metaclass(meta, *bases):
"""Metaclass injection utility from six.
See: https://pythonhosted.org/six/
"""
class metaclass(meta):
def __new__(cls, name, this_bases, d):
return meta(name, bases, d)
return type.__new__(metaclass, 'temporary_class', (), {})
class TestFailed(AssertionError):
def __init__(self, name, expected, got):
super(TestFailed, self).__init__(self)
diff = difflib.unified_diff(
expected.splitlines(), got.splitlines(),
fromfile='Expected', tofile='Got',
)
self.description = '{name}\n{diff}'.format(
name=name, diff='\n'.join(diff),
)
def __str__(self):
return self.description
def _test_func(test_case):
flags = test_case.get('flags') or []
hoedown_proc = subprocess.Popen(
HOEDOWN + flags + [os.path.join(TEST_ROOT, test_case['input'])],
stdout=subprocess.PIPE,
)
hoedown_proc.wait()
got_tidy_proc = subprocess.Popen(
TIDY, stdin=hoedown_proc.stdout, stdout=subprocess.PIPE,
)
got_tidy_proc.wait()
got = got_tidy_proc.stdout.read().strip()
expected_tidy_proc = subprocess.Popen(
TIDY + [os.path.join(TEST_ROOT, test_case['output'])],
stdout=subprocess.PIPE,
)
expected_tidy_proc.wait()
expected = expected_tidy_proc.stdout.read().strip()
# Cleanup.
hoedown_proc.stdout.close()
got_tidy_proc.stdout.close()
expected_tidy_proc.stdout.close()
try:
assert expected == got
except AssertionError:
raise TestFailed(test_case['input'], expected, got)
def _make_test(test_case):
return lambda self: _test_func(test_case)
class MarkdownTestsMeta(type):
"""Meta class for ``MarkdownTestCase`` to inject test cases on the fly.
"""
def __new__(meta, name, bases, attrs):
with open(CONFIG_PATH) as f:
config = json.load(f)
for test in config['tests']:
input_name = test['input']
attr_name = 'test_' + SLUGIFY_PATTERN.sub(
'_', os.path.splitext(input_name)[0].lower(),
)
func = _make_test(test)
func.__doc__ = input_name
if test.get('skip', False):
func = unittest.skip(input_name)(func)
if test.get('fail', False):
func = unittest.expectsFailure(func)
attrs[attr_name] = func
return type.__new__(meta, name, bases, attrs)
class MarkdownTests(with_metaclass(MarkdownTestsMeta, unittest.TestCase)):
pass
if __name__ == '__main__':
unittest.main()