diff --git a/configure.sh b/configure.sh index 964b1204..9c72a053 100755 --- a/configure.sh +++ b/configure.sh @@ -9,7 +9,6 @@ # ac_help='--enable-amalloc Enable memory allocation debugging --with-tabstops=N Set tabstops to N characters (default is 4) ---with-urlencoded-anchor Use url-encoded chars to multibyte chars in toc links --enable-all-features Turn on all stable optional features --shared Build shared libraries (default is static)' @@ -44,14 +43,13 @@ TARGET=markdown AC_INIT $TARGET -for banned_with in dl fenced-code id-anchor github-tags; do +for banned_with in dl fenced-code id-anchor github-tags urlencoded-anchor; do banned_with_variable_ref=\$WITH_`echo "$banned_with" | $AC_UPPERCASE | tr - _` if [ "`eval echo "$banned_with_variable_ref"`" ]; then AC_FAIL "Invalid option: --with-$banned_with. Please use a runtime flag to configure this feature." fi done -test "$WITH_URLENCODED_ANCHOR" && AC_DEFINE 'WITH_URLENCODED_ANCHOR' 1 test "$DEBIAN_GLITCH" && AC_DEFINE 'DEBIAN_GLITCH' 1 AC_PROG_CC diff --git a/flags.c b/flags.c index 3b12797e..0bd146d8 100644 --- a/flags.c +++ b/flags.c @@ -35,6 +35,7 @@ static struct flagnames flagnames[] = { { MKD_FENCEDCODE, "FENCEDCODE" }, { MKD_IDANCHOR, "IDANCHOR" }, { MKD_GITHUBTAGS, "GITHUBTAGS" }, + { MKD_URLENCODEDANCHOR, "URLENCODEDANCHOR" }, }; #define NR(x) (sizeof x/sizeof x[0]) diff --git a/generate.c b/generate.c index 888eeea8..2bef56eb 100644 --- a/generate.c +++ b/generate.c @@ -1430,7 +1430,7 @@ printheader(Paragraph *pp, MMIOT *f) Qstring(" id=\"", f); mkd_string_to_anchor(T(pp->text->text), S(pp->text->text), - (mkd_sta_function_t)Qchar, f, 1); + (mkd_sta_function_t)Qchar, f, 1, f->flags); Qchar('"', f); } Qchar('>', f); @@ -1439,7 +1439,7 @@ printheader(Paragraph *pp, MMIOT *f) Qstring("text->text), S(pp->text->text), - (mkd_sta_function_t)Qchar, f, 1); + (mkd_sta_function_t)Qchar, f, 1, f->flags); Qstring("\">\n", f); } Qprintf(f, "", pp->hnumber); diff --git a/markdown.1 b/markdown.1 index eded7f49..481a3e1e 100644 --- a/markdown.1 +++ b/markdown.1 @@ -125,6 +125,8 @@ Allow fenced code blocks (not default). Use id= anchors for table-of-contents links instead of (not default). .It Ar githubtags Allow underscore and dash in passed through element names (not default). +.It Ar urlencodedanchor +Use url-encoded chars for multibyte and nonalphanumeric chars rather than dots in toc links. .El .Pp As an example, the option diff --git a/markdown.3 b/markdown.3 index f60fc62a..02286074 100644 --- a/markdown.3 +++ b/markdown.3 @@ -118,6 +118,8 @@ Allow fenced code blocks. Use id= anchors instead of for table-of-contents links. .It Ar MKD_GITHUBTAGS Allow underscore and dash in passed through element names. +.It Ar MKD_URLENCODEDANCHOR +Use url-encoded chars for multibyte and nonalphanumeric chars rather than dots in toc links. .El .Sh RETURN VALUES .Fn markdown diff --git a/markdown.h b/markdown.h index 6cfcc3d2..8c6d7097 100644 --- a/markdown.h +++ b/markdown.h @@ -134,8 +134,9 @@ typedef struct mmiot { #define MKD_FENCEDCODE 0x02000000 #define MKD_IDANCHOR 0x04000000 #define MKD_GITHUBTAGS 0x08000000 -#define IS_LABEL 0x40000000 -#define USER_FLAGS 0x7FFFFFFF +#define MKD_URLENCODEDANCHOR 0x10000000 +#define IS_LABEL 0x20000000 +#define USER_FLAGS 0x3FFFFFFF #define INPUT_MASK (MKD_NOHEADER|MKD_TABSTOP) Callback_data *cb; @@ -195,7 +196,7 @@ extern int mkd_generateline(char *, int, FILE*, DWORD); extern void mkd_basename(Document*, char *); typedef int (*mkd_sta_function_t)(const int,const void*); -extern void mkd_string_to_anchor(char*,int, mkd_sta_function_t, void*, int); +extern void mkd_string_to_anchor(char*,int, mkd_sta_function_t, void*, int, DWORD); extern Document *mkd_in(FILE *, DWORD); extern Document *mkd_string(const char*,int, DWORD); diff --git a/mkdio.c b/mkdio.c index fd6105df..765c659a 100644 --- a/mkdio.c +++ b/mkdio.c @@ -213,37 +213,33 @@ markdown(Document *document, FILE *out, int flags) */ void mkd_string_to_anchor(char *s, int len, mkd_sta_function_t outchar, - void *out, int labelformat) + void *out, int labelformat, + DWORD flags) { -#if WITH_URLENCODED_ANCHOR static const unsigned char hexchars[] = "0123456789abcdef"; -#endif unsigned char c; int i, size; char *line; size = mkd_line(s, len, &line, IS_LABEL); - -#if !WITH_URLENCODED_ANCHOR - if ( labelformat && (size>0) && !isalpha(line[0]) ) + + if ( !(flags & MKD_URLENCODEDANCHOR) + && labelformat + && (size>0) && !isalpha(line[0]) ) (*outchar)('L',out); -#endif for ( i=0; i < size ; i++ ) { c = line[i]; if ( labelformat ) { if ( isalnum(c) || (c == '_') || (c == ':') || (c == '-') || (c == '.' ) ) (*outchar)(c, out); - else -#if WITH_URLENCODED_ANCHOR - { + else if ( flags & MKD_URLENCODEDANCHOR ) { (*outchar)('%', out); (*outchar)(hexchars[c >> 4 & 0xf], out); (*outchar)(hexchars[c & 0xf], out); } -#else + else (*outchar)('.', out); -#endif } else (*outchar)(c,out); diff --git a/mkdio.h.in b/mkdio.h.in index 0899af1f..ffd82fb5 100644 --- a/mkdio.h.in +++ b/mkdio.h.in @@ -111,6 +111,7 @@ void mkd_ref_prefix(MMIOT*, char*); #define MKD_FENCEDCODE 0x02000000 /* enabled fenced code blocks */ #define MKD_IDANCHOR 0x04000000 /* use id= anchors for TOC links */ #define MKD_GITHUBTAGS 0x08000000 /* allow dash and underscore in element names */ +#define MKD_URLENCODEDANCHOR 0x10000000 /* urlencode non-identifier chars instead of replacing with dots */ #define MKD_EMBED MKD_NOLINKS|MKD_NOIMAGE|MKD_TAGTEXT diff --git a/pgm_options.c b/pgm_options.c index 8a66c163..c64515c3 100644 --- a/pgm_options.c +++ b/pgm_options.c @@ -60,6 +60,7 @@ static struct _opt { { "fencedcode", "fenced code blocks", 0, 0, 1, MKD_FENCEDCODE }, { "idanchor", "id= anchors in TOC", 0, 0, 1, MKD_IDANCHOR }, { "githubtags", "permit - and _ in element names", 0, 0, 0, MKD_GITHUBTAGS }, + { "urlencodedanchor", "urlencode special chars in TOC links", 0, 0, 0, MKD_URLENCODEDANCHOR }, } ; #define NR(x) (sizeof x / sizeof x[0]) diff --git a/tests/toc.t b/tests/toc.t index 59f39129..2fc2628c 100644 --- a/tests/toc.t +++ b/tests/toc.t @@ -75,5 +75,14 @@ try '-T -ftoc' 'toc item with non-alpha start' \

1 header

' +# Be sure to save toc.t as UTF-8. +try '-T -ftoc,urlencodedanchor' 'urlencoded multibyte chars' \ +'#It’s an apostrophe' \ +' + +

It’s an apostrophe

' + summary $0 exit $rc diff --git a/toc.c b/toc.c index 8037e360..885b3dfb 100644 --- a/toc.c +++ b/toc.c @@ -62,11 +62,11 @@ mkd_toc(Document *p, char **doc) Csprintf(&res, "%*s
  • hnumber, ""); mkd_string_to_anchor(T(srcp->text->text), S(srcp->text->text), - (mkd_sta_function_t)Csputc, &res,1); + (mkd_sta_function_t)Csputc, &res,1,p->ctx->flags); Csprintf(&res, "\">"); mkd_string_to_anchor(T(srcp->text->text), S(srcp->text->text), - (mkd_sta_function_t)Csputc, &res,0); + (mkd_sta_function_t)Csputc, &res,0,p->ctx->flags); Csprintf(&res, ""); first = 0;