Allow 8-bit characters. This is not really complete, it only marks

characters with the highest bit set as HIGHBIT.  We need to expand
this to support the UTF-8 character set properly.  However, this
solves the problem that the character 0x80 (which is common in UTF-8)
gets masked to 0x00.
Patch submitted by "Huang Yuzhen" <huangyuzhen@bj.tom.com>
This commit is contained in:
Richard Levitte 2002-01-02 11:06:17 +00:00
parent 9d7bb313e3
commit b7342e116b
2 changed files with 123 additions and 82 deletions

View file

@ -71,6 +71,7 @@
#define CONF_COMMENT 128
#define CONF_FCOMMENT 2048
#define CONF_EOF 8
#define CONF_HIGHBIT 4096
#define CONF_ALPHA (CONF_UPPER|CONF_LOWER)
#define CONF_ALPHA_NUMERIC (CONF_ALPHA|CONF_NUMBER|CONF_UNDER)
#define CONF_ALPHA_NUMERIC_PUNCT (CONF_ALPHA|CONF_NUMBER|CONF_UNDER| \
@ -78,68 +79,102 @@
#define KEYTYPES(c) ((unsigned short *)((c)->meth_data))
#ifndef CHARSET_EBCDIC
#define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_COMMENT)
#define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_FCOMMENT)
#define IS_EOF(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_EOF)
#define IS_ESC(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_ESC)
#define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_NUMBER)
#define IS_WS(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC)
#define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_COMMENT)
#define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_FCOMMENT)
#define IS_EOF(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_EOF)
#define IS_ESC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ESC)
#define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_NUMBER)
#define IS_WS(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC)
#define IS_ALPHA_NUMERIC_PUNCT(c,a) \
(KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_QUOTE)
#define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_DQUOTE)
(KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_QUOTE)
#define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_DQUOTE)
#define IS_HIGHBIT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_HIGHBIT)
#else /*CHARSET_EBCDIC*/
#define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_COMMENT)
#define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_FCOMMENT)
#define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_EOF)
#define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ESC)
#define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_NUMBER)
#define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC)
#define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_COMMENT)
#define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_FCOMMENT)
#define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_EOF)
#define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ESC)
#define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_NUMBER)
#define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC)
#define IS_ALPHA_NUMERIC_PUNCT(c,a) \
(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_QUOTE)
#define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_DQUOTE)
(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_QUOTE)
#define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_DQUOTE)
#define IS_HIGHBIT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_HIGHBIT)
#endif /*CHARSET_EBCDIC*/
static unsigned short CONF_type_default[128]={
0x008,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
0x000,0x010,0x010,0x000,0x000,0x010,0x000,0x000,
0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
0x010,0x200,0x040,0x080,0x000,0x200,0x200,0x040,
0x000,0x000,0x200,0x200,0x200,0x200,0x200,0x200,
0x001,0x001,0x001,0x001,0x001,0x001,0x001,0x001,
0x001,0x001,0x000,0x200,0x000,0x000,0x000,0x200,
0x200,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
0x002,0x002,0x002,0x000,0x020,0x000,0x200,0x100,
0x040,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
0x004,0x004,0x004,0x000,0x200,0x000,0x200,0x000,
static unsigned short CONF_type_default[256]={
0x0008,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0010,0x0010,0x0000,0x0000,0x0010,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0010,0x0200,0x0040,0x0080,0x0000,0x0200,0x0200,0x0040,
0x0000,0x0000,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200,
0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
0x0001,0x0001,0x0000,0x0200,0x0000,0x0000,0x0000,0x0200,
0x0200,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
0x0002,0x0002,0x0002,0x0000,0x0020,0x0000,0x0200,0x0100,
0x0040,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
0x0004,0x0004,0x0004,0x0000,0x0200,0x0000,0x0200,0x0000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
};
static unsigned short CONF_type_win32[128]={
0x008,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
0x000,0x010,0x010,0x000,0x000,0x010,0x000,0x000,
0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
0x010,0x200,0x400,0x000,0x000,0x200,0x200,0x000,
0x000,0x000,0x200,0x200,0x200,0x200,0x200,0x200,
0x001,0x001,0x001,0x001,0x001,0x001,0x001,0x001,
0x001,0x001,0x000,0xA00,0x000,0x000,0x000,0x200,
0x200,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
0x002,0x002,0x002,0x000,0x000,0x000,0x200,0x100,
0x000,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
0x004,0x004,0x004,0x000,0x200,0x000,0x200,0x000,
static unsigned short CONF_type_win32[256]={
0x0008,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0010,0x0010,0x0000,0x0000,0x0010,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0010,0x0200,0x0400,0x0000,0x0000,0x0200,0x0200,0x0000,
0x0000,0x0000,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200,
0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
0x0001,0x0001,0x0000,0x0A00,0x0000,0x0000,0x0000,0x0200,
0x0200,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
0x0002,0x0002,0x0002,0x0000,0x0000,0x0000,0x0200,0x0100,
0x0000,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
0x0004,0x0004,0x0004,0x0000,0x0200,0x0000,0x0200,0x0000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
};

View file

@ -12,8 +12,9 @@ $DQUOTE=0x400;
$COMMENT=0x80;
$FCOMMENT=0x800;
$EOF=0x08;
$HIGHBIT=0x1000;
foreach (0 .. 127)
foreach (0 .. 255)
{
$v=0;
$c=sprintf("%c",$_);
@ -27,11 +28,12 @@ foreach (0 .. 127)
$v|=$QUOTE if ($c =~ /['`"]/); # for emacs: "`'}/)
$v|=$COMMENT if ($c =~ /\#/);
$v|=$EOF if ($c =~ /\0/);
$v|=$HIGHBIT if ($c =~/[\x80-\xff]/);
push(@V_def,$v);
}
foreach (0 .. 127)
foreach (0 .. 255)
{
$v=0;
$c=sprintf("%c",$_);
@ -44,6 +46,7 @@ foreach (0 .. 127)
$v|=$DQUOTE if ($c =~ /["]/); # for emacs: "}/)
$v|=$FCOMMENT if ($c =~ /;/);
$v|=$EOF if ($c =~ /\0/);
$v|=$HIGHBIT if ($c =~/[\x80-\xff]/);
push(@V_w32,$v);
}
@ -122,6 +125,7 @@ print <<"EOF";
#define CONF_COMMENT $COMMENT
#define CONF_FCOMMENT $FCOMMENT
#define CONF_EOF $EOF
#define CONF_HIGHBIT $HIGHBIT
#define CONF_ALPHA (CONF_UPPER|CONF_LOWER)
#define CONF_ALPHA_NUMERIC (CONF_ALPHA|CONF_NUMBER|CONF_UNDER)
#define CONF_ALPHA_NUMERIC_PUNCT (CONF_ALPHA|CONF_NUMBER|CONF_UNDER| \\
@ -129,51 +133,53 @@ print <<"EOF";
#define KEYTYPES(c) ((unsigned short *)((c)->meth_data))
#ifndef CHARSET_EBCDIC
#define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_COMMENT)
#define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_FCOMMENT)
#define IS_EOF(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_EOF)
#define IS_ESC(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_ESC)
#define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_NUMBER)
#define IS_WS(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC)
#define IS_COMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_COMMENT)
#define IS_FCOMMENT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_FCOMMENT)
#define IS_EOF(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_EOF)
#define IS_ESC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ESC)
#define IS_NUMBER(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_NUMBER)
#define IS_WS(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC)
#define IS_ALPHA_NUMERIC_PUNCT(c,a) \\
(KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_QUOTE)
#define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0x7f]&CONF_DQUOTE)
(KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_QUOTE)
#define IS_DQUOTE(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_DQUOTE)
#define IS_HIGHBIT(c,a) (KEYTYPES(c)[(a)&0xff]&CONF_HIGHBIT)
#else /*CHARSET_EBCDIC*/
#define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_COMMENT)
#define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_FCOMMENT)
#define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_EOF)
#define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ESC)
#define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_NUMBER)
#define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC)
#define IS_COMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_COMMENT)
#define IS_FCOMMENT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_FCOMMENT)
#define IS_EOF(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_EOF)
#define IS_ESC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ESC)
#define IS_NUMBER(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_NUMBER)
#define IS_WS(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC)
#define IS_ALPHA_NUMERIC_PUNCT(c,a) \\
(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_QUOTE)
#define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_DQUOTE)
(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_QUOTE)
#define IS_DQUOTE(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_DQUOTE)
#define IS_HIGHBIT(c,a) (KEYTYPES(c)[os_toascii[a]&0xff]&CONF_HIGHBIT)
#endif /*CHARSET_EBCDIC*/
EOF
print "static unsigned short CONF_type_default[128]={";
print "static unsigned short CONF_type_default[256]={";
for ($i=0; $i<128; $i++)
for ($i=0; $i<256; $i++)
{
print "\n\t" if ($i % 8) == 0;
printf "0x%03X,",$V_def[$i];
printf "0x%04X,",$V_def[$i];
}
print "\n\t};\n\n";
print "static unsigned short CONF_type_win32[128]={";
print "static unsigned short CONF_type_win32[256]={";
for ($i=0; $i<128; $i++)
for ($i=0; $i<256; $i++)
{
print "\n\t" if ($i % 8) == 0;
printf "0x%03X,",$V_w32[$i];
printf "0x%04X,",$V_w32[$i];
}
print "\n\t};\n\n";