openssl/util/perl/OpenSSL/ParseC.pm

#! /usr/bin/env perl
# Copyright 2018 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License").  You may not use
# this file except in compliance with the License.  You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html

package OpenSSL::ParseC;

use strict;
use warnings;

use Exporter;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
$VERSION = "0.9";
@ISA = qw(Exporter);
@EXPORT = qw(parse);

# Global handler data
my @preprocessor_conds;         # A list of simple preprocessor conditions,
                                # each item being a list of macros defined
                                # or not defined.

# Handler helpers
sub all_conds {
    return map { ( @$_ ) } @preprocessor_conds;
}

# A list of handlers that will look at a "complete" string and try to
# figure out what to make of it.
# Each handler is a hash with the following keys:
#
# regexp                a regexp to compare the "complete" string with.
# checker               a function that does a more complex comparison.
#                       Use this instead of regexp if that isn't enough.
# massager              massages the "complete" string into an array with
#                       the following elements:
#
#                       [0]     String that needs further processing (this
#                               applies to typedefs of structs), or empty.
#                       [1]     The name of what was found.
#                       [2]     A character that denotes what type of thing
#                               this is: 'F' for function, 'S' for struct,
#                               'T' for typedef, 'M' for macro, 'V' for
#                               variable.
#                       [3]     Return type (only for type 'F' and 'V')
#                       [4]     Value (for type 'M') or signature (for type 'F',
#                               'V', 'T' or 'S')
#                       [5...]  The list of preprocessor conditions this is
#                               found in, as in checks for macro definitions
#                               (stored as the macro's name) or the absence
#                               of definition (stored as the macro's name
#                               prefixed with a '!'
#
#                       If the massager returns an empty list, it means the
#                       "complete" string has side effects but should otherwise
#                       be ignored.
#                       If the massager is undefined, the "complete" string
#                       should be ignored.
my @opensslcpphandlers = (
    ##################################################################
    # OpenSSL CPP specials
    #
    # These are used to convert certain pre-precessor expressions into
    # others that @cpphandlers have a better chance to understand.

    { regexp   => qr/#if OPENSSL_API_COMPAT(\S+)(0x[0-9a-fA-F]{8})L$/,
      massager => sub {
          my $op = $1;
          my $v = hex($2);
          if ($op ne '<' && $op ne '>=') {
              die "Error: unacceptable operator $op: $_[0]\n";
          }
          my ($one, $major, $minor) =
              ( ($v >> 28) & 0xf,
                ($v >> 20) & 0xff,
                ($v >> 12) & 0xff );
          my $t = "DEPRECATEDIN_${one}_${major}_${minor}";
          my $cond = $op eq '<' ? 'ifndef' : 'ifdef';
          return (<<"EOF");
#$cond $t
EOF
      }
   }
);
my @cpphandlers = (
    ##################################################################
    # CPP stuff

    { regexp   => qr/#ifdef ?(.*)/,
      massager => sub {
          my %opts;
          if (ref($_[$#_]) eq "HASH") {
              %opts = %{$_[$#_]};
              pop @_;
          }
          push @preprocessor_conds, [ $1 ];
          print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
              if $opts{debug};
          return ();
      },
    },
    { regexp   => qr/#ifndef ?(.*)/,
      massager => sub {
          my %opts;
          if (ref($_[$#_]) eq "HASH") {
              %opts = %{$_[$#_]};
              pop @_;
          }
          push @preprocessor_conds, [ '!'.$1 ];
          print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
              if $opts{debug};
          return ();
      },
    },
    { regexp   => qr/#if (0|1)/,
      massager => sub {
          my %opts;
          if (ref($_[$#_]) eq "HASH") {
              %opts = %{$_[$#_]};
              pop @_;
          }
          if ($1 eq "1") {
              push @preprocessor_conds, [ "TRUE" ];
          } else {
              push @preprocessor_conds, [ "!TRUE" ];
          }
          print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
              if $opts{debug};
          return ();
      },
    },
    { regexp   => qr/#if ?(.*)/,
      massager => sub {
          my %opts;
          if (ref($_[$#_]) eq "HASH") {
              %opts = %{$_[$#_]};
              pop @_;
          }
          my @results = ();
          my $conds = $1;
          if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) {
              push @results, $1; # Handle the simple case
              my $rest = $2;
              my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/;
              print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
                  if $opts{debug};
              if ($rest =~ m/$re/) {
                  my @rest = split /\|\|/, $rest;
                  shift @rest;
                  foreach (@rest) {
                      m|^defined<<<\(([^\)]*)\)>>>$|;
                      die "Something wrong...$opts{PLACE}" if $1 eq "";
                      push @results, $1;
                  }
              } else {
                  $conds =~ s/<<<|>>>//g;
                  warn "Warning: complicated #if expression(1): $conds$opts{PLACE}"
                      if $opts{warnings};
              }
          } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) {
              push @results, '!'.$1; # Handle the simple case
              my $rest = $2;
              my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/;
              print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
                  if $opts{debug};
              if ($rest =~ m/$re/) {
                  my @rest = split /\&\&/, $rest;
                  shift @rest;
                  foreach (@rest) {
                      m|^!defined<<<\(([^\)]*)\)>>>$|;
                      die "Something wrong...$opts{PLACE}" if $1 eq "";
                      push @results, '!'.$1;
                  }
              } else {
                  $conds =~ s/<<<|>>>//g;
                  warn "Warning: complicated #if expression(2): $conds$opts{PLACE}"
                      if $opts{warnings};
              }
          } else {
              $conds =~ s/<<<|>>>//g;
              warn "Warning: complicated #if expression(3): $conds$opts{PLACE}"
                  if $opts{warnings};
          }
          print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n"
              if $opts{debug};
          push @preprocessor_conds, [ @results ];
          print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
              if $opts{debug};
          return ();
      },
    },
    { regexp   => qr/#elif (.*)/,
      massager => sub {
          my %opts;
          if (ref($_[$#_]) eq "HASH") {
              %opts = %{$_[$#_]};
              pop @_;
          }
          die "An #elif without corresponding condition$opts{PLACE}"
              if !@preprocessor_conds;
          pop @preprocessor_conds;
          print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
              if $opts{debug};
          return (<<"EOF");
#if $1
EOF
      },
    },
    { regexp   => qr/#else/,
      massager => sub {
          my %opts;
          if (ref($_[$#_]) eq "HASH") {
              %opts = %{$_[$#_]};
              pop @_;
          }
          die "An #else without corresponding condition$opts{PLACE}"
              if !@preprocessor_conds;
          # Invert all conditions on the last level
          my $stuff = pop @preprocessor_conds;
          push @preprocessor_conds, [
              map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff
          ];
          print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
              if $opts{debug};
          return ();
      },
    },
    { regexp   => qr/#endif ?/,
      massager => sub {
          my %opts;
          if (ref($_[$#_]) eq "HASH") {
              %opts = %{$_[$#_]};
              pop @_;
          }
          die "An #endif without corresponding condition$opts{PLACE}"
              if !@preprocessor_conds;
          pop @preprocessor_conds;
          print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
              if $opts{debug};
          return ();
      },
    },
    { regexp   => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/,
      massager => sub {
          my $name = $1;
          my $params = $2;
          my $spaceval = $3||"";
          my $val = $4||"";
          return ("",
                  $1, 'M', "", $params ? "$name$params$spaceval" : $val,
                  all_conds()); }
    },
    { regexp   => qr/#.*/,
      massager => sub { return (); }
    },
    );

my @opensslchandlers = (
    ##################################################################
    # OpenSSL C specials
    #
    # They are really preprocessor stuff, but they look like C stuff
    # to this parser.  All of these do replacements, anything else is
    # an error.

    #####
    # Global variable stuff
    { regexp   => qr/OPENSSL_DECLARE_GLOBAL<<<\((.*),(.*)\)>>>;/,
      massager => sub { return (<<"EOF");
#ifndef OPENSSL_EXPORT_VAR_AS_FUNCTION
OPENSSL_EXPORT $1 _shadow_$2;
#else
$1 *_shadow_$2(void);
#endif
EOF
      },
    },

    #####
    # Deprecated stuff, by OpenSSL release.

    # We trick the parser by pretending that the declaration is wrapped in a
    # check if the DEPRECATEDIN macro is defined or not.  Callers of parse()
    # will have to decide what to do with it.
    { regexp   => qr/(DEPRECATEDIN_\d+_\d+_\d+)<<<\((.*)\)>>>/,
      massager => sub { return (<<"EOF");
#ifndef $1
$2;
#endif
EOF
      },
    },

    #####
    # LHASH stuff

    # LHASH_OF(foo) is used as a type, but the chandlers won't take it
    # gracefully, so we expand it here.
    { regexp   => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/,
      massager => sub { return ("$1struct lhash_st_$2$3"); }
    },
    { regexp   => qr/DEFINE_LHASH_OF<<<\((.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *),
                                            int (*cfn)(const $1 *, const $1 *));
static ossl_inline void lh_$1_free(LHASH_OF($1) *lh);
static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d);
static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d);
static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d);
static ossl_inline int lh_$1_error(LHASH_OF($1) *lh);
static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh);
static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out);
static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh,
                                                   BIO *out);
static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out);
static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh);
static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl);
static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *));
LHASH_OF($1)
EOF
      }
     },

    #####
    # STACK stuff

    # STACK_OF(foo) is used as a type, but the chandlers won't take it
    # gracefully, so we expand it here.
    { regexp   => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/,
      massager => sub { return ("$1struct stack_st_$2$3"); }
    },
#    { regexp   => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/,
#      massager => sub {
#          my $before = $1;
#          my $stack_of = "struct stack_st_$2";
#          my $after = $3;
#          if ($after =~ m|^\w|) { $after = " ".$after; }
#          return ("$before$stack_of$after");
#      }
#    },
    { regexp   => qr/SKM_DEFINE_STACK_OF<<<\((.*),(.*),(.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
STACK_OF($1);
typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
typedef void (*sk_$1_freefunc)($3 *a);
typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
static ossl_inline int sk_$1_num(const STACK_OF($1) *sk);
static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx);
static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare);
static ossl_inline STACK_OF($1) *sk_$1_new_null(void);
static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare,
                                                   int n);
static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n);
static ossl_inline void sk_$1_free(STACK_OF($1) *sk);
static ossl_inline void sk_$1_zero(STACK_OF($1) *sk);
static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i);
static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr);
static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr);
static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr);
static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk);
static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk);
static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk,
                                       sk_$1_freefunc freefunc);
static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx);
static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr);
static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr);
static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr);
static ossl_inline void sk_$1_sort(STACK_OF($1) *sk);
static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk);
static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk);
static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk,
                                                 sk_$1_copyfunc copyfunc,
                                                 sk_$1_freefunc freefunc);
static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk,
                                                     sk_$1_compfunc compare);
EOF
      }
    },
    { regexp   => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),(.*)\)>>>/,
      massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); },
    },
    { regexp   => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/,
      massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); },
    },
    { regexp   => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),(.*)\)>>>/,
      massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); },
    },
    { regexp   => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/,
      massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); },
    },
    { regexp   => qr/PREDECLARE_STACK_OF<<<\((.*)\)>>>/,
      massager => sub { return ("STACK_OF($1);"); }
    },
    { regexp   => qr/DECLARE_STACK_OF<<<\((.*)\)>>>/,
      massager => sub { return ("STACK_OF($1);"); }
    },
    { regexp   => qr/DECLARE_SPECIAL_STACK_OF<<<\((.*?),(.*?)\)>>>/,
      massager => sub { return ("STACK_OF($1);"); }
     },

    #####
    # ASN1 stuff

    { regexp   => qr/TYPEDEF_D2I_OF<<<\((.*)\)>>>/,
      massager => sub {
          return ("typedef $1 *d2i_of_$1($1 **,const unsigned char **,long)");
      },
    },
    { regexp   => qr/TYPEDEF_I2D_OF<<<\((.*)\)>>>/,
      massager => sub {
          return ("typedef $1 *i2d_of_$1($1 *,unsigned char **)");
      },
    },
    { regexp   => qr/TYPEDEF_D2I2D_OF<<<\((.*)\)>>>/,
      massager => sub {
          return ("TYPEDEF_D2I_OF($1); TYPEDEF_I2D_OF($1)");
      },
    },
    { regexp   => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
#ifndef OPENSSL_EXPORT_VAR_AS_FUNCTION
OPENSSL_EXTERN const ASN1_ITEM *$1_it;
#else
const ASN1_ITEM *$1_it(void);
#endif
EOF
      },
    },
    { regexp   => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),(.*),(.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
int d2i_$3(void);
int i2d_$3(void);
DECLARE_ASN1_ITEM($2)
EOF
      },
    },
    { regexp   => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_const<<<\((.*),(.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
int d2i_$2(void);
int i2d_$2(void);
DECLARE_ASN1_ITEM($2)
EOF
      },
    },
    { regexp   => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
int $1_free(void);
int $1_new(void);
EOF
      },
    },
    { regexp   => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),(.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
int d2i_$2(void);
int i2d_$2(void);
int $2_free(void);
int $2_new(void);
DECLARE_ASN1_ITEM($2)
EOF
      },
    },
    { regexp   => qr/DECLARE_ASN1_FUNCTIONS_fname<<<\((.*),(.*),(.*)\)>>>/,
      massager => sub { return (<<"EOF");
int d2i_$3(void);
int i2d_$3(void);
int $3_free(void);
int $3_new(void);
DECLARE_ASN1_ITEM($2)
EOF
      }
    },
    { regexp   => qr/DECLARE_ASN1_FUNCTIONS(?:_const)?<<<\((.*)\)>>>/,
      massager => sub { return (<<"EOF");
int d2i_$1(void);
int i2d_$1(void);
int $1_free(void);
int $1_new(void);
DECLARE_ASN1_ITEM($1)
EOF
      }
    },
    { regexp   => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
int i2d_$1_NDEF(void);
EOF
      }
    },
    { regexp   => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
int $1_print_ctx(void);
EOF
      }
    },
    { regexp   => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),(.*)\)>>>/,
      massager => sub {
          return (<<"EOF");
int $2_print_ctx(void);
EOF
      }
    },
    { regexp   => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/,
      massager => sub { return (); }
    },
    { regexp   => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/,
      massager => sub { return (); }
    },
    { regexp   => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/,
      massager => sub { return (<<"EOF");
#ifndef OPENSSL_NO_STDIO
int PEM_read_$1(void);
int PEM_write_$1(void);
#endif
int PEM_read_bio_$1(void);
int PEM_write_bio_$1(void);
EOF
      },
    },

    #####
    # PEM stuff
    { regexp   => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/,
      massager => sub { return (<<"EOF");
#ifndef OPENSSL_NO_STDIO
int PEM_write_$1(void);
#endif
int PEM_write_bio_$1(void);
EOF
      },
    },
    { regexp   => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/,
      massager => sub { return (<<"EOF");
#ifndef OPENSSL_NO_STDIO
int PEM_read_$1(void);
#endif
int PEM_read_bio_$1(void);
EOF
      },
    },

    # Spurious stuff found in the OpenSSL headers
    # Usually, these are just macros that expand to, well, something
    { regexp   => qr/__NDK_FPABI__/,
      massager => sub { return (); }
    },
    );

my $anoncnt = 0;

my @chandlers = (
    ##################################################################
    # C stuff

    # extern "C" of individual items
    # Note that the main parse function has a special hack for 'extern "C" {'
    # which can't be done in handlers
    # We simply ignore it.
    { regexp   => qr/extern "C" (.*;)/,
      massager => sub { return ($1); },
    },
    # union, struct and enum definitions
    # Because this one might appear a little everywhere within type
    # definitions, we take it out and replace it with just
    # 'union|struct|enum name' while registering it.
    # This makes use of the parser trick to surround the outer braces
    # with <<< and >>>
    { regexp   => qr/(.*)                       # Anything before       ($1)
                     \b                         # word to non-word boundary
                     (union|struct|enum)        # The word used         ($2)
                     (?:\s([[:alpha:]_]\w*))?   # Struct or enum name   ($3)
                     <<<(\{.*?\})>>>            # Struct or enum definition ($4)
                     (.*)                       # Anything after        ($5)
                     ;
                    /x,
      massager => sub {
          my $before = $1;
          my $word = $2;
          my $name = $3
              || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct
          my $definition = $4;
          my $after = $5;
          my $type = $word eq "struct" ? 'S' : 'E';
          if ($before ne "" || $after ne ";") {
              if ($after =~ m|^\w|) { $after = " ".$after; }
              return ("$before$word $name$after;",
                      "$word $name", $type, "", "$word$definition", all_conds());
          }
          # If there was no before nor after, make the return much simple
          return ("", "$word $name", $type, "", "$word$definition", all_conds());
      }
    },
    # Named struct and enum forward declarations
    # We really just ignore them, but we need to parse them or the variable
    # declaration handler further down will think it's a variable declaration.
    { regexp   => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/,
      massager => sub { return (); }
    },
    # Function returning function pointer declaration
    { regexp   => qr/(?:(typedef)\s?)?          # Possible typedef      ($1)
                     ((?:\w|\*|\s)*?)           # Return type           ($2)
                     \s?                        # Possible space
                     <<<\(\*
                     ([[:alpha:]_]\w*)          # Function name         ($3)
                     (\(.*\))                   # Parameters            ($4)
                     \)>>>
                     <<<(\(.*\))>>>             # F.p. parameters       ($5)
                     ;
                    /x,
      massager => sub {
          return ("", $3, 'F', "", "$2(*$4)$5", all_conds())
              if defined $1;
          return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); }
    },
    # Function pointer declaration, or typedef thereof
    { regexp   => qr/(?:(typedef)\s?)?          # Possible typedef      ($1)
                     ((?:\w|\*|\s)*?)           # Return type           ($2)
                     <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name   ($3)
                     <<<(\(.*\))>>>             # F.p. parameters       ($4)
                     ;
                    /x,
      massager => sub {
          return ("", $3, 'T', "", "$2(*)$4", all_conds())
              if defined $1;
          return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds());
      },
    },
    # Function declaration, or typedef thereof
    { regexp   => qr/(?:(typedef)\s?)?          # Possible typedef      ($1)
                     ((?:\w|\*|\s)*?)           # Return type           ($2)
                     \s?                        # Possible space
                     ([[:alpha:]_]\w*)          # Function name         ($3)
                     <<<(\(.*\))>>>             # Parameters            ($4)
                     ;
                    /x,
      massager => sub {
          return ("", $3, 'T', "", "$2$4", all_conds())
              if defined $1;
          return ("", $3, 'F', $2, "$2$4", all_conds());
      },
    },
    # Variable declaration, including arrays, or typedef thereof
    { regexp   => qr/(?:(typedef)\s?)?          # Possible typedef      ($1)
                     ((?:\w|\*|\s)*?)           # Type                  ($2)
                     \s?                        # Possible space
                     ([[:alpha:]_]\w*)          # Variable name         ($3)
                     ((?:<<<\[[^\]]*\]>>>)*)    # Possible array declaration ($4)
                     ;
                    /x,
      massager => sub {
          return ("", $3, 'T', "", $2.($4||""), all_conds())
              if defined $1;
          return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds());
      },
    },
);

# End handlers are almost the same as handlers, except they are run through
# ONCE when the input has been parsed through.  These are used to check for
# remaining stuff, such as an unfinished #ifdef and stuff like that that the
# main parser can't check on its own.
my @endhandlers = (
    { massager => sub {
        my %opts = %{$_[0]};

        die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE}
            if @preprocessor_conds;
      }
    }
    );

# takes a list of strings that can each contain one or several lines of code
# also takes a hash of options as last argument.
#
# returns a list of hashes with information:
#
#       name            name of the thing
#       type            type, see the massage handler function
#       returntype      return type of functions and variables
#       value           value for macros, signature for functions, variables
#                       and structs
#       conds           preprocessor conditions (array ref)

sub parse {
    my %opts;
    if (ref($_[$#_]) eq "HASH") {
        %opts = %{$_[$#_]};
        pop @_;
    }
    my %state = (
        in_extern_C => 0,       # An exception to parenthesis processing.
        cpp_parens => [],       # A list of ending parens and braces found in
                                # preprocessor directives
        c_parens => [],         # A list of ending parens and braces found in
                                # C statements
        in_string => "",        # empty string when outside a string, otherwise
                                # "'" or '"' depending on the starting quote.
        in_comment => "",       # empty string when outside a comment, otherwise
                                # "/*" or "//" depending on the type of comment
                                # found.  The latter will never be multiline
                                # NOTE: in_string and in_comment will never be
                                # true (in perl semantics) at the same time.
        current_line => 0,
        );
    my @result = ();
    my $normalized_line = "";   # $input_line, but normalized.  In essence, this
                                # means that ALL whitespace is removed unless
                                # it absolutely has to be present, and in that
                                # case, there's only one space.
                                # The cases where a space needs to stay present
                                # are:
                                # 1. between words
                                # 2. between words and number
                                # 3. after the first word of a preprocessor
                                #    directive.
                                # 4. for the #define directive, between the macro
                                #    name/args and its value, so we end up with:
                                #       #define FOO val
                                #       #define BAR(x) something(x)
    my $collected_stmt = "";    # Where we're building up a C line until it's a
                                # complete definition/declaration, as determined
                                # by any handler being capable of matching it.

    # We use $_ shamelessly when looking through @lines.
    # In case we find a \ at the end, we keep filling it up with more lines.
    $_ = undef;

    foreach my $line (@_) {
        # split tries to be smart when a string ends with the thing we split on
        $line .= "\n" unless $line =~ m|\R$|;
        $line .= "#";

        # We use ¦undef¦ as a marker for a new line from the file.
        # Since we convert one line to several and unshift that into @lines,
        # that's the only safe way we have to track the original lines
        my @lines = map { ( undef, $_ ) } split $/, $line;

        # Remember that extra # we added above?  Now we remove it
        pop @lines;
        pop @lines;             # Don't forget the undef

        while (@lines) {
            if (!defined($lines[0])) {
                shift @lines;
                $state{current_line}++;
                if (!defined($_)) {
                    $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n";
                    $opts{PLACE2} = $opts{filename}.":".$state{current_line};
                }
                next;
            }

            $_ = "" unless defined $_;
            $_ .= shift @lines;

            if (m|\\$|) {
                $_ = $`;
                next;
            }

            if ($opts{debug}) {
                print STDERR "DEBUG:----------------------------\n";
                print STDERR "DEBUG: \$_      = '$_'\n";
            }

            ##########################################################
            # Now that we have a full line, let's process through it
            while(1) {
                unless ($state{in_comment}) {
                    # Begin with checking if the current $normalized_line
                    # contains a preprocessor directive
                    # This is only done if we're not inside a comment and
                    # if it's a preprocessor directive and it's finished.
                    if ($normalized_line =~ m|^#| && $_ eq "") {
                        print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n"
                            if $opts{debug};
                        $opts{debug_type} = "OPENSSL CPP";
                        my @r = ( _run_handlers($normalized_line,
                                                @opensslcpphandlers,
                                                \%opts) );
                        if (shift @r) {
                            # Checking if there are lines to inject.
                            if (@r) {
                                @r = split $/, (pop @r).$_;
                                print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n"
                                    if $opts{debug} && @r;
                                @lines = ( @r, @lines );

                                $_ = "";
                            }
                        } else {
                            print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n"
                                if $opts{debug};
                            $opts{debug_type} = "CPP";
                            my @r = ( _run_handlers($normalized_line,
                                                    @cpphandlers,
                                                    \%opts) );
                            if (shift @r) {
                                if (ref($r[0]) eq "HASH") {
                                    push @result, shift @r;
                                }

                                # Now, check if there are lines to inject.
                                # Really, this should never happen, it IS a
                                # preprocessor directive after all...
                                if (@r) {
                                    @r = split $/, pop @r;
                                    print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n"
                                    if $opts{debug} && @r;
                                    @lines = ( @r, @lines );
                                    $_ = "";
                                }
                            }
                        }

                        # Note: we simply ignore all directives that no
                        # handler matches
                        $normalized_line = "";
                    }

                    # If the two strings end and start with a character that
                    # shouldn't get concatenated, add a space
                    my $space =
                        ($collected_stmt =~ m/(?:"|')$/
                         || ($collected_stmt =~ m/(?:\w|\d)$/
                             && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : "";

                    # Now, unless we're building up a preprocessor directive or
                    # are in the middle of a string, or the parens et al aren't
                    # balanced up yet, let's try and see if there's a OpenSSL
                    # or C handler that can make sense of what we have so far.
                    if ( $normalized_line !~ m|^#|
                         && ($collected_stmt ne "" || $normalized_line ne "")
                         && ! @{$state{c_parens}}
                         && ! $state{in_string} ) {
                        if ($opts{debug}) {
                            print STDERR "DEBUG[OPENSSL C]: \$collected_stmt  = '$collected_stmt'\n";
                            print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n";
                        }
                        $opts{debug_type} = "OPENSSL C";
                        my @r = ( _run_handlers($collected_stmt
                                                    .$space
                                                    .$normalized_line,
                                                @opensslchandlers,
                                                \%opts) );
                        if (shift @r) {
                            # Checking if there are lines to inject.
                            if (@r) {
                                @r = split $/, (pop @r).$_;
                                print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n"
                                    if $opts{debug} && @r;
                                @lines = ( @r, @lines );

                                $_ = "";
                            }
                            $normalized_line = "";
                            $collected_stmt = "";
                        } else {
                            if ($opts{debug}) {
                                print STDERR "DEBUG[C]: \$collected_stmt  = '$collected_stmt'\n";
                                print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n";
                            }
                            $opts{debug_type} = "C";
                            my @r = ( _run_handlers($collected_stmt
                                                        .$space
                                                        .$normalized_line,
                                                    @chandlers,
                                                    \%opts) );
                            if (shift @r) {
                                if (ref($r[0]) eq "HASH") {
                                    push @result, shift @r;
                                }

                                # Checking if there are lines to inject.
                                if (@r) {
                                    @r = split $/, (pop @r).$_;
                                    print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n"
                                        if $opts{debug} && @r;
                                    @lines = ( @r, @lines );

                                    $_ = "";
                                }
                                $normalized_line = "";
                                $collected_stmt = "";
                            }
                        }
                    }
                    if ($_ eq "") {
                        $collected_stmt .= $space.$normalized_line;
                        $normalized_line = "";
                    }
                }

                if ($_ eq "") {
                    $_ = undef;
                    last;
                }

                # Take care of inside string first.
                if ($state{in_string}) {
                    if (m/ (?:^|(?<!\\))        # Make sure it's not escaped
                           $state{in_string}    # Look for matching quote
                         /x) {
                        $normalized_line .= $`.$&;
                        $state{in_string} = "";
                        $_ = $';
                        next;
                    } else {
                        die "Unfinished string without continuation found$opts{PLACE}\n";
                    }
                }
                # ... or inside comments, whichever happens to apply
                elsif ($state{in_comment}) {

                    # This should never happen
                    die "Something went seriously wrong, multiline //???$opts{PLACE}\n"
                        if ($state{in_comment} eq "//");

                    # A note: comments are simply discarded.

                    if (m/ (?:^|(?<!\\))        # Make sure it's not escaped
                           \*\/                 # Look for C comment end
                         /x) {
                        $state{in_comment} = "";
                        $_ = $';
                        print STDERR "DEBUG: Found end of comment, followed by '$_'\n"
                            if $opts{debug};
                        next;
                    } else {
                        $_ = "";
                        next;
                    }
                }

                # At this point, it's safe to remove leading whites, but
                # we need to be careful with some preprocessor lines
                if (m|^\s+|) {
                    my $rest = $';
                    my $space = "";
                    $space = " "
                        if ($normalized_line =~ m/^
                                                  \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)?
                                                  | \#[a-z]+
                                                  $/x);
                    print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n"
                        if $opts{debug};
                    $_ = $space.$rest;
                }

                my $parens =
                    $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens';
                (my $paren_singular = $parens) =~ s|s$||;

                # Now check for specific tokens, and if they are parens,
                # check them against $state{$parens}.  Note that we surround
                # the outermost parens with extra "<<<" and ">>>".  Those
                # are for the benefit of handlers who to need to detect
                # them, and they will be removed from the final output.
                if (m|^[\{\[\(]|) {
                    my $body = $&;
                    $_ = $';
                    if (!@{$state{$parens}}) {
                        if ("$normalized_line$body" =~ m|^extern "C"\{$|) {
                            $state{in_extern_C} = 1;
                            print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n"
                                if $opts{debug};
                            $normalized_line = "";
                        } else {
                            $normalized_line .= "<<<".$body;
                        }
                    } else {
                        $normalized_line .= $body;
                    }

                    if ($normalized_line ne "") {
                        print STDERR "DEBUG: found $paren_singular start '$body'\n"
                            if $opts{debug};
                        $body =~ tr|\{\[\(|\}\]\)|;
                        print STDERR "DEBUG: pushing $paren_singular end '$body'\n"
                            if $opts{debug};
                        push @{$state{$parens}}, $body;
                    }
                } elsif (m|^[\}\]\)]|) {
                    $_ = $';

                    if (!@{$state{$parens}}
                        && $& eq '}' && $state{in_extern_C}) {
                        print STDERR "DEBUG: found end of 'extern \"C\"'\n"
                            if $opts{debug};
                        $state{in_extern_C} = 0;
                    } else {
                        print STDERR "DEBUG: Trying to match '$&' against '"
                            ,join("', '", @{$state{$parens}})
                            ,"'\n"
                            if $opts{debug};
                        die "Unmatched parentheses$opts{PLACE}\n"
                            unless (@{$state{$parens}}
                                    && pop @{$state{$parens}} eq $&);
                        if (!@{$state{$parens}}) {
                            $normalized_line .= $&.">>>";
                        } else {
                            $normalized_line .= $&;
                        }
                    }
                } elsif (m|^["']|) { # string start
                    my $body = $&;
                    $_ = $';

                    # We want to separate strings from \w and \d with one space.
                    $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/;
                    $normalized_line .= $body;
                    $state{in_string} = $body;
                } elsif (m|^\/\*|) { # C style comment
                    print STDERR "DEBUG: found start of C style comment\n"
                        if $opts{debug};
                    $state{in_comment} = $&;
                    $_ = $';
                } elsif (m|^\/\/|) { # C++ style comment
                    print STDERR "DEBUG: found C++ style comment\n"
                        if $opts{debug};
                    $_ = "";    # (just discard it entirely)
                } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ )
                                 (?i: U | L | UL | LL | ULL )?
                               | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)?
                               ) /x) {
                    print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n"
                        if $opts{debug};
                    $normalized_line .= $&;
                    $_ = $';
                } elsif (m/^[[:alpha:]_]\w*/) {
                    my $body = $&;
                    my $rest = $';
                    my $space = "";

                    # Now, only add a space if it's needed to separate
                    # two \w characters, and we also surround strings with
                    # a space.  In this case, that's if $normalized_line ends
                    # with a \w, \d, " or '.
                    $space = " "
                        if ($normalized_line =~ m/("|')$/
                            || ($normalized_line =~ m/(\w|\d)$/
                                && $body =~ m/^(\w|\d)/));

                    print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n"
                        if $opts{debug};
                    $normalized_line .= $space.$body;
                    $_ = $rest;
                } elsif (m|^(?:\\)?.|) { # Catch-all
                    $normalized_line .= $&;
                    $_ = $';
                }
            }
        }
    }
    foreach my $handler (@endhandlers) {
        if ($handler->{massager}) {
            $handler->{massager}->(\%opts);
        }
    }
    return @result;
}

# arg1:    line to check
# arg2...: handlers to check
# return undef when no handler matched
sub _run_handlers {
    my %opts;
    if (ref($_[$#_]) eq "HASH") {
        %opts = %{$_[$#_]};
        pop @_;
    }
    my $line = shift;
    my @handlers = @_;

    foreach my $handler (@handlers) {
        if ($handler->{regexp}
            && $line =~ m|^$handler->{regexp}$|) {
            if ($handler->{massager}) {
                if ($opts{debug}) {
                    print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n";
                    print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n";
                }
                my $saved_line = $line;
                my @massaged =
                    map { s/(<<<|>>>)//g; $_ }
                    $handler->{massager}->($saved_line, \%opts);
                print STDERR "DEBUG[",$opts{debug_type},"]: Got back '"
                    , join("', '", @massaged), "'\n"
                    if $opts{debug};

                # Because we may get back new lines to be
                # injected before whatever else that follows,
                # and the injected stuff might include
                # preprocessor lines, we need to inject them
                # in @lines and set $_ to the empty string to
                # break out from the inner loops
                my $injected_lines = shift @massaged || "";

                if (@massaged) {
                    return (1,
                            {
                                name    => shift @massaged,
                                type    => shift @massaged,
                                returntype => shift @massaged,
                                value   => shift @massaged,
                                conds   => [ @massaged ]
                            },
                            $injected_lines
                        );
                } else {
                    print STDERR "DEBUG[",$opts{debug_type},"]:   (ignore, possible side effects)\n"
                        if $opts{debug} && $injected_lines eq "";
                    return (1, $injected_lines);
                }
            }
            return (1);
        }
    }
    return (0);
}