/*
 * nqs_generic/all-systems/match.c
 * Globbing for NQS
 */

#include <nqs_generic/license.h>
  /* NQS license information */
#include <nqs_generic/debug.h>
  /* debugging support */
#include <nqs_generic/proto.h>
  /* ANSI C debugging support */
#include <stdio.h>
  /* for NULL definition */

#ifndef BOOLEAN
# define BOOLEAN int
# ifndef TRUE
#  define TRUE 1
# endif
# ifndef FALSE
#  define FALSE 0
# endif
#endif

/* match defines */
#define MATCH_PATTERN  6    /* bad pattern */
#define MATCH_LITERAL  5    /* match failure on literal match */
#define MATCH_RANGE    4    /* match failure on [..] construct */
#define MATCH_ABORT    3    /* premature end of text string */
#define MATCH_END      2    /* premature end of pattern string */
#define MATCH_VALID    1    /* valid match */

/* pattern defines */
#define PATTERN_VALID  0    /* valid pattern */
#define PATTERN_ESC   -1    /* literal escape at end of pattern */
#define PATTERN_RANGE -2    /* malformed range in [..] construct */
#define PATTERN_CLOSE -3    /* no end bracket in [..] construct */
#define PATTERN_EMPTY -4    /* [..] contstruct is empty */

/* static int is_pattern ( char *p ); */
/* static int is_valid_pattern ( char *p, int *error_type ); */
static int matche_after_star ( char *p, char *t );

/*----------------------------------------------------------------------------
*
*  Match the pattern PATTERN against the string TEXT;
*
*       match() returns TRUE if pattern matches, FALSE otherwise.
*       matche() returns MATCH_VALID if pattern matches, or an errorcode
*           as follows otherwise:
*
*            MATCH_PATTERN  - bad pattern
*            MATCH_LITERAL  - match failure on literal mismatch
*            MATCH_RANGE    - match failure on [..] construct
*            MATCH_ABORT    - premature end of text string
*            MATCH_END      - premature end of pattern string
*            MATCH_VALID    - valid match
*
*
*  A match means the entire string TEXT is used up in matching.
*
*  In the pattern string:
*       `*' matches any sequence of characters (zero or more)
*       `?' matches any character
*       [SET] matches any character in the specified set,
*       [!SET] or [^SET] matches any character not in the specified set.
*
*  A set is composed of characters or ranges; a range looks like
*  character hyphen character (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the
*  minimal set of characters allowed in the [..] pattern construct.
*  Other characters are allowed (ie. 8 bit characters) if your system
*  will support them.
*
*  To suppress the special syntactic significance of any of `[]*?!^-\',
*  and match the character exactly, precede it with a `\'.
*
----------------------------------------------------------------------------*/


/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has any special wildcard characters
*
----------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has is a well formed regular expression according
* to the above syntax
*
* error_type is a return code based on the type of pattern error.  Zero is
* returned in error_type if the pattern is a valid one.  error_type return
* values are as follows:
*
*   PATTERN_VALID - pattern is well formed
*   PATTERN_ESC   - pattern has invalid escape ('\' at end of pattern)
*   PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
*   PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
*   PATTERN_EMPTY - [..] construct is empty (ie [])
*
----------------------------------------------------------------------------*/

/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has any special wildcard characters
*
----------------------------------------------------------------------------*/
#if 0
static BOOLEAN is_pattern (char *p)
{
  ENTER_FUNCTION("nqs_generic/all-systems/is_pattern");
    /* debugging support */
  TEST_ARG(p != NULL, 1);
    /* ensure we are not working with garbage */
  
  while ( *p )
  {
    switch ( *p++ )
    {
     case '?':
     case '*':
     case '[':
     case '\\':
      EXIT_FUNCTION;
      return TRUE;
    }
  }
  EXIT_FUNCTION;
  return FALSE;
}

/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has is a well formed regular expression according
* to the above syntax
*
* error_type is a return code based on the type of pattern error.  Zero is
* returned in error_type if the pattern is a valid one.  error_type return
* values are as follows:
*
*   PATTERN_VALID - pattern is well formed
*   PATTERN_ESC   - pattern has invalid escape ('\' at end of pattern)
*   PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
*   PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
*   PATTERN_EMPTY - [..] construct is empty (ie [])
*
----------------------------------------------------------------------------*/

static BOOLEAN is_valid_pattern (char *p, int *error_type)
{
  ENTER_FUNCTION("nqs_generic/all-systems/is_valid_pattern");
    /* debugging info */
  TEST_ARG(p          != NULL, 1);
  TEST_ARG(error_type != NULL, 2);
    /* ensure we are not working with garbage */
  
  /* init error_type */
  *error_type = PATTERN_VALID;
    
  /* loop through pattern to EOS */
  while( *p )
  {
    /* determine pattern type */
    switch( *p )
    {
      /* check literal escape, it cannot be at end of pattern */
     case '\\':
      if( !*++p )
      {
        *error_type = PATTERN_ESC;
	EXIT_FUNCTION;
        return FALSE;
      }
      p++;
      break;

      /* the [..] construct must be well formed */
     case '[':
      p++;
      /* if the next character is ']' then bad pattern */
      if ( *p == ']' )
      {
        *error_type = PATTERN_EMPTY;
	EXIT_FUNCTION;
        return FALSE;
      }
                
      /* if end of pattern here then bad pattern */
      if ( !*p )
      {
        *error_type = PATTERN_CLOSE;
	EXIT_FUNCTION;
        return FALSE;
      }

      /* loop to end of [..] construct */
      while( *p != ']' )
      {
        /* check for literal escape */
        if( *p == '\\' )
	{
          p++;
          /* if end of pattern here then bad pattern */
          if ( !*p++ )
	  {
            *error_type = PATTERN_ESC;
	    EXIT_FUNCTION;
            return FALSE;
          }
        }
        else
          p++;

        /* if end of pattern here then bad pattern */
        if ( !*p )
	{
          *error_type = PATTERN_CLOSE;
	  EXIT_FUNCTION;
          return FALSE;
        }

        /* if this a range */
        if( *p == '-' )
	{
          /* we must have an end of range */
          if ( !*++p || *p == ']' )
	  {
            *error_type = PATTERN_RANGE;
	    EXIT_FUNCTION;
            return FALSE;
          }
          else
	  {
            /* check for literal escape */
            if( *p == '\\' )
              p++;

            /* if end of pattern here then bad pattern */
            if ( !*p++ )
	    {
              *error_type = PATTERN_ESC;
	      EXIT_FUNCTION;
              return FALSE;
            }
          }
        }
      }
      break;

      /* all other characters are valid pattern elements */
     case '*':
     case '?':
     default:
      p++;                              /* "normal" character */
      break;
    }
  }
  EXIT_FUNCTION;
  return TRUE;
}
#endif /* #if 0 */

/*----------------------------------------------------------------------------
*
*  Match the pattern PATTERN against the string TEXT;
*
*  returns MATCH_VALID if pattern matches, or an errorcode as follows
*  otherwise:
*
*            MATCH_PATTERN  - bad pattern
*            MATCH_LITERAL  - match failure on literal mismatch
*            MATCH_RANGE    - match failure on [..] construct
*            MATCH_ABORT    - premature end of text string
*            MATCH_END      - premature end of pattern string
*            MATCH_VALID    - valid match
*
*
*  A match means the entire string TEXT is used up in matching.
*
*  In the pattern string:
*       `*' matches any sequence of characters (zero or more)
*       `?' matches any character
*       [SET] matches any character in the specified set,
*       [!SET] or [^SET] matches any character not in the specified set.
*
*  A set is composed of characters or ranges; a range looks like
*  character hyphen character (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the
*  minimal set of characters allowed in the [..] pattern construct.
*  Other characters are allowed (ie. 8 bit characters) if your system
*  will support them.
*
*  To suppress the special syntactic significance of any of `[]*?!^-\',
*  and match the character exactly, precede it with a `\'.
*
----------------------------------------------------------------------------*/

int matche (char *p, char *t )
{
  register char range_start, range_end;  /* start and end in range */

  BOOLEAN invert;             /* is this [..] or [!..] */
  BOOLEAN member_match;       /* have I matched the [..] construct? */
  BOOLEAN loop;               /* should I terminate? */

  int iReturn;
    /* temporary copy of the return value */
  
  ENTER_FUNCTION("nqs_generic/all-systems/matche");
    /* debugging support */
  TEST_ARG(p != NULL, 1);
  TEST_ARG(t != NULL, 2);
    /* ensure we are not working with garbage */
  
  for ( ; *p; p++, t++ )
  {
    /* if this is the end of the text then this is the end of the match */
    if (!*t)
    {
      EXIT_FUNCTION;
      return ( *p == '*' && *++p == '\0' ) ? MATCH_VALID : MATCH_ABORT;
    }

    /* determine and react to pattern type */
    switch ( *p )
    {
      /* single any character match */
     case '?':
      break;

      /* multiple any character match */
     case '*':
      iReturn = matche_after_star (p, t);
      EXIT_FUNCTION;
      return iReturn;

      /* [..] construct, single member/exclusion character match */
     case '[':
      /* move to beginning of range */
      p++;

      /* check if this is a member match or exclusion match */
      invert = FALSE;
      if ( *p == '!' || *p == '^')
      {
        invert = TRUE;
        p++;
      }

      /* if closing bracket here or at range start then we have a
         malformed pattern */
      if ( *p == ']' )
      {
	EXIT_FUNCTION;
        return MATCH_PATTERN;
      }

      member_match = FALSE;
      loop = TRUE;

      while ( loop )
      {

        /* if end of construct then loop is done */
        if (*p == ']')
        {
          loop = FALSE;
          continue;
        }

        /* matching a '!', '^', '-', '\' or a ']' */
        if ( *p == '\\' )
        {
          range_start = range_end = *++p;
        }
        else
	{
          range_start = range_end = *p;
        }

        /* if end of pattern then bad pattern (Missing ']') */
        if (!*p)
	{
	  EXIT_FUNCTION;
          return MATCH_PATTERN;
	}

        /* check for range bar */
        if (*++p == '-')
	{

          /* get the range end */
          range_end = *++p;

          /* if end of pattern or construct then bad pattern */
          if (range_end == '\0' || range_end == ']')
	  {
	    EXIT_FUNCTION;
            return MATCH_PATTERN;
	  }

          /* special character range end */
          if (range_end == '\\')
	  {
            range_end = *++p;

            /* if end of text then we have a bad pattern */
            if (!range_end)
	    {
	      EXIT_FUNCTION;
              return MATCH_PATTERN;
            }

            /* move just beyond this range */
            p++;
          }

          /* if the text character is in range then match found.
           * make sure the range letters have the proper
           * relationship to one another before comparison */
          if ( range_start < range_end  )
	  {
            if (*t >= range_start && *t <= range_end)
	    {
              member_match = TRUE;
              loop = FALSE;
            }
          }
          else 
	  {
            if (*t >= range_end && *t <= range_start)
	    {
              member_match = TRUE;
              loop = FALSE;
            }
          }
        }

        /* if there was a match in an exclusion set then no match */
        /* if there was no match in a member set then no match */
        if ((invert && member_match) || !(invert || member_match))
	{
	  EXIT_FUNCTION;
          return MATCH_RANGE;
	}

        /* if this is not an exclusion then skip the rest of the [...]
         * construct that already matched. */
        if (member_match)
	{
          while (*p != ']')
	  {

            /* bad pattern (Missing ']') */
            if (!*p)
	    {
	      EXIT_FUNCTION;
              return MATCH_PATTERN;
	    }
	      
            /* skip exact match */
            if (*p == '\\')
	    {
              p++;

              /* if end of text then we have a bad pattern */
              if (!*p)
	      {
		EXIT_FUNCTION;
                return MATCH_PATTERN;
	      }
		
            }

            /* move to next pattern char */
            p++;
          }
        }
      }
      break;

      /* next character is quoted and must match exactly */
     case '\\':

      /* move pattern pointer to quoted char and fall through */
      p++;

      /* if end of text then we have a bad pattern */
      if (!*p)
      {
	EXIT_FUNCTION;
        return MATCH_PATTERN;
      }
      
      /* must match this character exactly */
     default:
      if (*p != *t)
      {
	EXIT_FUNCTION;
        return MATCH_LITERAL;
      }
    }
  }

  /* if end of text not reached then the pattern fails */
  if ( *t )
  {
    EXIT_FUNCTION;
    return MATCH_END;
  }
  else
  {
    EXIT_FUNCTION;
    return MATCH_VALID;
  }
}

/*----------------------------------------------------------------------------
*
* recursively call matche() with final segment of PATTERN and of TEXT.
*
----------------------------------------------------------------------------*/

static int matche_after_star (char *p, char *t)
{
  register int match = 0;
  register nextp;

  ENTER_FUNCTION("nqs_generic/all-systems/matche_after_star");
    /* debugging info */
  
  /* pass over existing ? and * in pattern */
  while ( *p == '?' || *p == '*' )
  {
    /* take one char for each ? and + */
    if ( *p == '?' )
    {
      /* if end of text then no match */
      if ( !*t++ )
      {
	EXIT_FUNCTION;
        return MATCH_ABORT;
      }
    }

    /* move to next char in pattern */
    p++;
  }

  /* if end of pattern we have matched regardless of text left */
  if ( !*p )
  {
    EXIT_FUNCTION;
    return MATCH_VALID;
  }

  /* get the next character to match which must be a literal or '[' */
  nextp = *p;
  if ( nextp == '\\' )
  {
    nextp = p[1];

    /* if end of text then we have a bad pattern */
    if (!nextp)
    {
      EXIT_FUNCTION;
      return MATCH_PATTERN;
    }
  }

  /* Continue until we run out of text or definite result seen */
  do
  {
    /* a precondition for matching is that the next character
     * in the pattern match the next character in the text or that
     * the next pattern char is the beginning of a range.  Increment
     * text pointer as we go here */
    if ( nextp == *t || nextp == '[' )
    {
      match = matche(p, t);
    }

    /* if the end of text is reached then no match */
    if ( !*t++ )
      match = MATCH_ABORT;

  } while ( match != MATCH_VALID && match != MATCH_ABORT && match != MATCH_PATTERN);

  /* return result */
  EXIT_FUNCTION;
  return match;
}


/*----------------------------------------------------------------------------
*
* match() is a shell to matche() to return only BOOLEAN values.
*
----------------------------------------------------------------------------*/

int match( char *p, char *t )
{
  int error_type;
  
  ENTER_FUNCTION("nqs_generic/all-systems/match");
    /* debugging support */
  TEST_ARG(p != NULL, 1);
  TEST_ARG(t != NULL, 2);
    /* ensure we are not working with garbage */
  
  error_type = matche(p,t);
  EXIT_FUNCTION;
  return (error_type == MATCH_VALID ) ? TRUE : FALSE;
}


