/**********************************************************************
//  NEWFIND.C
//
//      Copyright (c) 1992 - Microsoft Corp.
//      All rights reserved.
//      Microsoft Confidential
//
//  TABS = 3
**********************************************************************/

#include "shellprv.h"
#pragma  hdrstop

#pragma warning( disable:4001 )         // Disable new type remark warning

//*********************************************************************
//
//  Miscellaneous constants and macros
//
//*********************************************************************

//*********************************************************************
//  Global data
//*********************************************************************

// BUGBUG (DavePl) This code throws exceptions, and I don't see anyone
//                 trying to catch them (for allocation failures)

//*********************************************************************
//  Intializes all global buffers and variables. aGlobalBufs[] is an
//  array of pointers to the global pointers which are allocated to
//  the sizes in the array aBufLens[]. DummyFirst and nDummyFirst
//  are the first wordss of the block of variables associated with
//  normal and /NOT searches. Each block of variables is initialized
//  by copying the static block starting with InitialSearchData
//  to the repective data areas.
//
//  int InitGrepBufs( void )
//
//  ARGUMENTS:
//      NONE
//  RETURNS:
//      int     - OK if no errors ELSE ERR_NOMEMORY.
//
//*********************************************************************

LPGREPINFO InitGrepBufs( void )
{
    //
    // We first want to allocate a global structure for this Grep instance
    //
    LPGREPINFO lpgi;

    lpgi = (LPGREPINFO)Alloc(SIZEOF(GREPINFO));
    if (lpgi == NULL)
        return(NULL);

    // Our Alloc function initializes the data to zero.

    //lpgi->Flags = 0;
    lpgi->CaseSen = 1;                                // Assume case-sensitivity

    // Initialize Search data
    lpgi->ge.TblEntriesUsed = 1;
    //lpgi->ge.ExprEntriesUsed = 0;
    //lpgi->ge.StrCount = 0;
    //lpgi->ge.TargetLen = 0;
    //lpgi->ge.MaxChar = 0;
    lpgi->ge.MinChar = 0xffff;
    lpgi->ge.ShortStrLen = 0xffff;

    // Initialize Not Search data
    lpgi->geNot.TblEntriesUsed = 1;
    //lpgi->geNotnExprEntriesUsed = 0;
    //lpgi->geNotnStrCount = 0;
    //lpgi->geNotnTargetLen = 0;
    //lpgi->geNotnMaxChar = 0;
    lpgi->geNot.MinChar = 0xffff;
    lpgi->geNot.ShortStrLen = 0xffff;

    //lpgi->addstr = NULL;                         // Initialize function pointers
    //lpgi->find = NULL;

    _fmemset( lpgi->ge.td1, 1, TRTABLEN );         // Set up TD1 for startup
    _fmemset( lpgi->geNot.td1, 1, TRTABLEN );     // Set up /NOT TD1 for startup

    return( lpgi );
}

//*********************************************************************
//  Frees all previously allocated global buffers. aPtrLst is an array
//  of pointers to the all of the global pointers. Only those pointers
//  which are not NULL are freed and then set to NULL.
//
//  Must first free all of the memory blocks allocated with alloc()
//  for the linked lists in both the normal and /NOT search trees.
//
//  int FreeGrepBufs( LPGREPIFIN lpgi )
//
//  ARGUMENTS:
//      NONE
//  RETURNS:
//      int     - OK (0) if successfull else ERR_MEM_CORRUPT
//
//*********************************************************************

int FreeGrepBufs(LPGREPINFO lpgi )
{
    int     i;
    int     x;

    for ( x = 0; x < 2; x++ )
    {
        for ( i = 0; i < lpgi->ge.TblEntriesUsed; i++ )
            freenode( lpgi, lpgi->ge.StringList[ i ] );

        for ( i = 0; i < lpgi->ge.ExprEntriesUsed; i++ )
            if ( !Free(lpgi->ge.ExprStrList[ i ] ))
                return( ERR_MEM_CORRUPT );

        SwapSrchTables(lpgi);
    }

    // Now lets free the actual header struture
    if (!Free(lpgi))
        return( ERR_MEM_CORRUPT );

    return( OK );
}

//*********************************************************************
//  Swaps all variables directly associated with the normal search
//  with those used in the /NOT search.
//
//  void SwapSrchTables( void )
//
//  ARGUMENTS:
//      NONE
//  RETURNS:
//      void
//*********************************************************************

void SwapSrchTables( LPGREPINFO lpgi )
{
    GREPELEMENTS geTemp;

    geTemp = lpgi->ge;
    lpgi->ge = lpgi->geNot;
    lpgi->geNot = geTemp;
}

//*********************************************************************
//  Frees the memory allocated to previously allocated node and frees
//  all nodes attached to the specified node.
//
//  void freenode( register STRINGNODE FAR *pNode )
//
//  ARGUMENTS:
//      pNode   - Pointer to node to free
//  RETURNS:
//      void
//
//*********************************************************************

void freenode(LPGREPINFO lpgi,  register STRINGNODE *pNode )
{
    register STRINGNODE *pTmpNode;          // Pointer to next node in list

    while ( pNode != NULL )                     // While not at end of list
    {
        if ( pNode->s_suf != NULL )             // Free suffix list if not end
            freenode(lpgi, pNode->s_suf );
        else
            lpgi->ge.StrCount--;                // Else decrement string count

        pTmpNode = pNode;                       // Save pointer
        pNode = pNode->s_alt;                   // Move down the list

        if ( pTmpNode != NULL && !Free( pTmpNode ) )
            RaiseException(ERROR_INVALID_BLOCK, 0, 0, 0);
    }
}


//*********************************************************************
//  Allocates memory if this fails we thrown an exception...
//  This is basically lazy programming and should be changed later...
//
//  LPVOID AllocThrow ( long cb )
//
//  ARGUMENTS:
//      cb                  - Number of bytes to allocate
//
//  RETURNS:
//      LPVOID - Ptr to new memory
//*********************************************************************

LPVOID AllocThrow (LPGREPINFO lpgi, long cb)
{
    LPVOID lp;

    lp = Alloc(cb);
    if (lp == NULL)
        RaiseException(ERROR_NOT_ENOUGH_MEMORY, 0, 0, 0);
    return lp;
}


//*********************************************************************
//  Allocates memory for a new node large enough to hold a copy of
//  a caller specifed string and then initializing the new node with
//  the caller supplied string.
//
//  STRINGNODE FAR *newnode( LPSTR String, int StrLen )
//
//  ARGUMENTS:
//      *String             - String to include in new node
//      StrLen              - Length of string argument
//  RETURNS:
//      STRINGNODE* - Ptr to a new node
//*********************************************************************

STRINGNODE FAR* newnode( LPGREPINFO lpgi, LPSTR String, int StrLen )
{
    register STRINGNODE *pNewNode;          // Pointer to new node

                                                        // Allocate string node
    pNewNode = (STRINGNODE *)AllocThrow(
            lpgi,  SIZEOF( STRINGNODE ) + (unsigned)StrLen +
                                              (unsigned)(StrLen & 1) + 1);

    pNewNode->s_alt = NULL;                     // No alternates yet
    pNewNode->s_suf = NULL;                     // No suffixes yet
    pNewNode->s_must = StrLen;                  // Set string length
                                                        // Copy string text into node buffer
    lstrcpynA( s_text( pNewNode ), String, (unsigned)StrLen + 1);
    return( pNewNode );                         // Return pointer to new node
}

//*********************************************************************
//  Updates an existing node with a new string passed by the caller.
//
//  WARNING:
//      The new string must be shorter than the original string for
//      the specified node or the buffer will overflow.
//
//  ARGUMENTS:
//      pNode       - Pointer to node
//      s           - String
//      n           - Length of string
//*********************************************************************

STRINGNODE FAR* reallocnode( register STRINGNODE *pNode, LPSTR pStr, int StrLen )
{
    #ifndef NDEBUG
        Assert( StrLen < pNode->s_must );
    #endif

    pNode->s_must = StrLen;                         // Set new length
                                                        // Copy new text
    hmemcpy( s_text( pNode ), pStr, (unsigned)StrLen );
    return( pNode );                                // Return pointer to original node
}

//*********************************************************************
// maketd1 - add entry for TD1 shift table
//
//   This function fills in the TD1 table for the given
//   search string.  The idea is adapted from Daniel M.
//   Sunday's QuickSearch algorithm as described in an
//   article in the August 1990 issue of "Communications
//   of the ACM".  As described, the algorithm is suitable
//   for single-string searches.    The idea to extend it for
//   multiple search strings is mine and is described below.
//
//       Think of searching for a match as shifting the search
//       pattern p of length n over the source text s until the
//       search pattern is aligned with matching text or until
//       the end of the source text is reached.
//
//       At any point when we find a mismatch, we know
//       we will shift our pattern to the right in the
//       source text at least one position.  Thus,
//       whenever we find a mismatch, we know the character
//       s[n] will figure in our next attempt to match.
//
//       For some character c, TD1[c] is the 1-based index
//       from right to left of the first occurrence of c
//       in p.  Put another way, it is the count of places
//       to shift p to the right on s so that the rightmost
//       c in p is aligned with s[n].  If p does not contain
//       c, then TD1[c] = n + 1, meaning we shift p to align
//       p[0] with s[n + 1] and try our next match there.
//
//       Computing TD1 for a single string is easy:
//
//           _fmemset(TD1,n + 1,SIZEOF TD1);
//           for (i = 0; i < n; ++i) {
//                TD1[p[i]] = n - i;
//           }
//
//       Generalizing this computation to a case where there
//       are multiple strings of differing lengths is trickier.
//       The key is to generate a TD1 that is as conservative
//       as necessary, meaning that no shift value can be larger
//       than one plus the length of the shortest string for
//       which you are looking.  The other key is to realize
//       that you must treat each string as though it were only
//       as long as the shortest string.  This is best illustrated
//       with an example.  Consider the following two strings:
//
//       DYNAMIC PROCEDURE
//       7654321 927614321
//
//       The numbers under each letter indicate the values of the
//       TD1 entries if we computed the array for each string
//       separately.  Taking the union of these two sets, and taking
//       the smallest value where there are conflicts would yield
//       the following TD1:
//
//       DYNAMICPODURE
//       7654321974321
//
//       Note that TD1['P'] equals 9; since n, the length of our
//       shortest string is 7, we know we should not have any
//       shift value larger than 8.  If we clamp our shift values
//       to this value, then we get
//
//       DYNAMICPODURE
//       7654321874321
//
//       Already, this looks fishy, but let's try it out on
//       s = "DYNAMPROCEDURE".  We know we should match on
//       the trailing procedure, but watch:
//
//       DYNAMPROCEDURE
//       ^^^^^^^|
//
//       Since DYNAMPR doesn't match one of our search strings,
//       we look at TD1[s[n]] == TD1['O'] == 7.  Applying this
//       shift, we get
//
//       DYNAMPROCEDURE
//                ^^^^^^^
//
//       As you can see, by shifting 7, we have gone too far, and
//       we miss our match. When computing TD1 for "PROCEDURE",
//       we must take only the first 7 characters, "PROCEDU".
//       Any trailing characters can be ignored (!) since they
//       have no effect on matching the first 7 characters of
//       the string.  Our modified TD1 then becomes
//
//       DYNAMICPODURE
//       7654321752163
//
//       When applied to s, we get TD1[s[n]] == TD1['O'] == 5,
//       leaving us with
//
//       DYNAMPROCEDURE
//              ^^^^^^^
//       which is just where we need to be to match on "PROCEDURE".
//
//   Going to this algorithm has speeded qgrep up on multi-string
//   searches from 20-30%.  The all-C version with this algorithm
//   became as fast or faster than the C+ASM version of the old
//   algorithm.  Thank you, Daniel Sunday, for your inspiration!
//
//   Note: if we are case-insensitive, then we expect the input
//   string to be upper-cased on entry to this routine.
//
//   Pete Stewart, August 14, 1990.
//*********************************************************************

void maketd1( LPGREPINFO lpgi, LPBYTE pch, int cch, int cchstart )
{
    int     ch;                             // Character
    int     i;                              // String index

    if ( (cch += cchstart) > lpgi->ge.ShortStrLen )
        cch = lpgi->ge.ShortStrLen;         // Use smaller count

    for ( i = cchstart; i < cch; i++ )
    {                                       // Examine each char left to right
        ch = (int)((unsigned char)*pch++);          // Get the character
        for (;;)
        {                                   // Loop to set up entries
            if ( ch < lpgi->ge.MinChar )
                lpgi->ge.MinChar = ch;      // Remember if smallest

            if ( ch > lpgi->ge.MaxChar )
                lpgi->ge.MaxChar = ch;      // Remember if largest

            if ( lpgi->ge.ShortStrLen - i < (int)lpgi->ge.td1[ ch ] )
                lpgi->ge.td1[ ch ] = (unsigned char)(lpgi->ge.ShortStrLen - i);

                                            // Set value if smaller than previous
            if (lpgi->CaseSen || !IsCharUpper( (char)ch ) )
                break;                      // Exit loop if done
            ch = LOWORD((DWORD)CharLowerA( (LPSTR)(DWORD)ch ));  // Force to lower case
        }
    }
}


//*********************************************************************
//  static int newstring( LPGREPINFO lpgi, LPBYTE s, int StrLen )
//
//  ARGUMENTS:
//      pStr        - String to add
//      StrLen  - Length of string
//  RETURNS:
//
//*********************************************************************

int newstring( LPGREPINFO lpgi, LPBYTE pStr, int StrLen )
{
    register STRINGNODE *pCurNode;          // Current string
    register STRINGNODE * *ppPrevNode;  // Pointer to previous link
    STRINGNODE          *pNewNode;          // New string
    int                     i;                      // Index
    int                     iNumMatched;        // Count of matched chars in 2 strings
    int                     iRelation;          // Count

    if ( lpgi->ge.ShortStrLen == -1 || StrLen < lpgi->ge.ShortStrLen )
        lpgi->ge.ShortStrLen = StrLen;                   // Remember length of shortest string

    if ( (i = lpgi->ge.TransTable[ *pStr ]) == 0 )   // If no existing list
    {
                                                        //  We have to start a new list
                                                        // Die if too many string lists
        if ( (i = lpgi->ge.TblEntriesUsed++) >= ASCII_LEN )
            RaiseException(ERROR_INVALID_PARAMETER, 0, 0, 0);

        #ifndef NDEBUG
            Assert( lpgi->ge.StringList[ i ] == NULL );  // Assert already initialized
        #endif

        lpgi->ge.TransTable[ *pStr ] = (unsigned char)i;     // Set pointer to new list

                                                        // Set pointer for other case
        if ( !lpgi->CaseSen && IsCharAlpha( *pStr ) )
            lpgi->ge.TransTable[ *pStr ^ '\040' ] = (unsigned char)i;
    }
    else if ( lpgi->ge.StringList[ i ] == NULL )
        return( 0 );

    if ( --StrLen == 0)                             // If 1-byte string
    {
        freenode( lpgi, lpgi->ge.StringList[ i ] ); // Free any existing stuff
        lpgi->ge.StringList[ i ] = NULL;            // No record here
        lpgi->ge.StrCount++;                        // We have a new string
        return( 1 );                                // String added
    }

    pStr++;                                         // Skip first char
    ppPrevNode = lpgi->ge.StringList + i;           // Get pointer to link
    pCurNode = *ppPrevNode;                     // Get pointer to node

    while ( pCurNode != NULL )                  // Loop to traverse match tree
    {
                                                        // Find minimum of string lengths
        i = (StrLen > pCurNode->s_must) ? pCurNode->s_must : StrLen;
                                                        // Compare the strings

        matchstrings( lpgi, (LPSTR )pStr, s_text( pCurNode ),
                          i, &iNumMatched, &iRelation );

        if ( iNumMatched == 0 )                 // If complete mismatch
        {
            if ( iRelation < 0 )                    // Was pStr <  s_text( pCurNode )
                break;                              // Break if insertion point found

            ppPrevNode = &(pCurNode->s_alt); // Get pointer to alternate link
            pCurNode = *ppPrevNode;             // Follow the link
        }

        else if ( i == iNumMatched )                        // Else if strings matched
        {
            if ( i == StrLen )                  // If new is prefix of current
            {
                                                        // Shorten text of node
                pCurNode = reallocnode(pCurNode, s_text( pCurNode ), StrLen);
                *ppPrevNode = pCurNode;
                                                        // If there are suffixes
                if ( pCurNode->s_suf != NULL )
                {
                    freenode( lpgi, pCurNode->s_suf );
                                                        // Suffixes no longer needed
                    pCurNode->s_suf = NULL;
                    lpgi->ge.StrCount++;                     // Account for this string
                }
                return( 1 );                        // String added
            }
            ppPrevNode = &(pCurNode->s_suf); // Get pointer to suffix link

            if ( (pCurNode = *ppPrevNode) == NULL )
                return (0);                         // Done if current is prefix of new

            pStr += i;                              // Skip matched portion
            StrLen -= i;
        }

        else                                            // Else partial match
        {
                //  We must split an existing node.
                //  This is the trickiest case.

            pNewNode = newnode( lpgi, s_text( pCurNode ) + iNumMatched,
                                      pCurNode->s_must - iNumMatched );

                                                        // Unmatched part of current string
            pCurNode = reallocnode( pCurNode, s_text( pCurNode ), iNumMatched );
            *ppPrevNode = pCurNode;

                                                        // Set length to matched portion
            pNewNode->s_suf = pCurNode->s_suf;  // Current string's suffixes

            if ( iRelation < 0 )                    // If new preceded current
            {                                           // First suffix is new string
                pCurNode->s_suf = newnode( lpgi, (LPSTR )pStr + iNumMatched, StrLen - iNumMatched );
                                                        // Alternate is part of current
                pCurNode->s_suf->s_alt = pNewNode;
            }
            else                                        // Else new followed current
            {                                           // Unmatched new string is alternate
                pNewNode->s_alt = newnode( lpgi, (LPSTR )(pStr + iNumMatched), StrLen - iNumMatched );
                pCurNode->s_suf = pNewNode;     // New suffix list
            }
            lpgi->ge.StrCount++;                             // One more string
            return( 1 );                            // String added
        }
    }

                                                        // Set pointer to new node
    *ppPrevNode = newnode( lpgi, (LPSTR )pStr, StrLen );
    (*ppPrevNode)->s_alt = pCurNode;        // Attach alternates
    lpgi->ge.StrCount++;                                     // One more string
    return( 1 );                                    // String added
}

//*********************************************************************
//  void addstring( LPGREPINFO lpgi, register LPSTR pStr, int StrLen )
//
//  ARGUMENTS:
//      pStr        - String to add
//      StrLen  - Length of string
//  RETURNS:
//      void
//*********************************************************************

void addstring( LPGREPINFO lpgi, register LPSTR pStr, int StrLen )
{
    register LPSTR pchChar;                     // Char pointer
    int             EndLine;                    // Match-at-end-of-line flag
    char            szTemp[MAXSTRLEN];          // Used as a scratchpad...
    BOOL            fOemDiff;                   // Is the oem characters different

    EndLine = lpgi->Flags & ENDLINE;            // Initialize flag
    pchChar = lpgi->Target;                     // Initialize pointer
    while ( StrLen-- > 0 )                      // While not at end of string
    {
        switch ( *pchChar = *(pStr++) )         // Switch on character
        {
            case '\\':                          // Escape
                                                // If next character "special"
                if ( StrLen > 0 && !IsCharAlphaNumeric( *pStr ) )
                {
                    StrLen--;                   // Decrement counter
                    *pchChar = *(pStr++);       // Copy next character
                }
                pchChar++;                      // Increment pointer
                break;

            case '$':                           // Special end character
                if ( StrLen == 0 )              // If end of string
                {
                    EndLine = ENDLINE;          // Set flag
                    break;                      // Exit switch
                }
                                                // Drop through
            default:                            // All others
                pchChar++;                      // Increment pointer
                break;
        }
    }
    if ( EndLine )
        *pchChar++ = EOS;                       // Add end character if needed
    lpgi->ge.TargetLen = pchChar - lpgi->Target;// Compute Target string length

    CharToOemBuffA(lpgi->Target, szTemp, lpgi->ge.TargetLen);
    fOemDiff = memcmp(lpgi->Target, szTemp, lpgi->ge.TargetLen) != 0;

    if ( !lpgi->CaseSen )
        CharUpperBuffA( lpgi->Target, lpgi->ge.TargetLen );     // Force to upper case if necessary
                                                // Add the string
    newstring( lpgi, (LPBYTE )lpgi->Target, lpgi->ge.TargetLen );

    // So far we have worked with Ansi characters.  Now lets try converting
    // this to OEM.  If the strings don't match we also add the OEM version
    // as to allow files typically made by Dos Apps to find their strings...

    if (fOemDiff)
    {
        DebugMsg(DM_TRACE, TEXT("Grep add OEM version of string %s"), szTemp);
        newstring( lpgi, (LPBYTE )szTemp, lpgi->ge.TargetLen );
    }


}

//*********************************************************************
//
//  int addstrings( register LPSTR pBuffer, register LPSTR pBufEnd,
//                       LPSTR pSepList, int *IsFirst )
//
//  ARGUMENTS:
//      pBuffer     - String list buffer containing search strings
//      pBufEnd     - End of search string buffer
//      pSepList        - List of search string separators
//      IsFirst     - Unused
//  RETURNS:
//      int         - Always returns 0
//
//*********************************************************************

int addstrings( LPGREPINFO lpgi, register LPSTR pBuffer, register LPSTR pBufEnd,
                     LPSTR pSepList, int *IsFirst )
{
    int StrLen;                                     // String length

    while ( pBuffer < pBufEnd )                 // While buffer not empty
    {
                                                        // Count leading separators
        StrLen = strnspn( pBuffer, pSepList, pBufEnd - pBuffer );

                                                        // Skip leading separators
        if ( (pBuffer += StrLen) >= pBufEnd )
            break;
                                                        // Get length of search string
        StrLen = strncspn( pBuffer, pSepList, pBufEnd - pBuffer );

                                                        // Select search string type
        if ( lpgi->addstr == NULL )
            lpgi->addstr = isexpr( lpgi, pBuffer, StrLen ) ? addexpr : addstring;

                                                        // If no match within string
        if ( lpgi->addstr == addexpr || (lpgi->Flags & (BEGLINE | COLNOS)) ||
              findlist( lpgi, pBuffer, pBuffer + StrLen ) == NULL )
            (*lpgi->addstr)( lpgi, pBuffer, StrLen );       // Add string to list

        pBuffer += StrLen;                      // Skip the string
    }

    return( 0 );
}

//*********************************************************************
//  int enumlist( LPGREPINFO lpgi, register STRINGNODE FAR *pNode, int cchprev )
//
//  ARGUMENTS:
//      pNode       - Pointer to list to dump
//      cchprev - Count of preceding characters
//  RETURNS:
//      int     - Number of strings in pNode
//
//*********************************************************************

int enumlist( LPGREPINFO lpgi, register STRINGNODE *pNode, int cchprev )
{
    int StrCnt;                                     // String count

    StrCnt = 0;                                     // Initialize string count
    while ( pNode != NULL )                     // While not at end of list
    {
                                                        // Make TD1 entries
        maketd1( lpgi, (LPBYTE )s_text( pNode ), (int)pNode->s_must, cchprev );
                                                        // Recurse to do suffixes
        StrCnt += (pNode->s_suf != NULL) ?
                enumlist( lpgi, pNode->s_suf, cchprev + pNode->s_must ) : 1;
        pNode = pNode->s_alt;                   // Do next alternate in list
    }
    return( StrCnt ? StrCnt : 1 );          // Return string count
}

//*********************************************************************
//  int enumstrings( LPGREPINFO lpgi )
//
//  ARGUMENTS:
//      NONE
//  RETURNS:
//      int     - Total number of strings in all StringList nodes
//*********************************************************************

int enumstrings( LPGREPINFO lpgi )
{
    unsigned char   uchChar;                    // Character
    int             i;                          // Index
    int             StrCnt;                     // String count

    StrCnt = 0;                                 // Initialize
    for ( i = 0; i < TRTABLEN; i++ )            // Loop through translation table
    {
        if ( lpgi->CaseSen || !IsCharLower( (unsigned char)i ) )
        {                                       // If case sensitive or not lower
            if (lpgi->ge.TransTable[ i ] == 0)
                continue;                       // Skip null entries

            uchChar = (unsigned char)i;                 // Get character
            maketd1( lpgi, &uchChar, 1, 0);     // Make TD1 entry

                                                        // Enumerate the list
            StrCnt += enumlist( lpgi,
                    lpgi->ge.StringList[ lpgi->ge.TransTable[ i ] ], 1 );
        }
    }
    return( StrCnt );                               // Return string count
}