/*** deblex.c - lexer module for expression evaluator * * Lexer routines for expression evaluator. */ ushort PASCAL ParseOp (char FAR *, token_t FAR *); LOCAL ushort NEAR PASCAL CanonOp (uchar FAR *, ptoken_t); LOCAL ushort NEAR PASCAL ParseIntConst (uchar FAR *, ptoken_t, uint, PLARGE_INTEGER); LOCAL ushort NEAR PASCAL ParseFloatConst (uchar FAR *, ptoken_t); LOCAL ushort NEAR PASCAL ParseIdent (uchar FAR *, ptoken_t, bool_t); LOCAL ushort NEAR PASCAL ParseChar (uchar FAR *, ptoken_t); LOCAL ushort NEAR PASCAL ParseString (uchar FAR *, ptoken_t); LOCAL ushort NEAR PASCAL FakeIdent (uchar FAR *pb, ptoken_t pTok); LOCAL bool_t NEAR PASCAL FInRadix (uchar, uint); struct Op { char str[5]; #ifdef WIN32 } OpStr[] = { #else } _based (_segname("_CODE")) OpStr[] = { #endif {"\003->*"}, {"\003>>="}, {"\003<<="}, {"\002+="}, {"\002-="}, {"\002*="}, {"\002/="}, {"\002%="}, {"\002^="}, {"\002&="}, {"\002|="}, {"\002<<"}, {"\002>>"}, {"\002=="}, {"\002!="}, {"\002<="}, {"\002>="}, {"\002&&"}, {"\002||"}, {"\002++"}, {"\002--"}, {"\002->"}, {"\001+"}, {"\001-"}, {"\001*"}, {"\001/"}, {"\001%"}, {"\001^"}, {"\001&"}, {"\001|"}, {"\001~"}, {"\001!"}, {"\001="}, {"\001<"}, {"\001>"}, {"\001,"}, }; #define OPCNT (sizeof (OpStr)/sizeof (struct Op)) /*** GetDBToken - Fetch next token from expression string * * status = GetDBToken (pbExpr, ptoken, radix, oper) * * Entry pbExpr = far pointer to expression string * ptoken = pointer to token return location * radix = default radix for numeric conversion * oper = previous operator * * Exit *ptoken = token as lexed from input string. If an * error occurred, the token will be of type OP_badtok. * If the token is a constant, its value will be determined and * placed in the token's 'val' field, and its type * (e.g., T_USHORT) will be placed in the token's 'typ' field. * If the previous operator is ., ->, ::, then ~ as the next * character is taken to be part of an identifier * * Returns ERR_NONE if no error * ERR_... if error encountered in parse * * Calls the appropriate routine to lex the input string. Handles: * * foo Identifiers OP_ident * +, -, etc. Operators OP_... * 345 Decimal constants OP_const * 0123 Octal constants OP_const * 0xABCD Hexadecimal constants OP_const * 'a', '\n' Character constants OP_const * "foo" String constants OP_const * L"foo" Wide string constants OP_const * 3.45 Floating point constants OP_const * 0xff handle to symbol follows OP_ident * * The handle to symbol is a hack to make sure that an expression * can be generated from and locked to the handle to symbol that is * passed to EEGetTMFromHSYM by the kernel */ ushort PASCAL GetDBToken (uchar FAR *pbExpr, ptoken_t pTok, uint radix, op_t oper) { uchar c; uchar FAR *pbSave = pbExpr; ushort error; memset (pTok, 0, sizeof (token_t)); pTok->opTok = OP_badtok; pTok->pbTok = (char FAR *)pbExpr; c = *pbExpr; if (c == '~') { switch (oper) { case OP_dot: case OP_pointsto: case OP_uscope: case OP_bscope: case OP_pmember: case OP_dotmember: error = ParseIdent (pbExpr, pTok, TRUE); pTok->cbTok = (uchar)(pTok->pbEnd - pTok->pbTok); return (error); } } if (isdigit (c)) { error = ParseConst (pbExpr, pTok, radix); } else if (((c == 'L') && (pbExpr[1] == '"')) || (c == '"')) { error = ParseString (pbExpr, pTok); } else if ((iscsymf(c)) || (c == '?') || (c == '$') || (c == '@')) { error = ParseIdent (pbExpr, pTok, FALSE); } else if (c == '\'') { error = ParseChar (pbExpr, pTok); } else if (c == '.') { c = *(pbExpr+1); if ( (c == 0) || (c == '+') || (c=='-') || (c==')')) { error = ParseIdent (pbExpr, pTok, FALSE); } else if ( isdigit(c) ) { error = ParseConst (pbExpr, pTok, radix); } else { error = ParseOp (pbExpr, pTok); } } else if (c == 0xff) { error = FakeIdent (pbExpr, pTok); } else { error = ParseOp (pbExpr, pTok); } pTok->cbTok = (uchar)(pTok->pbEnd - pTok->pbTok); // note that caller must compute index of token return (error); } /** ParseConst - Parse an integer or floating point constant string * * error = ParseConst (pb, pTok, radix); * * Entry pb = far pointer to string * pTok = pointer to return token * radix = default radix for numeric conversion * * Exit *pTok initialized for constant * pTok->pbEnd = end of token * * Returns ERR_NONE if no error * ERR_... if error encountered */ ushort PASCAL ParseConst (uchar FAR *pb, ptoken_t pTok, uint radixin) { char FAR *pbSave = pb; uint radix = radixin; bool_t fUSuffix = FALSE; bool_t fLSuffix = FALSE; bool_t fFitsInt = FALSE; bool_t fFitsUint = FALSE; bool_t fFitsLong = FALSE; bool_t fFitsQuad = FALSE; LARGE_INTEGER value; CV_typ_t typ; ushort error; // check beginning of constant for radix specifiers if ((*pb == '0') && (*(pb + 1) != '.')) { pb++; if (toupper (*pb) == 'X') { // Hex constant 0x....... radix = 16; ++pb; } else if (toupper(*pb) == 'T') { // Decimal constant 0t...... radix = 10; ++pb; } else if (toupper(*pb) == 'O') { // Octal constant 0........ radix = 8; ++pb; } else { // No radix override (012 is in current radix) --pb; } } if ((*pb != '.') && FInRadix (*pb, radix)) { // save pointer to string and parse as integer constant if ((error = ParseIntConst (pb, pTok, radix, &value)) != ERR_NONE) { // error parsing as integer constant return (error); } if ((*pTok->pbEnd == '.') || (toupper (*pTok->pbEnd) == 'E') || (toupper (*pTok->pbEnd) == 'F')) { // Back up and reparse string as floating point return (ParseFloatConst (pbSave, pTok)); } } else if (*pb == '.') { return (ParseFloatConst (pbSave, pTok)); } else { return (ERR_SYNTAX); } // Check for the 'u' and 'l' modifiers. pb = pTok->pbEnd; if (toupper(*pb) == 'U') { ++pb; fUSuffix = TRUE; if (toupper(*pb) == 'L') { ++pb; fLSuffix = TRUE; } } else if (toupper(*pb) == 'L') { ++pb; fLSuffix = TRUE; if (toupper(*pb) == 'U') { ++pb; fUSuffix = TRUE; } } // ANSI spec, section 3.1.3.2: // // The type of an integer constant is the first of the corresponding // list in which its value can be represented: // unsuffixed decimal : int, long int, unsigned long int // unsuffixed octal or hex : int, unsigned int, long int, unsigned long int; // suffixed by the letter u or U : unsigned int, unsigned long int // suffixed by the letter l or L : long int, unsigned long int // suffixed by both the letters u or U and l or L: unsigned long int. // // To extend for quad values: // unsuffixed decimal: postpend __int64 // octal or hex: postpend __int64, unsigned __int64 // suffix with u or U: postpend unsigned __int64 // suffix with l or L: postpend __int64, unsigned __int64 // suffix with both: postpend unsigned __int64 // Technically, ANSI doesn't know anything about quads. It's being assumed here // to be either the same or larger than long (where ANSI sees long as the longest). // if ( (value.HighPart == 0) || (value.HighPart == -1L) ) { if (value.LowPart < 0x8000L) { fFitsInt = TRUE; } if (value.LowPart < 0x10000L && value.HighPart == 0 ) { fFitsUint = TRUE; } if (value.LowPart < 0x80000000L) { fFitsLong = TRUE; } if (value.HighPart == 0) { typ = T_UINT4; } else { typ = T_INT4; } } else { if ( (LONG)value.HighPart < 0x80000000L ) { fFitsQuad = TRUE; } typ = T_UINT8; } if ((fUSuffix) && (fLSuffix)) { // it's already the smaller of T_UINT8 or T_UINT4 ; } else if (fUSuffix) { if (fFitsUint) { typ = T_UINT2; } } else if (fLSuffix) { // // might be long int, unsigned long int, // __int64, unsigned __int64 // if (fFitsLong) { typ = T_INT4; } if (fFitsQuad) { typ = T_INT8; } } else { if (fFitsInt) { typ = T_INT2; } else if ((fFitsUint) && (radix != 10)) { typ = T_UINT2; } else if (fFitsLong) { typ = T_INT4; } else if (fFitsQuad) { typ = T_INT8; } } pTok->typ = typ; pTok->opTok = OP_const; pTok->pbEnd = pb; VAL_QUAD (pTok) = value; return (ERR_NONE); } /*** ParseIntConst - Parse an integer constant * * error = ParseIntConst (pb, pTok, radix, pval) * * Entry pb = pointer to pointer to input string * pTok = pointer to token return * radix = radix (8, 10, 16) * pval = pointer to ulong for value of constant * * Exit pTok updated to reflect token * * Returns ERR_NONE if the input string was successfully parsed as an integer * constant with the given radix * ERR_... if error. * * Note This routine runs on any processor with LARGE_INTEGER support. */ LOCAL ushort NEAR PASCAL ParseIntConst (uchar FAR *pb, ptoken_t pTok, uint radix, PLARGE_INTEGER pval) { char c; LARGE_INTEGER li; LARGE_INTEGER maxvalue; ULONG junk; maxvalue.QuadPart = (ULONGLONG)-1 / (ULONGLONG)radix; li.QuadPart = 0; DASSERT(radix == 10 || radix == 8 || radix == 16); for (;;) { c = *pb; if (((radix > 10) && !isxdigit (c)) || ((radix <= 10) && !isdigit (c))) { // Must have reached the end break; } if (!FInRadix(c, radix)) { return (ERR_SYNTAX); } if (li.QuadPart < 0 || li.QuadPart > maxvalue.QuadPart) { // // This is the overflow case // return ERR_CONSTANT; } li.QuadPart = li.QuadPart * radix; if (isdigit (c = *pb)) { li.QuadPart += (c - '0'); } else { li.QuadPart += (toupper(c) - 'A' + 10); } pb++; } *(PLARGE_INTEGER)pval = li; pTok->pbEnd = pb; return (ERR_NONE); } /** ParseFloatConst - Parse a floating-point constant * * fSuccess = ParseFloatConst (pb, pTok); * * Entry pb = pointer to input string * pTok = pointer to parse token structure * * Exit pTok updated to reflect floating point number if one * is found. * * Returns ERR_NONE if no error encountered * ERR_... error */ LOCAL ushort NEAR PASCAL ParseFloatConst (uchar FAR *pb, ptoken_t pTok) { char *pEnd; CV_typ_t typ; _ULDBL12 val; char *pVal = (char *)&val; // check for a single '.' - strtold returns 0 in such a case if (((*pb == '.') && (!isdigit (*(pb + 1)))) || (strlen (pb)) >= 100) { return (ERR_SYNTAX); } // Call library routine to figure out the value. This will also // return a pointer to the first character which is not // part of the value -- this allows us to check for an // 'f' or 'l' suffix character: // // ANSI, Section 3.1.3.1: // // "An unsuffixed floating constant has type double. // If suffixed by the letter f or F, it has type float. // If suffixed by the letter l or L, it has type long double." if ( __strgtold12(&val,&pEnd,pb,1) != 0 ) { return(ERR_SYNTAX); } if (toupper (*pEnd) == 'F') { pEnd++; typ = T_REAL32; _ld12tof(&val,(FLOAT *)&VAL_FLOAT(pTok)); } // MBH - bugbug (FP) // Is the correct handling for us? // #if defined (TARGET_MIPS) || defined( TARGET_i386) || defined(ALPHA) || defined(PPC) else if (toupper(*pEnd) == 'L') { pEnd++; #if defined( TARGET_MIPS) || defined(TARGET_ALPHA) || defined(TARGET_PPC) // // NOTENOTE: v_willhe, MIPS doesn't support long double, but treats // it as a double, so we have to emulate. typ = T_REAL64; _ld12tod(&val,(UDOUBLE *)&VAL_DOUBLE(pTok)); #endif #ifdef TARGET_i386 typ = T_REAL80; _ld12told(&val,(_ULDOUBLE *)&VAL_LDOUBLE(pTok)); #endif } #endif /* defined (TARGET_MIPS) || defined(TARGET_i386) */ else { typ = T_REAL64; _ld12tod(&val,(UDOUBLE *)&VAL_DOUBLE(pTok)); } pTok->opTok = OP_const; pTok->typ = typ; pTok->pbEnd = pEnd; return (ERR_NONE); } /*** FakeIdent - Fake an identifier from handle to symbol * * error = FakeIdent (pb, pTok); * * Entry pb = far pointer to string * pTok = pointer to return token * * Exit *pTok initialized for identifier fro handle to symbol * pTok->pbEnd = end of token (first non-identifier character) * * Returns ERR_NONE * */ LOCAL ushort NEAR PASCAL FakeIdent (uchar FAR *pb, ptoken_t pTok) { pTok->opTok = OP_hsym; pTok->pbEnd = pb + sizeof (char) + sizeof (HSYM); return (ERR_NONE); } /*** ParseIdent - Parse an identifier * * error = ParseIdent (pb, pTok, fTilde); * * Entry pb = far pointer to string * pTok = pointer to return token * fTilde = TRUE if ~ acceptable as first character * * Exit *pTok initialized for identifier * pTok->pbEnd = end of token (first non-identifier character) * * Returns ERR_NONE if no error * ERR_... if error encountered * Also handles the 'sizeof', 'by', 'wo' and 'dw' operators, since these * look like identifiers. * */ LOCAL ushort NEAR PASCAL ParseIdent (uchar FAR *pb, ptoken_t pTok, bool_t fTilde) { int len; if ( *pb == '.' ) { ++pb; pTok->opTok = OP_ident; pTok->pbEnd = pb; } else if ((iscsymf(*pb)) || (*pb == '?') || (*pb == '$') || (*pb == '@') || ((*pb == '~') && (fTilde == TRUE))) { ++pb; while ((iscsym(*pb)) || (*pb == '?') || (*pb == '$') || (*pb == '@')) { ++pb; } pTok->opTok = OP_ident; pTok->pbEnd = pb; } // Check for the 'operator', 'sizeof', 'by', 'wo' and 'dw' operators. if ((len = pTok->pbEnd - pTok->pbTok) == 6) { if (strncmp (pTok->pbTok, "sizeof", 6) == 0) { pTok->opTok = OP_sizeof; } } #if !defined (C_ONLY) else if (len == 8) { if (strncmp (pTok->pbTok, "operator", 8) == 0) { // allow for operator op return (CanonOp (pb, pTok)); } } #endif else if (len == 2) { // Could be 'by', 'wo' or 'dw'... if (_strnicmp (pTok->pbTok, "BY", 2) == 0) { pTok->opTok = OP_by; } else if (_strnicmp (pTok->pbTok, "WO", 2) == 0) { pTok->opTok = OP_wo; } else if (_strnicmp (pTok->pbTok, "DW", 2) == 0) { pTok->opTok = OP_dw; } } return (ERR_NONE); } /** CanonOp - canonicalize operator string * * error = CanonOp (pb, pTok) * * Entry pb = pointer to first character after "operator" * * Exit string rewritten to ripple excess white space to the right * pTok updated to reflect total function name * pb points to '(' of function call * * Returns ERR_NONE if no error * ERR_... if error */ #if !defined (C_ONLY) LOCAL ushort NEAR PASCAL CanonOp (uchar FAR *pb, ptoken_t pTok) { char FAR *pOp = pb; char FAR *pTemp; int i; while (isspace (*pb)) { pb++; } if (*pb == 0) { return (ERR_SYNTAX); } if (isalpha (*pb)) { // process new, delete // process // {[const &| volatile] id [const &| volatile]} // [{\*[const &| volatile]}*[{\&[const &| volatile]}]] // // Note that the current code only processes a single id // new (), delete () and type () will pass. All others will // cause a syntax error later. pTemp = pb; while (isalpha (*pTemp)) { // skip to end of alpha string pTemp++; } *pOp++ = ' '; memmove (pOp, pb, pTemp - pb); pOp += pTemp - pb; pb = pTemp; } else if (*pb == '(') { // process "( )" pb++; while (*pb++ != ')') { if (!isspace (*pb)) { return (ERR_SYNTAX); } } *pOp++ = '('; *pOp++ = ')'; } else if (*pb == '[') { // process "[ ]" pb++; while (*pb++ != ']') { if (!isspace (*pb)) { return (ERR_SYNTAX); } } *pOp++ = '['; *pOp++ = ']'; } else { // process operator strings for ( i = 0; i < OPCNT; i++) { if (strncmp (OpStr[i].str + 1, pb, OpStr[i].str[0]) == 0) { break; } } if (i == OPCNT) { return (ERR_SYNTAX); } memmove (pOp, OpStr[i].str + 1, OpStr[i].str[0]); pOp += OpStr[i].str[0]; pb += OpStr[i].str[0]; } // blank out moved characters pTok->pbEnd = pOp; while (pOp < pb) { *pOp++ = ' '; } // skip to the next token and check to make sure it is a ( // the zero and ) checks are to allow "bp operator +" and // bp (operator +) while (isspace (*pb)) { pb++; } if ((*pb == '(') || (*pb == 0) || (*pb == ')')) { return (ERR_NONE); } else { return (ERR_SYNTAX); } } #endif /** GetEscapedChar - Parse an escaped character * * error = GetEscapedChar (ppb, pVal); * * Entry pb = far pointer to far pointer to string. pb points to * character after the \ * * Exit ppb updated to end of escaped character constant * *pVal = value of escaped character constant * * Returns ERR_NONE if no error * ERR_... if error encountered */ ushort PASCAL GetEscapedChar (char FAR * FAR *pb, ushort FAR *pVal) { char c; uint nval = 0; c = **pb; (*pb)++; switch (c) { case 'n': *pVal = '\n'; break; case 't': *pVal = '\t'; break; case 'b': *pVal = '\b'; break; case 'r': *pVal = '\r'; break; case 'f': *pVal = '\f'; break; case 'v': *pVal = '\v'; break; case 'a': *pVal = '\a'; break; case 'x': if (!FInRadix (**pb, 16)) { return (ERR_SYNTAX); } for (;;) { c = **pb; if (!FInRadix (c, 16)) { break; } nval *= 16; if (isdigit (c)) { nval += c - '0'; } else { nval += toupper(c) - 'A' + 10; } if (nval > 255) { return (ERR_CONSTANT); } (*pb)++; } *pVal = (uchar)nval; break; default: if (FInRadix (c, 8)) { // Octal character constant nval = (c - '0'); for (;;) { c = **pb; if (!isdigit (c)) { break; } if (!FInRadix (c, 8)) { return (ERR_SYNTAX); } nval = nval * 8 + (c - '0'); if (nval > 255) { return (ERR_CONSTANT); } (*pb)++; } *pVal = (uchar)nval; } else { *pVal = c; } break; } return (ERR_NONE); } /** ParseChar - Parse an character constant * * error = ParseChar (pb, pTok); * * Entry pb = far pointer to string * pTok = pointer to return token * * Exit *pTok initialized for character constant * pTok->pbEnd = end of token * * Returns ERR_NONE if no error * ERR_... if error encountered */ LOCAL ushort NEAR PASCAL ParseChar (uchar FAR *pb, ptoken_t pTok) { char c; ushort value; ushort retval; DASSERT(*pb == '\''); ++pb; if ((*pb == '\'') || (*pb == 0)) { return (ERR_SYNTAX); } while ((*pb != '\'') && (*pb != 0)) { if ((c = *pb++) == '\\') { // Escaped character constant if ((retval = GetEscapedChar (&pb, &value)) != ERR_NONE) { return (retval); } } else { value = c; } } if (*pb++ != '\'') { return (ERR_MISSINGSQ); } pTok->opTok = OP_const; VAL_CHAR(pTok) = (char) value; pTok->typ = T_RCHAR; pTok->pbEnd = pb; return (ERR_NONE); } /** ParseString - Parse a string constant "..." or L"..." * * error = ParseString (pb, pTok, fWide); * * Entry pb = far pointer to string * pTok = pointer to return token * * Exit *pTok initialized for string constant * pTok->pbEnd = end of token * * Returns ERR_NONE if no error * ERR_... if error encountered * * Note The string pointer will point to the initial " or L" * and the byte count will include beginning " or L" and the * ending ". The evaluator will have to adjust for the extra * characters and store the proper data. */ LOCAL ushort NEAR PASCAL ParseString (uchar FAR *pb, ptoken_t pTok) { if (*pb =='L') { // skip initial L if L" pb++; } // skip initial " pb++; // search for ending double quote while ((*pb != 0) && (*pb != '"')) { if (*pb == '\\' && *(pb + 1) == '"') { pb++; } pb++; } if (!*pb) { // reached end of string return (ERR_MISSINGDQ); } pTok->opTok = OP_const; pTok->typ = T_PCHAR; pTok->pbEnd = pb + 1; return (ERR_NONE); } /*** FInRadix - Is character appropriate for radix? * * fOK = FInRadix (ch, radix) * * Entry ch = character to check * radix = 8, 10, 16 * * Exit none * * Returns TRUE if character is in radix * FALSE if not. * */ LOCAL bool_t NEAR PASCAL FInRadix (uchar ch, uint radix) { switch (radix) { case 8: if (ch >= '8') { return (FALSE); } // Fall through case 10: return (isdigit(ch)); case 16: return (isxdigit(ch)); default: DASSERT (FALSE); return (FALSE); } }