| 1 |
# ----------------------------------------------------------------------
|
| 2 |
# clex.py
|
| 3 |
#
|
| 4 |
# A lexer for ANSI C.
|
| 5 |
# ----------------------------------------------------------------------
|
| 6 |
|
| 7 |
import lex
|
| 8 |
|
| 9 |
# Reserved words
|
| 10 |
reserved = (
|
| 11 |
'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
|
| 12 |
'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
|
| 13 |
'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
|
| 14 |
'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
|
| 15 |
)
|
| 16 |
|
| 17 |
tokens = reserved + (
|
| 18 |
# Literals (identifier, integer constant, float constant, string constant, char const)
|
| 19 |
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
|
| 20 |
|
| 21 |
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
|
| 22 |
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
|
| 23 |
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
|
| 24 |
'LOR', 'LAND', 'LNOT',
|
| 25 |
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
|
| 26 |
|
| 27 |
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
|
| 28 |
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
|
| 29 |
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
|
| 30 |
|
| 31 |
# Increment/decrement (++,--)
|
| 32 |
'PLUSPLUS', 'MINUSMINUS',
|
| 33 |
|
| 34 |
# Structure dereference (->)
|
| 35 |
'ARROW',
|
| 36 |
|
| 37 |
# Conditional operator (?)
|
| 38 |
'CONDOP',
|
| 39 |
|
| 40 |
# Delimeters ( ) [ ] { } , . ; :
|
| 41 |
'LPAREN', 'RPAREN',
|
| 42 |
'LBRACKET', 'RBRACKET',
|
| 43 |
'LBRACE', 'RBRACE',
|
| 44 |
'COMMA', 'PERIOD', 'SEMI', 'COLON',
|
| 45 |
|
| 46 |
# Ellipsis (...)
|
| 47 |
'ELLIPSIS',
|
| 48 |
)
|
| 49 |
|
| 50 |
# Completely ignored characters
|
| 51 |
t_ignore = ' \t\x0c'
|
| 52 |
|
| 53 |
# Newlines
|
| 54 |
def t_NEWLINE(t):
|
| 55 |
r'\n+'
|
| 56 |
t.lineno += t.value.count("\n")
|
| 57 |
|
| 58 |
# Operators
|
| 59 |
t_PLUS = r'\+'
|
| 60 |
t_MINUS = r'-'
|
| 61 |
t_TIMES = r'\*'
|
| 62 |
t_DIVIDE = r'/'
|
| 63 |
t_MOD = r'%'
|
| 64 |
t_OR = r'\|'
|
| 65 |
t_AND = r'&'
|
| 66 |
t_NOT = r'~'
|
| 67 |
t_XOR = r'\^'
|
| 68 |
t_LSHIFT = r'<<'
|
| 69 |
t_RSHIFT = r'>>'
|
| 70 |
t_LOR = r'\|\|'
|
| 71 |
t_LAND = r'&&'
|
| 72 |
t_LNOT = r'!'
|
| 73 |
t_LT = r'<'
|
| 74 |
t_GT = r'>'
|
| 75 |
t_LE = r'<='
|
| 76 |
t_GE = r'>='
|
| 77 |
t_EQ = r'=='
|
| 78 |
t_NE = r'!='
|
| 79 |
|
| 80 |
# Assignment operators
|
| 81 |
|
| 82 |
t_EQUALS = r'='
|
| 83 |
t_TIMESEQUAL = r'\*='
|
| 84 |
t_DIVEQUAL = r'/='
|
| 85 |
t_MODEQUAL = r'%='
|
| 86 |
t_PLUSEQUAL = r'\+='
|
| 87 |
t_MINUSEQUAL = r'-='
|
| 88 |
t_LSHIFTEQUAL = r'<<='
|
| 89 |
t_RSHIFTEQUAL = r'>>='
|
| 90 |
t_ANDEQUAL = r'&='
|
| 91 |
t_OREQUAL = r'\|='
|
| 92 |
t_XOREQUAL = r'^='
|
| 93 |
|
| 94 |
# Increment/decrement
|
| 95 |
t_PLUSPLUS = r'\+\+'
|
| 96 |
t_MINUSMINUS = r'--'
|
| 97 |
|
| 98 |
# ->
|
| 99 |
t_ARROW = r'->'
|
| 100 |
|
| 101 |
# ?
|
| 102 |
t_CONDOP = r'\?'
|
| 103 |
|
| 104 |
# Delimeters
|
| 105 |
t_LPAREN = r'\('
|
| 106 |
t_RPAREN = r'\)'
|
| 107 |
t_LBRACKET = r'\['
|
| 108 |
t_RBRACKET = r'\]'
|
| 109 |
t_LBRACE = r'\{'
|
| 110 |
t_RBRACE = r'\}'
|
| 111 |
t_COMMA = r','
|
| 112 |
t_PERIOD = r'\.'
|
| 113 |
t_SEMI = r';'
|
| 114 |
t_COLON = r':'
|
| 115 |
t_ELLIPSIS = r'\.\.\.'
|
| 116 |
|
| 117 |
# Identifiers and reserved words
|
| 118 |
|
| 119 |
reserved_map = { }
|
| 120 |
for r in reserved:
|
| 121 |
reserved_map[r.lower()] = r
|
| 122 |
|
| 123 |
def t_ID(t):
|
| 124 |
r'[A-Za-z_][\w_]*'
|
| 125 |
t.type = reserved_map.get(t.value,"ID")
|
| 126 |
return t
|
| 127 |
|
| 128 |
# Integer literal
|
| 129 |
t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
|
| 130 |
|
| 131 |
# Floating literal
|
| 132 |
t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
|
| 133 |
|
| 134 |
# String literal
|
| 135 |
t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
|
| 136 |
|
| 137 |
# Character constant 'c' or L'c'
|
| 138 |
t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
|
| 139 |
|
| 140 |
# Comments
|
| 141 |
def t_comment(t):
|
| 142 |
r' /\*(.|\n)*?\*/'
|
| 143 |
t.lineno += t.value.count('\n')
|
| 144 |
|
| 145 |
# Preprocessor directive (ignored)
|
| 146 |
def t_preprocessor(t):
|
| 147 |
r'\#(.)*?\n'
|
| 148 |
t.lineno += 1
|
| 149 |
|
| 150 |
def t_error(t):
|
| 151 |
print "Illegal character %s" % repr(t.value[0])
|
| 152 |
t.skip(1)
|
| 153 |
|
| 154 |
lexer = lex.lex(optimize=1)
|
| 155 |
if __name__ == "__main__":
|
| 156 |
lex.runmain(lexer)
|
| 157 |
|
| 158 |
|
| 159 |
|
| 160 |
|
| 161 |
|