All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michael Roth <mdroth@linux.vnet.ibm.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, peter.maydell@linaro.org, aliguori@us.ibm.com,
	blauwirbel@gmail.com, pbonzini@redhat.com, eblake@redhat.com
Subject: [Qemu-devel] [PATCH v3 18/22] qidl: add lexer library (based on QC parser)
Date: Thu,  4 Oct 2012 12:33:37 -0500	[thread overview]
Message-ID: <1349372021-31212-19-git-send-email-mdroth@linux.vnet.ibm.com> (raw)
In-Reply-To: <1349372021-31212-1-git-send-email-mdroth@linux.vnet.ibm.com>

Adds an abstract Lexer class to handle tokenizer via a
peek/pop/peekline/popline interface, along with an implementation for C
based on the lexer from qc.git

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
 scripts/lexer.py |  306 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 306 insertions(+)
 create mode 100644 scripts/lexer.py

diff --git a/scripts/lexer.py b/scripts/lexer.py
new file mode 100644
index 0000000..96c6c1a
--- /dev/null
+++ b/scripts/lexer.py
@@ -0,0 +1,306 @@
+#
+# QEMU Lexer Library
+#
+# Copyright IBM, Corp. 2012
+#
+# Authors:
+#  Anthony Liguori <aliguori@us.ibm.com>
+#  Michael Roth    <mdroth@linux.vnet.ibm.com>
+#
+# This work is licensed under the terms of the GNU GPLv2 or later.
+# See the COPYING file in the top-level directory.
+#
+# The lexer code is based off of:
+#   http://www.lysator.liu.se/c/ANSI-C-grammar-l.html
+
+class Input(object):
+    def __init__(self, fp):
+        self.fp = fp
+        self.line = None
+        self.offset = 0
+        self.is_eof = False
+        self.__fill_buf()
+
+    def __fill_buf(self):
+        if not self.line and not self.is_eof:
+            self.line = self.fp.readline()
+            if not self.line:
+                self.is_eof = True
+
+    def peek(self):
+        if self.is_eof:
+            return ""
+        return self.line[self.offset]
+
+    def pop(self):
+        if self.is_eof:
+            return ""
+        ch = self.line[self.offset]
+        self.offset += 1
+        if self.offset == len(self.line):
+            self.offset = 0
+            self.line = None
+            self.__fill_buf()
+        return ch
+
+    def peek_line(self):
+        return self.line
+
+    def pop_line(self):
+        line = self.line
+        self.line = None
+        self.offset = 0
+        self.__fill_buf()
+        return line
+
+    def eof(self):
+        return self.is_eof
+
+class Lexer(object):
+    def __init__(self, input, ignored_types=[]):
+        self.input = input
+        self.ignored_types = ignored_types
+        self.current_type = None
+        self.current_value = None
+
+    def get_token(self):
+        raise NotImplemented("derived classes must implement this method")
+
+    def __ensure_token(self):
+        while self.current_type == None and not self.input.eof():
+            t, v = self.get_token()
+            if t not in self.ignored_types:
+                self.current_type = t
+                self.current_value = v
+
+    def peek(self):
+        self.__ensure_token()
+        return self.current_value
+
+    def peek_line(self):
+        self.__ensure_token()
+        return self.input.peek_line()
+
+    def peek_type(self):
+        self.__ensure_token()
+        return self.current_type
+
+    def pop(self):
+        self.__ensure_token()
+        v = self.current_value
+        self.current_type = None
+        self.current_value = None
+        return v
+
+    def pop_line(self):
+        self.__ensure_token()
+        self.current_type = None
+        self.current_value = None
+        return self.input.pop_line()
+
+    def pop_expected(self, type_expected=None, value_expected=None):
+        self.__ensure_token()
+        if self.current_type != type_expected:
+            raise Exception("expected '%s', got %s %s" %
+                (type_expected, self.current_type, self.current_value))
+        if value_expected != None:
+            if self.current_value != value_expected:
+                raise Exception("expected '%s', got %s" %
+                    (value_expected, self.current_value))
+        return self.pop()
+    
+    def check_token(self, type_expected, value_expected=None):
+        self.__ensure_token()
+        if self.current_type != type_expected:
+            return False
+        if value_expected != None:
+            if self.current_value != value_expected:
+                return False
+        return True
+
+    def eof(self):
+        self.__ensure_token()
+        return self.current_type == None
+
+def in_range(ch, start, end):
+    if ch >= start and ch <= end:
+        return True
+    return False
+
+# D			[0-9]
+# L			[a-zA-Z_]
+# H			[a-fA-F0-9]
+# E			[Ee][+-]?{D}+
+# FS			(f|F|l|L)
+# IS			(u|U|l|L)*
+
+def is_D(ch):
+    return in_range(ch, '0', '9')
+
+def is_L(ch):
+    return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z') or ch == '_'
+
+def is_H(ch):
+    return in_range(ch, 'a', 'f') or in_range(ch, 'A', 'F') or is_D(ch)
+
+def is_FS(ch):
+    return ch in 'fFlL'
+
+def is_IS(ch):
+    return ch in 'uUlL'
+
+class CLexer(Lexer):
+    def __init__(self, input, ignored_types=[]):
+        super(CLexer, self).__init__(input, ignored_types)
+
+    # used internally, external users should use
+    # CLexer.peek()/peek_type()/pop() instead
+    def get_token(self):
+        token = ''
+        while not self.input.eof():
+            ch = self.input.peek()
+
+            if is_L(ch):
+                token += ch
+                self.input.pop()
+                ch = self.input.peek()
+                while is_L(ch) or is_D(ch):
+                    token += ch
+                    self.input.pop()
+                    ch = self.input.peek()
+                if token in [ 'auto', 'break', 'case', 'const', 'continue',
+                               'default', 'do', 'else', 'enum', 'extern',
+                               'for', 'goto', 'if', 'register', 'return',
+                               'signed', 'sizeof',
+                               'static', 'struct', 'typedef', 'union',
+                               'unsigned', 'volatile', 'while' ]:
+                    return (token, token)
+                else:
+                    return ('symbol', token)
+            elif ch == "'":
+                token += ch
+                self.input.pop()
+                
+                ch = self.input.peek()
+                if ch == '\\':
+                    token += ch
+                    self.input.pop()
+                    token += self.input.pop()
+                else:
+                    token += ch
+                token += self.input.pop()
+                return ('literal', token)
+            elif ch == '"':
+                token += ch
+                self.input.pop()
+
+                ch = self.input.peek()
+                while ch not in ['', '"']:
+                    token += ch
+                    self.input.pop()
+                    if ch == '\\':
+                        token += self.input.pop()
+                    ch = self.input.peek()
+                token += ch
+                self.input.pop()
+                return ('literal', token)
+            elif ch in '.><+-*/%&^|!;{},:=()[]~?':
+                token += ch
+                self.input.pop()
+                ch = self.input.peek()
+                tmp_token = token + ch
+                if tmp_token in ['<:']:
+                    return ('operator', '[')
+                elif tmp_token in [':>']:
+                    return ('operator', ']')
+                elif tmp_token in ['<%']:
+                    return ('operator', '{')
+                elif tmp_token in ['%>']:
+                    return ('operator', '}')
+                elif tmp_token == '//':
+                    token = tmp_token
+                    ch = self.input.peek()
+                    while ch != '\n' and ch != '':
+                        token += ch
+                        self.input.pop()
+                        ch = self.input.peek()
+                    return ('comment', token)
+                elif tmp_token == '/*':
+                    token = tmp_token
+                    self.input.pop()
+
+                    ch = self.input.peek()
+                    while True:
+                        while ch != '*':
+                            token += ch
+                            self.input.pop()
+                            ch = self.input.peek()
+                        token += ch
+                        self.input.pop()
+                        ch = self.input.peek()
+                        if ch == '/':
+                            token += ch
+                            self.input.pop()
+                            break
+                    return ('comment', token)
+                elif tmp_token in [ '+=', '-=', '*=', '/=', '%=', '&=', '^=',
+                                    '|=', '>>', '<<', '++', '--', '->', '&&',
+                                    '||', '<=', '>=', '==', '!=' ]:
+                    return ('operator', tmp_token)
+                else:
+                    return ('operator', token)
+            elif ch == '0':
+                token += ch
+                self.input.pop()
+                ch = self.input.peek()
+                if ch in 'xX':
+                    token += ch
+                    self.input.pop()
+                    ch = self.input.peek()
+                    while is_H(ch):
+                        token += ch
+                        self.input.pop()
+                        ch = self.input.peek()
+                    while is_IS(ch):
+                        token += ch
+                        self.input.pop()
+                        ch = self.input.peek()
+                elif is_D(ch):
+                    token += ch
+                    self.input.pop()
+                    ch = self.input.peek()
+                    while is_D(ch):
+                        token += ch
+                        self.input.pop()
+                        ch = self.input.peek()
+                return ('literal', token)
+            elif is_D(ch):
+                token += ch
+                self.input.pop()
+                ch = self.input.peek()
+                while is_D(ch):
+                    token += ch
+                    self.input.pop()
+                    ch = self.input.peek()
+                return ('literal', token)
+            elif ch in ' \t\v\n\f':
+                token += ch
+                self.input.pop()
+                ch = self.input.peek()
+                while len(ch) and ch in ' \t\v\n\f':
+                    token += ch
+                    self.input.pop()
+                    ch = self.input.peek()
+                return ('whitespace', token)
+            elif ch in '#':
+                token += ch
+                self.input.pop()
+                ch = self.input.peek()
+                while len(ch) and ch != '\n':
+                    token += ch
+                    self.input.pop()
+                    ch = self.input.peek()
+                return ('directive', token)
+            else:
+                return ('unknown', ch)
+        return (None, None)
-- 
1.7.9.5

  parent reply	other threads:[~2012-10-04 17:34 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-04 17:33 [Qemu-devel] [PATCH v3] Add infrastructure for QIDL-based device serialization Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 01/22] qapi: qapi-visit.py -> qapi_visit.py so we can import Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 02/22] qapi: qapi-types.py -> qapi_types.py Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 03/22] qapi: qapi-commands.py -> qapi_commands.py Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 04/22] qapi: qapi_visit.py, make code useable as module Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 05/22] qapi: qapi_visit.py, support arrays and complex qapi definitions Michael Roth
2012-10-05  8:11   ` Paolo Bonzini
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 06/22] qapi: qapi_visit.py, support generating static functions Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 07/22] qapi: qapi_visit.py, support for visiting non-pointer/embedded structs Michael Roth
2012-10-05  8:09   ` Paolo Bonzini
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 08/22] qapi: add visitor interfaces for C arrays Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 09/22] qapi: QmpOutputVisitor, implement array handling Michael Roth
2012-10-05  8:05   ` Paolo Bonzini
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 10/22] qapi: QmpInputVisitor, " Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 11/22] qapi: qapi.py, make json parser more robust Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 12/22] qapi: add open-coded visitor for struct tm types Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 13/22] qom-fuse: force single-threaded mode to avoid QMP races Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 14/22] qom-fuse: workaround for truncated properties > 4096 Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 15/22] module additions for schema registration Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 16/22] qdev: move Property-related declarations to qdev-properties.h Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 17/22] qidl: add documentation Michael Roth
2012-10-04 17:33 ` Michael Roth [this message]
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 19/22] qidl: add C parser (based on QC parser) Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 20/22] qidl: add QAPI-based code generator Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 21/22] qidl: qidl.h, definitions for qidl annotations Michael Roth
2012-10-05  8:14   ` Paolo Bonzini
2012-10-05 14:50     ` Michael Roth
2012-10-05 15:07   ` Paolo Bonzini
2012-10-05 15:41     ` Michael Roth
2012-10-05 15:53       ` Paolo Bonzini
2012-10-05 16:47         ` Michael Roth
2012-10-15 13:37           ` Paolo Bonzini
2012-10-15 15:50             ` Michael Roth
2012-10-04 17:33 ` [Qemu-devel] [PATCH v3 22/22] qidl: unit tests and build infrastructure Michael Roth
2012-10-05  8:24   ` Paolo Bonzini
2012-10-12 21:39     ` Michael Roth
2012-10-13  7:12       ` Paolo Bonzini
2012-10-15  8:52       ` Kevin Wolf
2012-10-15 14:48         ` Michael Roth
2012-10-05  8:26 ` [Qemu-devel] [PATCH v3] Add infrastructure for QIDL-based device serialization Paolo Bonzini
2012-10-05 14:52   ` Michael Roth

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1349372021-31212-19-git-send-email-mdroth@linux.vnet.ibm.com \
    --to=mdroth@linux.vnet.ibm.com \
    --cc=aliguori@us.ibm.com \
    --cc=blauwirbel@gmail.com \
    --cc=eblake@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.