[KVM-AUTOTEST PATCH v2] [RFC] KVM test: A memory efficient kvm_config implementation

* [KVM-AUTOTEST PATCH v2] [RFC] KVM test: A memory efficient kvm_config implementation
@ 2010-02-25 22:11 Michael Goldish
  0 siblings, 0 replies; only message in thread
From: Michael Goldish @ 2010-02-25 22:11 UTC (permalink / raw)
  To: autotest, kvm

This patch:

- Makes kvm_config use less memory during parsing, by storing config data
  compactly in arrays during parsing, and generating the final dicts only when
  requested.
  On my machine this results in 5-10 times less memory being used (depending on
  the size of the final generated list).
  This allows the test configuration to keep expanding without having the
  parser run out of memory.

- Adds config.fork_and_parse(), a function that parses a config file/string in
  a forked process and then terminates the process.  This works around Python's
  policy of keeping allocated memory to itself even after the objects occupying
  the memory have been destroyed.  If the process that does the parsing is the
  same one that runs the tests, less memory will be available to the VMs during
  testing.

- Makes parsing 4-5 times faster as a result of the new internal representation.

Differences from previous version:
- Added config.get_generator() which is similar to get_list() but returns a
  dict generator instead of a list.  This should save some more memory and will
  make tests start sooner.
- Use get_generator() in control.
- Call waitpid() at the end of fork_and_parse().

Signed-off-by: Michael Goldish <mgoldish@redhat.com>
---
 client/tests/kvm/control          |   30 +-
 client/tests/kvm/control.parallel |   21 +-
 client/tests/kvm/kvm_config.py    |  669 +++++++++++++++++++++++--------------
 3 files changed, 440 insertions(+), 280 deletions(-)

diff --git a/client/tests/kvm/control b/client/tests/kvm/control
index 163286e..a3bba94 100644
--- a/client/tests/kvm/control
+++ b/client/tests/kvm/control
@@ -30,34 +30,38 @@ import kvm_utils, kvm_config
 # set English environment (command output might be localized, need to be safe)
 os.environ['LANG'] = 'en_US.UTF-8'
 
-build_cfg_path = os.path.join(kvm_test_dir, "build.cfg")
-build_cfg = kvm_config.config(build_cfg_path)
-# Make any desired changes to the build configuration here. For example:
-#build_cfg.parse_string("""
+str = """
+# This string will be parsed after build.cfg.  Make any desired changes to the
+# build configuration here.  For example:
 #release_tag = 84
-#""")
-if not kvm_utils.run_tests(build_cfg.get_list(), job):
+"""
+build_cfg = kvm_config.config()
+build_cfg_path = os.path.join(kvm_test_dir, "build.cfg")
+build_cfg.fork_and_parse(build_cfg_path, str)
+if not kvm_utils.run_tests(build_cfg.get_generator(), job):
     logging.error("KVM build step failed, exiting.")
     sys.exit(1)
 
-tests_cfg_path = os.path.join(kvm_test_dir, "tests.cfg")
-tests_cfg = kvm_config.config(tests_cfg_path)
-# Make any desired changes to the test configuration here. For example:
-#tests_cfg.parse_string("""
+str = """
+# This string will be parsed after tests.cfg.  Make any desired configuration
+# changes here.  For example:
 #display = sdl
 #install|setup: timeout_multiplier = 3
-#""")
+"""
+tests_cfg = kvm_config.config()
+tests_cfg_path = os.path.join(kvm_test_dir, "tests.cfg")
+tests_cfg.fork_and_parse(tests_cfg_path, str)
 
 pools_cfg_path = os.path.join(kvm_test_dir, "address_pools.cfg")
 tests_cfg.parse_file(pools_cfg_path)
 hostname = os.uname()[1].split(".")[0]
-if tests_cfg.filter("^" + hostname):
+if tests_cfg.count("^" + hostname):
     tests_cfg.parse_string("only ^%s" % hostname)
 else:
     tests_cfg.parse_string("only ^default_host")
 
 # Run the tests
-kvm_utils.run_tests(tests_cfg.get_list(), job)
+kvm_utils.run_tests(tests_cfg.get_generator(), job)
 
 # Generate a nice HTML report inside the job's results dir
 kvm_utils.create_report(kvm_test_dir, job.resultdir)
diff --git a/client/tests/kvm/control.parallel b/client/tests/kvm/control.parallel
index 343f694..07bc6e5 100644
--- a/client/tests/kvm/control.parallel
+++ b/client/tests/kvm/control.parallel
@@ -160,19 +160,22 @@ if not params.get("mode") == "noinstall":
 # ----------------------------------------------------------
 import kvm_config
 
-filename = os.path.join(pwd, "kvm_tests.cfg")
-cfg = kvm_config.config(filename)
-
-# If desirable, make changes to the test configuration here.  For example:
-# cfg.parse_string("install|setup: timeout_multiplier = 2")
-# cfg.parse_string("only fc8_quick")
-# cfg.parse_string("display = sdl")
+str = """
+# This string will be parsed after tests.cfg.  Make any desired changes to the
+# test configuration here.  For example:
+#install|setup: timeout_multiplier = 3
+#only fc8_quick
+#display = sdl
+"""
+cfg = kvm_config.config()
+filename = os.path.join(pwd, "tests.cfg")
+cfg.fork_and_parse(filename, str)
 
-filename = os.path.join(pwd, "kvm_address_pools.cfg")
+filename = os.path.join(pwd, "address_pools.cfg")
 if os.path.exists(filename):
     cfg.parse_file(filename)
     hostname = os.uname()[1].split(".")[0]
-    if cfg.filter("^" + hostname):
+    if cfg.count("^" + hostname):
         cfg.parse_string("only ^%s" % hostname)
     else:
         cfg.parse_string("only ^default_host")
diff --git a/client/tests/kvm/kvm_config.py b/client/tests/kvm/kvm_config.py
index 798ef56..72d7ac6 100755
--- a/client/tests/kvm/kvm_config.py
+++ b/client/tests/kvm/kvm_config.py
@@ -2,10 +2,10 @@
 """
 KVM configuration file utility functions.
 
-@copyright: Red Hat 2008-2009
+@copyright: Red Hat 2008-2010
 """
 
-import logging, re, os, sys, StringIO, optparse
+import logging, re, os, sys, StringIO, optparse, array, cPickle, traceback
 import common
 from autotest_lib.client.common_lib import error
 from autotest_lib.client.common_lib import logging_config, logging_manager
@@ -21,29 +21,29 @@ class config:
     """
     Parse an input file or string that follows the KVM Test Config File format
     and generate a list of dicts that will be later used as configuration
-    parameters by the the KVM tests.
+    parameters by the KVM tests.
 
     @see: http://www.linux-kvm.org/page/KVM-Autotest/Test_Config_File
     """
 
-    def __init__(self, filename=None, debug=False):
+    def __init__(self, filename=None, debug=True):
         """
-        Initialize the list and optionally parse filename.
+        Initialize the list and optionally parse a file.
 
         @param filename: Path of the file that will be taken.
         @param debug: Whether to turn debugging output.
         """
-        self.list = [{"name": "", "shortname": "", "depend": []}]
-        self.debug = debug
+        self.list = [array.array("H", [4, 4, 4, 4])]
+        self.object_cache = []
         self.filename = filename
+        self.debug = debug
         if filename:
             self.parse_file(filename)
 
 
     def parse_file(self, filename):
         """
-        Parse filename, return the resulting list and store it in .list. If
-        filename does not exist, raise an exception.
+        Parse file.  If it doesn't exist, raise an exception.
 
         @param filename: Path of the configuration file.
         """
@@ -53,83 +53,121 @@ class config:
         file = open(filename, "r")
         self.list = self.parse(file, self.list)
         file.close()
-        return self.list
 
 
     def parse_string(self, str):
         """
-        Parse a string, return the resulting list and store it in .list.
+        Parse a string.
 
-        @param str: String that will be parsed.
+        @param str: String to parse.
         """
         file = StringIO.StringIO(str)
         self.list = self.parse(file, self.list)
         file.close()
-        return self.list
 
 
-    def get_list(self):
+    def fork_and_parse(self, filename=None, str=None):
         """
-        Return the list of dictionaries. This should probably be called after
-        parsing something.
-        """
-        return self.list
+        Parse a file and/or a string in a separate process to save memory.
+
+        Python likes to keep memory to itself even after the objects occupying
+        it have been destroyed.  If during a call to parse_file() or
+        parse_string() a lot of memory is used, it can only be freed by
+        terminating the process.  This function works around the problem by
+        doing the parsing in a forked process and then terminating it, freeing
+        any unneeded memory.
 
+        Note: if an exception is raised during parsing, its information will be
+        printed, and the resulting list will be empty.  The exception will not
+        be raised in the process calling this function.
 
-    def match(self, filter, dict):
+        @param filename: Path of file to parse (optional).
+        @param str: String to parse (optional).
+        """
+        pipe_r, pipe_w = os.pipe()
+        pipe_r = os.fdopen(pipe_r, "r")
+        pipe_w = os.fdopen(pipe_w, "w")
+        pid = os.fork()
+        if not pid:
+            # Child process
+            try:
+                if filename:
+                    self.parse_file(filename)
+                if str:
+                    self.parse_string(str)
+            except:
+                traceback.print_exc()
+                self.list = []
+            l = [a.tostring() for a in self.list]
+            cPickle.dump((l, self.object_cache), pipe_w, -1)
+            pipe_w.close()
+            os._exit(0)
+        else:
+            # Parent process
+            (l, self.object_cache) = cPickle.load(pipe_r)
+            pipe_r.close()
+            os.waitpid(pid, 0)
+            self.list = []
+            for s in l:
+                a = array.array("H")
+                a.fromstring(s)
+                self.list.append(a)
+
+
+    def get_generator(self):
         """
-        Return True if dict matches filter.
+        Generate dictionaries from the code parsed so far.  This should
+        probably be called after parsing something.
 
-        @param filter: A regular expression that defines the filter.
-        @param dict: Dictionary that will be inspected.
+        @return: A dict generator.
         """
-        filter = re.compile(r"(\.|^)(%s)(\.|$)" % filter)
-        return bool(filter.search(dict["name"]))
+        for a in self.list:
+            name, shortname, depend, content = _array_get_all(a, self.object_cache)
+            dict = {"name": name, "shortname": shortname, "depend": depend}
+            _apply_content_to_dict(dict, content)
+            yield dict
 
 
-    def filter(self, filter, list=None):
+    def get_list(self):
         """
-        Filter a list of dicts.
+        Generate a list of dictionaries from the code parsed so far.
+        This should probably be called after parsing something.
 
-        @param filter: A regular expression that will be used as a filter.
-        @param list: A list of dictionaries that will be filtered.
+        @return: A list of dicts.
         """
-        if list is None:
-            list = self.list
-        return [dict for dict in list if self.match(filter, dict)]
+        return list(self.get_generator())
 
 
-    def split_and_strip(self, str, sep="="):
+    def count(self, filter=".*"):
         """
-        Split str and strip quotes from the resulting parts.
+        Return the number of dictionaries whose names match filter.
 
-        @param str: String that will be processed
-        @param sep: Separator that will be used to split the string
+        @param filter: A regular expression string.
         """
-        temp = str.split(sep, 1)
-        for i in range(len(temp)):
-            temp[i] = temp[i].strip()
-            if re.findall("^\".*\"$", temp[i]):
-                temp[i] = temp[i].strip("\"")
-            elif re.findall("^\'.*\'$", temp[i]):
-                temp[i] = temp[i].strip("\'")
-        return temp
+        exp = _get_filter_regex(filter)
+        count = 0
+        for a in self.list:
+            name = _array_get_name(a, self.object_cache)
+            if exp.search(name):
+                count += 1
+        return count
 
 
     def get_next_line(self, file):
         """
         Get the next non-empty, non-comment line in a file like object.
 
-        @param file: File like object
+        @param file: File like object.
         @return: If no line is available, return None.
         """
         while True:
             line = file.readline()
-            if line == "": return None
+            if not line:
+                return None
             stripped_line = line.strip()
-            if len(stripped_line) > 0 \
-                    and not stripped_line.startswith('#') \
-                    and not stripped_line.startswith('//'):
+            if (stripped_line
+                and not stripped_line.startswith('#')
+                and not stripped_line.startswith('//')):
                 return line
 
 
@@ -142,34 +180,11 @@ class config:
         """
         pos = file.tell()
         line = self.get_next_line(file)
+        file.seek(pos)
         if not line:
-            file.seek(pos)
             return -1
         line = line.expandtabs()
-        indent = 0
-        while line[indent] == ' ':
-            indent += 1
-        file.seek(pos)
-        return indent
-
-
-    def add_name(self, str, name, append=False):
-        """
-        Add name to str with a separator dot and return the result.
-
-        @param str: String that will be processed
-        @param name: name that will be appended to the string.
-        @return: If append is True, append name to str.
-                Otherwise, pre-pend name to str.
-        """
-        if str == "":
-            return name
-        # Append?
-        elif append:
-            return str + "." + name
-        # Prepend?
-        else:
-            return name + "." + str
+        return len(line) - len(line.lstrip())
 
 
     def parse_variants(self, file, list, subvariants=False, prev_indent=-1):
@@ -178,13 +193,13 @@ class config:
         level lower than or equal to prev_indent is encountered.
 
         @brief: Parse a 'variants' or 'subvariants' block from a file-like
-        object.
-        @param file: File-like object that will be parsed
-        @param list: List of dicts to operate on
+            object.
+        @param file: File-like object that will be parsed.
+        @param list: List of arrays to operate on.
         @param subvariants: If True, parse in 'subvariants' mode;
-        otherwise parse in 'variants' mode
-        @param prev_indent: The indent level of the "parent" block
-        @return: The resulting list of dicts.
+            otherwise parse in 'variants' mode.
+        @param prev_indent: The indent level of the "parent" block.
+        @return: The resulting list of arrays.
         """
         new_list = []
 
@@ -193,46 +208,47 @@ class config:
             if indent <= prev_indent:
                 break
             indented_line = self.get_next_line(file).rstrip()
-            line = indented_line.strip()
+            line = indented_line.lstrip()
 
             # Get name and dependencies
-            temp = line.strip("- ").split(":")
-            name = temp[0]
-            if len(temp) == 1:
-                dep_list = []
-            else:
-                dep_list = temp[1].split()
+            (name, depend) = line.strip("- ").split(":")
 
             # See if name should be added to the 'shortname' field
-            add_to_shortname = True
-            if name.startswith("@"):
-                name = name.strip("@")
-                add_to_shortname = False
-
-            # Make a deep copy of list
-            temp_list = []
-            for dict in list:
-                new_dict = dict.copy()
-                new_dict["depend"] = dict["depend"][:]
-                temp_list.append(new_dict)
+            add_to_shortname = not name.startswith("@")
+            name = name.lstrip("@")
+
+            # Store name and dependencies in cache and get their indices
+            n = self._store_str(name)
+            d = self._store_str(depend)
+
+            # Make a copy of list
+            temp_list = [a[:] for a in list]
 
             if subvariants:
                 # If we're parsing 'subvariants', first modify the list
-                self.__modify_list_subvariants(temp_list, name, dep_list,
-                                               add_to_shortname)
-                temp_list = self.parse(file, temp_list,
-                        restricted=True, prev_indent=indent)
+                if add_to_shortname:
+                    for a in temp_list:
+                        _array_append_to_name_shortname_depend(a, n, d)
+                else:
+                    for a in temp_list:
+                        _array_append_to_name_depend(a, n, d)
+                temp_list = self.parse(file, temp_list, restricted=False,
+                                       prev_indent=indent)
             else:
                 # If we're parsing 'variants', parse before modifying the list
                 if self.debug:
-                    self.__debug_print(indented_line,
-                                       "Entering variant '%s' "
-                                       "(variant inherits %d dicts)" %
-                                       (name, len(list)))
-                temp_list = self.parse(file, temp_list,
-                        restricted=False, prev_indent=indent)
-                self.__modify_list_variants(temp_list, name, dep_list,
-                                            add_to_shortname)
+                    _debug_print(indented_line,
+                                 "Entering variant '%s' "
+                                 "(variant inherits %d dicts)" %
+                                 (name, len(list)))
+                temp_list = self.parse(file, temp_list, restricted=False,
+                                       prev_indent=indent)
+                if add_to_shortname:
+                    for a in temp_list:
+                        _array_prepend_to_name_shortname_depend(a, n, d)
+                else:
+                    for a in temp_list:
+                        _array_prepend_to_name_depend(a, n, d)
 
             new_list += temp_list
 
@@ -246,123 +262,93 @@ class config:
 
         @brief: Parse a file-like object.
         @param file: A file-like object
-        @param list: A list of dicts to operate on (list is modified in
-        place and should not be used after the call)
-        @param restricted: if True, operate in restricted mode
-        (prohibit 'variants')
-        @param prev_indent: the indent level of the "parent" block
-        @return: Return the resulting list of dicts.
+        @param list: A list of arrays to operate on (list is modified in
+            place and should not be used after the call).
+        @param restricted: If True, operate in restricted mode
+            (prohibit 'variants').
+        @param prev_indent: The indent level of the "parent" block.
+        @return: The resulting list of arrays.
         @note: List is destroyed and should not be used after the call.
-        Only the returned list should be used.
+            Only the returned list should be used.
         """
+        current_block = ""
+
         while True:
             indent = self.get_next_line_indent(file)
             if indent <= prev_indent:
+                self._append_content_to_arrays(list, current_block)
                 break
             indented_line = self.get_next_line(file).rstrip()
-            line = indented_line.strip()
-            words = line.split()
+            line = indented_line.lstrip()
 
             len_list = len(list)
 
-            # Look for a known operator in the line
-            operators = ["?+=", "?<=", "?=", "+=", "<=", "="]
-            op_found = None
-            op_pos = len(line)
-            for op in operators:
-                pos = line.find(op)
-                if pos >= 0 and pos < op_pos:
-                    op_found = op
-                    op_pos = pos
-
-            # Found an operator?
-            if op_found:
+            # Parse assignment operators (keep lines in temporary buffer)
+            if "=" in line:
                 if self.debug and not restricted:
-                    self.__debug_print(indented_line,
-                                       "Parsing operator (%d dicts in current "
-                                       "context)" % len_list)
-                (left, value) = self.split_and_strip(line, op_found)
-                filters_and_key = self.split_and_strip(left, ":")
-                filters = filters_and_key[:-1]
-                key = filters_and_key[-1]
-                filtered_list = list
-                for filter in filters:
-                    filtered_list = self.filter(filter, filtered_list)
-                # Apply the operation to the filtered list
-                if op_found == "=":
-                    for dict in filtered_list:
-                        dict[key] = value
-                elif op_found == "+=":
-                    for dict in filtered_list:
-                        dict[key] = dict.get(key, "") + value
-                elif op_found == "<=":
-                    for dict in filtered_list:
-                        dict[key] = value + dict.get(key, "")
-                elif op_found.startswith("?"):
-                    exp = re.compile("^(%s)$" % key)
-                    if op_found == "?=":
-                        for dict in filtered_list:
-                            for key in dict.keys():
-                                if exp.match(key):
-                                    dict[key] = value
-                    elif op_found == "?+=":
-                        for dict in filtered_list:
-                            for key in dict.keys():
-                                if exp.match(key):
-                                    dict[key] = dict.get(key, "") + value
-                    elif op_found == "?<=":
-                        for dict in filtered_list:
-                            for key in dict.keys():
-                                if exp.match(key):
-                                    dict[key] = value + dict.get(key, "")
+                    _debug_print(indented_line,
+                                 "Parsing operator (%d dicts in current "
+                                 "context)" % len_list)
+                current_block += line + "\n"
+                continue
+
+            # Flush the temporary buffer
+            self._append_content_to_arrays(list, current_block)
+            current_block = ""
+
+            words = line.split()
 
             # Parse 'no' and 'only' statements
-            elif words[0] == "no" or words[0] == "only":
+            if words[0] == "no" or words[0] == "only":
                 if len(words) <= 1:
                     continue
-                filters = words[1:]
+                filters = map(_get_filter_regex, words[1:])
                 filtered_list = []
                 if words[0] == "no":
-                    for dict in list:
+                    for a in list:
+                        name = _array_get_name(a, self.object_cache)
                         for filter in filters:
-                            if self.match(filter, dict):
+                            if filter.search(name):
                                 break
                         else:
-                            filtered_list.append(dict)
+                            filtered_list.append(a)
                 if words[0] == "only":
-                    for dict in list:
+                    for a in list:
+                        name = _array_get_name(a, self.object_cache)
                         for filter in filters:
-                            if self.match(filter, dict):
-                                filtered_list.append(dict)
+                            if filter.search(name):
+                                filtered_list.append(a)
                                 break
                 list = filtered_list
                 if self.debug and not restricted:
-                    self.__debug_print(indented_line,
-                                       "Parsing no/only (%d dicts in current "
-                                       "context, %d remain)" %
-                                       (len_list, len(list)))
+                    _debug_print(indented_line,
+                                 "Parsing no/only (%d dicts in current "
+                                 "context, %d remain)" %
+                                 (len_list, len(list)))
+                continue
 
             # Parse 'variants'
-            elif line == "variants:":
+            if line == "variants:":
                 # 'variants' not allowed in restricted mode
                 # (inside an exception or inside subvariants)
                 if restricted:
                     e_msg = "Using variants in this context is not allowed"
                     raise error.AutotestError(e_msg)
                 if self.debug and not restricted:
-                    self.__debug_print(indented_line,
-                                       "Entering variants block (%d dicts in "
-                                       "current context)" % len_list)
+                    _debug_print(indented_line,
+                                 "Entering variants block (%d dicts in "
+                                 "current context)" % len_list)
                 list = self.parse_variants(file, list, subvariants=False,
                                            prev_indent=indent)
+                continue
 
             # Parse 'subvariants' (the block is parsed for each dict
             # separately)
-            elif line == "subvariants:":
+            if line == "subvariants:":
                 if self.debug and not restricted:
-                    self.__debug_print(indented_line,
-                                       "Entering subvariants block (%d dicts in "
-                                       "current context)" % len_list)
+                    _debug_print(indented_line,
+                                 "Entering subvariants block (%d dicts in "
+                                 "current context)" % len_list)
                 new_list = []
                 # Remember current file position
                 pos = file.tell()
@@ -370,7 +356,7 @@ class config:
                 self.parse_variants(file, [], subvariants=True,
                                     prev_indent=indent)
                 # Iterate over the list...
-                for index in range(len(list)):
+                for index in xrange(len(list)):
                     # Revert to initial file position in this 'subvariants'
                     # block
                     file.seek(pos)
@@ -380,14 +366,15 @@ class config:
                                                     subvariants=True,
                                                     prev_indent=indent)
                 list = new_list
+                continue
 
             # Parse 'include' statements
-            elif words[0] == "include":
+            if words[0] == "include":
                 if len(words) <= 1:
                     continue
                 if self.debug and not restricted:
-                    self.__debug_print(indented_line,
-                                       "Entering file %s" % words[1])
+                    _debug_print(indented_line,
+                                 "Entering file %s" % words[1])
                 if self.filename:
                     filename = os.path.join(os.path.dirname(self.filename),
                                             words[1])
@@ -396,22 +383,23 @@ class config:
                         list = self.parse(new_file, list, restricted)
                         new_file.close()
                         if self.debug and not restricted:
-                            self.__debug_print("", "Leaving file %s" % words[1])
+                            _debug_print("", "Leaving file %s" % words[1])
                     else:
                         logging.warning("Cannot include %s -- file not found",
                                         filename)
                 else:
                     logging.warning("Cannot include %s because no file is "
                                     "currently open", words[1])
+                continue
 
             # Parse multi-line exceptions
             # (the block is parsed for each dict separately)
-            elif line.endswith(":"):
+            if line.endswith(":"):
                 if self.debug and not restricted:
-                    self.__debug_print(indented_line,
-                                       "Entering multi-line exception block "
-                                       "(%d dicts in current context outside "
-                                       "exception)" % len_list)
+                    _debug_print(indented_line,
+                                 "Entering multi-line exception block "
+                                 "(%d dicts in current context outside "
+                                 "exception)" % len_list)
                 line = line.strip(":")
                 new_list = []
                 # Remember current file position
@@ -419,8 +407,11 @@ class config:
                 # Read the lines in any case
                 self.parse(file, [], restricted=True, prev_indent=indent)
                 # Iterate over the list...
-                for index in range(len(list)):
-                    if self.match(line, list[index]):
+                exp = _get_filter_regex(line)
+                for index in xrange(len(list)):
+                    name = _array_get_name(list[index], self.object_cache)
+                    if exp.search(name):
+                    #if self.match(line, list[index]):
                         # Revert to initial file position in this
                         # exception block
                         file.seek(pos)
@@ -430,81 +421,243 @@ class config:
                                                restricted=True,
                                                prev_indent=indent)
                     else:
-                        new_list += list[index:index+1]
+                        #new_list += list[index:index+1]
+                        new_list.append(list[index])
                 list = new_list
+                continue
 
         return list
 
 
-    def __debug_print(self, str1, str2=""):
+    def _store_str(self, str):
         """
-        Nicely print two strings and an arrow.
+        Store str in the internal object cache, if it isn't already there, and
+        return its identifying index.
 
-        @param str1: First string
-        @param str2: Second string
+        @param str: String to store.
+        @return: The index of str in the object cache.
         """
-        if str2:
-            str = "%-50s ---> %s" % (str1, str2)
-        else:
-            str = str1
-        logging.debug(str)
+        try:
+            return self.object_cache.index(str)
+        except ValueError:
+            self.object_cache.append(str)
+            return len(self.object_cache) - 1
 
 
-    def __modify_list_variants(self, list, name, dep_list, add_to_shortname):
-        """
-        Make some modifications to list, as part of parsing a 'variants' block.
-
-        @param list: List to be processed
-        @param name: Name to be prepended to the dictionary's 'name' key
-        @param dep_list: List of dependencies to be added to the dictionary's
-                'depend' key
-        @param add_to_shortname: Boolean indicating whether name should be
-                prepended to the dictionary's 'shortname' key as well
+    def _append_content_to_arrays(self, list, content):
         """
-        for dict in list:
-            # Prepend name to the dict's 'name' field
-            dict["name"] = self.add_name(dict["name"], name)
-            # Prepend name to the dict's 'shortname' field
-            if add_to_shortname:
-                dict["shortname"] = self.add_name(dict["shortname"], name)
-            # Prepend name to each of the dict's dependencies
-            for i in range(len(dict["depend"])):
-                dict["depend"][i] = self.add_name(dict["depend"][i], name)
-            # Add new dependencies
-            dict["depend"] += dep_list
-
-
-    def __modify_list_subvariants(self, list, name, dep_list, add_to_shortname):
-        """
-        Make some modifications to list, as part of parsing a 'subvariants'
-        block.
-
-        @param list: List to be processed
-        @param name: Name to be appended to the dictionary's 'name' key
-        @param dep_list: List of dependencies to be added to the dictionary's
-                'depend' key
-        @param add_to_shortname: Boolean indicating whether name should be
-                appended to the dictionary's 'shortname' as well
+        Append content (config code containing assignment operations) to a list
+        of arrays.
+
+        @param list: List of arrays to operate on.
+        @param content: String containing assignment operations.
         """
-        for dict in list:
-            # Add new dependencies
-            for dep in dep_list:
-                dep_name = self.add_name(dict["name"], dep, append=True)
-                dict["depend"].append(dep_name)
-            # Append name to the dict's 'name' field
-            dict["name"] = self.add_name(dict["name"], name, append=True)
-            # Append name to the dict's 'shortname' field
-            if add_to_shortname:
-                dict["shortname"] = self.add_name(dict["shortname"], name,
-                                                  append=True)
+        if content:
+            str_index = self._store_str(content)
+            for a in list:
+                _array_append_to_content(a, str_index)
+
+
+
+# Assignment operators:
+
+def _op_set(dict, key, value):
+    dict[key] = value
+
+
+def _op_append(dict, key, value):
+    dict[key] = dict.get(key, "") + value
+
+
+def _op_prepend(dict, key, value):
+    dict[key] = value + dict.get(key, "")
+
+
+def _op_regex_set(dict, exp, value):
+    exp = re.compile("^(%s)$" % exp)
+    for key in dict:
+        if exp.match(key):
+            dict[key] = value
+
+
+def _op_regex_append(dict, exp, value):
+    exp = re.compile("^(%s)$" % exp)
+    for key in dict:
+        if exp.match(key):
+            dict[key] += value
+
+
+def _op_regex_prepend(dict, exp, value):
+    exp = re.compile("^(%s)$" % exp)
+    for key in dict:
+        if exp.match(key):
+            dict[key] = value + dict[key]
+
+
+ops = {
+    "=": _op_set,
+    "+=": _op_append,
+    "<=": _op_prepend,
+    "?=": _op_regex_set,
+    "?+=": _op_regex_append,
+    "?<=": _op_regex_prepend,
+}
+
+
+def _apply_content_to_dict(dict, content):
+    """
+    Apply the operations in content (config code containing assignment
+    operations) to a dict.
+
+    @param dict: Dictionary to operate on.  Must have 'name' key.
+    @param content: String containing assignment operations.
+    """
+    for line in content.splitlines():
+        op_found = None
+        op_pos = len(line)
+        for op in ops:
+            pos = line.find(op)
+            if pos >= 0 and pos < op_pos:
+                op_found = op
+                op_pos = pos
+        if not op_found:
+            continue
+        (left, value) = map(str.strip, line.split(op_found, 1))
+        if value and ((value[0] == '"' and value[-1] == '"') or
+                      (value[0] == "'" and value[-1] == "'")):
+            value = value[1:-1]
+        filters_and_key = map(str.strip, left.split(":"))
+        filters = filters_and_key[:-1]
+        key = filters_and_key[-1]
+        for filter in filters:
+            exp = _get_filter_regex(filter)
+            if not exp.search(dict["name"]):
+                break
+        else:
+            ops[op_found](dict, key, value)
+
+
+
+def _get_filter_regex(filter):
+    """
+    Return a regex object corresponding to a given filter string.
+
+    All regular expressions given to the parser are passed through this
+    function first.  Its purpose is to make them more specific and better
+    suited to match dictionary names: it forces simple expressions to match
+    only between dots or at the beginning or end of a string.  For example,
+    the filter 'foo' will match 'foo.bar' but not 'foobar'.
+    """
+    return re.compile(r"(\.|^)(%s)(\.|$)" % filter)
+
+
+def _debug_print(str1, str2=""):
+    """
+    Nicely print two strings and an arrow.
+
+    @param str1: First string.
+    @param str2: Second string.
+    """
+    if str2:
+        str = "%-50s ---> %s" % (str1, str2)
+    else:
+        str = str1
+    logging.debug(str)
+
+
+
+# Array structure:
+# ----------------
+# The first 4 elements contain the indices of the 4 segments.
+# a[0] -- Index of beginning of 'name' segment (always 4).
+# a[1] -- Index of beginning of 'shortname' segment.
+# a[2] -- Index of beginning of 'depend' segment.
+# a[3] -- Index of beginning of 'content' segment.
+# The next elements in the array comprise the aforementioned segments:
+# The 'name' segment begins with a[a[0]] and ends with a[a[1]-1].
+# The 'shortname' segment begins with a[a[1]] and ends with a[a[2]-1].
+# The 'depend' segment begins with a[a[2]] and ends with a[a[3]-1].
+# The 'content' segment begins with a[a[3]] and ends at the end of the array.
+
+# The following functions append/prepend to various segments of an array.
+
+def _array_append_to_name_shortname_depend(a, name, depend):
+    a.insert(a[1], name)
+    a.insert(a[2] + 1, name)
+    a.insert(a[3] + 2, depend)
+    a[1] += 1
+    a[2] += 2
+    a[3] += 3
+
+
+def _array_prepend_to_name_shortname_depend(a, name, depend):
+    a[1] += 1
+    a[2] += 2
+    a[3] += 3
+    a.insert(a[0], name)
+    a.insert(a[1], name)
+    a.insert(a[2], depend)
+
+
+def _array_append_to_name_depend(a, name, depend):
+    a.insert(a[1], name)
+    a.insert(a[3] + 1, depend)
+    a[1] += 1
+    a[2] += 1
+    a[3] += 2
+
+
+def _array_prepend_to_name_depend(a, name, depend):
+    a[1] += 1
+    a[2] += 1
+    a[3] += 2
+    a.insert(a[0], name)
+    a.insert(a[2], depend)
+
+
+def _array_append_to_content(a, content):
+    a.append(content)
+
+
+def _array_get_name(a, object_cache):
+    """
+    Return the name of a dictionary represented by a given array.
+
+    @param a: Array representing a dictionary.
+    @param object_cache: A list of strings referenced by elements in the array.
+    """
+    return ".".join(object_cache[i] for i in a[a[0]:a[1]])
+
+
+def _array_get_all(a, object_cache):
+    """
+    Return a 4-tuple containing all the data stored in a given array, in a
+    format that is easy to turn into an actual dictionary.
+
+    @param a: Array representing a dictionary.
+    @param object_cache: A list of strings referenced by elements in the array.
+    @return: A 4-tuple: (name, shortname, depend, content), in which all
+        members are strings except depend which is a list of strings.
+    """
+    name = ".".join(object_cache[i] for i in a[a[0]:a[1]])
+    shortname = ".".join(object_cache[i] for i in a[a[1]:a[2]])
+    content = "".join(object_cache[i] for i in a[a[3]:])
+    depend = []
+    prefix = ""
+    for (n, d) in zip(a[a[0]:a[1]], a[a[2]:a[3]]):
+        for dep in object_cache[d].split():
+            depend.append(prefix + dep)
+        prefix += object_cache[n] + "."
+    return (name, shortname, depend, content)
+
 
 
 if __name__ == "__main__":
     parser = optparse.OptionParser()
     parser.add_option('-f', '--file', dest="filename", action='store',
                       help='path to a config file that will be parsed. '
-                           'If not specified, will parse kvm_tests.cfg '
-                           'located inside the kvm test dir.')
+                           'If not specified, will parse tests.cfg located '
+                           'inside the kvm test dir.')
     parser.add_option('--verbose', dest="debug", action='store_true',
                       help='include debug messages in console output')
 
@@ -518,9 +671,9 @@ if __name__ == "__main__":
     # Here we configure the stand alone program to use the autotest
     # logging system.
     logging_manager.configure_logging(KvmLoggingConfig(), verbose=debug)
-    list = config(filename, debug=debug).get_list()
+    dicts = config(filename, debug=debug).get_generator()
     i = 0
-    for dict in list:
+    for dict in dicts:
         logging.info("Dictionary #%d:", i)
         keys = dict.keys()
         keys.sort()
-- 
1.5.4.1

^ permalink raw reply related	[flat|nested] only message in thread