Merge pull request #3005 from zhhl/2918_updateScriptAndGold

2918 update script and gold

Merge pull request #3005 from zhhl/2918_updateScriptAndGold
b6ebe15d · Richard Cordovano · GitHub · 71c145fd · 84813bc5 · b6ebe15d
Commit b6ebe15d authored 7 years ago by Richard Cordovano Committed by GitHub 7 years ago
--- a/test/script/config.xml
+++ b/test/script/config.xml
@@ -6,12 +6,25 @@ List of tags:
 image: An image to be ingested
 build: the path to the build.xml file
 indir: the path to input directory
-outdir: the path to output directory
+singleUser_outdir: the path to single-user case output directory
 global_csv: path to global csv file
-golddir: the path to gold directory
+singleUser_golddir: the path to single-user case gold directory
 timing: can be set to True or False. If enabled, record the timing.
-
-NOTE: Make sure to use windows style for paths!
+userCaseType: set this value to do single-user, multi-user or both tests
+
+List of tags for multi-user case:
+multiUser_outdir: a path to multi-user case output directory
+multiUser_golddir: a path to multi-user case gold directory
+dbHost: PostgreSQL database host name
+dbPort: PostgreSQL database port number
+dbUserName: PostgreSQL database username
+dbPassword: PostgreSQL database password
+solrHost: Solr server host name
+solrPort: Solr server port number
+messageServiceHost: ActiveMQ server hostname
+messageServicePort: ActiveMQ server port number
+
+NOTE: Make sure to use UNC path for mutliUser_outdir, use windows style for other paths! 

 None of these tags are mandatory, and if nothing is provided the file will be
 looked over and ignored.

--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -300,11 +300,14 @@ def _dump_output_db_nonbb(db_file, dump_file):
        os.chmod (backup_db_file, 0o777)

        conn = sqlite3.connect(backup_db_file)
-        id_path_table = build_id_table(conn.cursor())
+        id_files_table = build_id_files_table(conn.cursor())
        id_vs_parts_table = build_id_vs_parts_table(conn.cursor())
        id_vs_info_table = build_id_vs_info_table(conn.cursor())
        id_fs_info_table = build_id_fs_info_table(conn.cursor())
        id_objects_table = build_id_objects_table(conn.cursor())
+        id_artifact_types_table = build_id_artifact_types_table(conn.cursor())
+        id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table)
+
        conn.text_factory = lambda x: x.decode("utf-8", "ignore")

        # Delete the blackboard tables
@@ -314,7 +317,7 @@ def _dump_output_db_nonbb(db_file, dump_file):
        # Write to the database dump
        with codecs.open(dump_file, "wb", "utf_8") as db_log:
            for line in conn.iterdump():
-                line = normalize_db_entry(line, id_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table)
+                line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table)
                db_log.write('%s\n' % line)
            # Now sort the file    
            
@@ -346,12 +349,12 @@ def _get_tmp_file(base, ext):
 class TskDbDiffException(Exception):
    pass

-def normalize_db_entry(line, table, vs_parts_table, vs_info_table, fs_info_table, objects_table):
+def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table):
    """ Make testing more consistent and reasonable by doctoring certain db entries.

    Args:
        line: a String, the line to remove the object id from.
-        table: a map from object ids to file paths.
+        files_table: a map from object ids to file paths.
    """

    files_index = line.find('INSERT INTO "tsk_files"')
@@ -361,31 +364,35 @@ def normalize_db_entry(line, table, vs_parts_table, vs_info_table, fs_info_table
    layout_index = line.find('INSERT INTO "tsk_file_layout"')
    data_source_info_index = line.find('INSERT INTO "data_source_info"')
    ingest_job_index = line.find('INSERT INTO "ingest_jobs"')
+
    parens = line[line.find('(') + 1 : line.rfind(')')]
    fields_list = parens.replace(" ", "").split(',')
    
    # remove object ID
    if (files_index != -1):
        obj_id = fields_list[0]
-        path = table[int(obj_id)]
+        path = files_table[int(obj_id)]
        newLine = ('INSERT INTO "tsk_files" VALUES(' + ', '.join(fields_list[1:]) + ');') 
        return newLine
    # remove object ID
    elif (path_index != -1):
-        obj_id = fields_list[0]
-        objValue = table[int(obj_id)]
-        par_obj_id = objects_table[int(obj_id)]
-        par_obj_value = table[par_obj_id]
-        par_obj_name = par_obj_value[par_obj_value.rfind('/')+1:]
-        #check the par_id that we insert to the path name when we create uniqueName
-        pathValue = re.sub(par_obj_name + '_' + str(par_obj_id), par_obj_name, fields_list[1])
-                
+        obj_id = int(fields_list[0])
+        objValue = files_table[obj_id]
+        # remove the obj_id from ModuleOutput/EmbeddedFileExtractor directory
+        idx_pre = fields_list[1].find('EmbeddedFileExtractor') + len('EmbeddedFileExtractor')
+        if idx_pre > -1:
+            idx_pos =  fields_list[1].find('\\', idx_pre + 2)
+            dir_to_replace = fields_list[1][idx_pre + 1 : idx_pos] # +1 to skip the file seperator
+            dir_to_replace = dir_to_replace[0:dir_to_replace.rfind('_')]
+            pathValue = fields_list[1][:idx_pre+1] + dir_to_replace + fields_list[1][idx_pos:]
+        else:
+            pathValue = fields_list[1]
        newLine = ('INSERT INTO "tsk_files_path" VALUES(' + objValue + ', ' + pathValue + ', ' + ', '.join(fields_list[2:]) + ');') 
        return newLine
    # remove object ID
    elif (layout_index != -1):
        obj_id = fields_list[0]
-        path= table[int(obj_id)]
+        path= files_table[int(obj_id)]
        newLine = ('INSERT INTO "tsk_file_layout" VALUES(' + path + ', ' + ', '.join(fields_list[1:]) + ');') 
        return newLine
    # remove object ID
@@ -403,29 +410,29 @@ def normalize_db_entry(line, table, vs_parts_table, vs_info_table, fs_info_table
        except Exception as e:
            return line

-        if obj_id in table.keys():
-             path = table[obj_id]
+        if obj_id in files_table.keys():
+            path = files_table[obj_id]
        elif obj_id in vs_parts_table.keys():
-             path = vs_parts_table[obj_id]
+            path = vs_parts_table[obj_id]
        elif obj_id in vs_info_table.keys():
-             path = vs_info_table[obj_id]
+            path = vs_info_table[obj_id]
        elif obj_id in fs_info_table.keys():
-             path = fs_info_table[obj_id]
+            path = fs_info_table[obj_id]
        
-        if parent_id in table.keys():
-             parent_path = table[parent_id]
+        if parent_id in files_table.keys():
+            parent_path = files_table[parent_id]
        elif parent_id in vs_parts_table.keys():
-             parent_path = vs_parts_table[parent_id]
+            parent_path = vs_parts_table[parent_id]
        elif parent_id in vs_info_table.keys():
-             parent_path = vs_info_table[parent_id]
+            parent_path = vs_info_table[parent_id]
        elif parent_id in fs_info_table.keys():
-             parent_path = fs_info_table[parent_id]
+            parent_path = fs_info_table[parent_id]
        

        if path and parent_path:
-             return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');'
+            return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');'
        else:
-             return line 
+            return line 
    # remove time-based information, ie Test_6/11/14 -> Test    
    elif (report_index != -1):
        fields_list[1] = "AutopsyTestCase"
@@ -467,60 +474,95 @@ def getAssociatedArtifactType(db_file, artifact_id):

    return "File path: " + info[0] + " Artifact Type: " + info[1]

-def build_id_table(artifact_cursor):
+def build_id_files_table(db_cursor):
    """Build the map of object ids to file paths.

    Args:
-        artifact_cursor: the database cursor
+        db_cursor: the database cursor
    """
    # for each row in the db, take the object id, parent path, and name, then create a tuple in the dictionary
    # with the object id as the key and the full file path (parent + name) as the value
-    mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in artifact_cursor.execute("SELECT obj_id, parent_path, name FROM tsk_files")])
+    mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in db_cursor.execute("SELECT obj_id, parent_path, name FROM tsk_files")])
    return mapping

-def build_id_vs_parts_table(artifact_cursor):
+def build_id_vs_parts_table(db_cursor):
    """Build the map of object ids to vs_parts.

    Args:
-        artifact_cursor: the database cursor
+        db_cursor: the database cursor
    """
    # for each row in the db, take the object id, addr, and start, then create a tuple in the dictionary
    # with the object id as the key and (addr + start) as the value
-    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in artifact_cursor.execute("SELECT obj_id, addr, start FROM tsk_vs_parts")])
+    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in db_cursor.execute("SELECT obj_id, addr, start FROM tsk_vs_parts")])
    return mapping

-def build_id_vs_info_table(artifact_cursor):
+def build_id_vs_info_table(db_cursor):
    """Build the map of object ids to vs_info.

    Args:
-        artifact_cursor: the database cursor
+        db_cursor: the database cursor
    """
    # for each row in the db, take the object id, vs_type, and img_offset, then create a tuple in the dictionary
    # with the object id as the key and (vs_type + img_offset) as the value
-    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in artifact_cursor.execute("SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")])
+    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in db_cursor.execute("SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")])
    return mapping

     
-def build_id_fs_info_table(artifact_cursor):
+def build_id_fs_info_table(db_cursor):
    """Build the map of object ids to fs_info.

    Args:
-        artifact_cursor: the database cursor
+        db_cursor: the database cursor
    """
    # for each row in the db, take the object id, img_offset, and fs_type, then create a tuple in the dictionary
    # with the object id as the key and (img_offset + fs_type) as the value
-    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in artifact_cursor.execute("SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")])
+    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in db_cursor.execute("SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")])
    return mapping

-def build_id_objects_table(artifact_cursor):
+def build_id_objects_table(db_cursor):
    """Build the map of object ids to par_id.

    Args:
-        artifact_cursor: the database cursor
+        db_cursor: the database cursor
    """
    # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
-    # with the object id as the key and par_obj_id as the value
-    mapping = dict([(row[0], row[1]) for row in artifact_cursor.execute("SELECT obj_id, par_obj_id FROM tsk_objects")])
+    # with the object id as the key and par_obj_id, type as the value
+    mapping = dict([(row[0], [row[1], row[2]]) for row in db_cursor.execute("SELECT * FROM tsk_objects")])
+    return mapping
+
+def build_id_artifact_types_table(db_cursor):
+    """Build the map of object ids to artifact ids.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
+    # with the object id as the key and artifact type as the value
+    mapping = dict([(row[0], row[1]) for row in db_cursor.execute("SELECT blackboard_artifacts.artifact_obj_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")])
+    return mapping
+
+
+def build_id_obj_path_table(files_table, objects_table, artifacts_table):
+    """Build the map of object ids to artifact ids.
+
+    Args:
+        files_table: obj_id, path
+        objects_table: obj_id, par_obj_id, type
+        artifacts_table: obj_id, artifact_type_name
+    """
+    # make a copy of files_table and updated it with new data from artifats_table
+    mapping = files_table.copy()
+    for k, v in objects_table.items():
+        if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id(k), we use it's par_obj_id's path+name plus it's artifact_type name as path
+            if k in artifacts_table.keys():
+                par_obj_id = v[0]
+                path = mapping[par_obj_id] 
+                mapping[k] = path + "/" + artifacts_table[k]
+        elif v[0] not in mapping.keys():
+            if v[0] in artifacts_table.keys():
+                par_obj_id = objects_table[v[0]]
+                path = mapping[par_obj_id] 
+                mapping[k] = path + "/" + artifacts_table[v[0]]
    return mapping

 def main():