diff --git a/test/script/config.xml b/test/script/config.xml index 6762cdd5ce7b311d8751c5abbb5457ff5c7413dd..89dfa9790deec09adf40efb6ff4d621785a5b769 100644 --- a/test/script/config.xml +++ b/test/script/config.xml @@ -6,12 +6,25 @@ List of tags: image: An image to be ingested build: the path to the build.xml file indir: the path to input directory -outdir: the path to output directory +singleUser_outdir: the path to single-user case output directory global_csv: path to global csv file -golddir: the path to gold directory +singleUser_golddir: the path to single-user case gold directory timing: can be set to True or False. If enabled, record the timing. - -NOTE: Make sure to use windows style for paths! +userCaseType: set this value to do single-user, multi-user or both tests + +List of tags for multi-user case: +multiUser_outdir: a path to multi-user case output directory +multiUser_golddir: a path to multi-user case gold directory +dbHost: PostgreSQL database host name +dbPort: PostgreSQL database port number +dbUserName: PostgreSQL database username +dbPassword: PostgreSQL database password +solrHost: Solr server host name +solrPort: Solr server port number +messageServiceHost: ActiveMQ server hostname +messageServicePort: ActiveMQ server port number + +NOTE: Make sure to use UNC path for mutliUser_outdir, use windows style for other paths! None of these tags are mandatory, and if nothing is provided the file will be looked over and ignored. diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index f55fc0a95d772c51adcbc72767bb2f17ced4f7fc..4f3518f5655d5dbdb941402c3841330662d9c09a 100755 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -300,11 +300,14 @@ def _dump_output_db_nonbb(db_file, dump_file): os.chmod (backup_db_file, 0o777) conn = sqlite3.connect(backup_db_file) - id_path_table = build_id_table(conn.cursor()) + id_files_table = build_id_files_table(conn.cursor()) id_vs_parts_table = build_id_vs_parts_table(conn.cursor()) id_vs_info_table = build_id_vs_info_table(conn.cursor()) id_fs_info_table = build_id_fs_info_table(conn.cursor()) id_objects_table = build_id_objects_table(conn.cursor()) + id_artifact_types_table = build_id_artifact_types_table(conn.cursor()) + id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table) + conn.text_factory = lambda x: x.decode("utf-8", "ignore") # Delete the blackboard tables @@ -314,7 +317,7 @@ def _dump_output_db_nonbb(db_file, dump_file): # Write to the database dump with codecs.open(dump_file, "wb", "utf_8") as db_log: for line in conn.iterdump(): - line = normalize_db_entry(line, id_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table) + line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table) db_log.write('%s\n' % line) # Now sort the file @@ -346,12 +349,12 @@ def _get_tmp_file(base, ext): class TskDbDiffException(Exception): pass -def normalize_db_entry(line, table, vs_parts_table, vs_info_table, fs_info_table, objects_table): +def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table): """ Make testing more consistent and reasonable by doctoring certain db entries. Args: line: a String, the line to remove the object id from. - table: a map from object ids to file paths. + files_table: a map from object ids to file paths. """ files_index = line.find('INSERT INTO "tsk_files"') @@ -361,31 +364,35 @@ def normalize_db_entry(line, table, vs_parts_table, vs_info_table, fs_info_table layout_index = line.find('INSERT INTO "tsk_file_layout"') data_source_info_index = line.find('INSERT INTO "data_source_info"') ingest_job_index = line.find('INSERT INTO "ingest_jobs"') + parens = line[line.find('(') + 1 : line.rfind(')')] fields_list = parens.replace(" ", "").split(',') # remove object ID if (files_index != -1): obj_id = fields_list[0] - path = table[int(obj_id)] + path = files_table[int(obj_id)] newLine = ('INSERT INTO "tsk_files" VALUES(' + ', '.join(fields_list[1:]) + ');') return newLine # remove object ID elif (path_index != -1): - obj_id = fields_list[0] - objValue = table[int(obj_id)] - par_obj_id = objects_table[int(obj_id)] - par_obj_value = table[par_obj_id] - par_obj_name = par_obj_value[par_obj_value.rfind('/')+1:] - #check the par_id that we insert to the path name when we create uniqueName - pathValue = re.sub(par_obj_name + '_' + str(par_obj_id), par_obj_name, fields_list[1]) - + obj_id = int(fields_list[0]) + objValue = files_table[obj_id] + # remove the obj_id from ModuleOutput/EmbeddedFileExtractor directory + idx_pre = fields_list[1].find('EmbeddedFileExtractor') + len('EmbeddedFileExtractor') + if idx_pre > -1: + idx_pos = fields_list[1].find('\\', idx_pre + 2) + dir_to_replace = fields_list[1][idx_pre + 1 : idx_pos] # +1 to skip the file seperator + dir_to_replace = dir_to_replace[0:dir_to_replace.rfind('_')] + pathValue = fields_list[1][:idx_pre+1] + dir_to_replace + fields_list[1][idx_pos:] + else: + pathValue = fields_list[1] newLine = ('INSERT INTO "tsk_files_path" VALUES(' + objValue + ', ' + pathValue + ', ' + ', '.join(fields_list[2:]) + ');') return newLine # remove object ID elif (layout_index != -1): obj_id = fields_list[0] - path= table[int(obj_id)] + path= files_table[int(obj_id)] newLine = ('INSERT INTO "tsk_file_layout" VALUES(' + path + ', ' + ', '.join(fields_list[1:]) + ');') return newLine # remove object ID @@ -403,29 +410,29 @@ def normalize_db_entry(line, table, vs_parts_table, vs_info_table, fs_info_table except Exception as e: return line - if obj_id in table.keys(): - path = table[obj_id] + if obj_id in files_table.keys(): + path = files_table[obj_id] elif obj_id in vs_parts_table.keys(): - path = vs_parts_table[obj_id] + path = vs_parts_table[obj_id] elif obj_id in vs_info_table.keys(): - path = vs_info_table[obj_id] + path = vs_info_table[obj_id] elif obj_id in fs_info_table.keys(): - path = fs_info_table[obj_id] + path = fs_info_table[obj_id] - if parent_id in table.keys(): - parent_path = table[parent_id] + if parent_id in files_table.keys(): + parent_path = files_table[parent_id] elif parent_id in vs_parts_table.keys(): - parent_path = vs_parts_table[parent_id] + parent_path = vs_parts_table[parent_id] elif parent_id in vs_info_table.keys(): - parent_path = vs_info_table[parent_id] + parent_path = vs_info_table[parent_id] elif parent_id in fs_info_table.keys(): - parent_path = fs_info_table[parent_id] + parent_path = fs_info_table[parent_id] if path and parent_path: - return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');' + return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');' else: - return line + return line # remove time-based information, ie Test_6/11/14 -> Test elif (report_index != -1): fields_list[1] = "AutopsyTestCase" @@ -467,60 +474,95 @@ def getAssociatedArtifactType(db_file, artifact_id): return "File path: " + info[0] + " Artifact Type: " + info[1] -def build_id_table(artifact_cursor): +def build_id_files_table(db_cursor): """Build the map of object ids to file paths. Args: - artifact_cursor: the database cursor + db_cursor: the database cursor """ # for each row in the db, take the object id, parent path, and name, then create a tuple in the dictionary # with the object id as the key and the full file path (parent + name) as the value - mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in artifact_cursor.execute("SELECT obj_id, parent_path, name FROM tsk_files")]) + mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in db_cursor.execute("SELECT obj_id, parent_path, name FROM tsk_files")]) return mapping -def build_id_vs_parts_table(artifact_cursor): +def build_id_vs_parts_table(db_cursor): """Build the map of object ids to vs_parts. Args: - artifact_cursor: the database cursor + db_cursor: the database cursor """ # for each row in the db, take the object id, addr, and start, then create a tuple in the dictionary # with the object id as the key and (addr + start) as the value - mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in artifact_cursor.execute("SELECT obj_id, addr, start FROM tsk_vs_parts")]) + mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in db_cursor.execute("SELECT obj_id, addr, start FROM tsk_vs_parts")]) return mapping -def build_id_vs_info_table(artifact_cursor): +def build_id_vs_info_table(db_cursor): """Build the map of object ids to vs_info. Args: - artifact_cursor: the database cursor + db_cursor: the database cursor """ # for each row in the db, take the object id, vs_type, and img_offset, then create a tuple in the dictionary # with the object id as the key and (vs_type + img_offset) as the value - mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in artifact_cursor.execute("SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")]) + mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in db_cursor.execute("SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")]) return mapping -def build_id_fs_info_table(artifact_cursor): +def build_id_fs_info_table(db_cursor): """Build the map of object ids to fs_info. Args: - artifact_cursor: the database cursor + db_cursor: the database cursor """ # for each row in the db, take the object id, img_offset, and fs_type, then create a tuple in the dictionary # with the object id as the key and (img_offset + fs_type) as the value - mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in artifact_cursor.execute("SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")]) + mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in db_cursor.execute("SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")]) return mapping -def build_id_objects_table(artifact_cursor): +def build_id_objects_table(db_cursor): """Build the map of object ids to par_id. Args: - artifact_cursor: the database cursor + db_cursor: the database cursor """ # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary - # with the object id as the key and par_obj_id as the value - mapping = dict([(row[0], row[1]) for row in artifact_cursor.execute("SELECT obj_id, par_obj_id FROM tsk_objects")]) + # with the object id as the key and par_obj_id, type as the value + mapping = dict([(row[0], [row[1], row[2]]) for row in db_cursor.execute("SELECT * FROM tsk_objects")]) + return mapping + +def build_id_artifact_types_table(db_cursor): + """Build the map of object ids to artifact ids. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary + # with the object id as the key and artifact type as the value + mapping = dict([(row[0], row[1]) for row in db_cursor.execute("SELECT blackboard_artifacts.artifact_obj_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")]) + return mapping + + +def build_id_obj_path_table(files_table, objects_table, artifacts_table): + """Build the map of object ids to artifact ids. + + Args: + files_table: obj_id, path + objects_table: obj_id, par_obj_id, type + artifacts_table: obj_id, artifact_type_name + """ + # make a copy of files_table and updated it with new data from artifats_table + mapping = files_table.copy() + for k, v in objects_table.items(): + if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id(k), we use it's par_obj_id's path+name plus it's artifact_type name as path + if k in artifacts_table.keys(): + par_obj_id = v[0] + path = mapping[par_obj_id] + mapping[k] = path + "/" + artifacts_table[k] + elif v[0] not in mapping.keys(): + if v[0] in artifacts_table.keys(): + par_obj_id = objects_table[v[0]] + path = mapping[par_obj_id] + mapping[k] = path + "/" + artifacts_table[v[0]] return mapping def main():