From 8b8c553bf2c7d46e6cc21ef6af6bd45220d5e68e Mon Sep 17 00:00:00 2001 From: adam-m <amalinowski@basistech.com> Date: Thu, 24 Jan 2013 13:22:42 -0500 Subject: [PATCH] reenable processing of archive files (zip, tar, ...) by tika and string extractor, until after we have implemented the 7zip module --- .../autopsy/keywordsearch/AbstractFileStringExtract.java | 3 ++- .../autopsy/keywordsearch/AbstractFileTikaTextExtract.java | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java index f2f7318a32..72a9d7dadd 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java @@ -56,7 +56,8 @@ class AbstractFileStringExtract implements AbstractFileExtract { static final String[] UNSUPPORTED_EXTENSIONS = { //Archives //Note: archive unpacker module will process these instead - "tar", "jar", "zip", "7z", "gzip", "bzip", "bzip2", "gz", "tgz", "cab", "rar", "arj", "dmg", "iso"}; + //"tar", "jar", "zip", "7z", "gzip", "bzip", "bzip2", "gz", "tgz", "cab", "rar", "arj", "dmg", "iso" + }; //disabled prepending of BOM //static { diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java index ed7d85eb96..e8f6f0ba42 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java @@ -69,6 +69,8 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract { // TODO: use type detection mechanism instead, and maintain supported MimeTypes, not extensions // supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika static final String[] SUPPORTED_EXTENSIONS = { + //Archive (to be removed when we have archive module + "tar", "jar", "zip", "gzip", "bzip2", "gz", "tgz", "ar", "cpio", //MS Office "doc", "dot", "docx", "docm", "dotx", "dotm", "xls", "xlw", "xlt", "xlsx", "xlsm", "xltx", "xltm", -- GitLab