Skip to content
Snippets Groups Projects
Commit ce2cc562 authored by esaunders's avatar esaunders
Browse files

Added ignore known files functionality to InterestingFiles module.

parent d53cca14
Branches
Tags
No related merge requests found
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "Poco/AutoPtr.h" #include "Poco/AutoPtr.h"
#include "Poco/Path.h" #include "Poco/Path.h"
#include "Poco/File.h" #include "Poco/File.h"
#include "Poco/NumberParser.h"
#include "Poco/DOM/DOMParser.h" #include "Poco/DOM/DOMParser.h"
#include "Poco/DOM/Document.h" #include "Poco/DOM/Document.h"
#include "Poco/DOM/NodeList.h" #include "Poco/DOM/NodeList.h"
...@@ -45,6 +46,7 @@ namespace ...@@ -45,6 +46,7 @@ namespace
const std::string INTERESTING_FILE_SET_ELEMENT_TAG = "INTERESTING_FILE_SET"; const std::string INTERESTING_FILE_SET_ELEMENT_TAG = "INTERESTING_FILE_SET";
const std::string NAME_ATTRIBUTE = "name"; const std::string NAME_ATTRIBUTE = "name";
const std::string DESCRIPTION_ATTRIBUTE_TAG = "description"; const std::string DESCRIPTION_ATTRIBUTE_TAG = "description";
const std::string IGNORE_KNOWN_TAG = "ignoreKnown";
const std::string NAME_ELEMENT_TAG = "NAME"; const std::string NAME_ELEMENT_TAG = "NAME";
const std::string EXTENSION_ELEMENT_TAG = "EXTENSION"; const std::string EXTENSION_ELEMENT_TAG = "EXTENSION";
const std::string PATH_FILTER_ATTRIBUTE = "pathFilter"; const std::string PATH_FILTER_ATTRIBUTE = "pathFilter";
...@@ -54,6 +56,17 @@ namespace ...@@ -54,6 +56,17 @@ namespace
std::string configFilePath; std::string configFilePath;
// The following variables track whether we should ignore known
// files (and what type of known files to ignore) at a global
// level. These settings can be overridden on an individual
// file set.
// Whether we should ignore known files.
bool ignoreKnown = false;
// What type of known files to ignore.
TskImgDB::KNOWN_STATUS knownType = TskImgDB::IMGDB_FILES_KNOWN;
/** /**
* An interesting files set is defined by a set name, a set description, * An interesting files set is defined by a set name, a set description,
* and one or more SQL WHERE clauses that specify what files belong to the * and one or more SQL WHERE clauses that specify what files belong to the
...@@ -61,9 +74,12 @@ namespace ...@@ -61,9 +74,12 @@ namespace
*/ */
struct InterestingFilesSet struct InterestingFilesSet
{ {
InterestingFilesSet() : name(""), description("") {} InterestingFilesSet()
: name(""), description(""), ignoreKnown(false), knownType(TskImgDB::IMGDB_FILES_KNOWN) {}
std::string name; std::string name;
std::string description; std::string description;
bool ignoreKnown;
TskImgDB::KNOWN_STATUS knownType;
vector<std::string> conditions; vector<std::string> conditions;
}; };
...@@ -113,6 +129,25 @@ namespace ...@@ -113,6 +129,25 @@ namespace
Poco::replaceInPlace(stringToChange, "*", "%"); Poco::replaceInPlace(stringToChange, "*", "%");
} }
/**
* Verifies that the given attribute value is a valid integer value
* for a known type and converts the value to its corresponding enum.
*/
TskImgDB::KNOWN_STATUS parseKnownType(const std::string& attributeValue)
{
const std::string MSG_PREFIX(MODULE_NAME + std::string("::parseKnownType : "));
int knownType = Poco::NumberParser::parse(attributeValue);
if (knownType > TskImgDB::IMGDB_FILES_UNKNOWN || knownType < TskImgDB::IMGDB_FILES_KNOWN)
{
std::ostringstream msg;
msg << MSG_PREFIX << "Invalid value for ignoreKnown.";
throw TskException(msg.str());
}
return static_cast<TskImgDB::KNOWN_STATUS>(knownType);
}
/** /**
* Adds optional file type (file, directory) and path substring filters to * Adds optional file type (file, directory) and path substring filters to
* an SQL WHERE clause for a file search condition. * an SQL WHERE clause for a file search condition.
...@@ -188,13 +223,12 @@ namespace ...@@ -188,13 +223,12 @@ namespace
} }
/** /**
* Creates an SQL WHERE clause for a file query from a file name * Creates an SQL condition to find files based on file name.
* condition.
* *
* @param conditionDefinition A file name condition XML element. * @param conditionDefinition A file name condition XML element.
* @param conditions The WHERE clause is added to this collection. * @return The constructed SQL condition.
*/ */
void compileFileNameSearchCondition(const Poco::XML::Node *conditionDefinition, std::vector<std::string> &conditions) std::string compileFileNameSearchCondition(const Poco::XML::Node *conditionDefinition)
{ {
const std::string MSG_PREFIX = "InterestingFilesModule::compileFileNameSearchCondition : "; const std::string MSG_PREFIX = "InterestingFilesModule::compileFileNameSearchCondition : ";
...@@ -207,29 +241,29 @@ namespace ...@@ -207,29 +241,29 @@ namespace
} }
std::stringstream conditionBuilder; std::stringstream conditionBuilder;
if (hasGlobWildcards(name)) if (hasGlobWildcards(name))
{ {
convertGlobWildcardsToSQLWildcards(name); convertGlobWildcardsToSQLWildcards(name);
conditionBuilder << "WHERE UPPER(name) LIKE UPPER(" << TskServices::Instance().getImgDB().quote(name) << ") ESCAPE '#' "; conditionBuilder << "UPPER(name) LIKE UPPER(" << TskServices::Instance().getImgDB().quote(name) << ") ESCAPE '#' ";
} }
else else
{ {
conditionBuilder << "WHERE UPPER(name) = UPPER(" + TskServices::Instance().getImgDB().quote(name) + ")"; conditionBuilder << "UPPER(name) = UPPER(" + TskServices::Instance().getImgDB().quote(name) + ")";
} }
addPathAndTypeFilterOptions(conditionDefinition, conditionBuilder); addPathAndTypeFilterOptions(conditionDefinition, conditionBuilder);
conditionBuilder << " ORDER BY file_id";
conditions.push_back(conditionBuilder.str()); return conditionBuilder.str();
} }
/** /**
* Creates an SQL WHERE clause for a file query from a file extension * Creates an SQL condition to find files based on extension.
* condition.
* *
* @param conditionDefinition A file extension condition XML element. * @param conditionDefinition A file extension condition XML element.
* @param conditions The WHERE clause is added to this collection. * @returns The SQL condition.
*/ */
void compileExtensionSearchCondition(const Poco::XML::Node *conditionDefinition, std::vector<std::string> &conditions) std::string compileExtensionSearchCondition(const Poco::XML::Node *conditionDefinition)
{ {
const std::string MSG_PREFIX = "InterestingFilesModule::compileExtensionSearchCondition : "; const std::string MSG_PREFIX = "InterestingFilesModule::compileExtensionSearchCondition : ";
...@@ -253,11 +287,11 @@ namespace ...@@ -253,11 +287,11 @@ namespace
// @@@ TODO: In combination with glob wildcards this may create some unxepected matches. // @@@ TODO: In combination with glob wildcards this may create some unxepected matches.
// For example, ".htm*" will become "%.htm%" which will match "file.htm.txt" and the like. // For example, ".htm*" will become "%.htm%" which will match "file.htm.txt" and the like.
std::stringstream conditionBuilder; std::stringstream conditionBuilder;
conditionBuilder << "WHERE UPPER(name) LIKE UPPER('%" << extension << "') ESCAPE '#' "; conditionBuilder << "UPPER(name) LIKE UPPER('%" << extension << "') ESCAPE '#' ";
addPathAndTypeFilterOptions(conditionDefinition, conditionBuilder);
addPathAndTypeFilterOptions(conditionDefinition, conditionBuilder); return conditionBuilder.str();
conditionBuilder << " ORDER BY file_id";
conditions.push_back(conditionBuilder.str());
} }
/** /**
...@@ -314,6 +348,20 @@ namespace ...@@ -314,6 +348,20 @@ namespace
LOGWARN(msg.str()); LOGWARN(msg.str());
} }
} }
else if (attributeName == IGNORE_KNOWN_TAG)
{
if (!attributeValue.empty())
{
fileSet.knownType = parseKnownType(attributeValue);
fileSet.ignoreKnown = true;
}
else
{
std::ostringstream msg;
msg << MSG_PREFIX << "ignored " << INTERESTING_FILE_SET_ELEMENT_TAG << "'" << IGNORE_KNOWN_TAG << "' attribute without a value";
LOGWARN(msg.str());
}
}
else else
{ {
std::ostringstream msg; std::ostringstream msg;
...@@ -358,6 +406,15 @@ namespace ...@@ -358,6 +406,15 @@ namespace
throw TskException(msg.str()); throw TskException(msg.str());
} }
std::string conditionBase;
// If we want to ignore known files either on an individual file set
// or globally we need to join with the file_hashes table.
if (fileSet.ignoreKnown || ignoreKnown)
conditionBase = " JOIN file_hashes ON (files.file_id = file_hashes.file_id) WHERE ";
else
conditionBase = " WHERE ";
// Get the search conditions. // Get the search conditions.
Poco::AutoPtr<Poco::XML::NodeList>conditionDefinitions = fileSetDefinition->childNodes(); Poco::AutoPtr<Poco::XML::NodeList>conditionDefinitions = fileSetDefinition->childNodes();
for (unsigned long i = 0; i < conditionDefinitions->length(); ++i) for (unsigned long i = 0; i < conditionDefinitions->length(); ++i)
...@@ -366,13 +423,17 @@ namespace ...@@ -366,13 +423,17 @@ namespace
if (conditionDefinition->nodeType() == Poco::XML::Node::ELEMENT_NODE) if (conditionDefinition->nodeType() == Poco::XML::Node::ELEMENT_NODE)
{ {
const std::string &conditionType = Poco::XML::fromXMLString(conditionDefinition->nodeName()); const std::string &conditionType = Poco::XML::fromXMLString(conditionDefinition->nodeName());
std::stringstream conditionBuilder;
conditionBuilder << conditionBase;
if (conditionType == NAME_ELEMENT_TAG) if (conditionType == NAME_ELEMENT_TAG)
{ {
compileFileNameSearchCondition(conditionDefinition, fileSet.conditions); conditionBuilder << compileFileNameSearchCondition(conditionDefinition);
} }
else if (conditionType == EXTENSION_ELEMENT_TAG) else if (conditionType == EXTENSION_ELEMENT_TAG)
{ {
compileExtensionSearchCondition(conditionDefinition, fileSet.conditions); conditionBuilder << compileExtensionSearchCondition(conditionDefinition);
} }
else else
{ {
...@@ -380,6 +441,14 @@ namespace ...@@ -380,6 +441,14 @@ namespace
msg << MSG_PREFIX << "unrecognized " << INTERESTING_FILE_SET_ELEMENT_TAG << " child element '" << conditionType << "'"; msg << MSG_PREFIX << "unrecognized " << INTERESTING_FILE_SET_ELEMENT_TAG << " child element '" << conditionType << "'";
throw TskException(msg.str()); throw TskException(msg.str());
} }
if (fileSet.ignoreKnown)
conditionBuilder << " AND file_hashes.known != " << fileSet.knownType;
else if (ignoreKnown)
conditionBuilder << " AND file_hashes.known != " << knownType;
conditionBuilder << " ORDER BY files.file_id";
fileSet.conditions.push_back(conditionBuilder.str());
} }
} }
...@@ -468,6 +537,23 @@ extern "C" ...@@ -468,6 +537,23 @@ extern "C"
{ {
Poco::XML::InputSource inputSource(configStream); Poco::XML::InputSource inputSource(configStream);
Poco::AutoPtr<Poco::XML::Document> configDoc = Poco::XML::DOMParser().parse(&inputSource); Poco::AutoPtr<Poco::XML::Document> configDoc = Poco::XML::DOMParser().parse(&inputSource);
Poco::XML::Element * rootElement = configDoc->documentElement();
if (rootElement == NULL)
{
std::ostringstream msg;
msg << MSG_PREFIX << "Root element of config file is NULL.";
throw TskException(msg.str());
}
const std::string& ignoreKnownValue = Poco::XML::fromXMLString(rootElement->getAttribute(IGNORE_KNOWN_TAG));
if (!ignoreKnownValue.empty())
{
knownType = parseKnownType(ignoreKnownValue);
ignoreKnown = true;
}
Poco::AutoPtr<Poco::XML::NodeList> fileSetDefinitions = configDoc->getElementsByTagName(INTERESTING_FILE_SET_ELEMENT_TAG); Poco::AutoPtr<Poco::XML::NodeList> fileSetDefinitions = configDoc->getElementsByTagName(INTERESTING_FILE_SET_ELEMENT_TAG);
for (unsigned long i = 0; i < fileSetDefinitions->length(); ++i) for (unsigned long i = 0; i < fileSetDefinitions->length(); ++i)
{ {
......
...@@ -36,7 +36,7 @@ The configuration file is an XML document that defines interesting ...@@ -36,7 +36,7 @@ The configuration file is an XML document that defines interesting
file sets in terms of search criteria. Here is a sample: file sets in terms of search criteria. Here is a sample:
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<INTERESTING_FILES> <INTERESTING_FILES ignoreKnown="0">
<INTERESTING_FILE_SET name="HTMLFilesType" description="Files with extension .htm*"> <INTERESTING_FILE_SET name="HTMLFilesType" description="Files with extension .htm*">
<EXTENSION typeFilter="file">.htm*</EXTENSION> <EXTENSION typeFilter="file">.htm*</EXTENSION>
</INTERESTING_FILE_SET> </INTERESTING_FILE_SET>
...@@ -97,7 +97,20 @@ search named "SuspiciousFiles" will find files and directories that end in ...@@ -97,7 +97,20 @@ search named "SuspiciousFiles" will find files and directories that end in
attributes. Matches with this filter must contain the specified string as attributes. Matches with this filter must contain the specified string as
a sub-string of the file or directory path. a sub-string of the file or directory path.
Known files (e.g. files in the NSRL) can be ignored by providing the
'ignoreKnown' attribute either on the top level 'INTERESTING_FILES' element
or on one or more 'INTERESTING_FILE_SET' elements.
The following valid values for the 'ignoreKnown' attribute are based on the
TskImgDB::KNOWN_STATUS enumeration in TskImgDB.h.
0 - All known files (both good and bad)
1 - Known good files
2 - Known bad (or notable) files
3 - Unknown files
The ability to ignore known files depends on the existence of a hash database
along with hash calculation and lookup modules.
RESULTS RESULTS
The result of the lookup is written to the blackboard as an artifact. The result of the lookup is written to the blackboard as an artifact.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment