diff --git a/Core/src/org/sleuthkit/autopsy/guicomponentutils/AutoCompletion.java b/Core/src/org/sleuthkit/autopsy/guicomponentutils/AutoCompletion.java new file mode 100755 index 0000000000000000000000000000000000000000..4a5c195d7e3bd0c46447080bbbe78f8a9c8c99cf --- /dev/null +++ b/Core/src/org/sleuthkit/autopsy/guicomponentutils/AutoCompletion.java @@ -0,0 +1,228 @@ +package org.sleuthkit.autopsy.guicomponentutils; + +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.FocusAdapter; +import java.awt.event.FocusEvent; +import java.awt.event.FocusListener; +import java.awt.event.KeyAdapter; +import java.awt.event.KeyEvent; +import java.awt.event.KeyListener; +import java.beans.PropertyChangeEvent; +import java.beans.PropertyChangeListener; +import javax.swing.ComboBoxEditor; +import javax.swing.ComboBoxModel; +import javax.swing.JComboBox; +import javax.swing.text.AttributeSet; +import javax.swing.text.BadLocationException; +import javax.swing.text.JTextComponent; +import javax.swing.text.PlainDocument; + + +/* + * This code is taken from http://www.orbital-computer.de/JComboBox/source/AutoCompletion.java + * Author: Thomas Bierhance + * This work is hereby released into the Public Domain. To view a copy of the + * public domain dedication, visit + * http://creativecommons.org/licenses/publicdomain/ + */ +public class AutoCompletion extends PlainDocument { + + private static final long serialVersionUID = 1L; + + private JComboBox<?> comboBox; + private ComboBoxModel<?> model; + private JTextComponent editor; +// flag to indicate if setSelectedItem has been called +// subsequent calls to remove/insertString should be ignored + private boolean selecting = false; + private boolean hidePopupOnFocusLoss; + private boolean hitBackspace = false; + private boolean hitBackspaceOnSelection; + + private KeyListener editorKeyListener; + private FocusListener editorFocusListener; + + public AutoCompletion(final JComboBox<?> comboBox) { + this.comboBox = comboBox; + model = comboBox.getModel(); + comboBox.addActionListener(new ActionListener() { + @Override + public void actionPerformed(ActionEvent e) { + if (!selecting) { + highlightCompletedText(0); + } + } + }); + comboBox.addPropertyChangeListener(new PropertyChangeListener() { + @Override + public void propertyChange(PropertyChangeEvent e) { + if (e.getPropertyName().equals("editor")) { + configureEditor((ComboBoxEditor) e.getNewValue()); + } + if (e.getPropertyName().equals("model")) { + model = (ComboBoxModel) e.getNewValue(); + } + } + }); + editorKeyListener = new KeyAdapter() { + @Override + public void keyPressed(KeyEvent e) { + if (comboBox.isDisplayable()) { + comboBox.setPopupVisible(true); + } + hitBackspace = false; + switch (e.getKeyCode()) { + // determine if the pressed key is backspace (needed by the remove method) + case KeyEvent.VK_BACK_SPACE: + hitBackspace = true; + hitBackspaceOnSelection = editor.getSelectionStart() != editor.getSelectionEnd(); + break; + // ignore delete key + case KeyEvent.VK_DELETE: + e.consume(); + comboBox.getToolkit().beep(); + break; + } + } + }; + // Bug 5100422 on Java 1.5: Editable JComboBox won't hide popup when tabbing out + hidePopupOnFocusLoss = System.getProperty("java.version").startsWith("1.5"); + // Highlight whole text when gaining focus + editorFocusListener = new FocusAdapter() { + @Override + public void focusGained(FocusEvent e) { + highlightCompletedText(0); + } + + @Override + public void focusLost(FocusEvent e) { + // Workaround for Bug 5100422 - Hide Popup on focus loss + if (hidePopupOnFocusLoss) { + comboBox.setPopupVisible(false); + } + } + }; + configureEditor(comboBox.getEditor()); + // Handle initially selected object + Object selected = comboBox.getSelectedItem(); + if (selected != null) { + setText(selected.toString()); + } + highlightCompletedText(0); + } + + public static void enable(JComboBox<?> comboBox) { + // has to be editable + comboBox.setEditable(true); + // change the editor's document + new AutoCompletion(comboBox); + } + + void configureEditor(ComboBoxEditor newEditor) { + if (editor != null) { + editor.removeKeyListener(editorKeyListener); + editor.removeFocusListener(editorFocusListener); + } + + if (newEditor != null) { + editor = (JTextComponent) newEditor.getEditorComponent(); + editor.addKeyListener(editorKeyListener); + editor.addFocusListener(editorFocusListener); + editor.setDocument(this); + } + } + + public void remove(int offs, int len) throws BadLocationException { + // return immediately when selecting an item + if (selecting) { + return; + } + if (hitBackspace) { + // user hit backspace => move the selection backwards + // old item keeps being selected + if (offs > 0) { + if (hitBackspaceOnSelection) { + offs--; + } + } else { + // User hit backspace with the cursor positioned on the start => beep + comboBox.getToolkit().beep(); // when available use: UIManager.getLookAndFeel().provideErrorFeedback(comboBox); + } + highlightCompletedText(offs); + } else { + super.remove(offs, len); + } + } + + @Override + public void insertString(int offs, String str, AttributeSet a) throws BadLocationException { + // return immediately when selecting an item + if (selecting) { + return; + } + // insert the string into the document + super.insertString(offs, str, a); + // lookup and select a matching item + Object item = lookupItem(getText(0, getLength())); + if (item != null) { + setSelectedItem(item); + } else { + // keep old item selected if there is no match + item = comboBox.getSelectedItem(); + // imitate no insert (later on offs will be incremented by str.length(): selection won't move forward) + offs = offs - str.length(); + // provide feedback to the user that his input has been received but can not be accepted + comboBox.getToolkit().beep(); // when available use: UIManager.getLookAndFeel().provideErrorFeedback(comboBox); + } + setText(item.toString()); + // select the completed part + highlightCompletedText(offs + str.length()); + } + + private void setText(String text) { + try { + // remove all text and insert the completed string + super.remove(0, getLength()); + super.insertString(0, text, null); + } catch (BadLocationException e) { + throw new RuntimeException(e.toString()); + } + } + + private void highlightCompletedText(int start) { + editor.setCaretPosition(getLength()); + editor.moveCaretPosition(start); + } + + private void setSelectedItem(Object item) { + selecting = true; + model.setSelectedItem(item); + selecting = false; + } + + private Object lookupItem(String pattern) { + Object selectedItem = model.getSelectedItem(); + // only search for a different item if the currently selected does not match + if (selectedItem != null && startsWithIgnoreCase(selectedItem.toString(), pattern)) { + return selectedItem; + } else { + // iterate over all items + for (int i = 0, n = model.getSize(); i < n; i++) { + Object currentItem = model.getElementAt(i); + // current item starts with the pattern? + if (currentItem != null && startsWithIgnoreCase(currentItem.toString(), pattern)) { + return currentItem; + } + } + } + // no item starts with the pattern => return null + return null; + } + +// checks if str1 starts with str2 - ignores case + private boolean startsWithIgnoreCase(String str1, String str2) { + return str1.toUpperCase().startsWith(str2.toUpperCase()); + } + +} diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestJobSettings.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestJobSettings.java index a6b5f897dd1e1649721383a3e63fb093788364ae..a89d0ec6808e79dc8fa329d4bb664114401adbb4 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestJobSettings.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestJobSettings.java @@ -25,6 +25,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -402,7 +403,7 @@ private void load() { } for (String moduleName : disabledModuleNames) { if (!loadedModuleNames.contains(moduleName)) { - missingModuleNames.add(moduleName); + logger.log(Level.WARNING, MessageFormat.format("A module marked as disabled in the ingest job settings, ''{0}'', could not be found.", moduleName)); } } for (String moduleName : missingModuleNames) { diff --git a/Core/src/org/sleuthkit/autopsy/modules/interestingitems/FilesSetRulePanel.java b/Core/src/org/sleuthkit/autopsy/modules/interestingitems/FilesSetRulePanel.java index f3411768206406216a795c194f2af99185485885..ee7a69aee2f6f3dc90b374441e408ede7f4e7ff0 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/interestingitems/FilesSetRulePanel.java +++ b/Core/src/org/sleuthkit/autopsy/modules/interestingitems/FilesSetRulePanel.java @@ -18,6 +18,7 @@ */ package org.sleuthkit.autopsy.modules.interestingitems; +import org.sleuthkit.autopsy.guicomponentutils.AutoCompletion; import java.awt.Color; import java.awt.event.ActionEvent; import java.util.Arrays; @@ -76,6 +77,7 @@ final class FilesSetRulePanel extends javax.swing.JPanel { */ FilesSetRulePanel(JButton okButton, JButton cancelButton, PANEL_TYPE panelType) { initComponents(); + AutoCompletion.enable(mimeTypeComboBox); if (panelType == FilesSetDefsPanel.PANEL_TYPE.FILE_INGEST_FILTERS) { //Hide the mimetype settings when this is displaying a FileSet rule instead of a interesting item rule mimeTypeComboBox.setVisible(false); mimeCheck.setVisible(false); @@ -102,6 +104,7 @@ final class FilesSetRulePanel extends javax.swing.JPanel { */ FilesSetRulePanel(FilesSet.Rule rule, JButton okButton, JButton cancelButton, PANEL_TYPE panelType) { initComponents(); + AutoCompletion.enable(mimeTypeComboBox); if (panelType == FilesSetDefsPanel.PANEL_TYPE.FILE_INGEST_FILTERS) { //Hide the mimetype settings when this is displaying a FileSet rule instead of a interesting item rule mimeTypeComboBox.setVisible(false); mimeCheck.setVisible(false); diff --git a/KeywordSearch/ivy.xml b/KeywordSearch/ivy.xml index 7b417a99c73542d8fb2ad8790708b9d84fc54e5d..174c2e22fd1cb57e19a63f91736c5caa88c1ed8a 100644 --- a/KeywordSearch/ivy.xml +++ b/KeywordSearch/ivy.xml @@ -18,7 +18,8 @@ <dependency conf="solr-war->default" org="org.apache.solr" name="solr" rev="4.10.4" transitive="false" /> <!-- the war file for embedded Solr 4 --> <dependency conf="solr-libs->default" name="solr-cell" rev="8.11.2" org="org.apache.solr"/> - + <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core --> + <dependency conf="autopsy->default" org="org.apache.lucene" name="lucene-core" rev="8.11.2"/> <!-- Autopsy --> <dependency conf="autopsy->default" org="org.apache.solr" name="solr-solrj" rev="8.11.2"/> <dependency conf="autopsy->default" org="com.optimaize.languagedetector" name="language-detector" rev="0.6"/> diff --git a/KeywordSearch/nbproject/project.properties b/KeywordSearch/nbproject/project.properties index bc6da03ae3528af4cd3c885b5194cc17205dc038..639894fee3de216c50b214ca4da1ed1c9969dff3 100644 --- a/KeywordSearch/nbproject/project.properties +++ b/KeywordSearch/nbproject/project.properties @@ -48,6 +48,7 @@ file.reference.stax2-api-4.2.1.jar=release/modules/ext/stax2-api-4.2.1.jar file.reference.woodstox-core-6.2.4.jar=release/modules/ext/woodstox-core-6.2.4.jar file.reference.zookeeper-3.8.0.jar=release/modules/ext/zookeeper-3.8.0.jar file.reference.zookeeper-jute-3.8.0.jar=release/modules/ext/zookeeper-jute-3.8.0.jar +file.reference.lucene-core-8.11.2.jar=release/modules/ext/lucene-core-8.11.2.jar javac.source=17 javac.compilerargs=-Xlint -Xlint:-serial license.file=../LICENSE-2.0.txt diff --git a/KeywordSearch/nbproject/project.xml b/KeywordSearch/nbproject/project.xml index 9e170fac129b32abc4a25f1f0e74a7c8e0162e0b..b1ec158a2ae07bc30a20e272846b2e73bf33364b 100644 --- a/KeywordSearch/nbproject/project.xml +++ b/KeywordSearch/nbproject/project.xml @@ -434,6 +434,10 @@ <runtime-relative-path>ext/zookeeper-jute-3.8.0.jar</runtime-relative-path> <binary-origin>release/modules/ext/zookeeper-jute-3.8.0.jar</binary-origin> </class-path-extension> + <class-path-extension> + <runtime-relative-path>ext/lucene-core-8.11.2.jar</runtime-relative-path> + <binary-origin>release/modules/ext/lucene-core-8.11.2.jar</binary-origin> + </class-path-extension> </data> </configuration> </project> diff --git a/KeywordSearch/solr/server/solr/configsets/AutopsyConfig/conf/solrconfig.xml b/KeywordSearch/solr/server/solr/configsets/AutopsyConfig/conf/solrconfig.xml index 9fde79cd365a7e1535458692125aa88ce03c1d5e..92ce238a46ee9c670c032c3dac32e279a3526314 100755 --- a/KeywordSearch/solr/server/solr/configsets/AutopsyConfig/conf/solrconfig.xml +++ b/KeywordSearch/solr/server/solr/configsets/AutopsyConfig/conf/solrconfig.xml @@ -301,7 +301,7 @@ <autoCommit> <maxTime>300000</maxTime> <!-- maxDocs>15000</maxDocs --> - <openSearcher>true</openSearcher> + <openSearcher>false</openSearcher> </autoCommit> <!-- softAutoCommit is like autoCommit except it causes a diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AdHocSearchPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AdHocSearchPanel.java index c9aa061400ccccaccfc1a3b8115dca3d83fb2a6e..ce6c23f57d40f6efe36c176332c628620c87df0c 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AdHocSearchPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AdHocSearchPanel.java @@ -112,9 +112,9 @@ public void search(boolean saveResults) { } if (filesIndexed == 0) { if (isIngestRunning) { + // ELTODO this message should be dependent on whether Solr indexing is enabled or not KeywordSearchUtil.displayDialog(keywordSearchErrorDialogHeader, NbBundle.getMessage(this.getClass(), - "AbstractKeywordSearchPerformer.search.noFilesInIdxMsg", - KeywordSearchSettings.getUpdateFrequency().getTime()), KeywordSearchUtil.DIALOG_MESSAGE_TYPE.ERROR); + "AbstractKeywordSearchPerformer.search.noFilesInIdxMsg"), KeywordSearchUtil.DIALOG_MESSAGE_TYPE.ERROR); } else { KeywordSearchUtil.displayDialog(keywordSearchErrorDialogHeader, NbBundle.getMessage(this.getClass(), "AbstractKeywordSearchPerformer.search.noFilesIdxdMsg"), KeywordSearchUtil.DIALOG_MESSAGE_TYPE.ERROR); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties index 745e47d7d854738b5cb5cb18ee421ea4fd4c4755..fd8c6b4a4195bf4bc25448e2ac7d270871a1d1aa 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties @@ -39,8 +39,8 @@ AbstractKeywordSearchPerformer.search.invalidSyntaxHeader=Invalid query statemen AbstractKeywordSearchPerformer.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress AbstractKeywordSearchPerformer.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and this search might yield incomplete results.<br />Do you want to proceed with this search anyway?</html> AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=Keyword list is empty, please add at least one keyword to the list -AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />Try again later. Index is updated every {0} minutes.</html> -AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module enabled. </html> +AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html> +AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html> ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer. ExtractedContentViewer.getTitle=Indexed Text HighlightedMatchesSource.toString=Search Results @@ -122,7 +122,7 @@ KeywordSearchListsManagementPanel.fileExtensionFilterLbl=Autopsy Keyword List Fi KeywordSearchListsManagementPanel.fileExtensionFilterLb2=Encase Keyword List File (txt) KeywordSearch.listImportFeatureTitle=Keyword List Import KeywordSearchIngestModule.moduleName=Keyword Search -KeywordSearchIngestModule.moduleDescription=Performs file indexing and periodic search using keywords and regular expressions in lists. +KeywordSearchIngestModule.moduleDescription=Performs file indexing and search using selected keyword lists. DropdownSearchPanel.keywordTextField.text= KeywordSearchPanel.searchDropButton.text=Keyword Search DropdownSearchPanel.exactRadioButton.text=Exact Match @@ -211,11 +211,6 @@ KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 tex KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest): KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages): -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText=10 minutes (faster overall ingest time than default) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text=10 minutes (slower feedback, faster ingest) -KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text=Results update frequency during ingest: KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.toolTipText=Requires Hash Set service to had run previously, or be selected for next ingest. KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text=Do not add files in NSRL (known files) to keyword index during ingest KeywordSearchGlobalSearchSettingsPanel.informationLabel.text=Information @@ -224,11 +219,7 @@ KeywordSearchGlobalSearchSettingsPanel.filesIndexedValue.text=0 KeywordSearchGlobalSearchSettingsPanel.filesIndexedLabel.text=Files in keyword index: KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text=Show Keyword Preview in Keyword Search Results (will result in longer search times) KeywordSearchGlobalSearchSettingsPanel.chunksValLabel.text=0 -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText=1 minute (overall ingest time will be longest) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1 minute (faster feedback, longest ingest) KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index: -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default) KeywordSearchIngestModule.regExpHitLbl=Reg Ex hit: KeywordSearchIngestModule.kwHitLbl=Keyword hit: KeywordSearchIngestModule.kwHitThLbl=Keyword @@ -254,8 +245,6 @@ KeywordSearchListsManagementPanel.newKeywordListDescription2=Keyword list <{0}> KeywordSearchModuleFactory.getIngestJobSettingsPanel.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings KeywordSearchModuleFactory.createFileIngestModule.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings SearchRunner.Searcher.done.err.msg=Error performing keyword search -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=Fastest overall, but no results until the end -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=No periodic searches SolrConnectionCheck.HostnameOrPort=Invalid hostname and/or port number. SolrConnectionCheck.Hostname=Invalid hostname. SolrConnectionCheck.MissingHostname=Missing hostname. @@ -322,3 +311,4 @@ ExtractedContentPanel.pagesLabel.text=Page: KeywordSearchJobSettingsPanel.ocrCheckBox.text=Enable Optical Character Recognition (OCR) KeywordSearchJobSettingsPanel.limitedOcrCheckbox.text=<html>Only process PDFs, MS Office docs and images which are over 100KB in size or extracted from another file (Beta)</html> KeywordSearchJobSettingsPanel.ocrOnlyCheckbox.text=Only index text extracted using OCR +KeywordSearchJobSettingsPanel.solrCheckbox.text=Add text to Solr Index diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED index 15099026808e4e9a7b23a0e0ddaec1f905f052c0..c05fd15c027c687d84bc8dc81d68830bd0956d94 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED @@ -15,16 +15,13 @@ ExtractAllTermsReport.error.noOpenCase=No currently open case. ExtractAllTermsReport.export.error=Error During Unique Word Extraction ExtractAllTermsReport.exportComplete=Unique Word Extraction Complete ExtractAllTermsReport.getName.text=Extract Unique Words -# {0} - Number of extracted terms ExtractAllTermsReport.numberExtractedTerms=Extracted {0} terms... ExtractAllTermsReport.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and unique word extraction might yield incomplete results.<br />Do you want to proceed with unique word extraction anyway?</html> -# {0} - Keyword search commit frequency -ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. Try again later. Index is updated every {0} minutes. -ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Try again later +ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. If Solr keyword search indexing and Solr indexing were enabled, wait for ingest to complete. +ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Re-ingest the image with the Keyword Search Module and Solr indexing enabled. ExtractAllTermsReport.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress ExtractAllTermsReport.startExport=Starting Unique Word Extraction ExtractedContentPanel.setMarkup.panelTxt=<span style='font-style:italic'>Loading text... Please wait</span> -# {0} - Content name ExtractedContentPanel.SetMarkup.progress.loading=Loading text for {0} GlobalEditListPanel.editKeyword.title=Edit Keyword GlobalEditListPanel.warning.text=Boundary characters ^ and $ do not match word boundaries. Consider\nreplacing with an explicit list of boundary characters, such as [ \\.,] @@ -91,8 +88,8 @@ AbstractKeywordSearchPerformer.search.invalidSyntaxHeader=Invalid query statemen AbstractKeywordSearchPerformer.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress AbstractKeywordSearchPerformer.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and this search might yield incomplete results.<br />Do you want to proceed with this search anyway?</html> AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=Keyword list is empty, please add at least one keyword to the list -AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />Try again later. Index is updated every {0} minutes.</html> -AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module enabled. </html> +AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html> +AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html> ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer. ExtractedContentViewer.getTitle=Indexed Text HighlightedMatchesSource.toString=Search Results @@ -176,7 +173,7 @@ KeywordSearchListsManagementPanel.fileExtensionFilterLbl=Autopsy Keyword List Fi KeywordSearchListsManagementPanel.fileExtensionFilterLb2=Encase Keyword List File (txt) KeywordSearch.listImportFeatureTitle=Keyword List Import KeywordSearchIngestModule.moduleName=Keyword Search -KeywordSearchIngestModule.moduleDescription=Performs file indexing and periodic search using keywords and regular expressions in lists. +KeywordSearchIngestModule.moduleDescription=Performs file indexing and search using selected keyword lists. DropdownSearchPanel.keywordTextField.text= KeywordSearchPanel.searchDropButton.text=Keyword Search DropdownSearchPanel.exactRadioButton.text=Exact Match @@ -227,8 +224,6 @@ KeywordSearchSettings.properties_options.text={0}_Options KeywordSearchSettings.propertiesNSRL.text={0}_NSRL KeywordSearchSettings.propertiesScripts.text={0}_Scripts NoOpenCoreException.err.noOpenSorlCore.msg=No currently open Solr core. -SearchRunner.query.exception.msg=Error performing query: -# {0} - colelction name Server.deleteCore.exception.msg=Failed to delete Solr colelction {0} Server.exceptionMessage.unableToBackupCollection=Unable to backup Solr collection Server.exceptionMessage.unableToCreateCollection=Unable to create Solr collection @@ -272,11 +267,6 @@ KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 tex KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest): KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages): -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText=10 minutes (faster overall ingest time than default) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text=10 minutes (slower feedback, faster ingest) -KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text=Results update frequency during ingest: KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.toolTipText=Requires Hash Set service to had run previously, or be selected for next ingest. KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text=Do not add files in NSRL (known files) to keyword index during ingest KeywordSearchGlobalSearchSettingsPanel.informationLabel.text=Information @@ -285,11 +275,7 @@ KeywordSearchGlobalSearchSettingsPanel.filesIndexedValue.text=0 KeywordSearchGlobalSearchSettingsPanel.filesIndexedLabel.text=Files in keyword index: KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text=Show Keyword Preview in Keyword Search Results (will result in longer search times) KeywordSearchGlobalSearchSettingsPanel.chunksValLabel.text=0 -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText=1 minute (overall ingest time will be longest) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1 minute (faster feedback, longest ingest) KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index: -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default) KeywordSearchIngestModule.regExpHitLbl=Reg Ex hit: KeywordSearchIngestModule.kwHitLbl=Keyword hit: KeywordSearchIngestModule.kwHitThLbl=Keyword @@ -315,8 +301,6 @@ KeywordSearchListsManagementPanel.newKeywordListDescription2=Keyword list <{0}> KeywordSearchModuleFactory.getIngestJobSettingsPanel.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings KeywordSearchModuleFactory.createFileIngestModule.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings SearchRunner.Searcher.done.err.msg=Error performing keyword search -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=Fastest overall, but no results until the end -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=No periodic searches Server.status.failed.msg=Local Solr server did not respond to status request. This may be because the server failed to start or is taking too long to initialize. SolrConnectionCheck.HostnameOrPort=Invalid hostname and/or port number. SolrConnectionCheck.Hostname=Invalid hostname. @@ -404,6 +388,7 @@ ExtractedContentPanel.pagesLabel.text=Page: KeywordSearchJobSettingsPanel.ocrCheckBox.text=Enable Optical Character Recognition (OCR) KeywordSearchJobSettingsPanel.limitedOcrCheckbox.text=<html>Only process PDFs, MS Office docs and images which are over 100KB in size or extracted from another file (Beta)</html> KeywordSearchJobSettingsPanel.ocrOnlyCheckbox.text=Only index text extracted using OCR +KeywordSearchJobSettingsPanel.solrCheckbox.text=Add text to Solr Index TextZoomPanel.zoomInButton.text= TextZoomPanel.zoomOutButton.text= TextZoomPanel.zoomResetButton.text=Reset diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle_ja.properties b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle_ja.properties index 46af934d7c8949d3c41fe8ea0cb7bede5421f775..83e614bc936adff3ae6fd8df262d55a816a5bb8a 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle_ja.properties +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle_ja.properties @@ -7,8 +7,6 @@ AbstractKeywordSearchPerformer.search.dialogErrorHeader=\u30ad\u30fc\u30ef\u30fc AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=\u30ad\u30fc\u30ef\u30fc\u30c9\u30ea\u30b9\u30c8\u304c\u7a7a(\u672a\u5165\u529b)\u3067\u3059\u3002\u5c11\u306a\u304f\u3068\u30821\u3064\u306e\u30ad\u30fc\u30ef\u30fc\u30c9\u3092\u30ea\u30b9\u30c8\u306b\u8ffd\u52a0\u3057\u3066\u304f\u3060\u3055\u3044\u3002 AbstractKeywordSearchPerformer.search.ingestInProgressBody=<html>\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u73fe\u5728\u5b9f\u884c\u4e2d\u3067\u3059\u3002<br />\u3059\u3079\u3066\u306e\u30d5\u30a1\u30a4\u30eb\u304c\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u3055\u308c\u306a\u304b\u3063\u305f\u305f\u3081\u3001\u3053\u306e\u691c\u7d22\u306f\u4e0d\u5b8c\u5168\u306a\u7d50\u679c\u3092\u751f\u6210\u3059\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<br />\u305d\u308c\u3067\u3082\u3053\u306e\u691c\u7d22\u3092\u7d9a\u884c\u3057\u307e\u3059\u304b?</html> AbstractKeywordSearchPerformer.search.invalidSyntaxHeader=\u7121\u52b9\u306a\u30af\u30a8\u30ea\u30fb\u30b9\u30c6\u30fc\u30c8\u30e1\u30f3\u30c8\u3002 \u5185\u5bb9\u304c\u6b63\u898f\u8868\u73fe\u306e\u5834\u5408\u3001Lucene\u6b63\u898f\u8868\u73fe\u30d1\u30bf\u30fc\u30f3\u306e\u307f\u304c\u30b5\u30dd\u30fc\u30c8\u3055\u308c\u307e\u3059\u3002 POSIX\u6587\u5b57\u30af\u30e9\u30b9\uff08\\ n\u3084\\ w\u306a\u3069\uff09\u306f\u7121\u52b9\u3067\u3059\u3002 -AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>\u30d5\u30a1\u30a4\u30eb\u304c\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002<br />\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30e2\u30b8\u30e5\u30fc\u30eb\u3092\u6709\u52b9\u306b\u3057\u305f\u72b6\u614b\u3067\u30a4\u30e1\u30fc\u30b8\u3092\u518d\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u3057\u3066\u304f\u3060\u3055\u3044\u3002</html> -AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>\u307e\u3060\u30d5\u30a1\u30a4\u30eb\u304c\u7d22\u5f15\u306b\u542b\u307e\u308c\u3066\u3044\u307e\u305b\u3093\u3002<br />\u5f8c\u3067\u3082\u3046\u4e00\u5ea6\u304a\u8a66\u3057\u304f\u3060\u3055\u3044\u3002 \u7d22\u5f15\u306f {0} \u5206\u3054\u3068\u306b\u66f4\u65b0\u3055\u308c\u307e\u3059\u3002</html> AbstractKeywordSearchPerformer.search.searchIngestInProgressTitle=\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u9032\u884c\u4e2d\u3067\u3059 AccountsText.creditCardNumber=\u30af\u30ec\u30b8\u30c3\u30c8\u30ab\u30fc\u30c9\u756a\u53f7 AccountsText.creditCardNumbers=\u30af\u30ec\u30b8\u30c3\u30c8\u30ab\u30fc\u30c9\u756a\u53f7 @@ -55,8 +53,6 @@ ExtractAllTermsReport.exportComplete=\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\ ExtractAllTermsReport.getName.text=\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u3092\u62bd\u51fa\u3059\u308b ExtractAllTermsReport.numberExtractedTerms=\u62bd\u51fa\u3055\u308c\u305f{0}\u7528\u8a9e... ExtractAllTermsReport.search.ingestInProgressBody=<html> \u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u8aad\u8fbc\u306f\u73fe\u5728\u5b9f\u884c\u4e2d\u3067\u3059\u3002<br/>\u3059\u3079\u3066\u306e\u30d5\u30a1\u30a4\u30eb\u304c\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306b\u767b\u9332\u3055\u308c\u3066\u3044\u308b\u308f\u3051\u3067\u306f\u306a\u304f\u3001\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u3092\u62bd\u51fa\u306f\u4e0d\u5b8c\u5168\u306a\u7d50\u679c\u306b\u306a\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<br />\u305d\u308c\u3067\u3082\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u306e\u62bd\u51fa\u3092\u7d9a\u884c\u3057\u307e\u3059\u304b\uff1f</ html> -ExtractAllTermsReport.search.noFilesInIdxMsg=\u307e\u3060\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306b\u767b\u9332\u3055\u308c\u3066\u3044\u308b\u30d5\u30a1\u30a4\u30eb\u306f\u3042\u308a\u307e\u305b\u3093\u3002 \u3042\u3068\u3067\u3082\u3046\u4e00\u5ea6\u8a66\u3057\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002 \u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306f{0}\u5206\u3054\u3068\u306b\u66f4\u65b0\u3055\u308c\u307e\u3059\u3002 -ExtractAllTermsReport.search.noFilesInIdxMsg2=\u307e\u3060\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306b\u767b\u9332\u3055\u308c\u3066\u3044\u308b\u30d5\u30a1\u30a4\u30eb\u306f\u3042\u308a\u307e\u305b\u3093\u3002 \u3042\u3068\u3067\u3082\u3046\u4e00\u5ea6\u8a66\u3057\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002 ExtractAllTermsReport.search.searchIngestInProgressTitle=\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u9032\u884c\u4e2d\u3067\u3059 ExtractAllTermsReport.startExport=\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u62bd\u51fa\u306e\u958b\u59cb ExtractedContentPanel.SetMarkup.progress.loading={0} \u306e\u30c6\u30ad\u30b9\u30c8\u3092\u8aad\u307f\u8fbc\u3093\u3067\u3044\u307e\u3059 @@ -210,23 +206,12 @@ KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsLimitedOCR=\u3 KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsOCR=OCR\u6587\u5b57\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\uff08Windows 64\u30d3\u30c3\u30c8\u304c\u5fc5\u8981\uff09 KeywordSearchGlobalSearchSettingsPanel.filesIndexedLabel.text=\u30ad\u30fc\u30ef\u30fc\u30c9\u7d22\u5f15\u5185\u306e\u30d5\u30a1\u30a4\u30eb\: KeywordSearchGlobalSearchSettingsPanel.filesIndexedValue.text=0 -KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text=\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u4e2d\u306e\u7d50\u679c\u66f4\u65b0\u983b\u5ea6\: KeywordSearchGlobalSearchSettingsPanel.informationLabel.text=\u60c5\u5831 KeywordSearchGlobalSearchSettingsPanel.ingestWarningLabel.text=\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u9032\u884c\u4e2d\u3067\u3059\u3002\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u5b8c\u4e86\u3059\u308b\u307e\u3067\u4e00\u90e8\u306e\u8a2d\u5b9a\u3092\u5229\u7528\u3067\u304d\u307e\u305b\u3093\u3002 KeywordSearchGlobalSearchSettingsPanel.settingsLabel.text=\u8a2d\u5b9a KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text=\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u7d50\u679c\u306b\u30ad\u30fc\u30ef\u30fc\u30c9\u30d7\u30ec\u30d3\u30e5\u30fc\u3092\u8868\u793a(\u691c\u7d22\u6642\u9593\u304c\u9577\u304f\u306a\u308a\u307e\u3059) KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text=\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u4e2d\u306bNSRL(\u65e2\u77e5\u306e\u30d5\u30a1\u30a4\u30eb)\u306e\u30d5\u30a1\u30a4\u30eb\u3092\u30ad\u30fc\u30ef\u30fc\u30c9\u306b\u8ffd\u52a0\u3057\u306a\u3044\u3067\u304f\u3060\u3055\u3044 KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.toolTipText=\u30cf\u30c3\u30b7\u30e5\u30bb\u30c3\u30c8\u30b5\u30fc\u30d3\u30b9\u306b\u4ee5\u524d\u306b\u5b9f\u884c\u6e08\u307f\u3067\u3042\u308b\u3053\u3068\u3001\u307e\u305f\u306f\u6b21\u306e\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u306b\u9078\u629e\u3055\u308c\u308b\u3053\u3068\u3092\u8981\u6c42\u3057\u307e\u3059\u3002 -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20\u5206(\u6700\u9045\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3001\u6700\u901f\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20\u5206(\u6700\u901f\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text=10\u5206(\u3088\u308a\u9045\u3044\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3001\u3088\u308a\u901f\u3044\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText=10\u5206(\u30c7\u30d5\u30a9\u30eb\u30c8\u3088\u308a\u3082\u901f\u3044\u7dcf\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5\u5206(\u30c7\u30d5\u30a9\u30eb\u30c8) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5\u5206(\u7dcf\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593\u304c\u9577\u304f\u306a\u308a\u307e\u3059) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1\u5206(\u3088\u308a\u901f\u3044\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3001\u6700\u9577\u306e\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText=1\u5206(\u7dcf\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593\u306f\u6700\u9577\u306b\u306a\u308a\u307e\u3059) -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=\u5b9a\u671f\u691c\u7d22\u306a\u3057 -KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=\u5168\u4f53\u3067\u6700\u901f\u3067\u3059\u304c\u3001\u6700\u5f8c\u307e\u3067\u7d50\u679c\u306f\u8868\u793a\u3055\u308c\u307e\u305b\u3093 KeywordSearchGlobalSettingsPanel.Title=\u30ad\u30fc\u30ef\u30fc\u30c9\u4e00\u62ec\u691c\u7d22\u8a2d\u5b9a KeywordSearchIngestModule.doInBackGround.displayName=\u30ad\u30fc\u30ef\u30fc\u30c9\u5b9a\u671f\u691c\u7d22 KeywordSearchIngestModule.doInBackGround.finalizeMsg=\u78ba\u5b9a diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java index 2deb82d2f5bb7cbacf7a0e4db68c3cb9dcef9930..dac7b8d987c59588f3ffe17cc37422159bb46a56 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java @@ -398,6 +398,8 @@ static class Chunk { private final StringBuilder sb; private final int baseChunkSizeChars; private final StringBuilder lowerCasedChunk; + private boolean hasHit = false; + private int chunkId = 0; Chunk(StringBuilder sb, int baseChunkSizeChars, StringBuilder lowerCasedChunk) { this.sb = sb; @@ -420,7 +422,7 @@ public String toString() { * * @return The content of the chunk. */ - public String geLowerCasedChunk() { + public String getLowerCasedChunk() { return lowerCasedChunk.toString(); } @@ -432,5 +434,21 @@ public String geLowerCasedChunk() { int getBaseChunkLength() { return baseChunkSizeChars; } + + boolean hasHit() { + return hasHit; + } + + void setHasHit(boolean b) { + hasHit = b; + } + + void setChunkId(int id) { + chunkId = id; + } + + int getChunkId() { + return chunkId; + } } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/DropdownListSearchPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/DropdownListSearchPanel.java index 7b84080217522196ec89e3e8f6e7a2426d717e4c..f53dc547c56f8ad13cf885fdee9f902dada70efc 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/DropdownListSearchPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/DropdownListSearchPanel.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2018 Basis Technology Corp. + * Copyright 2011-2022 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,7 +30,6 @@ import java.util.Iterator; import java.util.List; import java.util.Set; -import java.util.logging.Level; import javax.swing.JCheckBox; import javax.swing.JTable; import javax.swing.ListSelectionModel; @@ -143,10 +142,7 @@ public void propertyChange(PropertyChangeEvent evt) { searchAddListener = new ActionListener() { @Override public void actionPerformed(ActionEvent e) { - if (ingestRunning) { - IngestSearchRunner.getInstance().addKeywordListsToAllJobs(listsTableModel.getSelectedLists()); - logger.log(Level.INFO, "Submitted enqueued lists to ingest"); //NON-NLS - } else { + if (!ingestRunning) { searchAction(e); } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractAllTermsReport.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractAllTermsReport.java index 88178b42ea88f20c49730a3b942cb4bd353b936d..584757aa93889efc7a5f7c46358036cd21d0955b 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractAllTermsReport.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractAllTermsReport.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2021 Basis Technology Corp. + * Copyright 2022 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -51,9 +51,8 @@ public String getName() { @NbBundle.Messages({ "ExtractAllTermsReport.error.noOpenCase=No currently open case.", - "# {0} - Keyword search commit frequency", - "ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. Try again later. Index is updated every {0} minutes.", - "ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Try again later", + "ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. If Solr keyword search indexing and Solr indexing were enabled, wait for ingest to complete.", + "ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Re-ingest the image with the Keyword Search Module and Solr indexing enabled.", "ExtractAllTermsReport.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress", "ExtractAllTermsReport.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and unique word extraction might yield incomplete results.<br />Do you want to proceed with unique word extraction anyway?</html>", "ExtractAllTermsReport.startExport=Starting Unique Word Extraction", @@ -83,7 +82,7 @@ public void generateReport(GeneralReportSettings settings, ReportProgressPanel p if (filesIndexed == 0) { if (isIngestRunning) { - progressPanel.complete(ReportProgressPanel.ReportStatus.ERROR, Bundle.ExtractAllTermsReport_search_noFilesInIdxMsg(KeywordSearchSettings.getUpdateFrequency().getTime())); + progressPanel.complete(ReportProgressPanel.ReportStatus.ERROR, Bundle.ExtractAllTermsReport_search_noFilesInIdxMsg()); } else { progressPanel.complete(ReportProgressPanel.ReportStatus.ERROR, Bundle.ExtractAllTermsReport_search_noFilesInIdxMsg2()); } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IngestSearchRunner.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IngestSearchRunner.java deleted file mode 100755 index 9cd33a81674d286b2a3e64c3496734706019421e..0000000000000000000000000000000000000000 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IngestSearchRunner.java +++ /dev/null @@ -1,705 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2014 - 2021 Basis Technology Corp. - * Contact: carrier <at> sleuthkit <dot> org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.concurrent.CancellationException; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.ScheduledThreadPoolExecutor; -import static java.util.concurrent.TimeUnit.MILLISECONDS; -import java.util.concurrent.atomic.AtomicLong; -import java.util.logging.Level; -import javax.annotation.concurrent.GuardedBy; -import javax.swing.SwingUtilities; -import javax.swing.SwingWorker; -import org.netbeans.api.progress.ProgressHandle; -import org.openide.util.Cancellable; -import org.openide.util.NbBundle; -import org.openide.util.NbBundle.Messages; -import org.sleuthkit.autopsy.core.RuntimeProperties; -import org.sleuthkit.autopsy.coreutils.Logger; -import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil; -import org.sleuthkit.autopsy.coreutils.StopWatch; -import org.sleuthkit.autopsy.coreutils.ThreadConfined; -import org.sleuthkit.autopsy.ingest.IngestJobContext; -import org.sleuthkit.autopsy.ingest.IngestMessage; -import org.sleuthkit.autopsy.ingest.IngestServices; - -/** - * Performs periodic and final keyword searches for ingest jobs. Periodic - * searches are done in background tasks. This represents a careful working - * around of the contract for IngestModule.process(). Final searches are done - * synchronously in the calling thread, as required by the contract for - * IngestModule.shutDown(). - */ -final class IngestSearchRunner { - - private static final Logger logger = Logger.getLogger(IngestSearchRunner.class.getName()); - private static IngestSearchRunner instance = null; - private final IngestServices services = IngestServices.getInstance(); - private Ingester ingester = null; - private long currentUpdateIntervalMs; - private volatile boolean periodicSearchTaskRunning; - private volatile Future<?> periodicSearchTaskHandle; - private final ScheduledThreadPoolExecutor periodicSearchTaskExecutor; - private static final int NUM_SEARCH_SCHEDULING_THREADS = 1; - private static final String SEARCH_SCHEDULER_THREAD_NAME = "periodic-search-scheduling-%d"; - private final Map<Long, SearchJobInfo> jobs = new ConcurrentHashMap<>(); // Ingest job ID to search job info - private final boolean usingNetBeansGUI = RuntimeProperties.runningWithGUI(); - - /* - * Constructs a singleton object that performs periodic and final keyword - * searches for ingest jobs. Periodic searches are done in background tasks. - * This represents a careful working around of the contract for - * IngestModule.process(). Final searches are done synchronously in the - * calling thread, as required by the contract for IngestModule.shutDown(). - */ - private IngestSearchRunner() { - currentUpdateIntervalMs = ((long) KeywordSearchSettings.getUpdateFrequency().getTime()) * 60 * 1000; - ingester = Ingester.getDefault(); - periodicSearchTaskExecutor = new ScheduledThreadPoolExecutor(NUM_SEARCH_SCHEDULING_THREADS, new ThreadFactoryBuilder().setNameFormat(SEARCH_SCHEDULER_THREAD_NAME).build()); - } - - /** - * Gets the ingest search runner singleton. - * - * @return The ingest search runner. - */ - public static synchronized IngestSearchRunner getInstance() { - if (instance == null) { - instance = new IngestSearchRunner(); - } - return instance; - } - - /** - * Starts the search job for an ingest job. - * - * @param jobContext The ingest job context. - * @param keywordListNames The names of the keyword search lists for the - * ingest job. - */ - public synchronized void startJob(IngestJobContext jobContext, List<String> keywordListNames) { - long jobId = jobContext.getJobId(); - if (jobs.containsKey(jobId) == false) { - SearchJobInfo jobData = new SearchJobInfo(jobContext, keywordListNames); - jobs.put(jobId, jobData); - } - - /* - * Keep track of the number of keyword search file ingest modules that - * are doing analysis for the ingest job, i.e., that have called this - * method. This is needed by endJob(). - */ - jobs.get(jobId).incrementModuleReferenceCount(); - - /* - * Start a periodic search task in the - */ - if ((jobs.size() > 0) && (periodicSearchTaskRunning == false)) { - currentUpdateIntervalMs = ((long) KeywordSearchSettings.getUpdateFrequency().getTime()) * 60 * 1000; - periodicSearchTaskHandle = periodicSearchTaskExecutor.schedule(new PeriodicSearchTask(), currentUpdateIntervalMs, MILLISECONDS); - periodicSearchTaskRunning = true; - } - } - - /** - * Finishes a search job for an ingest job. - * - * @param jobId The ingest job ID. - */ - public synchronized void endJob(long jobId) { - /* - * Only complete the job if this is the last keyword search file ingest - * module doing annalysis for this job. - */ - SearchJobInfo job; - job = jobs.get(jobId); - if (job == null) { - return; // RJCTODO: SEVERE - } - if (job.decrementModuleReferenceCount() != 0) { - jobs.remove(jobId); - } - - /* - * Commit the index and do the final search. The final search is done in - * the ingest thread that shutDown() on the keyword search file ingest - * module, per the contract of IngestModule.shutDwon(). - */ - logger.log(Level.INFO, "Commiting search index before final search for search job {0}", job.getJobId()); //NON-NLS - commit(); - logger.log(Level.INFO, "Starting final search for search job {0}", job.getJobId()); //NON-NLS - doFinalSearch(job); - logger.log(Level.INFO, "Final search for search job {0} completed", job.getJobId()); //NON-NLS - - if (jobs.isEmpty()) { - cancelPeriodicSearchSchedulingTask(); - } - } - - /** - * Stops the search job for an ingest job. - * - * @param jobId The ingest job ID. - */ - public synchronized void stopJob(long jobId) { - logger.log(Level.INFO, "Stopping search job {0}", jobId); //NON-NLS - commit(); - - SearchJobInfo job; - job = jobs.get(jobId); - if (job == null) { - return; - } - - /* - * Request cancellation of the current keyword search, whether it is a - * preiodic search or a final search. - */ - IngestSearchRunner.Searcher currentSearcher = job.getCurrentSearcher(); - if ((currentSearcher != null) && (!currentSearcher.isDone())) { - logger.log(Level.INFO, "Cancelling search job {0}", jobId); //NON-NLS - currentSearcher.cancel(true); - } - - jobs.remove(jobId); - - if (jobs.isEmpty()) { - cancelPeriodicSearchSchedulingTask(); - } - } - - /** - * Adds the given keyword list names to the set of keyword lists to be - * searched by ALL keyword search jobs. This supports adding one or more - * keyword search lists to ingest jobs already in progress. - * - * @param keywordListNames The n ames of the additional keyword lists. - */ - public synchronized void addKeywordListsToAllJobs(List<String> keywordListNames) { - for (String listName : keywordListNames) { - logger.log(Level.INFO, "Adding keyword list {0} to all jobs", listName); //NON-NLS - for (SearchJobInfo j : jobs.values()) { - j.addKeywordListName(listName); - } - } - } - - /** - * Commits the Solr index for the current case and publishes an event - * indicating the current number of indexed items (this is no longer just - * files). - */ - private void commit() { - ingester.commit(); - - /* - * Publish an event advertising the number of indexed items. Note that - * this is no longer the number of indexed files, since the text of many - * items in addition to files is indexed. - */ - try { - final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles(); - KeywordSearch.fireNumIndexedFilesChange(null, numIndexedFiles); - } catch (NoOpenCoreException | KeywordSearchModuleException ex) { - logger.log(Level.SEVERE, "Error executing Solr query for number of indexed files", ex); //NON-NLS - } - } - - /** - * Performs the final keyword search for an ingest job. The search is done - * synchronously, as required by the contract for IngestModule.shutDown(). - * - * @param job The keyword search job info. - */ - private void doFinalSearch(SearchJobInfo job) { - if (!job.getKeywordListNames().isEmpty()) { - try { - /* - * Wait for any periodic searches being done in a SwingWorker - * pool thread to finish. - */ - job.waitForCurrentWorker(); - IngestSearchRunner.Searcher finalSearcher = new IngestSearchRunner.Searcher(job, true); - job.setCurrentSearcher(finalSearcher); - /* - * Do the final search synchronously on the current ingest - * thread, per the contract specified - */ - finalSearcher.doInBackground(); - } catch (InterruptedException | CancellationException ex) { - logger.log(Level.INFO, "Final search for search job {0} interrupted or cancelled", job.getJobId()); //NON-NLS - } catch (Exception ex) { - logger.log(Level.SEVERE, String.format("Final search for search job %d failed", job.getJobId()), ex); //NON-NLS - } - } - } - - /** - * Cancels the current periodic search scheduling task. - */ - private synchronized void cancelPeriodicSearchSchedulingTask() { - if (periodicSearchTaskHandle != null) { - logger.log(Level.INFO, "No more search jobs, stopping periodic search scheduling"); //NON-NLS - periodicSearchTaskHandle.cancel(true); - periodicSearchTaskRunning = false; - } - } - - /** - * Task that runs in ScheduledThreadPoolExecutor to periodically start and - * wait for keyword search tasks for each keyword search job in progress. - * The keyword search tasks for individual ingest jobs are implemented as - * SwingWorkers to support legacy APIs. - */ - private final class PeriodicSearchTask implements Runnable { - - @Override - public void run() { - /* - * If there are no more jobs or this task has been cancelled, exit. - */ - if (jobs.isEmpty() || periodicSearchTaskHandle.isCancelled()) { - logger.log(Level.INFO, "Periodic search scheduling task has been cancelled, exiting"); //NON-NLS - periodicSearchTaskRunning = false; - return; - } - - /* - * Commit the Solr index for the current case before doing the - * searches. - */ - commit(); - - /* - * Do a keyword search for each ingest job in progress. When the - * searches are done, recalculate the "hold off" time between - * searches to prevent back-to-back periodic searches and schedule - * the nect periodic search task. - */ - final StopWatch stopWatch = new StopWatch(); - stopWatch.start(); - for (Iterator<Entry<Long, SearchJobInfo>> iterator = jobs.entrySet().iterator(); iterator.hasNext();) { - SearchJobInfo job = iterator.next().getValue(); - - if (periodicSearchTaskHandle.isCancelled()) { - logger.log(Level.INFO, "Periodic search scheduling task has been cancelled, exiting"); //NON-NLS - periodicSearchTaskRunning = false; - return; - } - - if (!job.getKeywordListNames().isEmpty() && !job.isWorkerRunning()) { - logger.log(Level.INFO, "Starting periodic search for search job {0}", job.getJobId()); - Searcher searcher = new Searcher(job, false); - job.setCurrentSearcher(searcher); - searcher.execute(); - job.setWorkerRunning(true); - try { - searcher.get(); - } catch (InterruptedException | ExecutionException ex) { - logger.log(Level.SEVERE, String.format("Error performing keyword search for ingest job %d", job.getJobId()), ex); //NON-NLS - services.postMessage(IngestMessage.createErrorMessage( - KeywordSearchModuleFactory.getModuleName(), - NbBundle.getMessage(this.getClass(), "SearchRunner.Searcher.done.err.msg"), ex.getMessage())); - } catch (java.util.concurrent.CancellationException ex) { - logger.log(Level.SEVERE, String.format("Keyword search for ingest job %d cancelled", job.getJobId()), ex); //NON-NLS - } - } - } - stopWatch.stop(); - logger.log(Level.INFO, "Periodic searches for all ingest jobs cumulatively took {0} secs", stopWatch.getElapsedTimeSecs()); //NON-NLS - recalculateUpdateIntervalTime(stopWatch.getElapsedTimeSecs()); // ELDEBUG - periodicSearchTaskHandle = periodicSearchTaskExecutor.schedule(new PeriodicSearchTask(), currentUpdateIntervalMs, MILLISECONDS); - } - - /** - * Sets the time interval between periodic keyword searches to avoid - * running back-to-back searches. If the most recent round of searches - * took longer that 1/4 of the current interval, doubles the interval. - * - * @param lastSerchTimeSec The time in seconds used to execute the most - * recent round of keword searches. - */ - private void recalculateUpdateIntervalTime(long lastSerchTimeSec) { - if (lastSerchTimeSec * 1000 < currentUpdateIntervalMs / 4) { - return; - } - currentUpdateIntervalMs *= 2; - logger.log(Level.WARNING, "Last periodic search took {0} sec. Increasing search interval to {1} sec", new Object[]{lastSerchTimeSec, currentUpdateIntervalMs / 1000}); - } - } - - /** - * A data structure to keep track of the keyword lists, current results, and - * search running status for an ingest job. - */ - private class SearchJobInfo { - - private final IngestJobContext jobContext; - private final long jobId; - private final long dataSourceId; - private volatile boolean workerRunning; - @GuardedBy("this") - private final List<String> keywordListNames; - @GuardedBy("this") - private final Map<Keyword, Set<Long>> currentResults; // Keyword to object IDs of items with hits - private IngestSearchRunner.Searcher currentSearcher; - private final AtomicLong moduleReferenceCount = new AtomicLong(0); - private final Object finalSearchLock = new Object(); - - private SearchJobInfo(IngestJobContext jobContext, List<String> keywordListNames) { - this.jobContext = jobContext; - jobId = jobContext.getJobId(); - dataSourceId = jobContext.getDataSource().getId(); - this.keywordListNames = new ArrayList<>(keywordListNames); - currentResults = new HashMap<>(); - workerRunning = false; - currentSearcher = null; - } - - private IngestJobContext getJobContext() { - return jobContext; - } - - private long getJobId() { - return jobId; - } - - private long getDataSourceId() { - return dataSourceId; - } - - private synchronized List<String> getKeywordListNames() { - return new ArrayList<>(keywordListNames); - } - - private synchronized void addKeywordListName(String keywordListName) { - if (!keywordListNames.contains(keywordListName)) { - keywordListNames.add(keywordListName); - } - } - - private synchronized Set<Long> currentKeywordResults(Keyword k) { - return currentResults.get(k); - } - - private synchronized void addKeywordResults(Keyword k, Set<Long> resultsIDs) { - currentResults.put(k, resultsIDs); - } - - private boolean isWorkerRunning() { - return workerRunning; - } - - private void setWorkerRunning(boolean flag) { - workerRunning = flag; - } - - private synchronized IngestSearchRunner.Searcher getCurrentSearcher() { - return currentSearcher; - } - - private synchronized void setCurrentSearcher(IngestSearchRunner.Searcher searchRunner) { - currentSearcher = searchRunner; - } - - private void incrementModuleReferenceCount() { - moduleReferenceCount.incrementAndGet(); - } - - private long decrementModuleReferenceCount() { - return moduleReferenceCount.decrementAndGet(); - } - - /** - * Waits for the current search task to complete. - * - * @throws InterruptedException - */ - private void waitForCurrentWorker() throws InterruptedException { - synchronized (finalSearchLock) { - while (workerRunning) { - logger.log(Level.INFO, String.format("Waiting for previous search task for job %d to finish", jobId)); //NON-NLS - finalSearchLock.wait(); - logger.log(Level.INFO, String.format("Notified previous search task for job %d to finish", jobId)); //NON-NLS - } - } - } - - /** - * Signals any threads waiting on the current search task to complete. - */ - private void searchNotify() { - synchronized (finalSearchLock) { - workerRunning = false; - finalSearchLock.notify(); - } - } - } - - /* - * A SwingWorker responsible for searching the Solr index of the current - * case for the keywords for an ingest job. Keyword hit analysis results are - * created and posted to the blackboard and notifications are sent to the - * ingest inbox. - */ - private final class Searcher extends SwingWorker<Object, Void> { - - /* - * Searcher has private copies/snapshots of the lists and keywords - */ - private final SearchJobInfo job; - private final List<Keyword> keywords; //keywords to search - private final List<String> keywordListNames; // lists currently being searched - private final List<KeywordList> keywordLists; - private final Map<Keyword, KeywordList> keywordToList; //keyword to list name mapping - @ThreadConfined(type = ThreadConfined.ThreadType.AWT) - private ProgressHandle progressIndicator; - private boolean finalRun = false; - - Searcher(SearchJobInfo job, boolean finalRun) { - this.job = job; - this.finalRun = finalRun; - keywordListNames = job.getKeywordListNames(); - keywords = new ArrayList<>(); - keywordToList = new HashMap<>(); - keywordLists = new ArrayList<>(); - } - - @Override - @Messages("SearchRunner.query.exception.msg=Error performing query:") - protected Object doInBackground() throws Exception { - try { - if (usingNetBeansGUI) { - /* - * If running in the NetBeans thick client application - * version of Autopsy, NetBeans progress handles (i.e., - * progress bars) are used to display search progress in the - * lower right hand corner of the main application window. - * - * A layer of abstraction to allow alternate representations - * of progress could be used here, as it is in other places - * in the application (see implementations and usage of - * org.sleuthkit.autopsy.progress.ProgressIndicator - * interface), to better decouple keyword search from the - * application's presentation layer. - */ - SwingUtilities.invokeAndWait(() -> { - final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName") - + (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : ""); - progressIndicator = ProgressHandle.createHandle(displayName, new Cancellable() { - @Override - public boolean cancel() { - if (progressIndicator != null) { - progressIndicator.setDisplayName(displayName + " " + NbBundle.getMessage(this.getClass(), "SearchRunner.doInBackGround.cancelMsg")); - } - logger.log(Level.INFO, "Search cancelled by user"); //NON-NLS - new Thread(() -> { - IngestSearchRunner.Searcher.this.cancel(true); - }).start(); - return true; - } - }); - progressIndicator.start(); - progressIndicator.switchToIndeterminate(); - }); - } - - updateKeywords(); - for (Keyword keyword : keywords) { - if (isCancelled() || job.getJobContext().fileIngestIsCancelled()) { - logger.log(Level.INFO, "Cancellation requested, exiting before new keyword processed: {0}", keyword.getSearchTerm()); //NON-NLS - return null; - } - - KeywordList keywordList = keywordToList.get(keyword); - if (usingNetBeansGUI) { - String searchTermStr = keyword.getSearchTerm(); - if (searchTermStr.length() > 50) { - searchTermStr = searchTermStr.substring(0, 49) + "..."; - } - final String progressMessage = keywordList.getName() + ": " + searchTermStr; - SwingUtilities.invokeLater(() -> { - progressIndicator.progress(progressMessage); - }); - } - - // Filtering - //limit search to currently ingested data sources - //set up a filter with 1 or more image ids OR'ed - KeywordSearchQuery keywordSearchQuery = KeywordSearchUtil.getQueryForKeyword(keyword, keywordList); - KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, job.getDataSourceId()); - keywordSearchQuery.addFilter(dataSourceFilter); - - // Do the actual search - QueryResults queryResults; - try { - queryResults = keywordSearchQuery.performQuery(); - } catch (KeywordSearchModuleException | NoOpenCoreException ex) { - logger.log(Level.SEVERE, "Error performing query: " + keyword.getSearchTerm(), ex); //NON-NLS - if (usingNetBeansGUI) { - final String userMessage = Bundle.SearchRunner_query_exception_msg() + keyword.getSearchTerm(); - SwingUtilities.invokeLater(() -> { - MessageNotifyUtil.Notify.error(userMessage, ex.getCause().getMessage()); - }); - } - //no reason to continue with next query if recovery failed - //or wait for recovery to kick in and run again later - //likely case has closed and threads are being interrupted - return null; - } catch (CancellationException e) { - logger.log(Level.INFO, "Cancellation requested, exiting during keyword query: {0}", keyword.getSearchTerm()); //NON-NLS - return null; - } - - // Reduce the results of the query to only those hits we - // have not already seen. - QueryResults newResults = filterResults(queryResults); - - if (!newResults.getKeywords().isEmpty()) { - // Create blackboard artifacts - newResults.process(this, keywordList.getIngestMessages(), true, job.getJobId()); - } - } - } catch (Exception ex) { - logger.log(Level.SEVERE, String.format("Error performing keyword search for ingest job %d", job.getJobId()), ex); //NON-NLS - } finally { - if (progressIndicator != null) { - SwingUtilities.invokeLater(new Runnable() { - @Override - public void run() { - progressIndicator.finish(); - progressIndicator = null; - } - }); - } - // In case a thread is waiting on this worker to be done - job.searchNotify(); - } - - return null; - } - - /** - * Sync-up the updated keywords from the currently used lists in the XML - */ - private void updateKeywords() { - XmlKeywordSearchList loader = XmlKeywordSearchList.getCurrent(); - - keywords.clear(); - keywordToList.clear(); - keywordLists.clear(); - - for (String name : keywordListNames) { - KeywordList list = loader.getList(name); - keywordLists.add(list); - for (Keyword k : list.getKeywords()) { - keywords.add(k); - keywordToList.put(k, list); - } - } - } - - /** - * This method filters out all of the hits found in earlier periodic - * searches and returns only the results found by the most recent - * search. - * - * This method will only return hits for objects for which we haven't - * previously seen a hit for the keyword. - * - * @param queryResult The results returned by a keyword search. - * - * @return A unique set of hits found by the most recent search for - * objects that have not previously had a hit. The hits will be - * for the lowest numbered chunk associated with the object. - * - */ - private QueryResults filterResults(QueryResults queryResult) { - - // Create a new (empty) QueryResults object to hold the most recently - // found hits. - QueryResults newResults = new QueryResults(queryResult.getQuery()); - - // For each keyword represented in the results. - for (Keyword keyword : queryResult.getKeywords()) { - // These are all of the hits across all objects for the most recent search. - // This may well include duplicates of hits we've seen in earlier periodic searches. - List<KeywordHit> queryTermResults = queryResult.getResults(keyword); - - // Sort the hits for this keyword so that we are always - // guaranteed to return the hit for the lowest chunk. - Collections.sort(queryTermResults); - - // This will be used to build up the hits we haven't seen before - // for this keyword. - List<KeywordHit> newUniqueHits = new ArrayList<>(); - - // Get the set of object ids seen in the past by this searcher - // for the given keyword. - Set<Long> curTermResults = job.currentKeywordResults(keyword); - if (curTermResults == null) { - // We create a new empty set if we haven't seen results for - // this keyword before. - curTermResults = new HashSet<>(); - } - - // For each hit for this keyword. - for (KeywordHit hit : queryTermResults) { - if (curTermResults.contains(hit.getSolrObjectId())) { - // Skip the hit if we've already seen a hit for - // this keyword in the object. - continue; - } - - // We haven't seen the hit before so add it to list of new - // unique hits. - newUniqueHits.add(hit); - - // Add the object id to the results we've seen for this - // keyword. - curTermResults.add(hit.getSolrObjectId()); - } - - // Update the job with the list of objects for which we have - // seen hits for the current keyword. - job.addKeywordResults(keyword, curTermResults); - - // Add the new hits for the current keyword into the results - // to be returned. - newResults.addResult(keyword, newUniqueHits); - } - - return newResults; - } - } - -} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java index 052a0b0b16d32412b327185750af61d4efb774fb..b1a32769e428b0523bdfde0e44c5019bd4ee061d 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java @@ -19,9 +19,14 @@ package org.sleuthkit.autopsy.keywordsearch; import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.io.Reader; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.logging.Level; @@ -29,9 +34,9 @@ import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrInputDocument; import org.openide.util.NbBundle; +import org.openide.util.io.ReaderInputStream; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.TimeZoneUtils; -import org.sleuthkit.autopsy.datamodel.ContentUtils; import org.sleuthkit.autopsy.healthmonitor.HealthMonitor; import org.sleuthkit.autopsy.healthmonitor.TimingMetric; import org.sleuthkit.autopsy.ingest.IngestJobContext; @@ -146,10 +151,10 @@ private Map<String, String> getContentFields(SleuthkitVisitableItem item) { * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException */ // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients - < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException { - boolean doLanguageDetection = true; - return indexText(sourceReader, sourceID, sourceName, source, context, doLanguageDetection); - } +// < T extends SleuthkitVisitableItem> boolean search(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException { +// boolean doLanguageDetection = true; +// return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames); +// } /** * Read and chunk the source text for indexing in Solr. Does NOT perform @@ -170,11 +175,17 @@ < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException */ // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients - < T extends SleuthkitVisitableItem> boolean indexStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException { - // Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time. - boolean doLanguageDetection = false; - return indexText(sourceReader, sourceID, sourceName, source, context, doLanguageDetection); - } +// < T extends SleuthkitVisitableItem> boolean searchStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr) throws Ingester.IngesterException { +// // Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time. +// boolean doLanguageDetection = false; +// return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, null); +// } +// +// < T extends SleuthkitVisitableItem> boolean searchStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException { +// // Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time. +// boolean doLanguageDetection = false; +// return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames); +// } /** * Read and chunk the source text for indexing in Solr. @@ -195,60 +206,159 @@ < T extends SleuthkitVisitableItem> boolean indexStrings(Reader sourceReader, lo * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException */ // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients - private < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection) throws Ingester.IngesterException { + < T extends SleuthkitVisitableItem> void search(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException, IOException, TskCoreException, Exception { int numChunks = 0; //unknown until chunking is done - Map<String, String> contentFields = Collections.unmodifiableMap(getContentFields(source)); Optional<Language> language = Optional.empty(); + InlineSearcher searcher = new InlineSearcher(keywordListNames, context); + List<Chunk> activeChunkList = new ArrayList<>(); + boolean fileIndexed = false; + //Get a reader for the content of the given source try (BufferedReader reader = new BufferedReader(sourceReader)) { Chunker chunker = new Chunker(reader); + String name = sourceName; + if(!(source instanceof BlackboardArtifact)) { + searcher.searchString(name, sourceID, 0); + } + while (chunker.hasNext()) { - if (context != null && context.fileIngestIsCancelled()) { + if ( context.fileIngestIsCancelled()) { + logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName); + return; + } + + Chunk chunk = chunker.next(); + chunk.setChunkId(numChunks+1); + + if (doLanguageDetection) { + int size = Math.min(chunk.getBaseChunkLength(), LANGUAGE_DETECTION_STRING_SIZE); + language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk.toString().substring(0, size)); + + // only do language detection on the first chunk of the document + doLanguageDetection = false; + } + + if(keywordListNames != null) { + boolean hitFoundInChunk = searcher.searchChunk(chunk, sourceID, numChunks); + if(!indexIntoSolr) { + if(!hitFoundInChunk) { + if(!activeChunkList.isEmpty() ) { + if(activeChunkList.get(activeChunkList.size() - 1).hasHit()) { + activeChunkList.add(chunk); + // Write List + for(Chunk c: activeChunkList) { + indexChunk(c, sourceID, sourceName, language, contentFields, chunker.hasNext()); + } + activeChunkList.clear(); + } else { + activeChunkList.clear(); + activeChunkList.add(chunk); + } + } else { + activeChunkList.add(chunk); + } + } else { + fileIndexed = true; + chunk.setHasHit(true); + activeChunkList.add(chunk); + } + } else { + indexChunk(chunk, sourceID, sourceName, language, contentFields, chunker.hasNext()); + } + } + + numChunks++; + + } + + if(activeChunkList.size() > 1 || (activeChunkList.size() == 1 && activeChunkList.get(0).hasHit())) { + for(Chunk c: activeChunkList) { + indexChunk(c, sourceID, sourceName, language, contentFields, true); + } + } + + + if (chunker.hasException()) { + logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException()); + throw chunker.getException(); + } + + } finally { + if (context.fileIngestIsCancelled()) { + return ; + } + + if (fileIndexed) { + Map<String, Object> fields = new HashMap<>(contentFields); + //after all chunks, index just the meta data, including the numChunks, of the parent file + fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks)); + //reset id field to base document id + fields.put(Server.Schema.ID.toString(), Long.toString(sourceID)); + //"parent" docs don't have chunk_size + fields.remove(Server.Schema.CHUNK_SIZE.toString()); + indexChunk(null, null, sourceName, fields); + } + } + } + + < T extends SleuthkitVisitableItem> boolean indexFile(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection) throws Ingester.IngesterException { + int numChunks = 0; //unknown until chunking is done + Map<String, String> contentFields = Collections.unmodifiableMap(getContentFields(source)); + Optional<Language> language = Optional.empty(); + //Get a reader for the content of the given source + try (BufferedReader reader = new BufferedReader(sourceReader)) { + Chunker chunker = new Chunker(reader); + while (chunker.hasNext()) { + if ( context.fileIngestIsCancelled()) { logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName); return false; } Chunk chunk = chunker.next(); - Map<String, Object> fields = new HashMap<>(contentFields); - String chunkId = Server.getChunkIdString(sourceID, numChunks + 1); - fields.put(Server.Schema.ID.toString(), chunkId); - fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength())); - + if (doLanguageDetection) { int size = Math.min(chunk.getBaseChunkLength(), LANGUAGE_DETECTION_STRING_SIZE); language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk.toString().substring(0, size)); - + // only do language detection on the first chunk of the document doLanguageDetection = false; } + + Map<String, Object> fields = new HashMap<>(contentFields); + String chunkId = Server.getChunkIdString(sourceID, numChunks + 1); + fields.put(Server.Schema.ID.toString(), chunkId); + fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength())); + language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang)); try { //add the chunk text to Solr index - indexChunk(chunk.toString(), chunk.geLowerCasedChunk(), sourceName, fields); + indexChunk(chunk.toString(), chunk.getLowerCasedChunk(), sourceName, fields); // add mini chunk when there's a language specific field if (chunker.hasNext() && language.isPresent()) { languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get()); } - numChunks++; + numChunks++; + } catch (Ingester.IngesterException ingEx) { logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS throw ingEx; //need to rethrow to signal error and move on - } + } } if (chunker.hasException()) { logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException()); return false; } + } catch (Exception ex) { logger.log(Level.WARNING, "Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS return false; - } finally { - if (context != null && context.fileIngestIsCancelled()) { + } finally { + if (context.fileIngestIsCancelled()) { return false; - } else { + } else { Map<String, Object> fields = new HashMap<>(contentFields); //after all chunks, index just the meta data, including the numChunks, of the parent file fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks)); @@ -259,8 +369,34 @@ private < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReade indexChunk(null, null, sourceName, fields); } } + + return true; } + + private void indexChunk(Chunk chunk, long sourceID, String sourceName, Optional<Language> language, Map<String, String> contentFields, boolean hasNext) throws IngesterException { + Map<String, Object> fields = new HashMap<>(contentFields); + String chunkId = Server.getChunkIdString(sourceID, chunk.getChunkId()); + fields.put(Server.Schema.ID.toString(), chunkId); + fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength())); + + + language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang)); + try { + //add the chunk text to Solr index + indexChunk(chunk.toString(), chunk.getLowerCasedChunk(), sourceName, fields); + // add mini chunk when there's a language specific field + if (hasNext && language.isPresent()) { + languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get()); + } + + } catch (Ingester.IngesterException ingEx) { + logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS + + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS + + throw ingEx; //need to rethrow to signal error and move on + } + } /** * Add one chunk as to the Solr index as a separate Solr document. diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/InlineSearcher.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/InlineSearcher.java new file mode 100755 index 0000000000000000000000000000000000000000..b8a8dcdf6164d38b8da1daaa4e94a20f08674457 --- /dev/null +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/InlineSearcher.java @@ -0,0 +1,614 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2022 Basis Technology Corp. + * Contact: carrier <at> sleuthkit <dot> org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.keywordsearch; + +import com.twelvemonkeys.lang.StringUtil; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.logging.Level; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.validator.routines.DomainValidator; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.sleuthkit.autopsy.casemodule.Case; +import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException; +import org.sleuthkit.autopsy.coreutils.Logger; +import org.sleuthkit.autopsy.ingest.IngestJobContext; +import org.sleuthkit.autopsy.keywordsearch.Chunker.Chunk; +import static org.sleuthkit.autopsy.keywordsearch.RegexQuery.CREDIT_CARD_NUM_PATTERN; +import org.sleuthkit.datamodel.Blackboard; +import org.sleuthkit.datamodel.BlackboardArtifact; +import org.sleuthkit.datamodel.BlackboardAttribute; +import org.sleuthkit.datamodel.Content; +import org.sleuthkit.datamodel.SleuthkitCase; +import org.sleuthkit.datamodel.TskCoreException; +import org.sleuthkit.datamodel.TskException; + +final class InlineSearcher { + + private final List<KeywordList> keywordList; + private static final int MIN_EMAIL_ADDR_LENGTH = 8; + private static final Logger logger = Logger.getLogger(InlineSearcher.class.getName()); + + private final IngestJobContext context; + + static final Map<Long, List<UniqueKeywordHit>> uniqueHitMap = new ConcurrentHashMap<>(); + + static final Map<Long, Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>>> uniqueHitMap2 = new ConcurrentHashMap<>(); + + // Uses mostly native java and the lucene api to search the a given chuck + // for Keywords. Create unique KeywordHits for any unique hit. + InlineSearcher(List<String> keywordListNames, IngestJobContext context) { + this.keywordList = new ArrayList<>(); + this.context = context; + + if (keywordListNames != null) { + XmlKeywordSearchList loader = XmlKeywordSearchList.getCurrent(); + for (String name : keywordListNames) { + keywordList.add(loader.getList(name)); + } + } + } + + /** + * Search the chunk for the currently selected keywords. + * + * @param chunk + * @param sourceID + * + * @throws TskCoreException + */ + boolean searchChunk(Chunk chunk, long sourceID, int chunkId) throws TskCoreException { + return searchString(chunk.getLowerCasedChunk(), sourceID, chunkId); + } + + /** + * Search a string for the currently selected keywords. + * + * @param text + * @param sourceID + * + * @throws TskCoreException + */ + boolean searchString(String text, long sourceID, int chunkId) throws TskCoreException { + boolean hitFound = false; + Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> hitByKeyword = getMap(context.getJobId(), sourceID); + for (KeywordList list : keywordList) { + List<Keyword> keywords = list.getKeywords(); + for (Keyword originalKeyword : keywords) { + Map<Keyword, List<UniqueKeywordHit>> hitMap = hitByKeyword.get(originalKeyword); + if (hitMap == null) { + hitMap = new HashMap<>(); + hitByKeyword.put(originalKeyword, hitMap); + } + + List<UniqueKeywordHit> keywordHits = new ArrayList<>(); + if (originalKeyword.searchTermIsLiteral()) { + if (StringUtil.containsIgnoreCase(text, originalKeyword.getSearchTerm())) { + keywordHits.addAll(createKeywordHits(text, originalKeyword, sourceID, chunkId, list.getName())); + } + } else { + String regex = originalKeyword.getSearchTerm(); + + try { + // validate the regex + Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); + Matcher matcher = pattern.matcher(text); + + if (matcher.find()) { + keywordHits.addAll(createKeywordHits(text, originalKeyword, sourceID, chunkId, list.getName())); + } + } catch (IllegalArgumentException ex) { + //TODO What should we do here? Log and continue? + } + } + + if (!keywordHits.isEmpty()) { + hitFound = true; + for (UniqueKeywordHit hit : keywordHits) { + Keyword keywordCopy = new Keyword(hit.getHit(), + originalKeyword.searchTermIsLiteral(), + originalKeyword.searchTermIsWholeWord(), + list.getName(), + originalKeyword.getOriginalTerm()); + + List<UniqueKeywordHit> mapHitList = hitMap.get(keywordCopy); + if (mapHitList == null) { + mapHitList = new ArrayList<>(); + hitMap.put(keywordCopy, mapHitList); + } + + if (!mapHitList.contains(hit)) { + mapHitList.add(hit); + } + } + } + + if (context.fileIngestIsCancelled()) { + return hitFound; + } + } + } + return hitFound; + } + + /** + * This method very similar to RegexQuery createKeywordHits, with the + * knowledge of solr removed. + * + * @param text + * @param originalKeyword + * + * @return A list of KeywordHit objects. + * + * @throws TskCoreException + */ + private List<UniqueKeywordHit> createKeywordHits(String text, Keyword originalKeyword, long sourceID, int chunkId, String keywordListName) throws TskCoreException { + + if (originalKeyword.searchTermIsLiteral() && originalKeyword.searchTermIsWholeWord()) { + try { + return getExactMatchHits(text, originalKeyword, sourceID, chunkId, keywordListName); + } catch (IOException ex) { + throw new TskCoreException("Failed to create exactMatch hits", ex); + } + } + + final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>(); + + List<UniqueKeywordHit> hits = new ArrayList<>(); + String keywordString = originalKeyword.getSearchTerm(); + + boolean queryStringContainsWildcardSuffix = originalKeyword.getSearchTerm().endsWith(".*"); + + String searchPattern; + if (originalKeyword.searchTermIsLiteral()) { + /** + * For substring searches, the following pattern was arrived at + * through trial and error in an attempt to reproduce the same hits + * we were getting when we were using the TermComponent approach. + * This basically looks for zero of more word characters followed + * optionally by a dot or apostrophe, followed by the quoted + * lowercase substring following by zero or more word characters + * followed optionally by a dot or apostrophe. The reason that the + * dot and apostrophe characters are being handled here is because + * the old code used to find hits in domain names (e.g. hacks.ie) + * and possessives (e.g. hacker's). This obviously works for English + * but is probably not sufficient for other languages. + */ + searchPattern = "[\\w[\\.']]*" + java.util.regex.Pattern.quote(keywordString.toLowerCase()) + "[\\w[\\.']]*"; + + } else { + searchPattern = keywordString; + } + + final java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(searchPattern, Pattern.CASE_INSENSITIVE); + + try { + String content = text; + Matcher hitMatcher = pattern.matcher(content); + int offset = 0; + + while (hitMatcher.find(offset)) { + + String hit = hitMatcher.group().toLowerCase(); + + /** + * No need to continue on if the the string is "" nothing to + * find or do. + */ + if ("".equals(hit)) { + break; + } + + offset = hitMatcher.end(); + final BlackboardAttribute.ATTRIBUTE_TYPE artifactAttributeType = originalKeyword.getArtifactAttributeType(); + + // We attempt to reduce false positives for phone numbers and IP address hits + // by querying Solr for hits delimited by a set of known boundary characters. + // See KeywordSearchList.PHONE_NUMBER_REGEX for an example. + // Because of this the hits may contain an extra character at the beginning or end that + // needs to be chopped off, unless the user has supplied their own wildcard suffix + // as part of the regex. + if (!queryStringContainsWildcardSuffix + && (artifactAttributeType == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER + || artifactAttributeType == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_IP_ADDRESS)) { + if (artifactAttributeType == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER) { + // For phone numbers replace all non numeric characters (except "(") at the start of the hit. + hit = hit.replaceAll("^[^0-9\\(]", ""); + } else { + // Replace all non numeric characters at the start of the hit. + hit = hit.replaceAll("^[^0-9]", ""); + } + // Replace all non numeric at the end of the hit. + hit = hit.replaceAll("[^0-9]$", ""); + + if (offset > 1) { + /* + * NOTE: our IP and phone number regex patterns look for + * boundary characters immediately before and after the + * keyword hit. After a match, Java pattern mather + * re-starts at the first character not matched by the + * previous match. This basically requires two boundary + * characters to be present between each pattern match. + * To mitigate this we are resetting the offest one + * character back. + */ + offset--; + } + } + + /** + * Boundary characters are removed from the start and end of the + * hit to normalize the hits. This is being done for substring + * searches only at this point. We don't do it for real regular + * expression searches because the user may have explicitly + * included boundary characters in their regular expression. + */ + if (originalKeyword.searchTermIsLiteral()) { + hit = hit.replaceAll("^" + KeywordSearchList.BOUNDARY_CHARACTERS + "*", ""); + hit = hit.replaceAll(KeywordSearchList.BOUNDARY_CHARACTERS + "*$", ""); + } + + /** + * The use of String interning is an optimization to ensure that + * we reuse the same keyword hit String object across all hits. + * Even though we benefit from G1GC String deduplication, the + * overhead associated with creating a new String object for + * every KeywordHit can be significant when the number of hits + * gets large. + */ + hit = hit.intern(); + + // We will only create one KeywordHit instance per document for + // a given hit. + if (keywordsFoundInThisDocument.containsKey(hit)) { + continue; + } + keywordsFoundInThisDocument.put(hit, hit); + + if (artifactAttributeType == null) { + hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm())); + } else { + switch (artifactAttributeType) { + case TSK_EMAIL: + /* + * Reduce false positives by eliminating email + * address hits that are either too short or are not + * for valid top level domains. + */ + if (hit.length() >= MIN_EMAIL_ADDR_LENGTH + && DomainValidator.getInstance(true).isValidTld(hit.substring(hit.lastIndexOf('.')))) { + hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm())); + } + + break; + case TSK_CARD_NUMBER: + /* + * If searching for credit card account numbers, do + * extra validation on the term and discard it if it + * does not pass. + */ + Matcher ccnMatcher = CREDIT_CARD_NUM_PATTERN.matcher(hit); + + for (int rLength = hit.length(); rLength >= 12; rLength--) { + ccnMatcher.region(0, rLength); + if (ccnMatcher.find()) { + final String group = ccnMatcher.group("ccn"); + if (CreditCardValidator.isValidCCN(group)) { + hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm())); + } + } + } + + break; + default: + hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm())); + break; + } + } + } + + } catch (Throwable error) { + /* + * NOTE: Matcher.find() is known to throw StackOverflowError in rare + * cases (see JIRA-2700). StackOverflowError is an error, not an + * exception, and therefore needs to be caught as a Throwable. When + * this occurs we should re-throw the error as TskCoreException so + * that it is logged by the calling method and move on to the next + * Solr document. + */ + throw new TskCoreException("Failed to create keyword hits for chunk due to " + error.getMessage()); + } + return hits; + } + + /** + * Clean up the memory that is being used for the given job. + * + * @param context + */ + static void cleanup(IngestJobContext context) { + Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> jobMap = uniqueHitMap2.get(context.getJobId()); + if (jobMap != null) { + jobMap.clear(); + } + } + + /** + * Generates the artifacts for the found KeywordHits. This method should be + * called once per content object. + * + * @param context + */ + static void makeArtifacts(IngestJobContext context) throws TskException { + + Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> jobMap = uniqueHitMap2.get(context.getJobId()); + if (jobMap == null) { + return; + } + + for (Map.Entry<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> mapBySource : jobMap.entrySet()) { + Long sourceId = mapBySource.getKey(); + Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> mapByKeyword = mapBySource.getValue(); + + for (Map.Entry<Keyword, Map<Keyword, List<UniqueKeywordHit>>> item : mapByKeyword.entrySet()) { + Keyword originalKeyword = item.getKey(); + Map<Keyword, List<UniqueKeywordHit>> map = item.getValue(); + + List<BlackboardArtifact> hitArtifacts = new ArrayList<>(); + if (!map.isEmpty()) { + for (Map.Entry<Keyword, List<UniqueKeywordHit>> entry : map.entrySet()) { + Keyword hitKeyword = entry.getKey(); + List<UniqueKeywordHit> hitList = entry.getValue(); + // Only create one hit for the document. + // The first hit in the list should be the first one that + // was found. + if (!hitList.isEmpty()) { + UniqueKeywordHit hit = hitList.get(0); + SleuthkitCase tskCase = Case.getCurrentCase().getSleuthkitCase(); + Content content = tskCase.getContentById(hit.getContentID()); + BlackboardArtifact artifact = RegexQuery.createKeywordHitArtifact(content, originalKeyword, hitKeyword, hit, hit.getSnippet(), hitKeyword.getListName(), sourceId); + // createKeywordHitArtifact has the potential to return null + // when a CCN account is created. + if (artifact != null) { + hitArtifacts.add(artifact); + + } + + } + } + + if (!hitArtifacts.isEmpty()) { + try { + SleuthkitCase tskCase = Case.getCurrentCaseThrows().getSleuthkitCase(); + Blackboard blackboard = tskCase.getBlackboard(); + + blackboard.postArtifacts(hitArtifacts, "KeywordSearch", context.getJobId()); + hitArtifacts.clear(); + } catch (NoCurrentCaseException | Blackboard.BlackboardException ex) { + logger.log(Level.SEVERE, "Failed to post KWH artifact to blackboard.", ex); //NON-NLS + } + } + + if (context.fileIngestIsCancelled()) { + return; + } + } + } + } + } + + /** + * Searches the chunk for exact matches and creates the appropriate keyword + * hits. + * + * @param text + * @param originalKeyword + * @param sourceID + * + * @return + * + * @throws IOException + */ + public List<UniqueKeywordHit> getExactMatchHits(String text, Keyword originalKeyword, long sourceID, int chunkId, String keywordListName) throws IOException { + final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>(); + + List<UniqueKeywordHit> hits = new ArrayList<>(); + Analyzer analyzer = new StandardAnalyzer(); + + //Get the tokens of the keyword + List<String> keywordTokens = new ArrayList<>(); + try (TokenStream keywordstream = analyzer.tokenStream("field", originalKeyword.getSearchTerm())) { + CharTermAttribute attr = keywordstream.addAttribute(CharTermAttribute.class); + keywordstream.reset(); + while (keywordstream.incrementToken()) { + keywordTokens.add(attr.toString()); + } + } + + try (TokenStream stream = analyzer.tokenStream("field", text)) { + CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class); + OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); + stream.reset(); + while (stream.incrementToken()) { + if (!attr.toString().equals(keywordTokens.get(0))) { + continue; + } + + int startOffset = offset.startOffset(); + int endOffset = offset.endOffset(); + boolean match = true; + + for (int index = 1; index < keywordTokens.size(); index++) { + if (stream.incrementToken()) { + if (!attr.toString().equals(keywordTokens.get(index))) { + match = false; + break; + } else { + endOffset = offset.endOffset(); + } + } + } + + if (match) { + String hit = text.subSequence(startOffset, endOffset).toString(); + + // We will only create one KeywordHit instance per document for + // a given hit. + if (keywordsFoundInThisDocument.containsKey(hit)) { + continue; + } + keywordsFoundInThisDocument.put(hit, hit); + + hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(text, startOffset, endOffset, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getOriginalTerm())); + } + } + } + + return hits; + } + + /** + * Get the keyword map for the given job and source. + * + * @param jobId + * @param sourceID + * + * @return + */ + static private Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> getMap(long jobId, long sourceID) { + Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> jobMap = uniqueHitMap2.get(jobId); + if (jobMap == null) { + jobMap = new ConcurrentHashMap<>(); + uniqueHitMap2.put(jobId, jobMap); + } + + Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> sourceMap = jobMap.get(sourceID); + if (sourceMap == null) { + sourceMap = new ConcurrentHashMap<>(); + jobMap.put(sourceID, sourceMap); + } + + return sourceMap; + } + + // KeywordHit is not unique enough for finding duplicates, this class + // extends the KeywordHit class to make truely unique hits. + static class UniqueKeywordHit extends KeywordHit { + + private final String listName; + private final boolean isLiteral; + private final boolean isWholeWord; + private final BlackboardAttribute.ATTRIBUTE_TYPE artifactAtrributeType; + private final String originalSearchTerm; + + UniqueKeywordHit(int chunkId, long sourceID, String snippet, String hit, String listName, boolean isWholeWord, boolean isLiteral, BlackboardAttribute.ATTRIBUTE_TYPE artifactAtrributeType, String originalSearchTerm) { + super(chunkId, sourceID, snippet, hit); + + this.listName = listName; + this.isWholeWord = isWholeWord; + this.isLiteral = isLiteral; + this.artifactAtrributeType = artifactAtrributeType; + this.originalSearchTerm = originalSearchTerm; + } + + @Override + public int compareTo(KeywordHit other) { + return compare((UniqueKeywordHit) other); + } + + private int compare(UniqueKeywordHit other) { + return Comparator.comparing(UniqueKeywordHit::getSolrObjectId) + .thenComparing(UniqueKeywordHit::getChunkId) + .thenComparing(UniqueKeywordHit::getHit) + .thenComparing(UniqueKeywordHit::getSnippet) + .thenComparing(UniqueKeywordHit::isWholeWord) + .thenComparing(UniqueKeywordHit::isLiteral) + .thenComparing(UniqueKeywordHit::getArtifactAtrributeType) + .thenComparing(UniqueKeywordHit::getOriginalSearchTerm) + .thenComparing(UniqueKeywordHit::getListName) + .compare(this, other); + } + + @Override + public boolean equals(Object obj) { + + if (null == obj) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final UniqueKeywordHit other = (UniqueKeywordHit) obj; + + return getSnippet().equalsIgnoreCase(other.getSnippet()) + && getSolrObjectId().equals(other.getSolrObjectId()) + && getChunkId().equals(other.getChunkId()) + && getHit().equalsIgnoreCase(other.getHit()) + && listName.equalsIgnoreCase(other.getListName()) + && isLiteral == other.isLiteral() + && isWholeWord == other.isWholeWord() + && originalSearchTerm.equalsIgnoreCase(other.getOriginalSearchTerm()) + && (artifactAtrributeType != null ? artifactAtrributeType.equals(other.getArtifactAtrributeType()) : true); + } + + @Override + public int hashCode() { + int hash = 3; + hash = 67 * hash + super.hashCode(); + hash = 67 * hash + Objects.hashCode(this.listName); + hash = 67 * hash + (this.isLiteral ? 1 : 0); + hash = 67 * hash + (this.isWholeWord ? 1 : 0); + hash = 67 * hash + Objects.hashCode(this.artifactAtrributeType); + hash = 67 * hash + Objects.hashCode(this.originalSearchTerm); + return hash; + } + + String getListName() { + return listName; + } + + Boolean isLiteral() { + return isLiteral; + } + + Boolean isWholeWord() { + return isWholeWord; + } + + BlackboardAttribute.ATTRIBUTE_TYPE getArtifactAtrributeType() { + return artifactAtrributeType; + } + + String getOriginalSearchTerm() { + return originalSearchTerm; + } + + } +} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Keyword.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Keyword.java index d7c7d7705fb42dd62b0c199f7b9ea974cafa9234..98479b7b41082a34f8ab38b1cf753f882f39d0cd 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Keyword.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Keyword.java @@ -211,8 +211,10 @@ public boolean equals(Object obj) { public int hashCode() { int hash = 7; hash = 17 * hash + this.searchTerm.hashCode(); + hash = 17 * hash + this.listName.hashCode(); hash = 17 * hash + (this.isLiteral ? 1 : 0); hash = 17 * hash + (this.isWholeWord ? 1 : 0); + hash = 17 * hash + this.originalTerm.hashCode(); return hash; } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java index c147618558b355a5d0a9f6112a8b6264f3ce0e58..5e39eb8af560b818cf83f2d76467428a4fa417f2 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java @@ -21,10 +21,12 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Comparator; +import java.util.Objects; import java.util.Optional; import org.apache.commons.lang3.StringUtils; import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException; +import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.SleuthkitCase; import org.sleuthkit.datamodel.TskCoreException; @@ -43,7 +45,7 @@ class KeywordHit implements Comparable<KeywordHit> { private final int chunkId; private final String snippet; private final String hit; - + /** * Constructor * @@ -54,7 +56,7 @@ class KeywordHit implements Comparable<KeywordHit> { * For some searches (ie substring, regex) this will be * different than the search term. * - */ + */ KeywordHit(String solrDocumentId, String snippet, String hit) { this.snippet = StringUtils.stripToEmpty(snippet); this.hit = hit; @@ -68,17 +70,30 @@ class KeywordHit implements Comparable<KeywordHit> { * documents. One contains object metadata (chunk #1) and the second and * subsequent documents contain chunks of the text. */ - String[] split = solrDocumentId.split(Server.CHUNK_ID_SEPARATOR); - if (split.length == 1) { - //chunk 0 has only the bare document id without the chunk id. - this.solrObjectId = Long.parseLong(solrDocumentId); - this.chunkId = 0; + if(!solrDocumentId.isEmpty()) { + String[] split = solrDocumentId.split(Server.CHUNK_ID_SEPARATOR); + if (split.length == 1) { + //chunk 0 has only the bare document id without the chunk id. + this.solrObjectId = Long.parseLong(solrDocumentId); + this.chunkId = 0; + } else { + this.solrObjectId = Long.parseLong(split[0]); + this.chunkId = Integer.parseInt(split[1]); + } } else { - this.solrObjectId = Long.parseLong(split[0]); - this.chunkId = Integer.parseInt(split[1]); + this.solrObjectId = 0; + this.chunkId = 0; } } + KeywordHit(int chunkId, long sourceID, String snippet, String hit) { + this.snippet = StringUtils.stripToEmpty(snippet); + this.hit = hit; + this.chunkId = chunkId; + this.solrObjectId = sourceID; + } + + String getHit() { return hit; } @@ -87,11 +102,11 @@ String getSolrDocumentId() { return Long.toString(solrObjectId) + Server.CHUNK_ID_SEPARATOR + Long.toString(chunkId); } - long getSolrObjectId() { + Long getSolrObjectId() { return this.solrObjectId; } - int getChunkId() { + Integer getChunkId() { return this.chunkId; } @@ -102,7 +117,7 @@ boolean hasSnippet() { String getSnippet() { return this.snippet; } - + /** * Get the content id associated with the content underlying hit. * For hits on files this will be the same as the object id associated @@ -172,20 +187,25 @@ public boolean equals(Object obj) { return false; } final KeywordHit other = (KeywordHit) obj; - return this.compareTo(other) == 0; + return compareTo(other) == 0; } - + @Override public int hashCode() { - int hash = 3; - hash = 41 * hash + (int) this.solrObjectId + this.chunkId; + int hash = 7; + hash = 37 * hash + (int) (this.solrObjectId ^ (this.solrObjectId >>> 32)); + hash = 37 * hash + this.chunkId; + hash = 37 * hash + Objects.hashCode(this.snippet); + hash = 37 * hash + Objects.hashCode(this.hit); return hash; } @Override - public int compareTo(KeywordHit o) { + public int compareTo(KeywordHit other) { return Comparator.comparing(KeywordHit::getSolrObjectId) .thenComparing(KeywordHit::getChunkId) - .compare(this, o); + .thenComparing(KeywordHit::getHit) + .thenComparing(KeywordHit::getSnippet) + .compare(this, other); } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.form b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.form index 4a0cadaefd4c67c9bdea4ac3ea756b9e56353ac6..0f8800efdd0af3e8641397d43222f3c7a9791361 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.form +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.form @@ -1,10 +1,6 @@ <?xml version="1.0" encoding="UTF-8" ?> <Form version="1.5" maxVersion="1.7" type="org.netbeans.modules.form.forminfo.JPanelFormInfo"> - <NonVisualComponents> - <Component class="javax.swing.ButtonGroup" name="timeGroup"> - </Component> - </NonVisualComponents> <AuxValues> <AuxValue name="FormSettings_autoResourcing" type="java.lang.Integer" value="1"/> <AuxValue name="FormSettings_autoSetComponentName" type="java.lang.Boolean" value="false"/> @@ -47,23 +43,13 @@ <EmptySpace type="separate" max="-2" attributes="0"/> <Component id="filesIndexedValue" min="-2" max="-2" attributes="0"/> </Group> - <Component id="frequencyLabel" alignment="0" min="-2" max="-2" attributes="0"/> <Group type="102" alignment="0" attributes="0"> <Component id="chunksLabel" linkSize="1" min="-2" max="-2" attributes="0"/> <EmptySpace type="separate" max="-2" attributes="0"/> <Component id="chunksValLabel" min="-2" max="-2" attributes="0"/> </Group> - <Group type="102" alignment="0" attributes="0"> - <EmptySpace min="16" pref="16" max="-2" attributes="0"/> - <Group type="103" groupAlignment="0" attributes="0"> - <Component id="timeRadioButton2" min="-2" max="-2" attributes="0"/> - <Component id="timeRadioButton1" min="-2" max="-2" attributes="0"/> - <Component id="timeRadioButton3" alignment="0" min="-2" max="-2" attributes="0"/> - <Component id="timeRadioButton4" alignment="0" min="-2" max="-2" attributes="0"/> - <Component id="timeRadioButton5" alignment="0" min="-2" max="-2" attributes="0"/> - </Group> - </Group> </Group> + <EmptySpace min="-2" pref="132" max="-2" attributes="0"/> </Group> </Group> <EmptySpace max="32767" attributes="0"/> @@ -90,19 +76,7 @@ <Component id="skipNSRLCheckBox" min="-2" max="-2" attributes="0"/> <EmptySpace max="-2" attributes="0"/> <Component id="showSnippetsCB" min="-2" max="-2" attributes="0"/> - <EmptySpace max="-2" attributes="0"/> - <Component id="frequencyLabel" min="-2" max="-2" attributes="0"/> - <EmptySpace max="-2" attributes="0"/> - <Component id="timeRadioButton1" min="-2" max="-2" attributes="0"/> - <EmptySpace max="-2" attributes="0"/> - <Component id="timeRadioButton2" min="-2" max="-2" attributes="0"/> - <EmptySpace max="-2" attributes="0"/> - <Component id="timeRadioButton3" min="-2" max="-2" attributes="0"/> - <EmptySpace max="-2" attributes="0"/> - <Component id="timeRadioButton4" min="-2" max="-2" attributes="0"/> - <EmptySpace max="-2" attributes="0"/> - <Component id="timeRadioButton5" min="-2" max="-2" attributes="0"/> - <EmptySpace max="-2" attributes="0"/> + <EmptySpace type="unrelated" max="-2" attributes="0"/> <Group type="103" groupAlignment="1" attributes="0"> <Component id="informationLabel" min="-2" max="-2" attributes="0"/> <Component id="informationSeparator" min="-2" pref="7" max="-2" attributes="0"/> @@ -119,7 +93,7 @@ </Group> <EmptySpace type="unrelated" max="-2" attributes="0"/> <Component id="ingestWarningLabel" min="-2" max="-2" attributes="0"/> - <EmptySpace max="32767" attributes="0"/> + <EmptySpace pref="151" max="32767" attributes="0"/> </Group> </Group> </DimensionLayout> @@ -184,65 +158,6 @@ </Component> <Component class="javax.swing.JSeparator" name="informationSeparator"> </Component> - <Component class="javax.swing.JLabel" name="frequencyLabel"> - <Properties> - <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - </Properties> - </Component> - <Component class="javax.swing.JRadioButton" name="timeRadioButton1"> - <Properties> - <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - </Properties> - <Events> - <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton1ActionPerformed"/> - </Events> - </Component> - <Component class="javax.swing.JRadioButton" name="timeRadioButton2"> - <Properties> - <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - </Properties> - <Events> - <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton2ActionPerformed"/> - </Events> - </Component> - <Component class="javax.swing.JRadioButton" name="timeRadioButton3"> - <Properties> - <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - </Properties> - <Events> - <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton3ActionPerformed"/> - </Events> - </Component> - <Component class="javax.swing.JRadioButton" name="timeRadioButton4"> - <Properties> - <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - </Properties> - <Events> - <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton4ActionPerformed"/> - </Events> - </Component> <Component class="javax.swing.JCheckBox" name="showSnippetsCB"> <Properties> <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> @@ -253,19 +168,6 @@ <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="showSnippetsCBActionPerformed"/> </Events> </Component> - <Component class="javax.swing.JRadioButton" name="timeRadioButton5"> - <Properties> - <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - </Properties> - <Events> - <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton5ActionPerformed"/> - </Events> - </Component> <Component class="javax.swing.JLabel" name="ingestWarningLabel"> <Properties> <Property name="icon" type="javax.swing.Icon" editor="org.netbeans.modules.form.editors2.IconEditor"> diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.java index ccd1de71dac3c0408c23a03e821589de28022d60..7393e3ea4dec9d720d69b6f0e53261aa1ba29bbc 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2012-2018 Basis Technology Corp. + * Copyright 2012-2022 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,9 +26,7 @@ import org.openide.util.NbBundle; import org.sleuthkit.autopsy.corecomponents.OptionsPanel; import org.sleuthkit.autopsy.coreutils.Logger; -import org.sleuthkit.autopsy.coreutils.PlatformUtil; import org.sleuthkit.autopsy.ingest.IngestManager; -import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.UpdateFrequency; /** * General, not per list, keyword search configuration and status display widget @@ -53,31 +51,6 @@ private void activateWidgets() { boolean ingestRunning = IngestManager.getInstance().isIngestRunning(); ingestWarningLabel.setVisible(ingestRunning); skipNSRLCheckBox.setEnabled(!ingestRunning); - setTimeSettingEnabled(!ingestRunning); - - final UpdateFrequency curFreq = KeywordSearchSettings.getUpdateFrequency(); - switch (curFreq) { - case FAST: - timeRadioButton1.setSelected(true); - break; - case AVG: - timeRadioButton2.setSelected(true); - break; - case SLOW: - timeRadioButton3.setSelected(true); - break; - case SLOWEST: - timeRadioButton4.setSelected(true); - break; - case NONE: - timeRadioButton5.setSelected(true); - break; - case DEFAULT: - default: - // default value - timeRadioButton3.setSelected(true); - break; - } } /** @@ -89,7 +62,6 @@ private void activateWidgets() { // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents private void initComponents() { - timeGroup = new javax.swing.ButtonGroup(); skipNSRLCheckBox = new javax.swing.JCheckBox(); filesIndexedLabel = new javax.swing.JLabel(); filesIndexedValue = new javax.swing.JLabel(); @@ -99,13 +71,7 @@ private void initComponents() { informationLabel = new javax.swing.JLabel(); settingsSeparator = new javax.swing.JSeparator(); informationSeparator = new javax.swing.JSeparator(); - frequencyLabel = new javax.swing.JLabel(); - timeRadioButton1 = new javax.swing.JRadioButton(); - timeRadioButton2 = new javax.swing.JRadioButton(); - timeRadioButton3 = new javax.swing.JRadioButton(); - timeRadioButton4 = new javax.swing.JRadioButton(); showSnippetsCB = new javax.swing.JCheckBox(); - timeRadioButton5 = new javax.swing.JRadioButton(); ingestWarningLabel = new javax.swing.JLabel(); skipNSRLCheckBox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text")); // NOI18N @@ -128,40 +94,6 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { informationLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.informationLabel.text")); // NOI18N - frequencyLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text")); // NOI18N - - timeRadioButton1.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text")); // NOI18N - timeRadioButton1.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText")); // NOI18N - timeRadioButton1.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - timeRadioButton1ActionPerformed(evt); - } - }); - - timeRadioButton2.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text")); // NOI18N - timeRadioButton2.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText")); // NOI18N - timeRadioButton2.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - timeRadioButton2ActionPerformed(evt); - } - }); - - timeRadioButton3.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text")); // NOI18N - timeRadioButton3.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText")); // NOI18N - timeRadioButton3.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - timeRadioButton3ActionPerformed(evt); - } - }); - - timeRadioButton4.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1")); // NOI18N - timeRadioButton4.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText")); // NOI18N - timeRadioButton4.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - timeRadioButton4ActionPerformed(evt); - } - }); - showSnippetsCB.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text")); // NOI18N showSnippetsCB.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { @@ -169,14 +101,6 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { } }); - timeRadioButton5.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text")); // NOI18N - timeRadioButton5.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText")); // NOI18N - timeRadioButton5.addActionListener(new java.awt.event.ActionListener() { - public void actionPerformed(java.awt.event.ActionEvent evt) { - timeRadioButton5ActionPerformed(evt); - } - }); - ingestWarningLabel.setIcon(new javax.swing.ImageIcon(getClass().getResource("/org/sleuthkit/autopsy/modules/hashdatabase/warning16.png"))); // NOI18N ingestWarningLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.ingestWarningLabel.text")); // NOI18N @@ -207,19 +131,11 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { .addComponent(filesIndexedLabel) .addGap(18, 18, 18) .addComponent(filesIndexedValue)) - .addComponent(frequencyLabel) .addGroup(layout.createSequentialGroup() .addComponent(chunksLabel) .addGap(18, 18, 18) - .addComponent(chunksValLabel)) - .addGroup(layout.createSequentialGroup() - .addGap(16, 16, 16) - .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) - .addComponent(timeRadioButton2) - .addComponent(timeRadioButton1) - .addComponent(timeRadioButton3) - .addComponent(timeRadioButton4) - .addComponent(timeRadioButton5)))))) + .addComponent(chunksValLabel))) + .addGap(132, 132, 132))) .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)) .addGroup(layout.createSequentialGroup() .addComponent(settingsLabel) @@ -241,19 +157,7 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { .addComponent(skipNSRLCheckBox) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) .addComponent(showSnippetsCB) - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) - .addComponent(frequencyLabel) - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) - .addComponent(timeRadioButton1) - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) - .addComponent(timeRadioButton2) - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) - .addComponent(timeRadioButton3) - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) - .addComponent(timeRadioButton4) - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) - .addComponent(timeRadioButton5) - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING) .addComponent(informationLabel) .addComponent(informationSeparator, javax.swing.GroupLayout.PREFERRED_SIZE, 7, javax.swing.GroupLayout.PREFERRED_SIZE)) @@ -267,14 +171,10 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { .addComponent(chunksValLabel)) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) .addComponent(ingestWarningLabel) - .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)) + .addContainerGap(151, Short.MAX_VALUE)) ); }// </editor-fold>//GEN-END:initComponents - private void timeRadioButton5ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton5ActionPerformed - firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); - }//GEN-LAST:event_timeRadioButton5ActionPerformed - private void skipNSRLCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_skipNSRLCheckBoxActionPerformed firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); }//GEN-LAST:event_skipNSRLCheckBoxActionPerformed @@ -283,28 +183,11 @@ private void showSnippetsCBActionPerformed(java.awt.event.ActionEvent evt) {//GE firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); }//GEN-LAST:event_showSnippetsCBActionPerformed - private void timeRadioButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton1ActionPerformed - firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); - }//GEN-LAST:event_timeRadioButton1ActionPerformed - - private void timeRadioButton2ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton2ActionPerformed - firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); - }//GEN-LAST:event_timeRadioButton2ActionPerformed - - private void timeRadioButton3ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton3ActionPerformed - firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); - }//GEN-LAST:event_timeRadioButton3ActionPerformed - - private void timeRadioButton4ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton4ActionPerformed - firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); - }//GEN-LAST:event_timeRadioButton4ActionPerformed - // Variables declaration - do not modify//GEN-BEGIN:variables private javax.swing.JLabel chunksLabel; private javax.swing.JLabel chunksValLabel; private javax.swing.JLabel filesIndexedLabel; private javax.swing.JLabel filesIndexedValue; - private javax.swing.JLabel frequencyLabel; private javax.swing.JLabel informationLabel; private javax.swing.JSeparator informationSeparator; private javax.swing.JLabel ingestWarningLabel; @@ -312,18 +195,11 @@ private void timeRadioButton4ActionPerformed(java.awt.event.ActionEvent evt) {// private javax.swing.JSeparator settingsSeparator; private javax.swing.JCheckBox showSnippetsCB; private javax.swing.JCheckBox skipNSRLCheckBox; - private javax.swing.ButtonGroup timeGroup; - private javax.swing.JRadioButton timeRadioButton1; - private javax.swing.JRadioButton timeRadioButton2; - private javax.swing.JRadioButton timeRadioButton3; - private javax.swing.JRadioButton timeRadioButton4; - private javax.swing.JRadioButton timeRadioButton5; // End of variables declaration//GEN-END:variables @Override public void store() { KeywordSearchSettings.setSkipKnown(skipNSRLCheckBox.isSelected()); - KeywordSearchSettings.setUpdateFrequency(getSelectedTimeValue()); KeywordSearchSettings.setShowSnippets(showSnippetsCB.isSelected()); } @@ -332,40 +208,10 @@ public void load() { activateWidgets(); } - private void setTimeSettingEnabled(boolean enabled) { - timeRadioButton1.setEnabled(enabled); - timeRadioButton2.setEnabled(enabled); - timeRadioButton3.setEnabled(enabled); - timeRadioButton4.setEnabled(enabled); - timeRadioButton5.setEnabled(enabled); - frequencyLabel.setEnabled(enabled); - } - - private UpdateFrequency getSelectedTimeValue() { - if (timeRadioButton1.isSelected()) { - return UpdateFrequency.FAST; - } else if (timeRadioButton2.isSelected()) { - return UpdateFrequency.AVG; - } else if (timeRadioButton3.isSelected()) { - return UpdateFrequency.SLOW; - } else if (timeRadioButton4.isSelected()) { - return UpdateFrequency.SLOWEST; - } else if (timeRadioButton5.isSelected()) { - return UpdateFrequency.NONE; - } - return UpdateFrequency.DEFAULT; - } - @NbBundle.Messages({"KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsOCR=Enable Optical Character Recognition (OCR) (Requires Windows 64-bit)", "KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsLimitedOCR=Only process images which are over 100KB in size or extracted from a document. (Beta) (Requires Windows 64-bit)"}) private void customizeComponents() { - timeGroup.add(timeRadioButton1); - timeGroup.add(timeRadioButton2); - timeGroup.add(timeRadioButton3); - timeGroup.add(timeRadioButton4); - timeGroup.add(timeRadioButton5); - this.skipNSRLCheckBox.setSelected(KeywordSearchSettings.getSkipKnown()); try { diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java index 3140916f5ec3fae13685eb7b255e82fbe67bc121..e3f9582fdfe6d07dee331c8b1dda153b08e5849d 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2021 Basis Technology Corp. + * Copyright 2011-2023 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,6 +38,7 @@ import java.util.logging.Level; import java.util.stream.Collectors; import org.apache.tika.mime.MimeTypes; +import org.openide.util.Exceptions; import org.openide.util.Lookup; import org.openide.util.NbBundle; import org.openide.util.NbBundle.Messages; @@ -69,6 +70,7 @@ import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskData; import org.sleuthkit.datamodel.TskData.FileKnown; +import org.sleuthkit.datamodel.TskException; /** * An ingest module on a file level Performs indexing of allocated and Solr @@ -149,7 +151,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule { .build(); private static final String IMAGE_MIME_TYPE_PREFIX = "image/"; - + // documents where OCR is performed private static final ImmutableSet<String> OCR_DOCUMENTS = ImmutableSet.of( "application/pdf", @@ -160,7 +162,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule { "application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ); - + /** * Options for this extractor */ @@ -169,33 +171,13 @@ enum StringsExtractOptions { EXTRACT_UTF8, ///< extract UTF8 text, true/false }; - enum UpdateFrequency { - - FAST(20), - AVG(10), - SLOW(5), - SLOWEST(1), - NONE(Integer.MAX_VALUE), - DEFAULT(5); - private final int time; - - UpdateFrequency(int time) { - this.time = time; - } - - int getTime() { - return time; - } - }; private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName()); private final IngestServices services = IngestServices.getInstance(); private Ingester ingester = null; - private Indexer indexer; private FileTypeDetector fileTypeDetector; //only search images from current ingest, not images previously ingested/indexed //accessed read-only by searcher thread - private boolean startedSearching = false; private Lookup stringsExtractionContext; private final KeywordSearchJobSettings settings; private boolean initialized = false; @@ -257,18 +239,21 @@ public void startUp(IngestJobContext context) throws IngestModuleException { initialized = false; jobId = context.getJobId(); - Server server = KeywordSearch.getServer(); - if (server.coreIsOpen() == false) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startUp_noOpenCore_msg()); - } + Server server = null; + if (settings.isIndexToSolrEnabled()) { + server = KeywordSearch.getServer(); + if (server.coreIsOpen() == false) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startUp_noOpenCore_msg()); + } - try { - Index indexInfo = server.getIndexInfo(); - if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion())); + try { + Index indexInfo = server.getIndexInfo(); + if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion())); + } + } catch (NoOpenCoreException ex) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex); } - } catch (NoOpenCoreException ex) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex); } try { @@ -307,22 +292,24 @@ public void startUp(IngestJobContext context) throws IngestModuleException { } } else { // for single-user cases need to verify connection to local SOLR service - try { - if (!server.isLocalSolrRunning()) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg())); + // server will be null if indexing is disabled + if (server != null) { + try { + if (!server.isLocalSolrRunning()) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg())); + } + } catch (KeywordSearchModuleException ex) { + //this means Solr is not properly initialized + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex); + } + try { + // make an actual query to verify that server is responding + // we had cases where getStatus was OK, but the connection resulted in a 404 + server.queryNumIndexedDocuments(); + } catch (KeywordSearchModuleException | NoOpenCoreException ex) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex); } - } catch (KeywordSearchModuleException ex) { - //this means Solr is not properly initialized - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex); - } - try { - // make an actual query to verify that server is responding - // we had cases where getStatus was OK, but the connection resulted in a 404 - server.queryNumIndexedDocuments(); - } catch (KeywordSearchModuleException | NoOpenCoreException ex) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex); } - // check if this job has any searchable keywords List<KeywordList> keywordLists = XmlKeywordSearchList.getCurrent().getListsL(); boolean hasKeywordsForSearch = false; @@ -347,7 +334,6 @@ public void startUp(IngestJobContext context) throws IngestModuleException { stringsExtractionContext = Lookups.fixed(stringsConfig); - indexer = new Indexer(); initialized = true; } @@ -389,7 +375,7 @@ public ProcessResult process(AbstractFile abstractFile) { if (context.fileIngestIsCancelled()) { return ProcessResult.OK; } - indexer.indexFile(extractorOpt, abstractFile, mimeType, false); + searchFile(extractorOpt, abstractFile, mimeType, false); return ProcessResult.OK; } @@ -397,17 +383,7 @@ public ProcessResult process(AbstractFile abstractFile) { if (context.fileIngestIsCancelled()) { return ProcessResult.OK; } - indexer.indexFile(extractorOpt, abstractFile, mimeType, true); - - // Start searching if it hasn't started already - if (!startedSearching) { - if (context.fileIngestIsCancelled()) { - return ProcessResult.OK; - } - List<String> keywordListNames = settings.getNamesOfEnabledKeyWordLists(); - IngestSearchRunner.getInstance().startJob(context, keywordListNames); - startedSearching = true; - } + searchFile(extractorOpt, abstractFile, mimeType, true); return ProcessResult.OK; } @@ -425,17 +401,22 @@ public void shutDown() { } if (context.fileIngestIsCancelled()) { - logger.log(Level.INFO, "Keyword search ingest module instance {0} stopping search job due to ingest cancellation", instanceNum); //NON-NLS - IngestSearchRunner.getInstance().stopJob(jobId); + logger.log(Level.INFO, "Keyword search ingest module instance {0} stopping due to ingest cancellation", instanceNum); //NON-NLS cleanup(); return; } - // Remove from the search list and trigger final commit and final search - IngestSearchRunner.getInstance().endJob(jobId); - // We only need to post the summary msg from the last module per job if (refCounter.decrementAndGet(jobId) == 0) { + + try { + InlineSearcher.makeArtifacts(context); + InlineSearcher.cleanup(context); + Ingester.getDefault().commit(); + } catch (TskException ex) { + logger.log(Level.SEVERE, String.format("Failed to create search ingest artifacts for job %d", context.getJobId()), ex); + } + try { final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles(); logger.log(Level.INFO, "Indexed files count: {0}", numIndexedFiles); //NON-NLS @@ -462,7 +443,7 @@ private void cleanup() { } /** - * Returns true if file should have OCR performed on it when limited OCR + * Returns true if file should have OCR performed on it when limited OCR * setting is specified. * * @param aFile The abstract file. @@ -475,12 +456,12 @@ private boolean isLimitedOCRFile(AbstractFile aFile, String mimeType) { if (OCR_DOCUMENTS.contains(mimeType)) { return true; } - + if (mimeType.startsWith(IMAGE_MIME_TYPE_PREFIX)) { return aFile.getSize() > LIMITED_OCR_SIZE_MIN - || aFile.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.DERIVED; + || aFile.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.DERIVED; } - + return false; } @@ -562,317 +543,319 @@ private Optional<TextExtractor> getExtractor(AbstractFile abstractFile) { * File indexer, processes and indexes known/allocated files, * unknown/unallocated files and directories accordingly */ - private class Indexer { - - private final Logger logger = Logger.getLogger(Indexer.class.getName()); - - /** - * Extract text with Tika or other text extraction modules (by - * streaming) from the file Divide the file into chunks and index the - * chunks - * - * @param extractorOptional The textExtractor to use with this file or - * empty. - * @param aFile file to extract strings from, divide into - * chunks and index - * @param extractedMetadata Map that will be populated with the file's - * metadata. - * - * @return true if the file was text_ingested, false otherwise - * - * @throws IngesterException exception thrown if indexing failed - */ - private boolean extractTextAndIndex(Optional<TextExtractor> extractorOptional, AbstractFile aFile, + /** + * Extract text with Tika or other text extraction modules (by streaming) + * from the file Divide the file into chunks and index the chunks + * + * @param extractorOptional The textExtractor to use with this file or + * empty. + * @param aFile file to extract strings from, divide into chunks + * and index + * @param extractedMetadata Map that will be populated with the file's + * metadata. + * + * @return true if the file was text_ingested, false otherwise + * + * @throws IngesterException exception thrown if indexing failed + */ + private boolean extractTextAndSearch(Optional<TextExtractor> extractorOptional, AbstractFile aFile, Map<String, String> extractedMetadata) throws IngesterException { + try { + if (!extractorOptional.isPresent()) { + return false; + } + //divide into chunks and index + Ingester.getDefault().search(getTikaOrTextExtractor(extractorOptional, aFile, extractedMetadata), aFile.getId(), aFile.getName(), aFile, context, true,settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists()); + + } catch (TextExtractor.InitReaderException ex) { + return false; + } catch(Exception ex) { + logger.log(Level.WARNING, String.format("Failed to search file %s [id=%d]", + aFile.getName(), aFile.getId()), ex); + return false; + } + + return true; + } + + private Reader getTikaOrTextExtractor(Optional<TextExtractor> extractorOptional, AbstractFile aFile, + Map<String, String> extractedMetadata) throws TextExtractor.InitReaderException { + + TextExtractor extractor = extractorOptional.get(); + Reader fileText = extractor.getReader(); + Reader finalReader; try { - if (!extractorOptional.isPresent()) { - return false; + Map<String, String> metadata = extractor.getMetadata(); + if (!metadata.isEmpty()) { + // Creating the metadata artifact here causes occasional problems + // when indexing the text, so we save the metadata map to + // use after this method is complete. + extractedMetadata.putAll(metadata); } - TextExtractor extractor = extractorOptional.get(); - Reader fileText = extractor.getReader(); - Reader finalReader; - try { - Map<String, String> metadata = extractor.getMetadata(); - if (!metadata.isEmpty()) { - // Creating the metadata artifact here causes occasional problems - // when indexing the text, so we save the metadata map to - // use after this method is complete. - extractedMetadata.putAll(metadata); + CharSource formattedMetadata = getMetaDataCharSource(metadata); + //Append the metadata to end of the file text + finalReader = CharSource.concat(new CharSource() { + //Wrap fileText reader for concatenation + @Override + public Reader openStream() throws IOException { + return fileText; } - CharSource formattedMetadata = getMetaDataCharSource(metadata); - //Append the metadata to end of the file text - finalReader = CharSource.concat(new CharSource() { - //Wrap fileText reader for concatenation - @Override - public Reader openStream() throws IOException { - return fileText; - } - }, formattedMetadata).openStream(); - } catch (IOException ex) { - logger.log(Level.WARNING, String.format("Could not format extracted metadata for file %s [id=%d]", - aFile.getName(), aFile.getId()), ex); - //Just send file text. - finalReader = fileText; - } - //divide into chunks and index - return Ingester.getDefault().indexText(finalReader, aFile.getId(), aFile.getName(), aFile, context); - } catch (TextExtractor.InitReaderException ex) { - // Text extractor could not be initialized. No text will be extracted. - return false; + }, formattedMetadata).openStream(); + } catch (IOException ex) { + logger.log(Level.WARNING, String.format("Could not format extracted metadata for file %s [id=%d]", + aFile.getName(), aFile.getId()), ex); + //Just send file text. + finalReader = fileText; } - } + //divide into chunks and index + return finalReader; + + } - private void createMetadataArtifact(AbstractFile aFile, Map<String, String> metadata) { + private void createMetadataArtifact(AbstractFile aFile, Map<String, String> metadata) { - String moduleName = KeywordSearchIngestModule.class.getName(); + String moduleName = KeywordSearchIngestModule.class.getName(); - Collection<BlackboardAttribute> attributes = new ArrayList<>(); - Collection<BlackboardArtifact> bbartifacts = new ArrayList<>(); - for (Map.Entry<String, String> entry : metadata.entrySet()) { - if (METADATA_TYPES_MAP.containsKey(entry.getKey())) { - BlackboardAttribute bba = checkAttribute(entry.getKey(), entry.getValue()); - if (bba != null) { - attributes.add(bba); - } + Collection<BlackboardAttribute> attributes = new ArrayList<>(); + Collection<BlackboardArtifact> bbartifacts = new ArrayList<>(); + for (Map.Entry<String, String> entry : metadata.entrySet()) { + if (METADATA_TYPES_MAP.containsKey(entry.getKey())) { + BlackboardAttribute bba = checkAttribute(entry.getKey(), entry.getValue()); + if (bba != null) { + attributes.add(bba); } } - if (!attributes.isEmpty()) { + } + if (!attributes.isEmpty()) { + try { + BlackboardArtifact bbart = aFile.newDataArtifact(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_METADATA), attributes); + bbartifacts.add(bbart); + } catch (TskCoreException ex) { + // Log error and return to continue processing + logger.log(Level.WARNING, String.format("Error creating or adding metadata artifact for file %s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS + return; + } + if (!bbartifacts.isEmpty()) { try { - BlackboardArtifact bbart = aFile.newDataArtifact(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_METADATA), attributes); - bbartifacts.add(bbart); - } catch (TskCoreException ex) { + Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard().postArtifacts(bbartifacts, moduleName, jobId); + } catch (NoCurrentCaseException | Blackboard.BlackboardException ex) { // Log error and return to continue processing - logger.log(Level.WARNING, String.format("Error creating or adding metadata artifact for file %s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS + logger.log(Level.WARNING, String.format("Unable to post blackboard artifacts for file $s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS return; } - if (!bbartifacts.isEmpty()) { - try { - Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard().postArtifacts(bbartifacts, moduleName, jobId); - } catch (NoCurrentCaseException | Blackboard.BlackboardException ex) { - // Log error and return to continue processing - logger.log(Level.WARNING, String.format("Unable to post blackboard artifacts for file $s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS - return; - } - } } } + } - private BlackboardAttribute checkAttribute(String key, String value) { - String moduleName = KeywordSearchIngestModule.class.getName(); - if (!value.isEmpty() && value.charAt(0) != ' ') { - if (METADATA_DATE_TYPES.contains(key)) { - SimpleDateFormat metadataDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", US); - Long metadataDateTime = Long.valueOf(0); - try { - String metadataDate = value.replaceAll("T", " ").replaceAll("Z", ""); - Date usedDate = metadataDateFormat.parse(metadataDate); - metadataDateTime = usedDate.getTime() / 1000; - return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, metadataDateTime); - } catch (ParseException ex) { - // catching error and displaying date that could not be parsed then will continue on. - logger.log(Level.WARNING, String.format("Failed to parse date/time %s for metadata attribute %s.", value, key), ex); //NON-NLS - return null; - } - } else { - return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, value); + private BlackboardAttribute checkAttribute(String key, String value) { + String moduleName = KeywordSearchIngestModule.class.getName(); + if (!value.isEmpty() && value.charAt(0) != ' ') { + if (METADATA_DATE_TYPES.contains(key)) { + SimpleDateFormat metadataDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", US); + Long metadataDateTime = Long.valueOf(0); + try { + String metadataDate = value.replaceAll("T", " ").replaceAll("Z", ""); + Date usedDate = metadataDateFormat.parse(metadataDate); + metadataDateTime = usedDate.getTime() / 1000; + return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, metadataDateTime); + } catch (ParseException ex) { + // catching error and displaying date that could not be parsed then will continue on. + logger.log(Level.WARNING, String.format("Failed to parse date/time %s for metadata attribute %s.", value, key), ex); //NON-NLS + return null; } + } else { + return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, value); } + } + + return null; + + } - return null; + /** + * Pretty print the text extractor metadata. + * + * @param metadata The Metadata map to wrap as a CharSource + * + * @return A CharSource for the given Metadata + */ + @NbBundle.Messages({ + "KeywordSearchIngestModule.metadataTitle=METADATA" + }) + private CharSource getMetaDataCharSource(Map<String, String> metadata) { + return CharSource.wrap(new StringBuilder( + String.format("\n\n------------------------------%s------------------------------\n\n", + Bundle.KeywordSearchIngestModule_metadataTitle())) + .append(metadata.entrySet().stream().sorted(Map.Entry.comparingByKey()) + .map(entry -> entry.getKey() + ": " + entry.getValue()) + .collect(Collectors.joining("\n")) + )); + } + /** + * Extract strings using heuristics from the file and add to index. + * + * @param aFile file to extract strings from, divide into chunks and index + * + * @return true if the file was text_ingested, false otherwise + */ + private boolean extractStringsAndIndex(AbstractFile aFile) { + try { + if (context.fileIngestIsCancelled()) { + return true; + } + Reader extractedTextReader = KeywordSearchUtil.getReader(aFile, stringsExtractionContext); + Ingester.getDefault().search(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context, false, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists()); + putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED); + } catch (Exception ex) { + logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); //NON-NLS + putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); + return false; } + return true; + } + + /** + * Adds the file to the index. Detects file type, calls extractors, etc. + * + * @param extractor The textExtractor to use with this file or empty if + * no extractor found. + * @param aFile File to analyze. + * @param mimeType The file mime type. + * @param indexContent False if only metadata should be text_ingested. True + * if content and metadata should be index. + */ + private void searchFile(Optional<TextExtractor> extractor, AbstractFile aFile, String mimeType, boolean indexContent) { + //logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName()); + + TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType(); /** - * Pretty print the text extractor metadata. - * - * @param metadata The Metadata map to wrap as a CharSource - * - * @return A CharSource for the given Metadata + * Extract unicode strings from unallocated and unused blocks and carved + * text files. The reason for performing string extraction on these is + * because they all may contain multiple encodings which can cause text + * to be missed by the more specialized text extractors used below. */ - @NbBundle.Messages({ - "KeywordSearchIngestModule.metadataTitle=METADATA" - }) - private CharSource getMetaDataCharSource(Map<String, String> metadata) { - return CharSource.wrap(new StringBuilder( - String.format("\n\n------------------------------%s------------------------------\n\n", - Bundle.KeywordSearchIngestModule_metadataTitle())) - .append(metadata.entrySet().stream().sorted(Map.Entry.comparingByKey()) - .map(entry -> entry.getKey() + ": " + entry.getValue()) - .collect(Collectors.joining("\n")) - )); + if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) + || aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)) + || (aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED) && aFile.getNameExtension().equalsIgnoreCase("txt"))) { + if (context.fileIngestIsCancelled()) { + return; + } + extractStringsAndIndex(aFile); + return; } - /** - * Extract strings using heuristics from the file and add to index. - * - * @param aFile file to extract strings from, divide into chunks and - * index - * - * @return true if the file was text_ingested, false otherwise - */ - private boolean extractStringsAndIndex(AbstractFile aFile) { + final long size = aFile.getSize(); + //if not to index content, or a dir, or 0 content, index meta data only + + if ((indexContent == false || aFile.isDir() || size == 0)) { try { if (context.fileIngestIsCancelled()) { - return true; - } - TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(aFile, stringsExtractionContext); - Reader extractedTextReader = stringsExtractor.getReader(); - if (Ingester.getDefault().indexStrings(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) { - putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED); - return true; - } else { - logger.log(Level.WARNING, "Failed to extract strings and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()}); //NON-NLS - putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); - return false; + return; } - } catch (IngesterException | TextExtractor.InitReaderException ex) { - logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); //NON-NLS + ingester.indexMetaDataOnly(aFile); + putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED); + } catch (IngesterException ex) { putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); - return false; + logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS } + return; } - /** - * Adds the file to the index. Detects file type, calls extractors, etc. - * - * @param extractor The textExtractor to use with this file or empty - * if no extractor found. - * @param aFile File to analyze. - * @param mimeType The file mime type. - * @param indexContent False if only metadata should be text_ingested. - * True if content and metadata should be index. - */ - private void indexFile(Optional<TextExtractor> extractor, AbstractFile aFile, String mimeType, boolean indexContent) { - //logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName()); - - TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType(); - - /** - * Extract unicode strings from unallocated and unused blocks and - * carved text files. The reason for performing string extraction on - * these is because they all may contain multiple encodings which - * can cause text to be missed by the more specialized text - * extractors used below. - */ - if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) - || aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)) - || (aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED) && aFile.getNameExtension().equalsIgnoreCase("txt"))) { + if (context.fileIngestIsCancelled()) { + return; + } + + // we skip archive formats that are opened by the archive module. + // @@@ We could have a check here to see if the archive module was enabled though... + if (ARCHIVE_MIME_TYPES.contains(mimeType)) { + try { if (context.fileIngestIsCancelled()) { return; } - extractStringsAndIndex(aFile); - return; + ingester.indexMetaDataOnly(aFile); + putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED); + } catch (IngesterException ex) { + putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); + logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS } + return; + } - final long size = aFile.getSize(); - //if not to index content, or a dir, or 0 content, index meta data only - - if ((indexContent == false || aFile.isDir() || size == 0)) { - try { - if (context.fileIngestIsCancelled()) { - return; - } - ingester.indexMetaDataOnly(aFile); - putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED); - } catch (IngesterException ex) { - putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); - logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS - } - return; - } + boolean wasTextAdded = false; + Map<String, String> extractedMetadata = new HashMap<>(); + //extract text with one of the extractors, divide into chunks and index with Solr + try { + //logger.log(Level.INFO, "indexing: " + aFile.getName()); if (context.fileIngestIsCancelled()) { return; } - - // we skip archive formats that are opened by the archive module. - // @@@ We could have a check here to see if the archive module was enabled though... - if (ARCHIVE_MIME_TYPES.contains(mimeType)) { - try { - if (context.fileIngestIsCancelled()) { - return; - } - ingester.indexMetaDataOnly(aFile); - putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED); - } catch (IngesterException ex) { - putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); - logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS - } + if (MimeTypes.OCTET_STREAM.equals(mimeType)) { + extractStringsAndIndex(aFile); return; } - - boolean wasTextAdded = false; - Map<String, String> extractedMetadata = new HashMap<>(); - - //extract text with one of the extractors, divide into chunks and index with Solr - try { - //logger.log(Level.INFO, "indexing: " + aFile.getName()); - if (context.fileIngestIsCancelled()) { - return; - } - if (MimeTypes.OCTET_STREAM.equals(mimeType)) { - extractStringsAndIndex(aFile); - return; - } - if (!extractTextAndIndex(extractor, aFile, extractedMetadata)) { - // Text extractor not found for file. Extract string only. - putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); - } else { - putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED); - wasTextAdded = true; - } - - } catch (IngesterException e) { - logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " //NON-NLS - + aFile.getName(), e); - putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); - } catch (Exception e) { - logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " //NON-NLS - + aFile.getName(), e); + if (!extractTextAndSearch(extractor, aFile, extractedMetadata)) { + // Text extractor not found for file. Extract string only. putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); + } else { + putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED); + wasTextAdded = true; } - if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) { - //Carved Files should be the only type of unallocated files capable of a txt extension and - //should be ignored by the TextFileExtractor because they may contain more than one text encoding - wasTextAdded = indexTextFile(aFile); - } + } catch (IngesterException e) { + logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " //NON-NLS + + aFile.getName(), e); + putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); + } catch (Exception e) { + logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " //NON-NLS + + aFile.getName(), e); + putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); + } - // if it wasn't supported or had an error, default to strings - if (wasTextAdded == false) { - extractStringsAndIndex(aFile); - } + if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) { + //Carved Files should be the only type of unallocated files capable of a txt extension and + //should be ignored by the TextFileExtractor because they may contain more than one text encoding + wasTextAdded = searchTextFile(aFile); + } - // Now that the indexing is complete, create the metadata artifact (if applicable). - // It is unclear why calling this from extractTextAndIndex() generates - // errors. - if (!extractedMetadata.isEmpty()) { - createMetadataArtifact(aFile, extractedMetadata); - } + // if it wasn't supported or had an error, default to strings + if (wasTextAdded == false) { + extractStringsAndIndex(aFile); } - /** - * Adds the text file to the index given an encoding. Returns true if - * indexing was successful and false otherwise. - * - * @param aFile Text file to analyze - */ - private boolean indexTextFile(AbstractFile aFile) { - try { - TextFileExtractor textFileExtractor = new TextFileExtractor(aFile); - Reader textReader = textFileExtractor.getReader(); - if (textReader == null) { - logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName()); - } else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) { - textReader.close(); - putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED); - return true; - } - } catch (IngesterException | IOException | TextExtractor.InitReaderException ex) { - logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex); + // Now that the indexing is complete, create the metadata artifact (if applicable). + // It is unclear why calling this from extractTextAndIndex() generates + // errors. + if (!extractedMetadata.isEmpty()) { + createMetadataArtifact(aFile, extractedMetadata); + } + } + + /** + * Adds the text file to the index given an encoding. Returns true if + * indexing was successful and false otherwise. + * + * @param aFile Text file to analyze + */ + private boolean searchTextFile(AbstractFile aFile) { + try { + TextFileExtractor textFileExtractor = new TextFileExtractor(aFile); + Reader textReader = textFileExtractor.getReader(); + if (textReader == null) { + logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName()); + } else { + Ingester.getDefault().search(textReader, aFile.getId(), aFile.getName(), aFile, context, true, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists()); + textReader.close(); + putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED); + return true; } - return false; + } catch (Exception ex) { + logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex); } + return false; } + } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettings.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettings.java index 865024dae865bb1ff02e7ae4027cccdb0ef1e070..133c3b37db729f53d8d1adc71c4ba2e98af151b2 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettings.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettings.java @@ -30,7 +30,7 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett private static final long serialVersionUID = 1L; - private HashSet<String> namesOfEnabledKeywordLists; + private final HashSet<String> namesOfEnabledKeywordLists; private HashSet<String> namesOfDisabledKeywordLists; // Added in version 1.1 /** @@ -41,6 +41,8 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett private Boolean limitedOCREnabled; private boolean ocrOnly; + + private boolean indexToSolr; /** * Constructs ingest job settings for the keywords search module. @@ -55,6 +57,7 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett this.ocrEnabled = null; this.limitedOCREnabled = null; this.ocrOnly = false; + this.indexToSolr = true; } /** @@ -69,12 +72,13 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett * @param ocrOnly True if keyword search ingest should * be solely limited to OCR. */ - KeywordSearchJobSettings(List<String> namesOfEnabledKeywordLists, List<String> namesOfDisabledKeywordLists, boolean ocrEnabled, boolean limitedOCREnabled, boolean ocrOnly) { + KeywordSearchJobSettings(List<String> namesOfEnabledKeywordLists, List<String> namesOfDisabledKeywordLists, boolean ocrEnabled, boolean limitedOCREnabled, boolean ocrOnly, boolean indexToSolr) { this.namesOfEnabledKeywordLists = new HashSet<>(namesOfEnabledKeywordLists); this.namesOfDisabledKeywordLists = new HashSet<>(namesOfDisabledKeywordLists); this.ocrEnabled = ocrEnabled; this.limitedOCREnabled = limitedOCREnabled; this.ocrOnly = ocrOnly; + this.indexToSolr = indexToSolr; } /** @@ -196,5 +200,13 @@ private void upgradeFromOlderVersions() { this.namesOfDisabledKeywordLists = new HashSet<>(); } } + + boolean isIndexToSolrEnabled() { + return indexToSolr; + } + + void setIndexToSolrEnabled(boolean enabled){ + indexToSolr = enabled; + } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.form b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.form index 4444a7436614f637c69fe2cb4a15b06de18a0f7f..33f83c0d9fba098ab852d4ac728054e7a310d7d9 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.form +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.form @@ -16,12 +16,10 @@ <AuxValue name="FormSettings_listenerGenerationStyle" type="java.lang.Integer" value="0"/> <AuxValue name="FormSettings_variablesLocal" type="java.lang.Boolean" value="false"/> <AuxValue name="FormSettings_variablesModifier" type="java.lang.Integer" value="2"/> - <AuxValue name="designerSize" type="java.awt.Dimension" value="-84,-19,0,5,115,114,0,18,106,97,118,97,46,97,119,116,46,68,105,109,101,110,115,105,111,110,65,-114,-39,-41,-84,95,68,20,2,0,2,73,0,6,104,101,105,103,104,116,73,0,5,119,105,100,116,104,120,112,0,0,1,108,0,0,1,71"/> + <AuxValue name="designerSize" type="java.awt.Dimension" value="-84,-19,0,5,115,114,0,18,106,97,118,97,46,97,119,116,46,68,105,109,101,110,115,105,111,110,65,-114,-39,-41,-84,95,68,20,2,0,2,73,0,6,104,101,105,103,104,116,73,0,5,119,105,100,116,104,120,112,0,0,0,-56,0,0,1,73"/> </AuxValues> - <Layout class="org.netbeans.modules.form.compat2.layouts.DesignBoxLayout"> - <Property name="axis" type="int" value="1"/> - </Layout> + <Layout class="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout"/> <SubComponents> <Component class="javax.swing.JLabel" name="titleLabel"> <Properties> @@ -51,6 +49,11 @@ <AuxValues> <AuxValue name="autoScrollPane" type="java.lang.Boolean" value="true"/> </AuxValues> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="0" gridY="1" gridWidth="7" gridHeight="1" fill="1" ipadX="284" ipadY="-71" insetsTop="6" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="1.0" weightY="1.0"/> + </Constraint> + </Constraints> <Layout class="org.netbeans.modules.form.compat2.layouts.support.JScrollPaneSupportLayout"/> <SubComponents> @@ -79,6 +82,21 @@ </Component> </SubComponents> </Container> +<<<<<<< HEAD +======= + <Component class="javax.swing.JLabel" name="titleLabel"> + <Properties> + <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchJobSettingsPanel.titleLabel.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + </Properties> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="0" gridY="0" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="7" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> + </Constraint> + </Constraints> + </Component> +>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c <Component class="javax.swing.JLabel" name="languagesLabel"> <Properties> <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> @@ -92,6 +110,11 @@ </Property> <Property name="verticalTextPosition" type="int" value="3"/> </Properties> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="0" gridY="2" gridWidth="8" gridHeight="1" fill="0" ipadX="25" ipadY="-22" insetsTop="6" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> + </Constraint> + </Constraints> </Component> <Component class="javax.swing.JLabel" name="languagesValLabel"> <Properties> @@ -107,6 +130,11 @@ </Border> </Property> </Properties> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="0" gridY="3" gridWidth="6" gridHeight="1" fill="0" ipadX="270" ipadY="0" insetsTop="6" insetsLeft="20" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> + </Constraint> + </Constraints> </Component> <Component class="javax.swing.JLabel" name="encodingsLabel"> <Properties> @@ -119,6 +147,11 @@ </Border> </Property> </Properties> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="0" gridY="4" gridWidth="1" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="11" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> + </Constraint> + </Constraints> </Component> <Component class="javax.swing.JLabel" name="keywordSearchEncodings"> <Properties> @@ -131,6 +164,11 @@ </Border> </Property> </Properties> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="1" gridY="4" gridWidth="1" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="11" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> + </Constraint> + </Constraints> </Component> <Component class="javax.swing.JCheckBox" name="ocrCheckBox"> <Properties> @@ -141,6 +179,11 @@ <Events> <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="ocrCheckBoxActionPerformed"/> </Events> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="0" gridY="5" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="7" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> + </Constraint> + </Constraints> </Component> <Component class="javax.swing.JCheckBox" name="limitedOcrCheckbox"> <Properties> @@ -157,6 +200,11 @@ <Events> <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="limitedOcrCheckboxActionPerformed"/> </Events> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="0" gridY="7" gridWidth="2" gridHeight="1" fill="0" ipadX="216" ipadY="0" insetsTop="0" insetsLeft="31" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> + </Constraint> + </Constraints> </Component> <Component class="javax.swing.JCheckBox" name="ocrOnlyCheckbox"> <Properties> @@ -172,6 +220,24 @@ <Events> <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="ocrOnlyCheckboxActionPerformed"/> </Events> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="0" gridY="6" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="0" insetsLeft="31" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> + </Constraint> + </Constraints> + </Component> + <Component class="javax.swing.JCheckBox" name="solrCheckbox"> + <Properties> + <Property name="selected" type="boolean" value="true"/> + <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchJobSettingsPanel.solrCheckbox.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + </Properties> + <Constraints> + <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> + <GridBagConstraints gridX="0" gridY="8" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="7" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> + </Constraint> + </Constraints> </Component> </SubComponents> </Form> diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java index fcb3ed9d9384a778335d640aefa9c36fb430ed75..e39ddb465df89d867ed1870dedce483affec6c34 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java @@ -40,6 +40,8 @@ */ @SuppressWarnings("PMD.SingularField") // UI widgets cause lots of false positives public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSettingsPanel implements PropertyChangeListener { + + private static final long serialVersionUID = 1L; private final KeywordListsTableModel tableModel = new KeywordListsTableModel(); private final List<String> keywordListNames = new ArrayList<>(); private final Map<String, Boolean> keywordListStates = new HashMap<>(); @@ -65,6 +67,7 @@ private void initializeKeywordListSettings(KeywordSearchJobSettings settings) { ocrCheckBox.setSelected(settings.isOCREnabled()); limitedOcrCheckbox.setSelected(settings.isLimitedOCREnabled()); ocrOnlyCheckbox.setSelected(settings.isOCROnly()); + solrCheckbox.setSelected(settings.isIndexToSolrEnabled()); handleOcrEnabled(settings.isOCREnabled()); } @@ -194,7 +197,7 @@ public IngestModuleIngestJobSettings getSettings() { } } return new KeywordSearchJobSettings(enabledListNames, disabledListNames, - this.ocrCheckBox.isSelected(), this.limitedOcrCheckbox.isSelected(), this.ocrOnlyCheckbox.isSelected()); + this.ocrCheckBox.isSelected(), this.limitedOcrCheckbox.isSelected(), this.ocrOnlyCheckbox.isSelected(), this.solrCheckbox.isSelected()); } void reset(KeywordSearchJobSettings newSettings) { @@ -253,6 +256,7 @@ public Class<?> getColumnClass(int c) { @SuppressWarnings("unchecked") // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents private void initComponents() { + java.awt.GridBagConstraints gridBagConstraints; titleLabel = new javax.swing.JLabel(); listsScrollPane = new javax.swing.JScrollPane(); @@ -264,12 +268,17 @@ private void initComponents() { ocrCheckBox = new javax.swing.JCheckBox(); limitedOcrCheckbox = new javax.swing.JCheckBox(); ocrOnlyCheckbox = new javax.swing.JCheckBox(); + solrCheckbox = new javax.swing.JCheckBox(); setPreferredSize(new java.awt.Dimension(300, 170)); +<<<<<<< HEAD setLayout(new javax.swing.BoxLayout(this, javax.swing.BoxLayout.Y_AXIS)); titleLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.titleLabel.text")); // NOI18N add(titleLabel); +======= + setLayout(new java.awt.GridBagLayout()); +>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c listsScrollPane.setBorder(javax.swing.BorderFactory.createEtchedBorder()); listsScrollPane.setAlignmentX(0.0F); @@ -293,12 +302,37 @@ private void initComponents() { listsScrollPane.setViewportView(listsTable); listsTable.setDefaultRenderer(String.class, new SimpleTableCellRenderer()); +<<<<<<< HEAD add(listsScrollPane); +======= + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 1; + gridBagConstraints.gridwidth = 7; + gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH; + gridBagConstraints.ipadx = 284; + gridBagConstraints.ipady = -71; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.weightx = 1.0; + gridBagConstraints.weighty = 1.0; + gridBagConstraints.insets = new java.awt.Insets(6, 10, 0, 0); + add(listsScrollPane, gridBagConstraints); + + titleLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.titleLabel.text")); // NOI18N + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 0; + gridBagConstraints.gridwidth = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(7, 10, 0, 0); + add(titleLabel, gridBagConstraints); +>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c languagesLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesLabel.text")); // NOI18N languagesLabel.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesLabel.toolTipText")); // NOI18N languagesLabel.setPreferredSize(new java.awt.Dimension(294, 35)); languagesLabel.setVerticalTextPosition(javax.swing.SwingConstants.BOTTOM); +<<<<<<< HEAD add(languagesLabel); languagesValLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesValLabel.text")); // NOI18N @@ -313,6 +347,44 @@ private void initComponents() { keywordSearchEncodings.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.keywordSearchEncodings.text")); // NOI18N keywordSearchEncodings.setBorder(javax.swing.BorderFactory.createEmptyBorder(5, 1, 5, 1)); add(keywordSearchEncodings); +======= + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 2; + gridBagConstraints.gridwidth = 8; + gridBagConstraints.ipadx = 25; + gridBagConstraints.ipady = -22; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(6, 10, 0, 0); + add(languagesLabel, gridBagConstraints); + + languagesValLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesValLabel.text")); // NOI18N + languagesValLabel.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesValLabel.toolTipText")); // NOI18N + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 3; + gridBagConstraints.gridwidth = 6; + gridBagConstraints.ipadx = 270; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(6, 20, 0, 0); + add(languagesValLabel, gridBagConstraints); + + encodingsLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.encodingsLabel.text")); // NOI18N + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 4; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(11, 10, 0, 0); + add(encodingsLabel, gridBagConstraints); + + keywordSearchEncodings.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.keywordSearchEncodings.text")); // NOI18N + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 1; + gridBagConstraints.gridy = 4; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(11, 10, 0, 0); + add(keywordSearchEncodings, gridBagConstraints); +>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c ocrCheckBox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.ocrCheckBox.text")); // NOI18N ocrCheckBox.addActionListener(new java.awt.event.ActionListener() { @@ -320,7 +392,17 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { ocrCheckBoxActionPerformed(evt); } }); +<<<<<<< HEAD add(ocrCheckBox); +======= + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 5; + gridBagConstraints.gridwidth = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(7, 10, 0, 0); + add(ocrCheckBox, gridBagConstraints); +>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c limitedOcrCheckbox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.limitedOcrCheckbox.text")); // NOI18N limitedOcrCheckbox.setBorder(javax.swing.BorderFactory.createEmptyBorder(1, 20, 1, 1)); @@ -330,7 +412,18 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { limitedOcrCheckboxActionPerformed(evt); } }); +<<<<<<< HEAD add(limitedOcrCheckbox); +======= + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 7; + gridBagConstraints.gridwidth = 2; + gridBagConstraints.ipadx = 216; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(0, 31, 0, 0); + add(limitedOcrCheckbox, gridBagConstraints); +>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c ocrOnlyCheckbox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.ocrOnlyCheckbox.text")); // NOI18N ocrOnlyCheckbox.setBorder(javax.swing.BorderFactory.createEmptyBorder(1, 20, 1, 1)); @@ -339,7 +432,27 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { ocrOnlyCheckboxActionPerformed(evt); } }); +<<<<<<< HEAD add(ocrOnlyCheckbox); +======= + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 6; + gridBagConstraints.gridwidth = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(0, 31, 0, 0); + add(ocrOnlyCheckbox, gridBagConstraints); + + solrCheckbox.setSelected(true); + solrCheckbox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.solrCheckbox.text")); // NOI18N + gridBagConstraints = new java.awt.GridBagConstraints(); + gridBagConstraints.gridx = 0; + gridBagConstraints.gridy = 8; + gridBagConstraints.gridwidth = 2; + gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; + gridBagConstraints.insets = new java.awt.Insets(7, 10, 0, 0); + add(solrCheckbox, gridBagConstraints); +>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c }// </editor-fold>//GEN-END:initComponents private void ocrCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_ocrCheckBoxActionPerformed @@ -365,6 +478,7 @@ private void ocrOnlyCheckboxActionPerformed(java.awt.event.ActionEvent evt) {//G private javax.swing.JTable listsTable; private javax.swing.JCheckBox ocrCheckBox; private javax.swing.JCheckBox ocrOnlyCheckbox; + private javax.swing.JCheckBox solrCheckbox; private javax.swing.JLabel titleLabel; // End of variables declaration//GEN-END:variables } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchModuleFactory.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchModuleFactory.java index c6b2c39f888228c1ad2a6463c65dc35e7371bb20..43bbafaf063761d1f8b03e1ba361564ad65bbb60 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchModuleFactory.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchModuleFactory.java @@ -130,7 +130,12 @@ public boolean isDataArtifactIngestModuleFactory() { @Override public DataArtifactIngestModule createDataArtifactIngestModule(IngestModuleIngestJobSettings settings) { - return new KwsDataArtifactIngestModule(); + if (!(settings instanceof KeywordSearchJobSettings)) { + throw new IllegalArgumentException(NbBundle.getMessage(this.getClass(), + "KeywordSearchModuleFactory.createFileIngestModule.exception.msg")); + } + + return new KwsDataArtifactIngestModule((KeywordSearchJobSettings) settings); } @Override @@ -140,7 +145,12 @@ public boolean isAnalysisResultIngestModuleFactory() { @Override public AnalysisResultIngestModule createAnalysisResultIngestModule(IngestModuleIngestJobSettings settings) { - return new KwsAnalysisResultIngestModule(); + if (!(settings instanceof KeywordSearchJobSettings)) { + throw new IllegalArgumentException(NbBundle.getMessage(this.getClass(), + "KeywordSearchModuleFactory.createFileIngestModule.exception.msg")); + } + + return new KwsAnalysisResultIngestModule((KeywordSearchJobSettings) settings); } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java index b7057a2c89d0061bdd4d3ea4954d2a6952325760..0087c7d05e01ebe7d31df0ad337ce08cf12c8f7e 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java @@ -29,7 +29,6 @@ import org.sleuthkit.autopsy.coreutils.StringExtract; import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT; import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.StringsExtractOptions; -import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.UpdateFrequency; //This file contains constants and settings for KeywordSearch class KeywordSearchSettings { @@ -46,34 +45,9 @@ class KeywordSearchSettings { static final boolean LIMITED_OCR_ENABLED_DEFAULT = false; private static boolean skipKnown = true; private static final Logger logger = Logger.getLogger(KeywordSearchSettings.class.getName()); - private static UpdateFrequency UpdateFreq = UpdateFrequency.DEFAULT; private static List<StringExtract.StringExtractUnicodeTable.SCRIPT> stringExtractScripts = new ArrayList<>(); private static Map<String, String> stringExtractOptions = new HashMap<>(); - /** - * Gets the update Frequency from KeywordSearch_Options.properties - * - * @return KeywordSearchIngestModule's update frequency - */ - static UpdateFrequency getUpdateFrequency() { - if (ModuleSettings.getConfigSetting(PROPERTIES_OPTIONS, "UpdateFrequency") != null) { //NON-NLS - return UpdateFrequency.valueOf(ModuleSettings.getConfigSetting(PROPERTIES_OPTIONS, "UpdateFrequency")); //NON-NLS - } - //if it failed, return the default/last known value - logger.log(Level.WARNING, "Could not read property for UpdateFrequency, returning backup value."); //NON-NLS - return UpdateFrequency.DEFAULT; - } - - /** - * Sets the update frequency and writes to KeywordSearch_Options.properties - * - * @param freq Sets KeywordSearchIngestModule to this value. - */ - static void setUpdateFrequency(UpdateFrequency freq) { - ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, "UpdateFrequency", freq.name()); //NON-NLS - UpdateFreq = freq; - } - /** * Sets whether or not to skip adding known good files to the search during * index. @@ -243,11 +217,6 @@ static void setDefaults() { logger.log(Level.INFO, "No configuration for NSRL found, generating default..."); //NON-NLS KeywordSearchSettings.setSkipKnown(true); } - //setting default Update Frequency - if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, "UpdateFrequency")) { //NON-NLS - logger.log(Level.INFO, "No configuration for Update Frequency found, generating default..."); //NON-NLS - KeywordSearchSettings.setUpdateFrequency(UpdateFrequency.DEFAULT); - } //setting default Extract UTF8 if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, StringsExtractOptions.EXTRACT_UTF8.toString())) { logger.log(Level.INFO, "No configuration for UTF8 found, generating default..."); //NON-NLS diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java index abbc893f85866d16ad57a6c93c560911846c42df..8cab5236ec7c33277d1e47398d95bc98d95189e4 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java @@ -20,11 +20,22 @@ import java.awt.Component; import java.io.File; +import java.io.Reader; +import java.util.regex.Matcher; import org.sleuthkit.autopsy.coreutils.Logger; import javax.swing.JOptionPane; +import org.openide.util.Lookup; import org.openide.windows.WindowManager; +import org.sleuthkit.autopsy.textextractors.TextExtractor; +import org.sleuthkit.autopsy.textextractors.TextExtractorFactory; +import org.sleuthkit.datamodel.AbstractFile; +import org.sleuthkit.datamodel.BlackboardArtifact; +import org.sleuthkit.datamodel.Content; +import org.sleuthkit.datamodel.TskCoreException; class KeywordSearchUtil { + + private static final String SNIPPET_DELIMITER = String.valueOf(Character.toChars(171)); public enum DIALOG_MESSAGE_TYPE { @@ -142,6 +153,36 @@ static KeywordSearchQuery getQueryForKeyword(Keyword keyword, KeywordList keywor } return query; } + + /** + * Make a snippet from the given content that has the given hit plus some + * surrounding context. + * + * @param content The content to extract the snippet from. + * + * @param hitMatcher The Matcher that has the start/end info for where the + * hit is in the content. + * @param hit The actual hit in the content. + * + * @return A snippet extracted from content that contains hit plus some + * surrounding context. + */ + static String makeSnippet(String content, Matcher hitMatcher, String hit) { + // Get the snippet from the document. + final int end = hitMatcher.end(); + final int start = hitMatcher.start(); + + return makeSnippet(content, start, end, hit); + } + + static String makeSnippet(String content, int startOffset, int endOffset, String hit) { + // Get the snippet from the document. + int maxIndex = content.length() - 1; + + return content.substring(Integer.max(0, startOffset - 20), Integer.max(0, startOffset)) + + SNIPPET_DELIMITER + hit + SNIPPET_DELIMITER + + content.substring(Integer.min(maxIndex, endOffset), Integer.min(maxIndex, endOffset + 20)); + } /** * Is the Keyword Search list at absPath an XML list? @@ -154,4 +195,40 @@ static boolean isXMLList(String absPath) { //TODO: make this more robust, if necessary return new File(absPath).getName().endsWith(".xml"); //NON-NLS } + + static Reader getReader(Content content) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException{ + return getReader(content, null); + } + + static Reader getReader(Content content, Lookup stringsExtractionContext) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException{ + Reader reader = null; + if (content instanceof BlackboardArtifact) { + BlackboardArtifact artifact = (BlackboardArtifact) content; + if (artifact.getArtifactID() > 0) { + /* + * Artifact indexing is only supported for artifacts that use + * negative artifact ids to avoid overlapping with the object + * ids of other types of Content. + */ + return null; + } + TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(content, null); + reader = blackboardExtractor.getReader(); + + } else if (content instanceof AbstractFile) { + TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor( content, stringsExtractionContext); + reader = stringsExtractor.getReader(); + } else { + try { + TextExtractor contentExtractor = TextExtractorFactory.getExtractor(content, null); + reader = contentExtractor.getReader(); + } catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) { + // Try the StringsTextExtractor if Tika extractions fails. + TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(content, null); + reader = stringsExtractor.getReader(); + } + } + + return reader; + } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsAnalysisResultIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsAnalysisResultIngestModule.java index b82c4d91fef544c3a43d06e5c68aef54a2f1aae9..2697024752136b351382b236433b282c62ffb3fa 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsAnalysisResultIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsAnalysisResultIngestModule.java @@ -18,16 +18,14 @@ */ package org.sleuthkit.autopsy.keywordsearch; +import java.io.Reader; import java.util.logging.Level; -import org.openide.util.Lookup; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.ingest.AnalysisResultIngestModule; import org.sleuthkit.autopsy.ingest.IngestJobContext; import org.sleuthkit.autopsy.ingest.IngestModule; -import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService; import org.sleuthkit.datamodel.AnalysisResult; import org.sleuthkit.datamodel.BlackboardArtifact; -import org.sleuthkit.datamodel.TskCoreException; /** * An analysis result ingest module that indexes text for keyword search. All @@ -40,21 +38,33 @@ public class KwsAnalysisResultIngestModule implements AnalysisResultIngestModule private static final Logger LOGGER = Logger.getLogger(KeywordSearchIngestModule.class.getName()); private static final int TSK_KEYWORD_HIT_TYPE_ID = BlackboardArtifact.Type.TSK_KEYWORD_HIT.getTypeID(); private IngestJobContext context; - private KeywordSearchService searchService; + private final KeywordSearchJobSettings settings; + KwsAnalysisResultIngestModule(KeywordSearchJobSettings settings) { + this.settings = settings; + } + @Override public void startUp(IngestJobContext context) throws IngestModule.IngestModuleException { this.context = context; - searchService = Lookup.getDefault().lookup(KeywordSearchService.class); } @Override public IngestModule.ProcessResult process(AnalysisResult result) { try { if (result.getType().getTypeID() != TSK_KEYWORD_HIT_TYPE_ID) { - searchService.index(result); + Ingester ingester = Ingester.getDefault(); + Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(result); + String sourceName = result.getDisplayName() + "_" + result.getArtifactID(); + ingester.indexMetaDataOnly(result, sourceName); + ingester.search(blackboardExtractedTextReader, + result.getArtifactID(), + sourceName, result, + context, true, + settings.isIndexToSolrEnabled(), + settings.getNamesOfEnabledKeyWordLists()); } - } catch (TskCoreException ex) { + } catch (Exception ex) { LOGGER.log(Level.SEVERE, String.format("Error indexing analysis result '%s' (job ID=%d)", result, context.getJobId()), ex); //NON-NLS return IngestModule.ProcessResult.ERROR; } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsDataArtifactIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsDataArtifactIngestModule.java index fe4cac8b4fd095657450fcaa259da2b2073f9478..81e5dee952ea266e316be093b3792f74a58cd4f1 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsDataArtifactIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsDataArtifactIngestModule.java @@ -18,12 +18,13 @@ */ package org.sleuthkit.autopsy.keywordsearch; +import java.io.Reader; import java.util.logging.Level; -import org.openide.util.Lookup; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.ingest.DataArtifactIngestModule; import org.sleuthkit.autopsy.ingest.IngestJobContext; -import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService; +import org.sleuthkit.autopsy.textextractors.TextExtractor; +import org.sleuthkit.autopsy.textextractors.TextExtractorFactory; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.DataArtifact; import org.sleuthkit.datamodel.TskCoreException; @@ -39,24 +40,36 @@ public class KwsDataArtifactIngestModule implements DataArtifactIngestModule { private static final Logger LOGGER = Logger.getLogger(KeywordSearchIngestModule.class.getName()); private static final int TSK_ASSOCIATED_OBJECT_TYPE_ID = BlackboardArtifact.Type.TSK_ASSOCIATED_OBJECT.getTypeID(); private IngestJobContext context; - private KeywordSearchService searchService; + private final KeywordSearchJobSettings settings; + KwsDataArtifactIngestModule(KeywordSearchJobSettings settings) { + this.settings = settings; + } + @Override public void startUp(IngestJobContext context) throws IngestModuleException { this.context = context; - searchService = Lookup.getDefault().lookup(KeywordSearchService.class); } @Override public ProcessResult process(DataArtifact artifact) { try { if (artifact.getType().getTypeID() != TSK_ASSOCIATED_OBJECT_TYPE_ID) { - searchService.index(artifact); + Ingester ingester = Ingester.getDefault(); + Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(artifact); + String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID(); + ingester.indexMetaDataOnly(artifact, sourceName); + ingester.search(blackboardExtractedTextReader, + artifact.getArtifactID(), + sourceName, artifact, + context, true, + settings.isIndexToSolrEnabled(), + settings.getNamesOfEnabledKeyWordLists()); } - } catch (TskCoreException ex) { + } catch (Exception ex) { LOGGER.log(Level.SEVERE, String.format("Error indexing data artifact '%s' (job ID=%d)", artifact, context.getJobId()), ex); //NON-NLS return ProcessResult.ERROR; - } + } return ProcessResult.OK; } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index 20c567a42db1a904cd5bb2139d280a07c3377d55..0d70e5af43d4a792423a8e523f78da4ec818d704 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -46,7 +46,6 @@ import static org.sleuthkit.autopsy.keywordsearch.KeywordSearchSettings.MODULE_NAME; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.Account; -import org.sleuthkit.datamodel.AccountFileInstance; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; @@ -431,7 +430,7 @@ private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreE keywordsFoundInThisDocument.put(hit, hit); if (artifactAttributeType == null) { - hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); + hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit)); } else { switch (artifactAttributeType) { case TSK_EMAIL: @@ -442,7 +441,7 @@ private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreE */ if (hit.length() >= MIN_EMAIL_ADDR_LENGTH && DomainValidator.getInstance(true).isValidTld(hit.substring(hit.lastIndexOf('.')))) { - hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); + hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit)); } break; @@ -459,14 +458,14 @@ private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreE if (ccnMatcher.find()) { final String group = ccnMatcher.group("ccn"); if (CreditCardValidator.isValidCCN(group)) { - hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); + hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit)); } } } break; default: - hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); + hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit)); break; } } @@ -486,30 +485,6 @@ private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreE return hits; } - /** - * Make a snippet from the given content that has the given hit plus some - * surrounding context. - * - * @param content The content to extract the snippet from. - * - * @param hitMatcher The Matcher that has the start/end info for where the - * hit is in the content. - * @param hit The actual hit in the content. - * - * @return A snippet extracted from content that contains hit plus some - * surrounding context. - */ - private String makeSnippet(String content, Matcher hitMatcher, String hit) { - // Get the snippet from the document. - int maxIndex = content.length() - 1; - final int end = hitMatcher.end(); - final int start = hitMatcher.start(); - - return content.substring(Integer.max(0, start - 20), Integer.max(0, start)) - + SNIPPET_DELIMITER + hit + SNIPPET_DELIMITER - + content.substring(Integer.min(maxIndex, end), Integer.min(maxIndex, end + 20)); - } - @Override public void addFilter(KeywordQueryFilter filter) { this.filters.add(filter); @@ -573,6 +548,11 @@ synchronized public String getEscapedQueryString() { */ @Override public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) { + return createKeywordHitArtifact(content, originalKeyword, foundKeyword, hit, snippet, listName, ingestJobId); + } + + + public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) { final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName(); if (content == null) { @@ -583,8 +563,8 @@ public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foun /* * Credit Card number hits are handled differently */ - if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { - createCCNAccount(content, foundKeyword, hit, snippet, listName, ingestJobId); + if (originalKW.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { + createCCNAccount(content, originalKW, foundKeyword, hit, snippet, listName, ingestJobId); return null; } @@ -594,8 +574,10 @@ public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foun */ Collection<BlackboardAttribute> attributes = new ArrayList<>(); - attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm())); - attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, getQueryString())); + attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm().toLowerCase())); + if(!originalKW.searchTermIsWholeWord() || !originalKW.searchTermIsLiteral()) { + attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKW.getSearchTerm())); + } if (StringUtils.isNotBlank(listName)) { attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName)); @@ -608,8 +590,12 @@ public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foun -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID)) ); - if (originalKeyword.searchTermIsLiteral()) { - attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal())); + if (originalKW.searchTermIsLiteral()) { + if(!originalKW.searchTermIsWholeWord()) { + attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal())); + } else { + attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.LITERAL.ordinal())); + } } else { attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal())); } @@ -625,11 +611,11 @@ public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foun } } - private void createCCNAccount(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) { + private static void createCCNAccount(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) { final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName(); - if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { + if (originalKW.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { LOGGER.log(Level.SEVERE, "Keyword hit is not a credit card number"); //NON-NLS return; } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java index 5e64dcbd9029e9c6ee0d6f7fe5e21552234032a7..d9dd4d921a4a4284acb003fd1cb7aaadd4c573ba 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java @@ -93,28 +93,22 @@ public void index(Content content) throws TskCoreException { return; } try { - TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(content, null); - Reader blackboardExtractedTextReader = blackboardExtractor.getReader(); + Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(content); String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID(); ingester.indexMetaDataOnly(artifact, sourceName); - ingester.indexText(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null); - } catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) { + // Will not cause an inline search becauce the keyword list is null + ingester.search(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null, true, true, null); + } catch (Exception ex) { throw new TskCoreException("Error indexing artifact", ex); } } else { try { - TextExtractor contentExtractor = TextExtractorFactory.getExtractor(content, null); - Reader contentExtractedTextReader = contentExtractor.getReader(); - ingester.indexText(contentExtractedTextReader, content.getId(), content.getName(), content, null); - } catch (TextExtractorFactory.NoTextExtractorFound | Ingester.IngesterException | TextExtractor.InitReaderException ex) { - try { - // Try the StringsTextExtractor if Tika extractions fails. - TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(content, null); - Reader stringsExtractedTextReader = stringsExtractor.getReader(); - ingester.indexStrings(stringsExtractedTextReader, content.getId(), content.getName(), content, null); - } catch (Ingester.IngesterException | TextExtractor.InitReaderException ex1) { - throw new TskCoreException("Error indexing content", ex1); - } + + Reader reader = KeywordSearchUtil.getReader(content); + // Will not cause an inline search becauce the keyword list is null + ingester.search(reader, content.getId(), content.getName(), content, null, true, true, null); + } catch (Exception ex) { + throw new TskCoreException("Error indexing content", ex); } // only do a Solr commit if ingest is not running. If ingest is running, the changes will // be committed via a periodic commit or via final commit after the ingest job has finished. @@ -421,11 +415,11 @@ public void indexArtifact(BlackboardArtifact artifact) throws TskCoreException { try { String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID(); - TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor((Content) artifact, null); + TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(artifact, null); Reader blackboardExtractedTextReader = blackboardExtractor.getReader(); ingester.indexMetaDataOnly(artifact, sourceName); - ingester.indexText(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null); - } catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) { + ingester.search(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null, true, true, null); + } catch (Exception ex) { throw new TskCoreException(ex.getCause().getMessage(), ex); } } diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java index 088af9f4b2e876262c86ecabdb341493d9020aeb..a88ad8cad85dd6feb52ccbdcd0cae974ef506a1e 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java @@ -420,15 +420,6 @@ private void analyzeRegistryFiles(long ingestJobId) { Report report = currentCase.addReport(regOutputFiles.fullPlugins, NbBundle.getMessage(this.getClass(), "ExtractRegistry.parentModuleName.noSpace"), "RegRipper " + regFile.getUniquePath(), regFile); //NON-NLS - - // Index the report content so that it will be available for keyword search. - KeywordSearchService searchService = Lookup.getDefault().lookup(KeywordSearchService.class); - if (null == searchService) { - logger.log(Level.WARNING, "Keyword search service not found. Report will not be indexed"); - } else { - searchService.index(report); - report.close(); - } } catch (TskCoreException e) { this.addErrorMessage("Error adding regripper output as Autopsy report: " + e.getLocalizedMessage()); //NON-NLS }