diff --git a/KeywordSearch/ivy.xml b/KeywordSearch/ivy.xml index 174c2e22fd1cb57e19a63f91736c5caa88c1ed8a..7b417a99c73542d8fb2ad8790708b9d84fc54e5d 100644 --- a/KeywordSearch/ivy.xml +++ b/KeywordSearch/ivy.xml @@ -18,8 +18,7 @@ <dependency conf="solr-war->default" org="org.apache.solr" name="solr" rev="4.10.4" transitive="false" /> <!-- the war file for embedded Solr 4 --> <dependency conf="solr-libs->default" name="solr-cell" rev="8.11.2" org="org.apache.solr"/> - <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core --> - <dependency conf="autopsy->default" org="org.apache.lucene" name="lucene-core" rev="8.11.2"/> + <!-- Autopsy --> <dependency conf="autopsy->default" org="org.apache.solr" name="solr-solrj" rev="8.11.2"/> <dependency conf="autopsy->default" org="com.optimaize.languagedetector" name="language-detector" rev="0.6"/> diff --git a/KeywordSearch/nbproject/project.properties b/KeywordSearch/nbproject/project.properties index 4046543d120422dc4dfdeefc62bb8599a1090153..d639073ca796e21648a8cdcf151698f0bb23af2a 100644 --- a/KeywordSearch/nbproject/project.properties +++ b/KeywordSearch/nbproject/project.properties @@ -44,7 +44,6 @@ file.reference.stax2-api-4.2.1.jar=release/modules/ext/stax2-api-4.2.1.jar file.reference.woodstox-core-6.2.4.jar=release/modules/ext/woodstox-core-6.2.4.jar file.reference.zookeeper-3.8.0.jar=release/modules/ext/zookeeper-3.8.0.jar file.reference.zookeeper-jute-3.8.0.jar=release/modules/ext/zookeeper-jute-3.8.0.jar -file.reference.lucene-core-8.11.2.jar=release/modules/ext/lucene-core-8.11.2.jar javac.source=1.8 javac.compilerargs=-Xlint -Xlint:-serial license.file=../LICENSE-2.0.txt diff --git a/KeywordSearch/nbproject/project.xml b/KeywordSearch/nbproject/project.xml index c5777d8a14d97d809d3f19218d25f653f1521945..9b8fa50bda4fb8fa28cf2f58d33ff927d6518097 100644 --- a/KeywordSearch/nbproject/project.xml +++ b/KeywordSearch/nbproject/project.xml @@ -418,10 +418,6 @@ <runtime-relative-path>ext/zookeeper-jute-3.8.0.jar</runtime-relative-path> <binary-origin>release/modules/ext/zookeeper-jute-3.8.0.jar</binary-origin> </class-path-extension> - <class-path-extension> - <runtime-relative-path>ext/lucene-core-8.11.2.jar</runtime-relative-path> - <binary-origin>release/modules/ext/lucene-core-8.11.2.jar</binary-origin> - </class-path-extension> </data> </configuration> </project> diff --git a/KeywordSearch/solr/server/solr/configsets/AutopsyConfig/conf/solrconfig.xml b/KeywordSearch/solr/server/solr/configsets/AutopsyConfig/conf/solrconfig.xml index 92ce238a46ee9c670c032c3dac32e279a3526314..9fde79cd365a7e1535458692125aa88ce03c1d5e 100755 --- a/KeywordSearch/solr/server/solr/configsets/AutopsyConfig/conf/solrconfig.xml +++ b/KeywordSearch/solr/server/solr/configsets/AutopsyConfig/conf/solrconfig.xml @@ -301,7 +301,7 @@ <autoCommit> <maxTime>300000</maxTime> <!-- maxDocs>15000</maxDocs --> - <openSearcher>false</openSearcher> + <openSearcher>true</openSearcher> </autoCommit> <!-- softAutoCommit is like autoCommit except it causes a diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AdHocSearchPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AdHocSearchPanel.java index ce6c23f57d40f6efe36c176332c628620c87df0c..c9aa061400ccccaccfc1a3b8115dca3d83fb2a6e 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AdHocSearchPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AdHocSearchPanel.java @@ -112,9 +112,9 @@ public void search(boolean saveResults) { } if (filesIndexed == 0) { if (isIngestRunning) { - // ELTODO this message should be dependent on whether Solr indexing is enabled or not KeywordSearchUtil.displayDialog(keywordSearchErrorDialogHeader, NbBundle.getMessage(this.getClass(), - "AbstractKeywordSearchPerformer.search.noFilesInIdxMsg"), KeywordSearchUtil.DIALOG_MESSAGE_TYPE.ERROR); + "AbstractKeywordSearchPerformer.search.noFilesInIdxMsg", + KeywordSearchSettings.getUpdateFrequency().getTime()), KeywordSearchUtil.DIALOG_MESSAGE_TYPE.ERROR); } else { KeywordSearchUtil.displayDialog(keywordSearchErrorDialogHeader, NbBundle.getMessage(this.getClass(), "AbstractKeywordSearchPerformer.search.noFilesIdxdMsg"), KeywordSearchUtil.DIALOG_MESSAGE_TYPE.ERROR); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties index e34155fcd537c7c446b904b76c0219100148f017..831379e5376a1a87c16f15c45f85d12a996294cc 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties @@ -39,8 +39,8 @@ AbstractKeywordSearchPerformer.search.invalidSyntaxHeader=Invalid query statemen AbstractKeywordSearchPerformer.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress AbstractKeywordSearchPerformer.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and this search might yield incomplete results.<br />Do you want to proceed with this search anyway?</html> AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=Keyword list is empty, please add at least one keyword to the list -AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html> -AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html> +AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />Try again later. Index is updated every {0} minutes.</html> +AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module enabled. </html> ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer. ExtractedContentViewer.getTitle=Indexed Text HighlightedMatchesSource.toString=Search Results @@ -122,7 +122,7 @@ KeywordSearchListsManagementPanel.fileExtensionFilterLbl=Autopsy Keyword List Fi KeywordSearchListsManagementPanel.fileExtensionFilterLb2=Encase Keyword List File (txt) KeywordSearch.listImportFeatureTitle=Keyword List Import KeywordSearchIngestModule.moduleName=Keyword Search -KeywordSearchIngestModule.moduleDescription=Performs file indexing and search using selected keyword lists. +KeywordSearchIngestModule.moduleDescription=Performs file indexing and periodic search using keywords and regular expressions in lists. DropdownSearchPanel.keywordTextField.text= KeywordSearchPanel.searchDropButton.text=Keyword Search DropdownSearchPanel.exactRadioButton.text=Exact Match @@ -211,6 +211,11 @@ KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 tex KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest): KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages): +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText=10 minutes (faster overall ingest time than default) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text=10 minutes (slower feedback, faster ingest) +KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text=Results update frequency during ingest: KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.toolTipText=Requires Hash Set service to had run previously, or be selected for next ingest. KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text=Do not add files in NSRL (known files) to keyword index during ingest KeywordSearchGlobalSearchSettingsPanel.informationLabel.text=Information @@ -219,7 +224,11 @@ KeywordSearchGlobalSearchSettingsPanel.filesIndexedValue.text=0 KeywordSearchGlobalSearchSettingsPanel.filesIndexedLabel.text=Files in keyword index: KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text=Show Keyword Preview in Keyword Search Results (will result in longer search times) KeywordSearchGlobalSearchSettingsPanel.chunksValLabel.text=0 +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText=1 minute (overall ingest time will be longest) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1 minute (faster feedback, longest ingest) KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index: +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default) KeywordSearchIngestModule.regExpHitLbl=Reg Ex hit: KeywordSearchIngestModule.kwHitLbl=Keyword hit: KeywordSearchIngestModule.kwHitThLbl=Keyword @@ -245,6 +254,8 @@ KeywordSearchListsManagementPanel.newKeywordListDescription2=Keyword list <{0}> KeywordSearchModuleFactory.getIngestJobSettingsPanel.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings KeywordSearchModuleFactory.createFileIngestModule.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings SearchRunner.Searcher.done.err.msg=Error performing keyword search +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=Fastest overall, but no results until the end +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=No periodic searches SolrConnectionCheck.HostnameOrPort=Invalid hostname and/or port number. SolrConnectionCheck.Hostname=Invalid hostname. SolrConnectionCheck.MissingHostname=Missing hostname. @@ -311,4 +322,3 @@ ExtractedContentPanel.pagesLabel.text=Page: KeywordSearchJobSettingsPanel.ocrCheckBox.text=Enable Optical Character Recognition (OCR) KeywordSearchJobSettingsPanel.limitedOcrCheckbox.text=<html>Only process PDFs, MS Office docs and images which are over 100KB in size or extracted from another file (Beta)</html> KeywordSearchJobSettingsPanel.ocrOnlyCheckbox.text=Only index text extracted using OCR -KeywordSearchJobSettingsPanel.solrCheckbox.text=Add text to Solr Index diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED index c05fd15c027c687d84bc8dc81d68830bd0956d94..15099026808e4e9a7b23a0e0ddaec1f905f052c0 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED @@ -15,13 +15,16 @@ ExtractAllTermsReport.error.noOpenCase=No currently open case. ExtractAllTermsReport.export.error=Error During Unique Word Extraction ExtractAllTermsReport.exportComplete=Unique Word Extraction Complete ExtractAllTermsReport.getName.text=Extract Unique Words +# {0} - Number of extracted terms ExtractAllTermsReport.numberExtractedTerms=Extracted {0} terms... ExtractAllTermsReport.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and unique word extraction might yield incomplete results.<br />Do you want to proceed with unique word extraction anyway?</html> -ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. If Solr keyword search indexing and Solr indexing were enabled, wait for ingest to complete. -ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Re-ingest the image with the Keyword Search Module and Solr indexing enabled. +# {0} - Keyword search commit frequency +ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. Try again later. Index is updated every {0} minutes. +ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Try again later ExtractAllTermsReport.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress ExtractAllTermsReport.startExport=Starting Unique Word Extraction ExtractedContentPanel.setMarkup.panelTxt=<span style='font-style:italic'>Loading text... Please wait</span> +# {0} - Content name ExtractedContentPanel.SetMarkup.progress.loading=Loading text for {0} GlobalEditListPanel.editKeyword.title=Edit Keyword GlobalEditListPanel.warning.text=Boundary characters ^ and $ do not match word boundaries. Consider\nreplacing with an explicit list of boundary characters, such as [ \\.,] @@ -88,8 +91,8 @@ AbstractKeywordSearchPerformer.search.invalidSyntaxHeader=Invalid query statemen AbstractKeywordSearchPerformer.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress AbstractKeywordSearchPerformer.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and this search might yield incomplete results.<br />Do you want to proceed with this search anyway?</html> AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=Keyword list is empty, please add at least one keyword to the list -AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html> -AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html> +AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />Try again later. Index is updated every {0} minutes.</html> +AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module enabled. </html> ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer. ExtractedContentViewer.getTitle=Indexed Text HighlightedMatchesSource.toString=Search Results @@ -173,7 +176,7 @@ KeywordSearchListsManagementPanel.fileExtensionFilterLbl=Autopsy Keyword List Fi KeywordSearchListsManagementPanel.fileExtensionFilterLb2=Encase Keyword List File (txt) KeywordSearch.listImportFeatureTitle=Keyword List Import KeywordSearchIngestModule.moduleName=Keyword Search -KeywordSearchIngestModule.moduleDescription=Performs file indexing and search using selected keyword lists. +KeywordSearchIngestModule.moduleDescription=Performs file indexing and periodic search using keywords and regular expressions in lists. DropdownSearchPanel.keywordTextField.text= KeywordSearchPanel.searchDropButton.text=Keyword Search DropdownSearchPanel.exactRadioButton.text=Exact Match @@ -224,6 +227,8 @@ KeywordSearchSettings.properties_options.text={0}_Options KeywordSearchSettings.propertiesNSRL.text={0}_NSRL KeywordSearchSettings.propertiesScripts.text={0}_Scripts NoOpenCoreException.err.noOpenSorlCore.msg=No currently open Solr core. +SearchRunner.query.exception.msg=Error performing query: +# {0} - colelction name Server.deleteCore.exception.msg=Failed to delete Solr colelction {0} Server.exceptionMessage.unableToBackupCollection=Unable to backup Solr collection Server.exceptionMessage.unableToCreateCollection=Unable to create Solr collection @@ -267,6 +272,11 @@ KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 tex KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest): KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages): +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText=10 minutes (faster overall ingest time than default) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text=10 minutes (slower feedback, faster ingest) +KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text=Results update frequency during ingest: KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.toolTipText=Requires Hash Set service to had run previously, or be selected for next ingest. KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text=Do not add files in NSRL (known files) to keyword index during ingest KeywordSearchGlobalSearchSettingsPanel.informationLabel.text=Information @@ -275,7 +285,11 @@ KeywordSearchGlobalSearchSettingsPanel.filesIndexedValue.text=0 KeywordSearchGlobalSearchSettingsPanel.filesIndexedLabel.text=Files in keyword index: KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text=Show Keyword Preview in Keyword Search Results (will result in longer search times) KeywordSearchGlobalSearchSettingsPanel.chunksValLabel.text=0 +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText=1 minute (overall ingest time will be longest) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1 minute (faster feedback, longest ingest) KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index: +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default) KeywordSearchIngestModule.regExpHitLbl=Reg Ex hit: KeywordSearchIngestModule.kwHitLbl=Keyword hit: KeywordSearchIngestModule.kwHitThLbl=Keyword @@ -301,6 +315,8 @@ KeywordSearchListsManagementPanel.newKeywordListDescription2=Keyword list <{0}> KeywordSearchModuleFactory.getIngestJobSettingsPanel.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings KeywordSearchModuleFactory.createFileIngestModule.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings SearchRunner.Searcher.done.err.msg=Error performing keyword search +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=Fastest overall, but no results until the end +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=No periodic searches Server.status.failed.msg=Local Solr server did not respond to status request. This may be because the server failed to start or is taking too long to initialize. SolrConnectionCheck.HostnameOrPort=Invalid hostname and/or port number. SolrConnectionCheck.Hostname=Invalid hostname. @@ -388,7 +404,6 @@ ExtractedContentPanel.pagesLabel.text=Page: KeywordSearchJobSettingsPanel.ocrCheckBox.text=Enable Optical Character Recognition (OCR) KeywordSearchJobSettingsPanel.limitedOcrCheckbox.text=<html>Only process PDFs, MS Office docs and images which are over 100KB in size or extracted from another file (Beta)</html> KeywordSearchJobSettingsPanel.ocrOnlyCheckbox.text=Only index text extracted using OCR -KeywordSearchJobSettingsPanel.solrCheckbox.text=Add text to Solr Index TextZoomPanel.zoomInButton.text= TextZoomPanel.zoomOutButton.text= TextZoomPanel.zoomResetButton.text=Reset diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle_ja.properties b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle_ja.properties index 83e614bc936adff3ae6fd8df262d55a816a5bb8a..46af934d7c8949d3c41fe8ea0cb7bede5421f775 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle_ja.properties +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle_ja.properties @@ -7,6 +7,8 @@ AbstractKeywordSearchPerformer.search.dialogErrorHeader=\u30ad\u30fc\u30ef\u30fc AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=\u30ad\u30fc\u30ef\u30fc\u30c9\u30ea\u30b9\u30c8\u304c\u7a7a(\u672a\u5165\u529b)\u3067\u3059\u3002\u5c11\u306a\u304f\u3068\u30821\u3064\u306e\u30ad\u30fc\u30ef\u30fc\u30c9\u3092\u30ea\u30b9\u30c8\u306b\u8ffd\u52a0\u3057\u3066\u304f\u3060\u3055\u3044\u3002 AbstractKeywordSearchPerformer.search.ingestInProgressBody=<html>\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u73fe\u5728\u5b9f\u884c\u4e2d\u3067\u3059\u3002<br />\u3059\u3079\u3066\u306e\u30d5\u30a1\u30a4\u30eb\u304c\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u3055\u308c\u306a\u304b\u3063\u305f\u305f\u3081\u3001\u3053\u306e\u691c\u7d22\u306f\u4e0d\u5b8c\u5168\u306a\u7d50\u679c\u3092\u751f\u6210\u3059\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<br />\u305d\u308c\u3067\u3082\u3053\u306e\u691c\u7d22\u3092\u7d9a\u884c\u3057\u307e\u3059\u304b?</html> AbstractKeywordSearchPerformer.search.invalidSyntaxHeader=\u7121\u52b9\u306a\u30af\u30a8\u30ea\u30fb\u30b9\u30c6\u30fc\u30c8\u30e1\u30f3\u30c8\u3002 \u5185\u5bb9\u304c\u6b63\u898f\u8868\u73fe\u306e\u5834\u5408\u3001Lucene\u6b63\u898f\u8868\u73fe\u30d1\u30bf\u30fc\u30f3\u306e\u307f\u304c\u30b5\u30dd\u30fc\u30c8\u3055\u308c\u307e\u3059\u3002 POSIX\u6587\u5b57\u30af\u30e9\u30b9\uff08\\ n\u3084\\ w\u306a\u3069\uff09\u306f\u7121\u52b9\u3067\u3059\u3002 +AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>\u30d5\u30a1\u30a4\u30eb\u304c\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002<br />\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30e2\u30b8\u30e5\u30fc\u30eb\u3092\u6709\u52b9\u306b\u3057\u305f\u72b6\u614b\u3067\u30a4\u30e1\u30fc\u30b8\u3092\u518d\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u3057\u3066\u304f\u3060\u3055\u3044\u3002</html> +AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>\u307e\u3060\u30d5\u30a1\u30a4\u30eb\u304c\u7d22\u5f15\u306b\u542b\u307e\u308c\u3066\u3044\u307e\u305b\u3093\u3002<br />\u5f8c\u3067\u3082\u3046\u4e00\u5ea6\u304a\u8a66\u3057\u304f\u3060\u3055\u3044\u3002 \u7d22\u5f15\u306f {0} \u5206\u3054\u3068\u306b\u66f4\u65b0\u3055\u308c\u307e\u3059\u3002</html> AbstractKeywordSearchPerformer.search.searchIngestInProgressTitle=\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u9032\u884c\u4e2d\u3067\u3059 AccountsText.creditCardNumber=\u30af\u30ec\u30b8\u30c3\u30c8\u30ab\u30fc\u30c9\u756a\u53f7 AccountsText.creditCardNumbers=\u30af\u30ec\u30b8\u30c3\u30c8\u30ab\u30fc\u30c9\u756a\u53f7 @@ -53,6 +55,8 @@ ExtractAllTermsReport.exportComplete=\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\ ExtractAllTermsReport.getName.text=\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u3092\u62bd\u51fa\u3059\u308b ExtractAllTermsReport.numberExtractedTerms=\u62bd\u51fa\u3055\u308c\u305f{0}\u7528\u8a9e... ExtractAllTermsReport.search.ingestInProgressBody=<html> \u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u8aad\u8fbc\u306f\u73fe\u5728\u5b9f\u884c\u4e2d\u3067\u3059\u3002<br/>\u3059\u3079\u3066\u306e\u30d5\u30a1\u30a4\u30eb\u304c\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306b\u767b\u9332\u3055\u308c\u3066\u3044\u308b\u308f\u3051\u3067\u306f\u306a\u304f\u3001\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u3092\u62bd\u51fa\u306f\u4e0d\u5b8c\u5168\u306a\u7d50\u679c\u306b\u306a\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<br />\u305d\u308c\u3067\u3082\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u306e\u62bd\u51fa\u3092\u7d9a\u884c\u3057\u307e\u3059\u304b\uff1f</ html> +ExtractAllTermsReport.search.noFilesInIdxMsg=\u307e\u3060\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306b\u767b\u9332\u3055\u308c\u3066\u3044\u308b\u30d5\u30a1\u30a4\u30eb\u306f\u3042\u308a\u307e\u305b\u3093\u3002 \u3042\u3068\u3067\u3082\u3046\u4e00\u5ea6\u8a66\u3057\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002 \u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306f{0}\u5206\u3054\u3068\u306b\u66f4\u65b0\u3055\u308c\u307e\u3059\u3002 +ExtractAllTermsReport.search.noFilesInIdxMsg2=\u307e\u3060\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306b\u767b\u9332\u3055\u308c\u3066\u3044\u308b\u30d5\u30a1\u30a4\u30eb\u306f\u3042\u308a\u307e\u305b\u3093\u3002 \u3042\u3068\u3067\u3082\u3046\u4e00\u5ea6\u8a66\u3057\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002 ExtractAllTermsReport.search.searchIngestInProgressTitle=\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u9032\u884c\u4e2d\u3067\u3059 ExtractAllTermsReport.startExport=\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u62bd\u51fa\u306e\u958b\u59cb ExtractedContentPanel.SetMarkup.progress.loading={0} \u306e\u30c6\u30ad\u30b9\u30c8\u3092\u8aad\u307f\u8fbc\u3093\u3067\u3044\u307e\u3059 @@ -206,12 +210,23 @@ KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsLimitedOCR=\u3 KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsOCR=OCR\u6587\u5b57\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\uff08Windows 64\u30d3\u30c3\u30c8\u304c\u5fc5\u8981\uff09 KeywordSearchGlobalSearchSettingsPanel.filesIndexedLabel.text=\u30ad\u30fc\u30ef\u30fc\u30c9\u7d22\u5f15\u5185\u306e\u30d5\u30a1\u30a4\u30eb\: KeywordSearchGlobalSearchSettingsPanel.filesIndexedValue.text=0 +KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text=\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u4e2d\u306e\u7d50\u679c\u66f4\u65b0\u983b\u5ea6\: KeywordSearchGlobalSearchSettingsPanel.informationLabel.text=\u60c5\u5831 KeywordSearchGlobalSearchSettingsPanel.ingestWarningLabel.text=\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u9032\u884c\u4e2d\u3067\u3059\u3002\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u5b8c\u4e86\u3059\u308b\u307e\u3067\u4e00\u90e8\u306e\u8a2d\u5b9a\u3092\u5229\u7528\u3067\u304d\u307e\u305b\u3093\u3002 KeywordSearchGlobalSearchSettingsPanel.settingsLabel.text=\u8a2d\u5b9a KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text=\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u7d50\u679c\u306b\u30ad\u30fc\u30ef\u30fc\u30c9\u30d7\u30ec\u30d3\u30e5\u30fc\u3092\u8868\u793a(\u691c\u7d22\u6642\u9593\u304c\u9577\u304f\u306a\u308a\u307e\u3059) KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text=\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u4e2d\u306bNSRL(\u65e2\u77e5\u306e\u30d5\u30a1\u30a4\u30eb)\u306e\u30d5\u30a1\u30a4\u30eb\u3092\u30ad\u30fc\u30ef\u30fc\u30c9\u306b\u8ffd\u52a0\u3057\u306a\u3044\u3067\u304f\u3060\u3055\u3044 KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.toolTipText=\u30cf\u30c3\u30b7\u30e5\u30bb\u30c3\u30c8\u30b5\u30fc\u30d3\u30b9\u306b\u4ee5\u524d\u306b\u5b9f\u884c\u6e08\u307f\u3067\u3042\u308b\u3053\u3068\u3001\u307e\u305f\u306f\u6b21\u306e\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u306b\u9078\u629e\u3055\u308c\u308b\u3053\u3068\u3092\u8981\u6c42\u3057\u307e\u3059\u3002 +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20\u5206(\u6700\u9045\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3001\u6700\u901f\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20\u5206(\u6700\u901f\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text=10\u5206(\u3088\u308a\u9045\u3044\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3001\u3088\u308a\u901f\u3044\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText=10\u5206(\u30c7\u30d5\u30a9\u30eb\u30c8\u3088\u308a\u3082\u901f\u3044\u7dcf\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5\u5206(\u30c7\u30d5\u30a9\u30eb\u30c8) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5\u5206(\u7dcf\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593\u304c\u9577\u304f\u306a\u308a\u307e\u3059) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1\u5206(\u3088\u308a\u901f\u3044\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3001\u6700\u9577\u306e\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText=1\u5206(\u7dcf\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593\u306f\u6700\u9577\u306b\u306a\u308a\u307e\u3059) +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=\u5b9a\u671f\u691c\u7d22\u306a\u3057 +KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=\u5168\u4f53\u3067\u6700\u901f\u3067\u3059\u304c\u3001\u6700\u5f8c\u307e\u3067\u7d50\u679c\u306f\u8868\u793a\u3055\u308c\u307e\u305b\u3093 KeywordSearchGlobalSettingsPanel.Title=\u30ad\u30fc\u30ef\u30fc\u30c9\u4e00\u62ec\u691c\u7d22\u8a2d\u5b9a KeywordSearchIngestModule.doInBackGround.displayName=\u30ad\u30fc\u30ef\u30fc\u30c9\u5b9a\u671f\u691c\u7d22 KeywordSearchIngestModule.doInBackGround.finalizeMsg=\u78ba\u5b9a diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java index dac7b8d987c59588f3ffe17cc37422159bb46a56..2deb82d2f5bb7cbacf7a0e4db68c3cb9dcef9930 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java @@ -398,8 +398,6 @@ static class Chunk { private final StringBuilder sb; private final int baseChunkSizeChars; private final StringBuilder lowerCasedChunk; - private boolean hasHit = false; - private int chunkId = 0; Chunk(StringBuilder sb, int baseChunkSizeChars, StringBuilder lowerCasedChunk) { this.sb = sb; @@ -422,7 +420,7 @@ public String toString() { * * @return The content of the chunk. */ - public String getLowerCasedChunk() { + public String geLowerCasedChunk() { return lowerCasedChunk.toString(); } @@ -434,21 +432,5 @@ public String getLowerCasedChunk() { int getBaseChunkLength() { return baseChunkSizeChars; } - - boolean hasHit() { - return hasHit; - } - - void setHasHit(boolean b) { - hasHit = b; - } - - void setChunkId(int id) { - chunkId = id; - } - - int getChunkId() { - return chunkId; - } } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/DropdownListSearchPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/DropdownListSearchPanel.java index f53dc547c56f8ad13cf885fdee9f902dada70efc..7b84080217522196ec89e3e8f6e7a2426d717e4c 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/DropdownListSearchPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/DropdownListSearchPanel.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2022 Basis Technology Corp. + * Copyright 2011-2018 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,6 +30,7 @@ import java.util.Iterator; import java.util.List; import java.util.Set; +import java.util.logging.Level; import javax.swing.JCheckBox; import javax.swing.JTable; import javax.swing.ListSelectionModel; @@ -142,7 +143,10 @@ public void propertyChange(PropertyChangeEvent evt) { searchAddListener = new ActionListener() { @Override public void actionPerformed(ActionEvent e) { - if (!ingestRunning) { + if (ingestRunning) { + IngestSearchRunner.getInstance().addKeywordListsToAllJobs(listsTableModel.getSelectedLists()); + logger.log(Level.INFO, "Submitted enqueued lists to ingest"); //NON-NLS + } else { searchAction(e); } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractAllTermsReport.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractAllTermsReport.java index 584757aa93889efc7a5f7c46358036cd21d0955b..88178b42ea88f20c49730a3b942cb4bd353b936d 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractAllTermsReport.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractAllTermsReport.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2022 Basis Technology Corp. + * Copyright 2021 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -51,8 +51,9 @@ public String getName() { @NbBundle.Messages({ "ExtractAllTermsReport.error.noOpenCase=No currently open case.", - "ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. If Solr keyword search indexing and Solr indexing were enabled, wait for ingest to complete.", - "ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Re-ingest the image with the Keyword Search Module and Solr indexing enabled.", + "# {0} - Keyword search commit frequency", + "ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. Try again later. Index is updated every {0} minutes.", + "ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Try again later", "ExtractAllTermsReport.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress", "ExtractAllTermsReport.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and unique word extraction might yield incomplete results.<br />Do you want to proceed with unique word extraction anyway?</html>", "ExtractAllTermsReport.startExport=Starting Unique Word Extraction", @@ -82,7 +83,7 @@ public void generateReport(GeneralReportSettings settings, ReportProgressPanel p if (filesIndexed == 0) { if (isIngestRunning) { - progressPanel.complete(ReportProgressPanel.ReportStatus.ERROR, Bundle.ExtractAllTermsReport_search_noFilesInIdxMsg()); + progressPanel.complete(ReportProgressPanel.ReportStatus.ERROR, Bundle.ExtractAllTermsReport_search_noFilesInIdxMsg(KeywordSearchSettings.getUpdateFrequency().getTime())); } else { progressPanel.complete(ReportProgressPanel.ReportStatus.ERROR, Bundle.ExtractAllTermsReport_search_noFilesInIdxMsg2()); } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IngestSearchRunner.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IngestSearchRunner.java new file mode 100755 index 0000000000000000000000000000000000000000..9cd33a81674d286b2a3e64c3496734706019421e --- /dev/null +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IngestSearchRunner.java @@ -0,0 +1,705 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2014 - 2021 Basis Technology Corp. + * Contact: carrier <at> sleuthkit <dot> org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.keywordsearch; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import java.util.concurrent.atomic.AtomicLong; +import java.util.logging.Level; +import javax.annotation.concurrent.GuardedBy; +import javax.swing.SwingUtilities; +import javax.swing.SwingWorker; +import org.netbeans.api.progress.ProgressHandle; +import org.openide.util.Cancellable; +import org.openide.util.NbBundle; +import org.openide.util.NbBundle.Messages; +import org.sleuthkit.autopsy.core.RuntimeProperties; +import org.sleuthkit.autopsy.coreutils.Logger; +import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil; +import org.sleuthkit.autopsy.coreutils.StopWatch; +import org.sleuthkit.autopsy.coreutils.ThreadConfined; +import org.sleuthkit.autopsy.ingest.IngestJobContext; +import org.sleuthkit.autopsy.ingest.IngestMessage; +import org.sleuthkit.autopsy.ingest.IngestServices; + +/** + * Performs periodic and final keyword searches for ingest jobs. Periodic + * searches are done in background tasks. This represents a careful working + * around of the contract for IngestModule.process(). Final searches are done + * synchronously in the calling thread, as required by the contract for + * IngestModule.shutDown(). + */ +final class IngestSearchRunner { + + private static final Logger logger = Logger.getLogger(IngestSearchRunner.class.getName()); + private static IngestSearchRunner instance = null; + private final IngestServices services = IngestServices.getInstance(); + private Ingester ingester = null; + private long currentUpdateIntervalMs; + private volatile boolean periodicSearchTaskRunning; + private volatile Future<?> periodicSearchTaskHandle; + private final ScheduledThreadPoolExecutor periodicSearchTaskExecutor; + private static final int NUM_SEARCH_SCHEDULING_THREADS = 1; + private static final String SEARCH_SCHEDULER_THREAD_NAME = "periodic-search-scheduling-%d"; + private final Map<Long, SearchJobInfo> jobs = new ConcurrentHashMap<>(); // Ingest job ID to search job info + private final boolean usingNetBeansGUI = RuntimeProperties.runningWithGUI(); + + /* + * Constructs a singleton object that performs periodic and final keyword + * searches for ingest jobs. Periodic searches are done in background tasks. + * This represents a careful working around of the contract for + * IngestModule.process(). Final searches are done synchronously in the + * calling thread, as required by the contract for IngestModule.shutDown(). + */ + private IngestSearchRunner() { + currentUpdateIntervalMs = ((long) KeywordSearchSettings.getUpdateFrequency().getTime()) * 60 * 1000; + ingester = Ingester.getDefault(); + periodicSearchTaskExecutor = new ScheduledThreadPoolExecutor(NUM_SEARCH_SCHEDULING_THREADS, new ThreadFactoryBuilder().setNameFormat(SEARCH_SCHEDULER_THREAD_NAME).build()); + } + + /** + * Gets the ingest search runner singleton. + * + * @return The ingest search runner. + */ + public static synchronized IngestSearchRunner getInstance() { + if (instance == null) { + instance = new IngestSearchRunner(); + } + return instance; + } + + /** + * Starts the search job for an ingest job. + * + * @param jobContext The ingest job context. + * @param keywordListNames The names of the keyword search lists for the + * ingest job. + */ + public synchronized void startJob(IngestJobContext jobContext, List<String> keywordListNames) { + long jobId = jobContext.getJobId(); + if (jobs.containsKey(jobId) == false) { + SearchJobInfo jobData = new SearchJobInfo(jobContext, keywordListNames); + jobs.put(jobId, jobData); + } + + /* + * Keep track of the number of keyword search file ingest modules that + * are doing analysis for the ingest job, i.e., that have called this + * method. This is needed by endJob(). + */ + jobs.get(jobId).incrementModuleReferenceCount(); + + /* + * Start a periodic search task in the + */ + if ((jobs.size() > 0) && (periodicSearchTaskRunning == false)) { + currentUpdateIntervalMs = ((long) KeywordSearchSettings.getUpdateFrequency().getTime()) * 60 * 1000; + periodicSearchTaskHandle = periodicSearchTaskExecutor.schedule(new PeriodicSearchTask(), currentUpdateIntervalMs, MILLISECONDS); + periodicSearchTaskRunning = true; + } + } + + /** + * Finishes a search job for an ingest job. + * + * @param jobId The ingest job ID. + */ + public synchronized void endJob(long jobId) { + /* + * Only complete the job if this is the last keyword search file ingest + * module doing annalysis for this job. + */ + SearchJobInfo job; + job = jobs.get(jobId); + if (job == null) { + return; // RJCTODO: SEVERE + } + if (job.decrementModuleReferenceCount() != 0) { + jobs.remove(jobId); + } + + /* + * Commit the index and do the final search. The final search is done in + * the ingest thread that shutDown() on the keyword search file ingest + * module, per the contract of IngestModule.shutDwon(). + */ + logger.log(Level.INFO, "Commiting search index before final search for search job {0}", job.getJobId()); //NON-NLS + commit(); + logger.log(Level.INFO, "Starting final search for search job {0}", job.getJobId()); //NON-NLS + doFinalSearch(job); + logger.log(Level.INFO, "Final search for search job {0} completed", job.getJobId()); //NON-NLS + + if (jobs.isEmpty()) { + cancelPeriodicSearchSchedulingTask(); + } + } + + /** + * Stops the search job for an ingest job. + * + * @param jobId The ingest job ID. + */ + public synchronized void stopJob(long jobId) { + logger.log(Level.INFO, "Stopping search job {0}", jobId); //NON-NLS + commit(); + + SearchJobInfo job; + job = jobs.get(jobId); + if (job == null) { + return; + } + + /* + * Request cancellation of the current keyword search, whether it is a + * preiodic search or a final search. + */ + IngestSearchRunner.Searcher currentSearcher = job.getCurrentSearcher(); + if ((currentSearcher != null) && (!currentSearcher.isDone())) { + logger.log(Level.INFO, "Cancelling search job {0}", jobId); //NON-NLS + currentSearcher.cancel(true); + } + + jobs.remove(jobId); + + if (jobs.isEmpty()) { + cancelPeriodicSearchSchedulingTask(); + } + } + + /** + * Adds the given keyword list names to the set of keyword lists to be + * searched by ALL keyword search jobs. This supports adding one or more + * keyword search lists to ingest jobs already in progress. + * + * @param keywordListNames The n ames of the additional keyword lists. + */ + public synchronized void addKeywordListsToAllJobs(List<String> keywordListNames) { + for (String listName : keywordListNames) { + logger.log(Level.INFO, "Adding keyword list {0} to all jobs", listName); //NON-NLS + for (SearchJobInfo j : jobs.values()) { + j.addKeywordListName(listName); + } + } + } + + /** + * Commits the Solr index for the current case and publishes an event + * indicating the current number of indexed items (this is no longer just + * files). + */ + private void commit() { + ingester.commit(); + + /* + * Publish an event advertising the number of indexed items. Note that + * this is no longer the number of indexed files, since the text of many + * items in addition to files is indexed. + */ + try { + final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles(); + KeywordSearch.fireNumIndexedFilesChange(null, numIndexedFiles); + } catch (NoOpenCoreException | KeywordSearchModuleException ex) { + logger.log(Level.SEVERE, "Error executing Solr query for number of indexed files", ex); //NON-NLS + } + } + + /** + * Performs the final keyword search for an ingest job. The search is done + * synchronously, as required by the contract for IngestModule.shutDown(). + * + * @param job The keyword search job info. + */ + private void doFinalSearch(SearchJobInfo job) { + if (!job.getKeywordListNames().isEmpty()) { + try { + /* + * Wait for any periodic searches being done in a SwingWorker + * pool thread to finish. + */ + job.waitForCurrentWorker(); + IngestSearchRunner.Searcher finalSearcher = new IngestSearchRunner.Searcher(job, true); + job.setCurrentSearcher(finalSearcher); + /* + * Do the final search synchronously on the current ingest + * thread, per the contract specified + */ + finalSearcher.doInBackground(); + } catch (InterruptedException | CancellationException ex) { + logger.log(Level.INFO, "Final search for search job {0} interrupted or cancelled", job.getJobId()); //NON-NLS + } catch (Exception ex) { + logger.log(Level.SEVERE, String.format("Final search for search job %d failed", job.getJobId()), ex); //NON-NLS + } + } + } + + /** + * Cancels the current periodic search scheduling task. + */ + private synchronized void cancelPeriodicSearchSchedulingTask() { + if (periodicSearchTaskHandle != null) { + logger.log(Level.INFO, "No more search jobs, stopping periodic search scheduling"); //NON-NLS + periodicSearchTaskHandle.cancel(true); + periodicSearchTaskRunning = false; + } + } + + /** + * Task that runs in ScheduledThreadPoolExecutor to periodically start and + * wait for keyword search tasks for each keyword search job in progress. + * The keyword search tasks for individual ingest jobs are implemented as + * SwingWorkers to support legacy APIs. + */ + private final class PeriodicSearchTask implements Runnable { + + @Override + public void run() { + /* + * If there are no more jobs or this task has been cancelled, exit. + */ + if (jobs.isEmpty() || periodicSearchTaskHandle.isCancelled()) { + logger.log(Level.INFO, "Periodic search scheduling task has been cancelled, exiting"); //NON-NLS + periodicSearchTaskRunning = false; + return; + } + + /* + * Commit the Solr index for the current case before doing the + * searches. + */ + commit(); + + /* + * Do a keyword search for each ingest job in progress. When the + * searches are done, recalculate the "hold off" time between + * searches to prevent back-to-back periodic searches and schedule + * the nect periodic search task. + */ + final StopWatch stopWatch = new StopWatch(); + stopWatch.start(); + for (Iterator<Entry<Long, SearchJobInfo>> iterator = jobs.entrySet().iterator(); iterator.hasNext();) { + SearchJobInfo job = iterator.next().getValue(); + + if (periodicSearchTaskHandle.isCancelled()) { + logger.log(Level.INFO, "Periodic search scheduling task has been cancelled, exiting"); //NON-NLS + periodicSearchTaskRunning = false; + return; + } + + if (!job.getKeywordListNames().isEmpty() && !job.isWorkerRunning()) { + logger.log(Level.INFO, "Starting periodic search for search job {0}", job.getJobId()); + Searcher searcher = new Searcher(job, false); + job.setCurrentSearcher(searcher); + searcher.execute(); + job.setWorkerRunning(true); + try { + searcher.get(); + } catch (InterruptedException | ExecutionException ex) { + logger.log(Level.SEVERE, String.format("Error performing keyword search for ingest job %d", job.getJobId()), ex); //NON-NLS + services.postMessage(IngestMessage.createErrorMessage( + KeywordSearchModuleFactory.getModuleName(), + NbBundle.getMessage(this.getClass(), "SearchRunner.Searcher.done.err.msg"), ex.getMessage())); + } catch (java.util.concurrent.CancellationException ex) { + logger.log(Level.SEVERE, String.format("Keyword search for ingest job %d cancelled", job.getJobId()), ex); //NON-NLS + } + } + } + stopWatch.stop(); + logger.log(Level.INFO, "Periodic searches for all ingest jobs cumulatively took {0} secs", stopWatch.getElapsedTimeSecs()); //NON-NLS + recalculateUpdateIntervalTime(stopWatch.getElapsedTimeSecs()); // ELDEBUG + periodicSearchTaskHandle = periodicSearchTaskExecutor.schedule(new PeriodicSearchTask(), currentUpdateIntervalMs, MILLISECONDS); + } + + /** + * Sets the time interval between periodic keyword searches to avoid + * running back-to-back searches. If the most recent round of searches + * took longer that 1/4 of the current interval, doubles the interval. + * + * @param lastSerchTimeSec The time in seconds used to execute the most + * recent round of keword searches. + */ + private void recalculateUpdateIntervalTime(long lastSerchTimeSec) { + if (lastSerchTimeSec * 1000 < currentUpdateIntervalMs / 4) { + return; + } + currentUpdateIntervalMs *= 2; + logger.log(Level.WARNING, "Last periodic search took {0} sec. Increasing search interval to {1} sec", new Object[]{lastSerchTimeSec, currentUpdateIntervalMs / 1000}); + } + } + + /** + * A data structure to keep track of the keyword lists, current results, and + * search running status for an ingest job. + */ + private class SearchJobInfo { + + private final IngestJobContext jobContext; + private final long jobId; + private final long dataSourceId; + private volatile boolean workerRunning; + @GuardedBy("this") + private final List<String> keywordListNames; + @GuardedBy("this") + private final Map<Keyword, Set<Long>> currentResults; // Keyword to object IDs of items with hits + private IngestSearchRunner.Searcher currentSearcher; + private final AtomicLong moduleReferenceCount = new AtomicLong(0); + private final Object finalSearchLock = new Object(); + + private SearchJobInfo(IngestJobContext jobContext, List<String> keywordListNames) { + this.jobContext = jobContext; + jobId = jobContext.getJobId(); + dataSourceId = jobContext.getDataSource().getId(); + this.keywordListNames = new ArrayList<>(keywordListNames); + currentResults = new HashMap<>(); + workerRunning = false; + currentSearcher = null; + } + + private IngestJobContext getJobContext() { + return jobContext; + } + + private long getJobId() { + return jobId; + } + + private long getDataSourceId() { + return dataSourceId; + } + + private synchronized List<String> getKeywordListNames() { + return new ArrayList<>(keywordListNames); + } + + private synchronized void addKeywordListName(String keywordListName) { + if (!keywordListNames.contains(keywordListName)) { + keywordListNames.add(keywordListName); + } + } + + private synchronized Set<Long> currentKeywordResults(Keyword k) { + return currentResults.get(k); + } + + private synchronized void addKeywordResults(Keyword k, Set<Long> resultsIDs) { + currentResults.put(k, resultsIDs); + } + + private boolean isWorkerRunning() { + return workerRunning; + } + + private void setWorkerRunning(boolean flag) { + workerRunning = flag; + } + + private synchronized IngestSearchRunner.Searcher getCurrentSearcher() { + return currentSearcher; + } + + private synchronized void setCurrentSearcher(IngestSearchRunner.Searcher searchRunner) { + currentSearcher = searchRunner; + } + + private void incrementModuleReferenceCount() { + moduleReferenceCount.incrementAndGet(); + } + + private long decrementModuleReferenceCount() { + return moduleReferenceCount.decrementAndGet(); + } + + /** + * Waits for the current search task to complete. + * + * @throws InterruptedException + */ + private void waitForCurrentWorker() throws InterruptedException { + synchronized (finalSearchLock) { + while (workerRunning) { + logger.log(Level.INFO, String.format("Waiting for previous search task for job %d to finish", jobId)); //NON-NLS + finalSearchLock.wait(); + logger.log(Level.INFO, String.format("Notified previous search task for job %d to finish", jobId)); //NON-NLS + } + } + } + + /** + * Signals any threads waiting on the current search task to complete. + */ + private void searchNotify() { + synchronized (finalSearchLock) { + workerRunning = false; + finalSearchLock.notify(); + } + } + } + + /* + * A SwingWorker responsible for searching the Solr index of the current + * case for the keywords for an ingest job. Keyword hit analysis results are + * created and posted to the blackboard and notifications are sent to the + * ingest inbox. + */ + private final class Searcher extends SwingWorker<Object, Void> { + + /* + * Searcher has private copies/snapshots of the lists and keywords + */ + private final SearchJobInfo job; + private final List<Keyword> keywords; //keywords to search + private final List<String> keywordListNames; // lists currently being searched + private final List<KeywordList> keywordLists; + private final Map<Keyword, KeywordList> keywordToList; //keyword to list name mapping + @ThreadConfined(type = ThreadConfined.ThreadType.AWT) + private ProgressHandle progressIndicator; + private boolean finalRun = false; + + Searcher(SearchJobInfo job, boolean finalRun) { + this.job = job; + this.finalRun = finalRun; + keywordListNames = job.getKeywordListNames(); + keywords = new ArrayList<>(); + keywordToList = new HashMap<>(); + keywordLists = new ArrayList<>(); + } + + @Override + @Messages("SearchRunner.query.exception.msg=Error performing query:") + protected Object doInBackground() throws Exception { + try { + if (usingNetBeansGUI) { + /* + * If running in the NetBeans thick client application + * version of Autopsy, NetBeans progress handles (i.e., + * progress bars) are used to display search progress in the + * lower right hand corner of the main application window. + * + * A layer of abstraction to allow alternate representations + * of progress could be used here, as it is in other places + * in the application (see implementations and usage of + * org.sleuthkit.autopsy.progress.ProgressIndicator + * interface), to better decouple keyword search from the + * application's presentation layer. + */ + SwingUtilities.invokeAndWait(() -> { + final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName") + + (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : ""); + progressIndicator = ProgressHandle.createHandle(displayName, new Cancellable() { + @Override + public boolean cancel() { + if (progressIndicator != null) { + progressIndicator.setDisplayName(displayName + " " + NbBundle.getMessage(this.getClass(), "SearchRunner.doInBackGround.cancelMsg")); + } + logger.log(Level.INFO, "Search cancelled by user"); //NON-NLS + new Thread(() -> { + IngestSearchRunner.Searcher.this.cancel(true); + }).start(); + return true; + } + }); + progressIndicator.start(); + progressIndicator.switchToIndeterminate(); + }); + } + + updateKeywords(); + for (Keyword keyword : keywords) { + if (isCancelled() || job.getJobContext().fileIngestIsCancelled()) { + logger.log(Level.INFO, "Cancellation requested, exiting before new keyword processed: {0}", keyword.getSearchTerm()); //NON-NLS + return null; + } + + KeywordList keywordList = keywordToList.get(keyword); + if (usingNetBeansGUI) { + String searchTermStr = keyword.getSearchTerm(); + if (searchTermStr.length() > 50) { + searchTermStr = searchTermStr.substring(0, 49) + "..."; + } + final String progressMessage = keywordList.getName() + ": " + searchTermStr; + SwingUtilities.invokeLater(() -> { + progressIndicator.progress(progressMessage); + }); + } + + // Filtering + //limit search to currently ingested data sources + //set up a filter with 1 or more image ids OR'ed + KeywordSearchQuery keywordSearchQuery = KeywordSearchUtil.getQueryForKeyword(keyword, keywordList); + KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, job.getDataSourceId()); + keywordSearchQuery.addFilter(dataSourceFilter); + + // Do the actual search + QueryResults queryResults; + try { + queryResults = keywordSearchQuery.performQuery(); + } catch (KeywordSearchModuleException | NoOpenCoreException ex) { + logger.log(Level.SEVERE, "Error performing query: " + keyword.getSearchTerm(), ex); //NON-NLS + if (usingNetBeansGUI) { + final String userMessage = Bundle.SearchRunner_query_exception_msg() + keyword.getSearchTerm(); + SwingUtilities.invokeLater(() -> { + MessageNotifyUtil.Notify.error(userMessage, ex.getCause().getMessage()); + }); + } + //no reason to continue with next query if recovery failed + //or wait for recovery to kick in and run again later + //likely case has closed and threads are being interrupted + return null; + } catch (CancellationException e) { + logger.log(Level.INFO, "Cancellation requested, exiting during keyword query: {0}", keyword.getSearchTerm()); //NON-NLS + return null; + } + + // Reduce the results of the query to only those hits we + // have not already seen. + QueryResults newResults = filterResults(queryResults); + + if (!newResults.getKeywords().isEmpty()) { + // Create blackboard artifacts + newResults.process(this, keywordList.getIngestMessages(), true, job.getJobId()); + } + } + } catch (Exception ex) { + logger.log(Level.SEVERE, String.format("Error performing keyword search for ingest job %d", job.getJobId()), ex); //NON-NLS + } finally { + if (progressIndicator != null) { + SwingUtilities.invokeLater(new Runnable() { + @Override + public void run() { + progressIndicator.finish(); + progressIndicator = null; + } + }); + } + // In case a thread is waiting on this worker to be done + job.searchNotify(); + } + + return null; + } + + /** + * Sync-up the updated keywords from the currently used lists in the XML + */ + private void updateKeywords() { + XmlKeywordSearchList loader = XmlKeywordSearchList.getCurrent(); + + keywords.clear(); + keywordToList.clear(); + keywordLists.clear(); + + for (String name : keywordListNames) { + KeywordList list = loader.getList(name); + keywordLists.add(list); + for (Keyword k : list.getKeywords()) { + keywords.add(k); + keywordToList.put(k, list); + } + } + } + + /** + * This method filters out all of the hits found in earlier periodic + * searches and returns only the results found by the most recent + * search. + * + * This method will only return hits for objects for which we haven't + * previously seen a hit for the keyword. + * + * @param queryResult The results returned by a keyword search. + * + * @return A unique set of hits found by the most recent search for + * objects that have not previously had a hit. The hits will be + * for the lowest numbered chunk associated with the object. + * + */ + private QueryResults filterResults(QueryResults queryResult) { + + // Create a new (empty) QueryResults object to hold the most recently + // found hits. + QueryResults newResults = new QueryResults(queryResult.getQuery()); + + // For each keyword represented in the results. + for (Keyword keyword : queryResult.getKeywords()) { + // These are all of the hits across all objects for the most recent search. + // This may well include duplicates of hits we've seen in earlier periodic searches. + List<KeywordHit> queryTermResults = queryResult.getResults(keyword); + + // Sort the hits for this keyword so that we are always + // guaranteed to return the hit for the lowest chunk. + Collections.sort(queryTermResults); + + // This will be used to build up the hits we haven't seen before + // for this keyword. + List<KeywordHit> newUniqueHits = new ArrayList<>(); + + // Get the set of object ids seen in the past by this searcher + // for the given keyword. + Set<Long> curTermResults = job.currentKeywordResults(keyword); + if (curTermResults == null) { + // We create a new empty set if we haven't seen results for + // this keyword before. + curTermResults = new HashSet<>(); + } + + // For each hit for this keyword. + for (KeywordHit hit : queryTermResults) { + if (curTermResults.contains(hit.getSolrObjectId())) { + // Skip the hit if we've already seen a hit for + // this keyword in the object. + continue; + } + + // We haven't seen the hit before so add it to list of new + // unique hits. + newUniqueHits.add(hit); + + // Add the object id to the results we've seen for this + // keyword. + curTermResults.add(hit.getSolrObjectId()); + } + + // Update the job with the list of objects for which we have + // seen hits for the current keyword. + job.addKeywordResults(keyword, curTermResults); + + // Add the new hits for the current keyword into the results + // to be returned. + newResults.addResult(keyword, newUniqueHits); + } + + return newResults; + } + } + +} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java index 70791b8a6b715cde955a88a3b1f1098dc0818082..4aa19564dfbd4e35d558224334b72c4a8d13c71d 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java @@ -19,14 +19,9 @@ package org.sleuthkit.autopsy.keywordsearch; import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.io.Reader; -import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Optional; import java.util.logging.Level; @@ -34,7 +29,6 @@ import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrInputDocument; import org.openide.util.NbBundle; -import org.openide.util.io.ReaderInputStream; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.TimeZoneUtils; import org.sleuthkit.autopsy.healthmonitor.HealthMonitor; @@ -150,10 +144,10 @@ private Map<String, String> getContentFields(Content item) { * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException */ // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients -// < T extends SleuthkitVisitableItem> boolean search(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException { -// boolean doLanguageDetection = true; -// return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames); -// } + < T extends Content> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException { + boolean doLanguageDetection = true; + return indexText(sourceReader, sourceID, sourceName, source, context, doLanguageDetection); + } /** * Read and chunk the source text for indexing in Solr. Does NOT perform @@ -174,17 +168,11 @@ private Map<String, String> getContentFields(Content item) { * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException */ // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients -// < T extends SleuthkitVisitableItem> boolean searchStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr) throws Ingester.IngesterException { -// // Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time. -// boolean doLanguageDetection = false; -// return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, null); -// } -// -// < T extends SleuthkitVisitableItem> boolean searchStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException { -// // Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time. -// boolean doLanguageDetection = false; -// return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames); -// } + < T extends Content> boolean indexStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException { + // Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time. + boolean doLanguageDetection = false; + return indexText(sourceReader, sourceID, sourceName, source, context, doLanguageDetection); + } /** * Read and chunk the source text for indexing in Solr. @@ -207,157 +195,58 @@ private Map<String, String> getContentFields(Content item) { // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients private < T extends Content> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection) throws Ingester.IngesterException { int numChunks = 0; //unknown until chunking is done - Map<String, String> contentFields = Collections.unmodifiableMap(getContentFields(source)); - Optional<Language> language = Optional.empty(); - InlineSearcher searcher = new InlineSearcher(keywordListNames, context); - List<Chunk> activeChunkList = new ArrayList<>(); - boolean fileIndexed = false; - - //Get a reader for the content of the given source - try (BufferedReader reader = new BufferedReader(sourceReader)) { - Chunker chunker = new Chunker(reader); - String name = sourceName; - if(!(source instanceof BlackboardArtifact)) { - searcher.searchString(name, sourceID, 0); - } - - while (chunker.hasNext()) { - if ( context.fileIngestIsCancelled()) { - logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName); - return; - } - - Chunk chunk = chunker.next(); - chunk.setChunkId(numChunks+1); - - if (doLanguageDetection) { - int size = Math.min(chunk.getBaseChunkLength(), LANGUAGE_DETECTION_STRING_SIZE); - language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk.toString().substring(0, size)); - - // only do language detection on the first chunk of the document - doLanguageDetection = false; - } - - if(keywordListNames != null) { - boolean hitFoundInChunk = searcher.searchChunk(chunk, sourceID, numChunks); - if(!indexIntoSolr) { - if(!hitFoundInChunk) { - if(!activeChunkList.isEmpty() ) { - if(activeChunkList.get(activeChunkList.size() - 1).hasHit()) { - activeChunkList.add(chunk); - // Write List - for(Chunk c: activeChunkList) { - indexChunk(c, sourceID, sourceName, language, contentFields, chunker.hasNext()); - } - activeChunkList.clear(); - } else { - activeChunkList.clear(); - activeChunkList.add(chunk); - } - } else { - activeChunkList.add(chunk); - } - } else { - fileIndexed = true; - chunk.setHasHit(true); - activeChunkList.add(chunk); - } - } else { - indexChunk(chunk, sourceID, sourceName, language, contentFields, chunker.hasNext()); - } - } - - numChunks++; - - } - - if(activeChunkList.size() > 1 || (activeChunkList.size() == 1 && activeChunkList.get(0).hasHit())) { - for(Chunk c: activeChunkList) { - indexChunk(c, sourceID, sourceName, language, contentFields, true); - } - } - - - if (chunker.hasException()) { - logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException()); - throw chunker.getException(); - } - - } finally { - if (context.fileIngestIsCancelled()) { - return ; - } - - if (fileIndexed) { - Map<String, Object> fields = new HashMap<>(contentFields); - //after all chunks, index just the meta data, including the numChunks, of the parent file - fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks)); - //reset id field to base document id - fields.put(Server.Schema.ID.toString(), Long.toString(sourceID)); - //"parent" docs don't have chunk_size - fields.remove(Server.Schema.CHUNK_SIZE.toString()); - indexChunk(null, null, sourceName, fields); - } - } - } - - < T extends SleuthkitVisitableItem> boolean indexFile(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection) throws Ingester.IngesterException { - int numChunks = 0; //unknown until chunking is done + Map<String, String> contentFields = Collections.unmodifiableMap(getContentFields(source)); Optional<Language> language = Optional.empty(); //Get a reader for the content of the given source try (BufferedReader reader = new BufferedReader(sourceReader)) { Chunker chunker = new Chunker(reader); while (chunker.hasNext()) { - if ( context.fileIngestIsCancelled()) { + if (context != null && context.fileIngestIsCancelled()) { logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName); return false; } Chunk chunk = chunker.next(); - + Map<String, Object> fields = new HashMap<>(contentFields); + String chunkId = Server.getChunkIdString(sourceID, numChunks + 1); + fields.put(Server.Schema.ID.toString(), chunkId); + fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength())); + if (doLanguageDetection) { int size = Math.min(chunk.getBaseChunkLength(), LANGUAGE_DETECTION_STRING_SIZE); language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk.toString().substring(0, size)); - + // only do language detection on the first chunk of the document doLanguageDetection = false; } - - Map<String, Object> fields = new HashMap<>(contentFields); - String chunkId = Server.getChunkIdString(sourceID, numChunks + 1); - fields.put(Server.Schema.ID.toString(), chunkId); - fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength())); - language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang)); try { //add the chunk text to Solr index - indexChunk(chunk.toString(), chunk.getLowerCasedChunk(), sourceName, fields); + indexChunk(chunk.toString(), chunk.geLowerCasedChunk(), sourceName, fields); // add mini chunk when there's a language specific field if (chunker.hasNext() && language.isPresent()) { languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get()); } - numChunks++; - + numChunks++; } catch (Ingester.IngesterException ingEx) { logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS throw ingEx; //need to rethrow to signal error and move on - } + } } if (chunker.hasException()) { logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException()); return false; } - } catch (Exception ex) { logger.log(Level.WARNING, "Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS return false; - } finally { - if (context.fileIngestIsCancelled()) { + } finally { + if (context != null && context.fileIngestIsCancelled()) { return false; - } else { + } else { Map<String, Object> fields = new HashMap<>(contentFields); //after all chunks, index just the meta data, including the numChunks, of the parent file fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks)); @@ -368,34 +257,8 @@ < T extends SleuthkitVisitableItem> boolean indexFile(Reader sourceReader, long indexChunk(null, null, sourceName, fields); } } - - return true; } - - private void indexChunk(Chunk chunk, long sourceID, String sourceName, Optional<Language> language, Map<String, String> contentFields, boolean hasNext) throws IngesterException { - Map<String, Object> fields = new HashMap<>(contentFields); - String chunkId = Server.getChunkIdString(sourceID, chunk.getChunkId()); - fields.put(Server.Schema.ID.toString(), chunkId); - fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength())); - - - language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang)); - try { - //add the chunk text to Solr index - indexChunk(chunk.toString(), chunk.getLowerCasedChunk(), sourceName, fields); - // add mini chunk when there's a language specific field - if (hasNext && language.isPresent()) { - languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get()); - } - - } catch (Ingester.IngesterException ingEx) { - logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS - + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS - - throw ingEx; //need to rethrow to signal error and move on - } - } /** * Add one chunk as to the Solr index as a separate Solr document. diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/InlineSearcher.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/InlineSearcher.java deleted file mode 100755 index b8a8dcdf6164d38b8da1daaa4e94a20f08674457..0000000000000000000000000000000000000000 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/InlineSearcher.java +++ /dev/null @@ -1,614 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2022 Basis Technology Corp. - * Contact: carrier <at> sleuthkit <dot> org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -import com.twelvemonkeys.lang.StringUtil; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.concurrent.ConcurrentHashMap; -import java.util.logging.Level; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.apache.commons.validator.routines.DomainValidator; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.sleuthkit.autopsy.casemodule.Case; -import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException; -import org.sleuthkit.autopsy.coreutils.Logger; -import org.sleuthkit.autopsy.ingest.IngestJobContext; -import org.sleuthkit.autopsy.keywordsearch.Chunker.Chunk; -import static org.sleuthkit.autopsy.keywordsearch.RegexQuery.CREDIT_CARD_NUM_PATTERN; -import org.sleuthkit.datamodel.Blackboard; -import org.sleuthkit.datamodel.BlackboardArtifact; -import org.sleuthkit.datamodel.BlackboardAttribute; -import org.sleuthkit.datamodel.Content; -import org.sleuthkit.datamodel.SleuthkitCase; -import org.sleuthkit.datamodel.TskCoreException; -import org.sleuthkit.datamodel.TskException; - -final class InlineSearcher { - - private final List<KeywordList> keywordList; - private static final int MIN_EMAIL_ADDR_LENGTH = 8; - private static final Logger logger = Logger.getLogger(InlineSearcher.class.getName()); - - private final IngestJobContext context; - - static final Map<Long, List<UniqueKeywordHit>> uniqueHitMap = new ConcurrentHashMap<>(); - - static final Map<Long, Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>>> uniqueHitMap2 = new ConcurrentHashMap<>(); - - // Uses mostly native java and the lucene api to search the a given chuck - // for Keywords. Create unique KeywordHits for any unique hit. - InlineSearcher(List<String> keywordListNames, IngestJobContext context) { - this.keywordList = new ArrayList<>(); - this.context = context; - - if (keywordListNames != null) { - XmlKeywordSearchList loader = XmlKeywordSearchList.getCurrent(); - for (String name : keywordListNames) { - keywordList.add(loader.getList(name)); - } - } - } - - /** - * Search the chunk for the currently selected keywords. - * - * @param chunk - * @param sourceID - * - * @throws TskCoreException - */ - boolean searchChunk(Chunk chunk, long sourceID, int chunkId) throws TskCoreException { - return searchString(chunk.getLowerCasedChunk(), sourceID, chunkId); - } - - /** - * Search a string for the currently selected keywords. - * - * @param text - * @param sourceID - * - * @throws TskCoreException - */ - boolean searchString(String text, long sourceID, int chunkId) throws TskCoreException { - boolean hitFound = false; - Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> hitByKeyword = getMap(context.getJobId(), sourceID); - for (KeywordList list : keywordList) { - List<Keyword> keywords = list.getKeywords(); - for (Keyword originalKeyword : keywords) { - Map<Keyword, List<UniqueKeywordHit>> hitMap = hitByKeyword.get(originalKeyword); - if (hitMap == null) { - hitMap = new HashMap<>(); - hitByKeyword.put(originalKeyword, hitMap); - } - - List<UniqueKeywordHit> keywordHits = new ArrayList<>(); - if (originalKeyword.searchTermIsLiteral()) { - if (StringUtil.containsIgnoreCase(text, originalKeyword.getSearchTerm())) { - keywordHits.addAll(createKeywordHits(text, originalKeyword, sourceID, chunkId, list.getName())); - } - } else { - String regex = originalKeyword.getSearchTerm(); - - try { - // validate the regex - Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); - Matcher matcher = pattern.matcher(text); - - if (matcher.find()) { - keywordHits.addAll(createKeywordHits(text, originalKeyword, sourceID, chunkId, list.getName())); - } - } catch (IllegalArgumentException ex) { - //TODO What should we do here? Log and continue? - } - } - - if (!keywordHits.isEmpty()) { - hitFound = true; - for (UniqueKeywordHit hit : keywordHits) { - Keyword keywordCopy = new Keyword(hit.getHit(), - originalKeyword.searchTermIsLiteral(), - originalKeyword.searchTermIsWholeWord(), - list.getName(), - originalKeyword.getOriginalTerm()); - - List<UniqueKeywordHit> mapHitList = hitMap.get(keywordCopy); - if (mapHitList == null) { - mapHitList = new ArrayList<>(); - hitMap.put(keywordCopy, mapHitList); - } - - if (!mapHitList.contains(hit)) { - mapHitList.add(hit); - } - } - } - - if (context.fileIngestIsCancelled()) { - return hitFound; - } - } - } - return hitFound; - } - - /** - * This method very similar to RegexQuery createKeywordHits, with the - * knowledge of solr removed. - * - * @param text - * @param originalKeyword - * - * @return A list of KeywordHit objects. - * - * @throws TskCoreException - */ - private List<UniqueKeywordHit> createKeywordHits(String text, Keyword originalKeyword, long sourceID, int chunkId, String keywordListName) throws TskCoreException { - - if (originalKeyword.searchTermIsLiteral() && originalKeyword.searchTermIsWholeWord()) { - try { - return getExactMatchHits(text, originalKeyword, sourceID, chunkId, keywordListName); - } catch (IOException ex) { - throw new TskCoreException("Failed to create exactMatch hits", ex); - } - } - - final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>(); - - List<UniqueKeywordHit> hits = new ArrayList<>(); - String keywordString = originalKeyword.getSearchTerm(); - - boolean queryStringContainsWildcardSuffix = originalKeyword.getSearchTerm().endsWith(".*"); - - String searchPattern; - if (originalKeyword.searchTermIsLiteral()) { - /** - * For substring searches, the following pattern was arrived at - * through trial and error in an attempt to reproduce the same hits - * we were getting when we were using the TermComponent approach. - * This basically looks for zero of more word characters followed - * optionally by a dot or apostrophe, followed by the quoted - * lowercase substring following by zero or more word characters - * followed optionally by a dot or apostrophe. The reason that the - * dot and apostrophe characters are being handled here is because - * the old code used to find hits in domain names (e.g. hacks.ie) - * and possessives (e.g. hacker's). This obviously works for English - * but is probably not sufficient for other languages. - */ - searchPattern = "[\\w[\\.']]*" + java.util.regex.Pattern.quote(keywordString.toLowerCase()) + "[\\w[\\.']]*"; - - } else { - searchPattern = keywordString; - } - - final java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(searchPattern, Pattern.CASE_INSENSITIVE); - - try { - String content = text; - Matcher hitMatcher = pattern.matcher(content); - int offset = 0; - - while (hitMatcher.find(offset)) { - - String hit = hitMatcher.group().toLowerCase(); - - /** - * No need to continue on if the the string is "" nothing to - * find or do. - */ - if ("".equals(hit)) { - break; - } - - offset = hitMatcher.end(); - final BlackboardAttribute.ATTRIBUTE_TYPE artifactAttributeType = originalKeyword.getArtifactAttributeType(); - - // We attempt to reduce false positives for phone numbers and IP address hits - // by querying Solr for hits delimited by a set of known boundary characters. - // See KeywordSearchList.PHONE_NUMBER_REGEX for an example. - // Because of this the hits may contain an extra character at the beginning or end that - // needs to be chopped off, unless the user has supplied their own wildcard suffix - // as part of the regex. - if (!queryStringContainsWildcardSuffix - && (artifactAttributeType == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER - || artifactAttributeType == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_IP_ADDRESS)) { - if (artifactAttributeType == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER) { - // For phone numbers replace all non numeric characters (except "(") at the start of the hit. - hit = hit.replaceAll("^[^0-9\\(]", ""); - } else { - // Replace all non numeric characters at the start of the hit. - hit = hit.replaceAll("^[^0-9]", ""); - } - // Replace all non numeric at the end of the hit. - hit = hit.replaceAll("[^0-9]$", ""); - - if (offset > 1) { - /* - * NOTE: our IP and phone number regex patterns look for - * boundary characters immediately before and after the - * keyword hit. After a match, Java pattern mather - * re-starts at the first character not matched by the - * previous match. This basically requires two boundary - * characters to be present between each pattern match. - * To mitigate this we are resetting the offest one - * character back. - */ - offset--; - } - } - - /** - * Boundary characters are removed from the start and end of the - * hit to normalize the hits. This is being done for substring - * searches only at this point. We don't do it for real regular - * expression searches because the user may have explicitly - * included boundary characters in their regular expression. - */ - if (originalKeyword.searchTermIsLiteral()) { - hit = hit.replaceAll("^" + KeywordSearchList.BOUNDARY_CHARACTERS + "*", ""); - hit = hit.replaceAll(KeywordSearchList.BOUNDARY_CHARACTERS + "*$", ""); - } - - /** - * The use of String interning is an optimization to ensure that - * we reuse the same keyword hit String object across all hits. - * Even though we benefit from G1GC String deduplication, the - * overhead associated with creating a new String object for - * every KeywordHit can be significant when the number of hits - * gets large. - */ - hit = hit.intern(); - - // We will only create one KeywordHit instance per document for - // a given hit. - if (keywordsFoundInThisDocument.containsKey(hit)) { - continue; - } - keywordsFoundInThisDocument.put(hit, hit); - - if (artifactAttributeType == null) { - hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm())); - } else { - switch (artifactAttributeType) { - case TSK_EMAIL: - /* - * Reduce false positives by eliminating email - * address hits that are either too short or are not - * for valid top level domains. - */ - if (hit.length() >= MIN_EMAIL_ADDR_LENGTH - && DomainValidator.getInstance(true).isValidTld(hit.substring(hit.lastIndexOf('.')))) { - hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm())); - } - - break; - case TSK_CARD_NUMBER: - /* - * If searching for credit card account numbers, do - * extra validation on the term and discard it if it - * does not pass. - */ - Matcher ccnMatcher = CREDIT_CARD_NUM_PATTERN.matcher(hit); - - for (int rLength = hit.length(); rLength >= 12; rLength--) { - ccnMatcher.region(0, rLength); - if (ccnMatcher.find()) { - final String group = ccnMatcher.group("ccn"); - if (CreditCardValidator.isValidCCN(group)) { - hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm())); - } - } - } - - break; - default: - hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm())); - break; - } - } - } - - } catch (Throwable error) { - /* - * NOTE: Matcher.find() is known to throw StackOverflowError in rare - * cases (see JIRA-2700). StackOverflowError is an error, not an - * exception, and therefore needs to be caught as a Throwable. When - * this occurs we should re-throw the error as TskCoreException so - * that it is logged by the calling method and move on to the next - * Solr document. - */ - throw new TskCoreException("Failed to create keyword hits for chunk due to " + error.getMessage()); - } - return hits; - } - - /** - * Clean up the memory that is being used for the given job. - * - * @param context - */ - static void cleanup(IngestJobContext context) { - Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> jobMap = uniqueHitMap2.get(context.getJobId()); - if (jobMap != null) { - jobMap.clear(); - } - } - - /** - * Generates the artifacts for the found KeywordHits. This method should be - * called once per content object. - * - * @param context - */ - static void makeArtifacts(IngestJobContext context) throws TskException { - - Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> jobMap = uniqueHitMap2.get(context.getJobId()); - if (jobMap == null) { - return; - } - - for (Map.Entry<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> mapBySource : jobMap.entrySet()) { - Long sourceId = mapBySource.getKey(); - Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> mapByKeyword = mapBySource.getValue(); - - for (Map.Entry<Keyword, Map<Keyword, List<UniqueKeywordHit>>> item : mapByKeyword.entrySet()) { - Keyword originalKeyword = item.getKey(); - Map<Keyword, List<UniqueKeywordHit>> map = item.getValue(); - - List<BlackboardArtifact> hitArtifacts = new ArrayList<>(); - if (!map.isEmpty()) { - for (Map.Entry<Keyword, List<UniqueKeywordHit>> entry : map.entrySet()) { - Keyword hitKeyword = entry.getKey(); - List<UniqueKeywordHit> hitList = entry.getValue(); - // Only create one hit for the document. - // The first hit in the list should be the first one that - // was found. - if (!hitList.isEmpty()) { - UniqueKeywordHit hit = hitList.get(0); - SleuthkitCase tskCase = Case.getCurrentCase().getSleuthkitCase(); - Content content = tskCase.getContentById(hit.getContentID()); - BlackboardArtifact artifact = RegexQuery.createKeywordHitArtifact(content, originalKeyword, hitKeyword, hit, hit.getSnippet(), hitKeyword.getListName(), sourceId); - // createKeywordHitArtifact has the potential to return null - // when a CCN account is created. - if (artifact != null) { - hitArtifacts.add(artifact); - - } - - } - } - - if (!hitArtifacts.isEmpty()) { - try { - SleuthkitCase tskCase = Case.getCurrentCaseThrows().getSleuthkitCase(); - Blackboard blackboard = tskCase.getBlackboard(); - - blackboard.postArtifacts(hitArtifacts, "KeywordSearch", context.getJobId()); - hitArtifacts.clear(); - } catch (NoCurrentCaseException | Blackboard.BlackboardException ex) { - logger.log(Level.SEVERE, "Failed to post KWH artifact to blackboard.", ex); //NON-NLS - } - } - - if (context.fileIngestIsCancelled()) { - return; - } - } - } - } - } - - /** - * Searches the chunk for exact matches and creates the appropriate keyword - * hits. - * - * @param text - * @param originalKeyword - * @param sourceID - * - * @return - * - * @throws IOException - */ - public List<UniqueKeywordHit> getExactMatchHits(String text, Keyword originalKeyword, long sourceID, int chunkId, String keywordListName) throws IOException { - final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>(); - - List<UniqueKeywordHit> hits = new ArrayList<>(); - Analyzer analyzer = new StandardAnalyzer(); - - //Get the tokens of the keyword - List<String> keywordTokens = new ArrayList<>(); - try (TokenStream keywordstream = analyzer.tokenStream("field", originalKeyword.getSearchTerm())) { - CharTermAttribute attr = keywordstream.addAttribute(CharTermAttribute.class); - keywordstream.reset(); - while (keywordstream.incrementToken()) { - keywordTokens.add(attr.toString()); - } - } - - try (TokenStream stream = analyzer.tokenStream("field", text)) { - CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class); - OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); - stream.reset(); - while (stream.incrementToken()) { - if (!attr.toString().equals(keywordTokens.get(0))) { - continue; - } - - int startOffset = offset.startOffset(); - int endOffset = offset.endOffset(); - boolean match = true; - - for (int index = 1; index < keywordTokens.size(); index++) { - if (stream.incrementToken()) { - if (!attr.toString().equals(keywordTokens.get(index))) { - match = false; - break; - } else { - endOffset = offset.endOffset(); - } - } - } - - if (match) { - String hit = text.subSequence(startOffset, endOffset).toString(); - - // We will only create one KeywordHit instance per document for - // a given hit. - if (keywordsFoundInThisDocument.containsKey(hit)) { - continue; - } - keywordsFoundInThisDocument.put(hit, hit); - - hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(text, startOffset, endOffset, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getOriginalTerm())); - } - } - } - - return hits; - } - - /** - * Get the keyword map for the given job and source. - * - * @param jobId - * @param sourceID - * - * @return - */ - static private Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> getMap(long jobId, long sourceID) { - Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> jobMap = uniqueHitMap2.get(jobId); - if (jobMap == null) { - jobMap = new ConcurrentHashMap<>(); - uniqueHitMap2.put(jobId, jobMap); - } - - Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> sourceMap = jobMap.get(sourceID); - if (sourceMap == null) { - sourceMap = new ConcurrentHashMap<>(); - jobMap.put(sourceID, sourceMap); - } - - return sourceMap; - } - - // KeywordHit is not unique enough for finding duplicates, this class - // extends the KeywordHit class to make truely unique hits. - static class UniqueKeywordHit extends KeywordHit { - - private final String listName; - private final boolean isLiteral; - private final boolean isWholeWord; - private final BlackboardAttribute.ATTRIBUTE_TYPE artifactAtrributeType; - private final String originalSearchTerm; - - UniqueKeywordHit(int chunkId, long sourceID, String snippet, String hit, String listName, boolean isWholeWord, boolean isLiteral, BlackboardAttribute.ATTRIBUTE_TYPE artifactAtrributeType, String originalSearchTerm) { - super(chunkId, sourceID, snippet, hit); - - this.listName = listName; - this.isWholeWord = isWholeWord; - this.isLiteral = isLiteral; - this.artifactAtrributeType = artifactAtrributeType; - this.originalSearchTerm = originalSearchTerm; - } - - @Override - public int compareTo(KeywordHit other) { - return compare((UniqueKeywordHit) other); - } - - private int compare(UniqueKeywordHit other) { - return Comparator.comparing(UniqueKeywordHit::getSolrObjectId) - .thenComparing(UniqueKeywordHit::getChunkId) - .thenComparing(UniqueKeywordHit::getHit) - .thenComparing(UniqueKeywordHit::getSnippet) - .thenComparing(UniqueKeywordHit::isWholeWord) - .thenComparing(UniqueKeywordHit::isLiteral) - .thenComparing(UniqueKeywordHit::getArtifactAtrributeType) - .thenComparing(UniqueKeywordHit::getOriginalSearchTerm) - .thenComparing(UniqueKeywordHit::getListName) - .compare(this, other); - } - - @Override - public boolean equals(Object obj) { - - if (null == obj) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - final UniqueKeywordHit other = (UniqueKeywordHit) obj; - - return getSnippet().equalsIgnoreCase(other.getSnippet()) - && getSolrObjectId().equals(other.getSolrObjectId()) - && getChunkId().equals(other.getChunkId()) - && getHit().equalsIgnoreCase(other.getHit()) - && listName.equalsIgnoreCase(other.getListName()) - && isLiteral == other.isLiteral() - && isWholeWord == other.isWholeWord() - && originalSearchTerm.equalsIgnoreCase(other.getOriginalSearchTerm()) - && (artifactAtrributeType != null ? artifactAtrributeType.equals(other.getArtifactAtrributeType()) : true); - } - - @Override - public int hashCode() { - int hash = 3; - hash = 67 * hash + super.hashCode(); - hash = 67 * hash + Objects.hashCode(this.listName); - hash = 67 * hash + (this.isLiteral ? 1 : 0); - hash = 67 * hash + (this.isWholeWord ? 1 : 0); - hash = 67 * hash + Objects.hashCode(this.artifactAtrributeType); - hash = 67 * hash + Objects.hashCode(this.originalSearchTerm); - return hash; - } - - String getListName() { - return listName; - } - - Boolean isLiteral() { - return isLiteral; - } - - Boolean isWholeWord() { - return isWholeWord; - } - - BlackboardAttribute.ATTRIBUTE_TYPE getArtifactAtrributeType() { - return artifactAtrributeType; - } - - String getOriginalSearchTerm() { - return originalSearchTerm; - } - - } -} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Keyword.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Keyword.java index 98479b7b41082a34f8ab38b1cf753f882f39d0cd..d7c7d7705fb42dd62b0c199f7b9ea974cafa9234 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Keyword.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Keyword.java @@ -211,10 +211,8 @@ public boolean equals(Object obj) { public int hashCode() { int hash = 7; hash = 17 * hash + this.searchTerm.hashCode(); - hash = 17 * hash + this.listName.hashCode(); hash = 17 * hash + (this.isLiteral ? 1 : 0); hash = 17 * hash + (this.isWholeWord ? 1 : 0); - hash = 17 * hash + this.originalTerm.hashCode(); return hash; } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java index 5e39eb8af560b818cf83f2d76467428a4fa417f2..c147618558b355a5d0a9f6112a8b6264f3ce0e58 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java @@ -21,12 +21,10 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Comparator; -import java.util.Objects; import java.util.Optional; import org.apache.commons.lang3.StringUtils; import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException; -import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.SleuthkitCase; import org.sleuthkit.datamodel.TskCoreException; @@ -45,7 +43,7 @@ class KeywordHit implements Comparable<KeywordHit> { private final int chunkId; private final String snippet; private final String hit; - + /** * Constructor * @@ -56,7 +54,7 @@ class KeywordHit implements Comparable<KeywordHit> { * For some searches (ie substring, regex) this will be * different than the search term. * - */ + */ KeywordHit(String solrDocumentId, String snippet, String hit) { this.snippet = StringUtils.stripToEmpty(snippet); this.hit = hit; @@ -70,30 +68,17 @@ class KeywordHit implements Comparable<KeywordHit> { * documents. One contains object metadata (chunk #1) and the second and * subsequent documents contain chunks of the text. */ - if(!solrDocumentId.isEmpty()) { - String[] split = solrDocumentId.split(Server.CHUNK_ID_SEPARATOR); - if (split.length == 1) { - //chunk 0 has only the bare document id without the chunk id. - this.solrObjectId = Long.parseLong(solrDocumentId); - this.chunkId = 0; - } else { - this.solrObjectId = Long.parseLong(split[0]); - this.chunkId = Integer.parseInt(split[1]); - } - } else { - this.solrObjectId = 0; + String[] split = solrDocumentId.split(Server.CHUNK_ID_SEPARATOR); + if (split.length == 1) { + //chunk 0 has only the bare document id without the chunk id. + this.solrObjectId = Long.parseLong(solrDocumentId); this.chunkId = 0; + } else { + this.solrObjectId = Long.parseLong(split[0]); + this.chunkId = Integer.parseInt(split[1]); } } - KeywordHit(int chunkId, long sourceID, String snippet, String hit) { - this.snippet = StringUtils.stripToEmpty(snippet); - this.hit = hit; - this.chunkId = chunkId; - this.solrObjectId = sourceID; - } - - String getHit() { return hit; } @@ -102,11 +87,11 @@ String getSolrDocumentId() { return Long.toString(solrObjectId) + Server.CHUNK_ID_SEPARATOR + Long.toString(chunkId); } - Long getSolrObjectId() { + long getSolrObjectId() { return this.solrObjectId; } - Integer getChunkId() { + int getChunkId() { return this.chunkId; } @@ -117,7 +102,7 @@ boolean hasSnippet() { String getSnippet() { return this.snippet; } - + /** * Get the content id associated with the content underlying hit. * For hits on files this will be the same as the object id associated @@ -187,25 +172,20 @@ public boolean equals(Object obj) { return false; } final KeywordHit other = (KeywordHit) obj; - return compareTo(other) == 0; + return this.compareTo(other) == 0; } - + @Override public int hashCode() { - int hash = 7; - hash = 37 * hash + (int) (this.solrObjectId ^ (this.solrObjectId >>> 32)); - hash = 37 * hash + this.chunkId; - hash = 37 * hash + Objects.hashCode(this.snippet); - hash = 37 * hash + Objects.hashCode(this.hit); + int hash = 3; + hash = 41 * hash + (int) this.solrObjectId + this.chunkId; return hash; } @Override - public int compareTo(KeywordHit other) { + public int compareTo(KeywordHit o) { return Comparator.comparing(KeywordHit::getSolrObjectId) .thenComparing(KeywordHit::getChunkId) - .thenComparing(KeywordHit::getHit) - .thenComparing(KeywordHit::getSnippet) - .compare(this, other); + .compare(this, o); } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.form b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.form index 0f8800efdd0af3e8641397d43222f3c7a9791361..4a0cadaefd4c67c9bdea4ac3ea756b9e56353ac6 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.form +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.form @@ -1,6 +1,10 @@ <?xml version="1.0" encoding="UTF-8" ?> <Form version="1.5" maxVersion="1.7" type="org.netbeans.modules.form.forminfo.JPanelFormInfo"> + <NonVisualComponents> + <Component class="javax.swing.ButtonGroup" name="timeGroup"> + </Component> + </NonVisualComponents> <AuxValues> <AuxValue name="FormSettings_autoResourcing" type="java.lang.Integer" value="1"/> <AuxValue name="FormSettings_autoSetComponentName" type="java.lang.Boolean" value="false"/> @@ -43,13 +47,23 @@ <EmptySpace type="separate" max="-2" attributes="0"/> <Component id="filesIndexedValue" min="-2" max="-2" attributes="0"/> </Group> + <Component id="frequencyLabel" alignment="0" min="-2" max="-2" attributes="0"/> <Group type="102" alignment="0" attributes="0"> <Component id="chunksLabel" linkSize="1" min="-2" max="-2" attributes="0"/> <EmptySpace type="separate" max="-2" attributes="0"/> <Component id="chunksValLabel" min="-2" max="-2" attributes="0"/> </Group> + <Group type="102" alignment="0" attributes="0"> + <EmptySpace min="16" pref="16" max="-2" attributes="0"/> + <Group type="103" groupAlignment="0" attributes="0"> + <Component id="timeRadioButton2" min="-2" max="-2" attributes="0"/> + <Component id="timeRadioButton1" min="-2" max="-2" attributes="0"/> + <Component id="timeRadioButton3" alignment="0" min="-2" max="-2" attributes="0"/> + <Component id="timeRadioButton4" alignment="0" min="-2" max="-2" attributes="0"/> + <Component id="timeRadioButton5" alignment="0" min="-2" max="-2" attributes="0"/> + </Group> + </Group> </Group> - <EmptySpace min="-2" pref="132" max="-2" attributes="0"/> </Group> </Group> <EmptySpace max="32767" attributes="0"/> @@ -76,7 +90,19 @@ <Component id="skipNSRLCheckBox" min="-2" max="-2" attributes="0"/> <EmptySpace max="-2" attributes="0"/> <Component id="showSnippetsCB" min="-2" max="-2" attributes="0"/> - <EmptySpace type="unrelated" max="-2" attributes="0"/> + <EmptySpace max="-2" attributes="0"/> + <Component id="frequencyLabel" min="-2" max="-2" attributes="0"/> + <EmptySpace max="-2" attributes="0"/> + <Component id="timeRadioButton1" min="-2" max="-2" attributes="0"/> + <EmptySpace max="-2" attributes="0"/> + <Component id="timeRadioButton2" min="-2" max="-2" attributes="0"/> + <EmptySpace max="-2" attributes="0"/> + <Component id="timeRadioButton3" min="-2" max="-2" attributes="0"/> + <EmptySpace max="-2" attributes="0"/> + <Component id="timeRadioButton4" min="-2" max="-2" attributes="0"/> + <EmptySpace max="-2" attributes="0"/> + <Component id="timeRadioButton5" min="-2" max="-2" attributes="0"/> + <EmptySpace max="-2" attributes="0"/> <Group type="103" groupAlignment="1" attributes="0"> <Component id="informationLabel" min="-2" max="-2" attributes="0"/> <Component id="informationSeparator" min="-2" pref="7" max="-2" attributes="0"/> @@ -93,7 +119,7 @@ </Group> <EmptySpace type="unrelated" max="-2" attributes="0"/> <Component id="ingestWarningLabel" min="-2" max="-2" attributes="0"/> - <EmptySpace pref="151" max="32767" attributes="0"/> + <EmptySpace max="32767" attributes="0"/> </Group> </Group> </DimensionLayout> @@ -158,6 +184,65 @@ </Component> <Component class="javax.swing.JSeparator" name="informationSeparator"> </Component> + <Component class="javax.swing.JLabel" name="frequencyLabel"> + <Properties> + <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + </Properties> + </Component> + <Component class="javax.swing.JRadioButton" name="timeRadioButton1"> + <Properties> + <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + </Properties> + <Events> + <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton1ActionPerformed"/> + </Events> + </Component> + <Component class="javax.swing.JRadioButton" name="timeRadioButton2"> + <Properties> + <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + </Properties> + <Events> + <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton2ActionPerformed"/> + </Events> + </Component> + <Component class="javax.swing.JRadioButton" name="timeRadioButton3"> + <Properties> + <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + </Properties> + <Events> + <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton3ActionPerformed"/> + </Events> + </Component> + <Component class="javax.swing.JRadioButton" name="timeRadioButton4"> + <Properties> + <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + </Properties> + <Events> + <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton4ActionPerformed"/> + </Events> + </Component> <Component class="javax.swing.JCheckBox" name="showSnippetsCB"> <Properties> <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> @@ -168,6 +253,19 @@ <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="showSnippetsCBActionPerformed"/> </Events> </Component> + <Component class="javax.swing.JRadioButton" name="timeRadioButton5"> + <Properties> + <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + <Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> + <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> + </Property> + </Properties> + <Events> + <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton5ActionPerformed"/> + </Events> + </Component> <Component class="javax.swing.JLabel" name="ingestWarningLabel"> <Properties> <Property name="icon" type="javax.swing.Icon" editor="org.netbeans.modules.form.editors2.IconEditor"> diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.java index 7393e3ea4dec9d720d69b6f0e53261aa1ba29bbc..ccd1de71dac3c0408c23a03e821589de28022d60 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalSearchSettingsPanel.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2012-2022 Basis Technology Corp. + * Copyright 2012-2018 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,7 +26,9 @@ import org.openide.util.NbBundle; import org.sleuthkit.autopsy.corecomponents.OptionsPanel; import org.sleuthkit.autopsy.coreutils.Logger; +import org.sleuthkit.autopsy.coreutils.PlatformUtil; import org.sleuthkit.autopsy.ingest.IngestManager; +import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.UpdateFrequency; /** * General, not per list, keyword search configuration and status display widget @@ -51,6 +53,31 @@ private void activateWidgets() { boolean ingestRunning = IngestManager.getInstance().isIngestRunning(); ingestWarningLabel.setVisible(ingestRunning); skipNSRLCheckBox.setEnabled(!ingestRunning); + setTimeSettingEnabled(!ingestRunning); + + final UpdateFrequency curFreq = KeywordSearchSettings.getUpdateFrequency(); + switch (curFreq) { + case FAST: + timeRadioButton1.setSelected(true); + break; + case AVG: + timeRadioButton2.setSelected(true); + break; + case SLOW: + timeRadioButton3.setSelected(true); + break; + case SLOWEST: + timeRadioButton4.setSelected(true); + break; + case NONE: + timeRadioButton5.setSelected(true); + break; + case DEFAULT: + default: + // default value + timeRadioButton3.setSelected(true); + break; + } } /** @@ -62,6 +89,7 @@ private void activateWidgets() { // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents private void initComponents() { + timeGroup = new javax.swing.ButtonGroup(); skipNSRLCheckBox = new javax.swing.JCheckBox(); filesIndexedLabel = new javax.swing.JLabel(); filesIndexedValue = new javax.swing.JLabel(); @@ -71,7 +99,13 @@ private void initComponents() { informationLabel = new javax.swing.JLabel(); settingsSeparator = new javax.swing.JSeparator(); informationSeparator = new javax.swing.JSeparator(); + frequencyLabel = new javax.swing.JLabel(); + timeRadioButton1 = new javax.swing.JRadioButton(); + timeRadioButton2 = new javax.swing.JRadioButton(); + timeRadioButton3 = new javax.swing.JRadioButton(); + timeRadioButton4 = new javax.swing.JRadioButton(); showSnippetsCB = new javax.swing.JCheckBox(); + timeRadioButton5 = new javax.swing.JRadioButton(); ingestWarningLabel = new javax.swing.JLabel(); skipNSRLCheckBox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text")); // NOI18N @@ -94,6 +128,40 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { informationLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.informationLabel.text")); // NOI18N + frequencyLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text")); // NOI18N + + timeRadioButton1.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text")); // NOI18N + timeRadioButton1.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText")); // NOI18N + timeRadioButton1.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + timeRadioButton1ActionPerformed(evt); + } + }); + + timeRadioButton2.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text")); // NOI18N + timeRadioButton2.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText")); // NOI18N + timeRadioButton2.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + timeRadioButton2ActionPerformed(evt); + } + }); + + timeRadioButton3.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text")); // NOI18N + timeRadioButton3.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText")); // NOI18N + timeRadioButton3.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + timeRadioButton3ActionPerformed(evt); + } + }); + + timeRadioButton4.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1")); // NOI18N + timeRadioButton4.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText")); // NOI18N + timeRadioButton4.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + timeRadioButton4ActionPerformed(evt); + } + }); + showSnippetsCB.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text")); // NOI18N showSnippetsCB.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { @@ -101,6 +169,14 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { } }); + timeRadioButton5.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text")); // NOI18N + timeRadioButton5.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText")); // NOI18N + timeRadioButton5.addActionListener(new java.awt.event.ActionListener() { + public void actionPerformed(java.awt.event.ActionEvent evt) { + timeRadioButton5ActionPerformed(evt); + } + }); + ingestWarningLabel.setIcon(new javax.swing.ImageIcon(getClass().getResource("/org/sleuthkit/autopsy/modules/hashdatabase/warning16.png"))); // NOI18N ingestWarningLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.ingestWarningLabel.text")); // NOI18N @@ -131,11 +207,19 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { .addComponent(filesIndexedLabel) .addGap(18, 18, 18) .addComponent(filesIndexedValue)) + .addComponent(frequencyLabel) .addGroup(layout.createSequentialGroup() .addComponent(chunksLabel) .addGap(18, 18, 18) - .addComponent(chunksValLabel))) - .addGap(132, 132, 132))) + .addComponent(chunksValLabel)) + .addGroup(layout.createSequentialGroup() + .addGap(16, 16, 16) + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) + .addComponent(timeRadioButton2) + .addComponent(timeRadioButton1) + .addComponent(timeRadioButton3) + .addComponent(timeRadioButton4) + .addComponent(timeRadioButton5)))))) .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)) .addGroup(layout.createSequentialGroup() .addComponent(settingsLabel) @@ -157,7 +241,19 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { .addComponent(skipNSRLCheckBox) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) .addComponent(showSnippetsCB) - .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(frequencyLabel) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(timeRadioButton1) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(timeRadioButton2) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(timeRadioButton3) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(timeRadioButton4) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(timeRadioButton5) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING) .addComponent(informationLabel) .addComponent(informationSeparator, javax.swing.GroupLayout.PREFERRED_SIZE, 7, javax.swing.GroupLayout.PREFERRED_SIZE)) @@ -171,10 +267,14 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { .addComponent(chunksValLabel)) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) .addComponent(ingestWarningLabel) - .addContainerGap(151, Short.MAX_VALUE)) + .addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)) ); }// </editor-fold>//GEN-END:initComponents + private void timeRadioButton5ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton5ActionPerformed + firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); + }//GEN-LAST:event_timeRadioButton5ActionPerformed + private void skipNSRLCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_skipNSRLCheckBoxActionPerformed firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); }//GEN-LAST:event_skipNSRLCheckBoxActionPerformed @@ -183,11 +283,28 @@ private void showSnippetsCBActionPerformed(java.awt.event.ActionEvent evt) {//GE firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); }//GEN-LAST:event_showSnippetsCBActionPerformed + private void timeRadioButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton1ActionPerformed + firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); + }//GEN-LAST:event_timeRadioButton1ActionPerformed + + private void timeRadioButton2ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton2ActionPerformed + firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); + }//GEN-LAST:event_timeRadioButton2ActionPerformed + + private void timeRadioButton3ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton3ActionPerformed + firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); + }//GEN-LAST:event_timeRadioButton3ActionPerformed + + private void timeRadioButton4ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton4ActionPerformed + firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); + }//GEN-LAST:event_timeRadioButton4ActionPerformed + // Variables declaration - do not modify//GEN-BEGIN:variables private javax.swing.JLabel chunksLabel; private javax.swing.JLabel chunksValLabel; private javax.swing.JLabel filesIndexedLabel; private javax.swing.JLabel filesIndexedValue; + private javax.swing.JLabel frequencyLabel; private javax.swing.JLabel informationLabel; private javax.swing.JSeparator informationSeparator; private javax.swing.JLabel ingestWarningLabel; @@ -195,11 +312,18 @@ private void showSnippetsCBActionPerformed(java.awt.event.ActionEvent evt) {//GE private javax.swing.JSeparator settingsSeparator; private javax.swing.JCheckBox showSnippetsCB; private javax.swing.JCheckBox skipNSRLCheckBox; + private javax.swing.ButtonGroup timeGroup; + private javax.swing.JRadioButton timeRadioButton1; + private javax.swing.JRadioButton timeRadioButton2; + private javax.swing.JRadioButton timeRadioButton3; + private javax.swing.JRadioButton timeRadioButton4; + private javax.swing.JRadioButton timeRadioButton5; // End of variables declaration//GEN-END:variables @Override public void store() { KeywordSearchSettings.setSkipKnown(skipNSRLCheckBox.isSelected()); + KeywordSearchSettings.setUpdateFrequency(getSelectedTimeValue()); KeywordSearchSettings.setShowSnippets(showSnippetsCB.isSelected()); } @@ -208,10 +332,40 @@ public void load() { activateWidgets(); } + private void setTimeSettingEnabled(boolean enabled) { + timeRadioButton1.setEnabled(enabled); + timeRadioButton2.setEnabled(enabled); + timeRadioButton3.setEnabled(enabled); + timeRadioButton4.setEnabled(enabled); + timeRadioButton5.setEnabled(enabled); + frequencyLabel.setEnabled(enabled); + } + + private UpdateFrequency getSelectedTimeValue() { + if (timeRadioButton1.isSelected()) { + return UpdateFrequency.FAST; + } else if (timeRadioButton2.isSelected()) { + return UpdateFrequency.AVG; + } else if (timeRadioButton3.isSelected()) { + return UpdateFrequency.SLOW; + } else if (timeRadioButton4.isSelected()) { + return UpdateFrequency.SLOWEST; + } else if (timeRadioButton5.isSelected()) { + return UpdateFrequency.NONE; + } + return UpdateFrequency.DEFAULT; + } + @NbBundle.Messages({"KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsOCR=Enable Optical Character Recognition (OCR) (Requires Windows 64-bit)", "KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsLimitedOCR=Only process images which are over 100KB in size or extracted from a document. (Beta) (Requires Windows 64-bit)"}) private void customizeComponents() { + timeGroup.add(timeRadioButton1); + timeGroup.add(timeRadioButton2); + timeGroup.add(timeRadioButton3); + timeGroup.add(timeRadioButton4); + timeGroup.add(timeRadioButton5); + this.skipNSRLCheckBox.setSelected(KeywordSearchSettings.getSkipKnown()); try { diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java index e3f9582fdfe6d07dee331c8b1dda153b08e5849d..3140916f5ec3fae13685eb7b255e82fbe67bc121 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2023 Basis Technology Corp. + * Copyright 2011-2021 Basis Technology Corp. * Contact: carrier <at> sleuthkit <dot> org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,7 +38,6 @@ import java.util.logging.Level; import java.util.stream.Collectors; import org.apache.tika.mime.MimeTypes; -import org.openide.util.Exceptions; import org.openide.util.Lookup; import org.openide.util.NbBundle; import org.openide.util.NbBundle.Messages; @@ -70,7 +69,6 @@ import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskData; import org.sleuthkit.datamodel.TskData.FileKnown; -import org.sleuthkit.datamodel.TskException; /** * An ingest module on a file level Performs indexing of allocated and Solr @@ -151,7 +149,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule { .build(); private static final String IMAGE_MIME_TYPE_PREFIX = "image/"; - + // documents where OCR is performed private static final ImmutableSet<String> OCR_DOCUMENTS = ImmutableSet.of( "application/pdf", @@ -162,7 +160,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule { "application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ); - + /** * Options for this extractor */ @@ -171,13 +169,33 @@ enum StringsExtractOptions { EXTRACT_UTF8, ///< extract UTF8 text, true/false }; + enum UpdateFrequency { + + FAST(20), + AVG(10), + SLOW(5), + SLOWEST(1), + NONE(Integer.MAX_VALUE), + DEFAULT(5); + private final int time; + + UpdateFrequency(int time) { + this.time = time; + } + + int getTime() { + return time; + } + }; private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName()); private final IngestServices services = IngestServices.getInstance(); private Ingester ingester = null; + private Indexer indexer; private FileTypeDetector fileTypeDetector; //only search images from current ingest, not images previously ingested/indexed //accessed read-only by searcher thread + private boolean startedSearching = false; private Lookup stringsExtractionContext; private final KeywordSearchJobSettings settings; private boolean initialized = false; @@ -239,21 +257,18 @@ public void startUp(IngestJobContext context) throws IngestModuleException { initialized = false; jobId = context.getJobId(); - Server server = null; - if (settings.isIndexToSolrEnabled()) { - server = KeywordSearch.getServer(); - if (server.coreIsOpen() == false) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startUp_noOpenCore_msg()); - } + Server server = KeywordSearch.getServer(); + if (server.coreIsOpen() == false) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startUp_noOpenCore_msg()); + } - try { - Index indexInfo = server.getIndexInfo(); - if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion())); - } - } catch (NoOpenCoreException ex) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex); + try { + Index indexInfo = server.getIndexInfo(); + if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion())); } + } catch (NoOpenCoreException ex) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex); } try { @@ -292,24 +307,22 @@ public void startUp(IngestJobContext context) throws IngestModuleException { } } else { // for single-user cases need to verify connection to local SOLR service - // server will be null if indexing is disabled - if (server != null) { - try { - if (!server.isLocalSolrRunning()) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg())); - } - } catch (KeywordSearchModuleException ex) { - //this means Solr is not properly initialized - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex); - } - try { - // make an actual query to verify that server is responding - // we had cases where getStatus was OK, but the connection resulted in a 404 - server.queryNumIndexedDocuments(); - } catch (KeywordSearchModuleException | NoOpenCoreException ex) { - throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex); + try { + if (!server.isLocalSolrRunning()) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg())); } + } catch (KeywordSearchModuleException ex) { + //this means Solr is not properly initialized + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex); + } + try { + // make an actual query to verify that server is responding + // we had cases where getStatus was OK, but the connection resulted in a 404 + server.queryNumIndexedDocuments(); + } catch (KeywordSearchModuleException | NoOpenCoreException ex) { + throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex); } + // check if this job has any searchable keywords List<KeywordList> keywordLists = XmlKeywordSearchList.getCurrent().getListsL(); boolean hasKeywordsForSearch = false; @@ -334,6 +347,7 @@ public void startUp(IngestJobContext context) throws IngestModuleException { stringsExtractionContext = Lookups.fixed(stringsConfig); + indexer = new Indexer(); initialized = true; } @@ -375,7 +389,7 @@ public ProcessResult process(AbstractFile abstractFile) { if (context.fileIngestIsCancelled()) { return ProcessResult.OK; } - searchFile(extractorOpt, abstractFile, mimeType, false); + indexer.indexFile(extractorOpt, abstractFile, mimeType, false); return ProcessResult.OK; } @@ -383,7 +397,17 @@ public ProcessResult process(AbstractFile abstractFile) { if (context.fileIngestIsCancelled()) { return ProcessResult.OK; } - searchFile(extractorOpt, abstractFile, mimeType, true); + indexer.indexFile(extractorOpt, abstractFile, mimeType, true); + + // Start searching if it hasn't started already + if (!startedSearching) { + if (context.fileIngestIsCancelled()) { + return ProcessResult.OK; + } + List<String> keywordListNames = settings.getNamesOfEnabledKeyWordLists(); + IngestSearchRunner.getInstance().startJob(context, keywordListNames); + startedSearching = true; + } return ProcessResult.OK; } @@ -401,22 +425,17 @@ public void shutDown() { } if (context.fileIngestIsCancelled()) { - logger.log(Level.INFO, "Keyword search ingest module instance {0} stopping due to ingest cancellation", instanceNum); //NON-NLS + logger.log(Level.INFO, "Keyword search ingest module instance {0} stopping search job due to ingest cancellation", instanceNum); //NON-NLS + IngestSearchRunner.getInstance().stopJob(jobId); cleanup(); return; } + // Remove from the search list and trigger final commit and final search + IngestSearchRunner.getInstance().endJob(jobId); + // We only need to post the summary msg from the last module per job if (refCounter.decrementAndGet(jobId) == 0) { - - try { - InlineSearcher.makeArtifacts(context); - InlineSearcher.cleanup(context); - Ingester.getDefault().commit(); - } catch (TskException ex) { - logger.log(Level.SEVERE, String.format("Failed to create search ingest artifacts for job %d", context.getJobId()), ex); - } - try { final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles(); logger.log(Level.INFO, "Indexed files count: {0}", numIndexedFiles); //NON-NLS @@ -443,7 +462,7 @@ private void cleanup() { } /** - * Returns true if file should have OCR performed on it when limited OCR + * Returns true if file should have OCR performed on it when limited OCR * setting is specified. * * @param aFile The abstract file. @@ -456,12 +475,12 @@ private boolean isLimitedOCRFile(AbstractFile aFile, String mimeType) { if (OCR_DOCUMENTS.contains(mimeType)) { return true; } - + if (mimeType.startsWith(IMAGE_MIME_TYPE_PREFIX)) { return aFile.getSize() > LIMITED_OCR_SIZE_MIN - || aFile.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.DERIVED; + || aFile.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.DERIVED; } - + return false; } @@ -543,319 +562,317 @@ private Optional<TextExtractor> getExtractor(AbstractFile abstractFile) { * File indexer, processes and indexes known/allocated files, * unknown/unallocated files and directories accordingly */ - /** - * Extract text with Tika or other text extraction modules (by streaming) - * from the file Divide the file into chunks and index the chunks - * - * @param extractorOptional The textExtractor to use with this file or - * empty. - * @param aFile file to extract strings from, divide into chunks - * and index - * @param extractedMetadata Map that will be populated with the file's - * metadata. - * - * @return true if the file was text_ingested, false otherwise - * - * @throws IngesterException exception thrown if indexing failed - */ - private boolean extractTextAndSearch(Optional<TextExtractor> extractorOptional, AbstractFile aFile, + private class Indexer { + + private final Logger logger = Logger.getLogger(Indexer.class.getName()); + + /** + * Extract text with Tika or other text extraction modules (by + * streaming) from the file Divide the file into chunks and index the + * chunks + * + * @param extractorOptional The textExtractor to use with this file or + * empty. + * @param aFile file to extract strings from, divide into + * chunks and index + * @param extractedMetadata Map that will be populated with the file's + * metadata. + * + * @return true if the file was text_ingested, false otherwise + * + * @throws IngesterException exception thrown if indexing failed + */ + private boolean extractTextAndIndex(Optional<TextExtractor> extractorOptional, AbstractFile aFile, Map<String, String> extractedMetadata) throws IngesterException { - try { - if (!extractorOptional.isPresent()) { - return false; - } - //divide into chunks and index - Ingester.getDefault().search(getTikaOrTextExtractor(extractorOptional, aFile, extractedMetadata), aFile.getId(), aFile.getName(), aFile, context, true,settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists()); - - } catch (TextExtractor.InitReaderException ex) { - return false; - } catch(Exception ex) { - logger.log(Level.WARNING, String.format("Failed to search file %s [id=%d]", - aFile.getName(), aFile.getId()), ex); - return false; - } - - return true; - } - - private Reader getTikaOrTextExtractor(Optional<TextExtractor> extractorOptional, AbstractFile aFile, - Map<String, String> extractedMetadata) throws TextExtractor.InitReaderException { - - TextExtractor extractor = extractorOptional.get(); - Reader fileText = extractor.getReader(); - Reader finalReader; try { - Map<String, String> metadata = extractor.getMetadata(); - if (!metadata.isEmpty()) { - // Creating the metadata artifact here causes occasional problems - // when indexing the text, so we save the metadata map to - // use after this method is complete. - extractedMetadata.putAll(metadata); + if (!extractorOptional.isPresent()) { + return false; } - CharSource formattedMetadata = getMetaDataCharSource(metadata); - //Append the metadata to end of the file text - finalReader = CharSource.concat(new CharSource() { - //Wrap fileText reader for concatenation - @Override - public Reader openStream() throws IOException { - return fileText; + TextExtractor extractor = extractorOptional.get(); + Reader fileText = extractor.getReader(); + Reader finalReader; + try { + Map<String, String> metadata = extractor.getMetadata(); + if (!metadata.isEmpty()) { + // Creating the metadata artifact here causes occasional problems + // when indexing the text, so we save the metadata map to + // use after this method is complete. + extractedMetadata.putAll(metadata); } - }, formattedMetadata).openStream(); - } catch (IOException ex) { - logger.log(Level.WARNING, String.format("Could not format extracted metadata for file %s [id=%d]", - aFile.getName(), aFile.getId()), ex); - //Just send file text. - finalReader = fileText; + CharSource formattedMetadata = getMetaDataCharSource(metadata); + //Append the metadata to end of the file text + finalReader = CharSource.concat(new CharSource() { + //Wrap fileText reader for concatenation + @Override + public Reader openStream() throws IOException { + return fileText; + } + }, formattedMetadata).openStream(); + } catch (IOException ex) { + logger.log(Level.WARNING, String.format("Could not format extracted metadata for file %s [id=%d]", + aFile.getName(), aFile.getId()), ex); + //Just send file text. + finalReader = fileText; + } + //divide into chunks and index + return Ingester.getDefault().indexText(finalReader, aFile.getId(), aFile.getName(), aFile, context); + } catch (TextExtractor.InitReaderException ex) { + // Text extractor could not be initialized. No text will be extracted. + return false; } - //divide into chunks and index - return finalReader; - - } + } - private void createMetadataArtifact(AbstractFile aFile, Map<String, String> metadata) { + private void createMetadataArtifact(AbstractFile aFile, Map<String, String> metadata) { - String moduleName = KeywordSearchIngestModule.class.getName(); + String moduleName = KeywordSearchIngestModule.class.getName(); - Collection<BlackboardAttribute> attributes = new ArrayList<>(); - Collection<BlackboardArtifact> bbartifacts = new ArrayList<>(); - for (Map.Entry<String, String> entry : metadata.entrySet()) { - if (METADATA_TYPES_MAP.containsKey(entry.getKey())) { - BlackboardAttribute bba = checkAttribute(entry.getKey(), entry.getValue()); - if (bba != null) { - attributes.add(bba); + Collection<BlackboardAttribute> attributes = new ArrayList<>(); + Collection<BlackboardArtifact> bbartifacts = new ArrayList<>(); + for (Map.Entry<String, String> entry : metadata.entrySet()) { + if (METADATA_TYPES_MAP.containsKey(entry.getKey())) { + BlackboardAttribute bba = checkAttribute(entry.getKey(), entry.getValue()); + if (bba != null) { + attributes.add(bba); + } } } - } - if (!attributes.isEmpty()) { - try { - BlackboardArtifact bbart = aFile.newDataArtifact(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_METADATA), attributes); - bbartifacts.add(bbart); - } catch (TskCoreException ex) { - // Log error and return to continue processing - logger.log(Level.WARNING, String.format("Error creating or adding metadata artifact for file %s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS - return; - } - if (!bbartifacts.isEmpty()) { + if (!attributes.isEmpty()) { try { - Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard().postArtifacts(bbartifacts, moduleName, jobId); - } catch (NoCurrentCaseException | Blackboard.BlackboardException ex) { + BlackboardArtifact bbart = aFile.newDataArtifact(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_METADATA), attributes); + bbartifacts.add(bbart); + } catch (TskCoreException ex) { // Log error and return to continue processing - logger.log(Level.WARNING, String.format("Unable to post blackboard artifacts for file $s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS + logger.log(Level.WARNING, String.format("Error creating or adding metadata artifact for file %s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS return; } + if (!bbartifacts.isEmpty()) { + try { + Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard().postArtifacts(bbartifacts, moduleName, jobId); + } catch (NoCurrentCaseException | Blackboard.BlackboardException ex) { + // Log error and return to continue processing + logger.log(Level.WARNING, String.format("Unable to post blackboard artifacts for file $s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS + return; + } + } } } - } - private BlackboardAttribute checkAttribute(String key, String value) { - String moduleName = KeywordSearchIngestModule.class.getName(); - if (!value.isEmpty() && value.charAt(0) != ' ') { - if (METADATA_DATE_TYPES.contains(key)) { - SimpleDateFormat metadataDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", US); - Long metadataDateTime = Long.valueOf(0); - try { - String metadataDate = value.replaceAll("T", " ").replaceAll("Z", ""); - Date usedDate = metadataDateFormat.parse(metadataDate); - metadataDateTime = usedDate.getTime() / 1000; - return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, metadataDateTime); - } catch (ParseException ex) { - // catching error and displaying date that could not be parsed then will continue on. - logger.log(Level.WARNING, String.format("Failed to parse date/time %s for metadata attribute %s.", value, key), ex); //NON-NLS - return null; + private BlackboardAttribute checkAttribute(String key, String value) { + String moduleName = KeywordSearchIngestModule.class.getName(); + if (!value.isEmpty() && value.charAt(0) != ' ') { + if (METADATA_DATE_TYPES.contains(key)) { + SimpleDateFormat metadataDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", US); + Long metadataDateTime = Long.valueOf(0); + try { + String metadataDate = value.replaceAll("T", " ").replaceAll("Z", ""); + Date usedDate = metadataDateFormat.parse(metadataDate); + metadataDateTime = usedDate.getTime() / 1000; + return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, metadataDateTime); + } catch (ParseException ex) { + // catching error and displaying date that could not be parsed then will continue on. + logger.log(Level.WARNING, String.format("Failed to parse date/time %s for metadata attribute %s.", value, key), ex); //NON-NLS + return null; + } + } else { + return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, value); } - } else { - return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, value); } - } - - return null; - - } - /** - * Pretty print the text extractor metadata. - * - * @param metadata The Metadata map to wrap as a CharSource - * - * @return A CharSource for the given Metadata - */ - @NbBundle.Messages({ - "KeywordSearchIngestModule.metadataTitle=METADATA" - }) - private CharSource getMetaDataCharSource(Map<String, String> metadata) { - return CharSource.wrap(new StringBuilder( - String.format("\n\n------------------------------%s------------------------------\n\n", - Bundle.KeywordSearchIngestModule_metadataTitle())) - .append(metadata.entrySet().stream().sorted(Map.Entry.comparingByKey()) - .map(entry -> entry.getKey() + ": " + entry.getValue()) - .collect(Collectors.joining("\n")) - )); - } + return null; - /** - * Extract strings using heuristics from the file and add to index. - * - * @param aFile file to extract strings from, divide into chunks and index - * - * @return true if the file was text_ingested, false otherwise - */ - private boolean extractStringsAndIndex(AbstractFile aFile) { - try { - if (context.fileIngestIsCancelled()) { - return true; - } - Reader extractedTextReader = KeywordSearchUtil.getReader(aFile, stringsExtractionContext); - Ingester.getDefault().search(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context, false, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists()); - putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED); - } catch (Exception ex) { - logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); //NON-NLS - putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); - return false; } - return true; - } - - /** - * Adds the file to the index. Detects file type, calls extractors, etc. - * - * @param extractor The textExtractor to use with this file or empty if - * no extractor found. - * @param aFile File to analyze. - * @param mimeType The file mime type. - * @param indexContent False if only metadata should be text_ingested. True - * if content and metadata should be index. - */ - private void searchFile(Optional<TextExtractor> extractor, AbstractFile aFile, String mimeType, boolean indexContent) { - //logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName()); - - TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType(); /** - * Extract unicode strings from unallocated and unused blocks and carved - * text files. The reason for performing string extraction on these is - * because they all may contain multiple encodings which can cause text - * to be missed by the more specialized text extractors used below. + * Pretty print the text extractor metadata. + * + * @param metadata The Metadata map to wrap as a CharSource + * + * @return A CharSource for the given Metadata */ - if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) - || aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)) - || (aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED) && aFile.getNameExtension().equalsIgnoreCase("txt"))) { - if (context.fileIngestIsCancelled()) { - return; - } - extractStringsAndIndex(aFile); - return; + @NbBundle.Messages({ + "KeywordSearchIngestModule.metadataTitle=METADATA" + }) + private CharSource getMetaDataCharSource(Map<String, String> metadata) { + return CharSource.wrap(new StringBuilder( + String.format("\n\n------------------------------%s------------------------------\n\n", + Bundle.KeywordSearchIngestModule_metadataTitle())) + .append(metadata.entrySet().stream().sorted(Map.Entry.comparingByKey()) + .map(entry -> entry.getKey() + ": " + entry.getValue()) + .collect(Collectors.joining("\n")) + )); } - final long size = aFile.getSize(); - //if not to index content, or a dir, or 0 content, index meta data only - - if ((indexContent == false || aFile.isDir() || size == 0)) { + /** + * Extract strings using heuristics from the file and add to index. + * + * @param aFile file to extract strings from, divide into chunks and + * index + * + * @return true if the file was text_ingested, false otherwise + */ + private boolean extractStringsAndIndex(AbstractFile aFile) { try { if (context.fileIngestIsCancelled()) { - return; + return true; } - ingester.indexMetaDataOnly(aFile); - putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED); - } catch (IngesterException ex) { + TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(aFile, stringsExtractionContext); + Reader extractedTextReader = stringsExtractor.getReader(); + if (Ingester.getDefault().indexStrings(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) { + putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED); + return true; + } else { + logger.log(Level.WARNING, "Failed to extract strings and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()}); //NON-NLS + putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); + return false; + } + } catch (IngesterException | TextExtractor.InitReaderException ex) { + logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); //NON-NLS putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); - logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS + return false; } - return; } - if (context.fileIngestIsCancelled()) { - return; - } - - // we skip archive formats that are opened by the archive module. - // @@@ We could have a check here to see if the archive module was enabled though... - if (ARCHIVE_MIME_TYPES.contains(mimeType)) { - try { + /** + * Adds the file to the index. Detects file type, calls extractors, etc. + * + * @param extractor The textExtractor to use with this file or empty + * if no extractor found. + * @param aFile File to analyze. + * @param mimeType The file mime type. + * @param indexContent False if only metadata should be text_ingested. + * True if content and metadata should be index. + */ + private void indexFile(Optional<TextExtractor> extractor, AbstractFile aFile, String mimeType, boolean indexContent) { + //logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName()); + + TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType(); + + /** + * Extract unicode strings from unallocated and unused blocks and + * carved text files. The reason for performing string extraction on + * these is because they all may contain multiple encodings which + * can cause text to be missed by the more specialized text + * extractors used below. + */ + if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) + || aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)) + || (aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED) && aFile.getNameExtension().equalsIgnoreCase("txt"))) { if (context.fileIngestIsCancelled()) { return; } - ingester.indexMetaDataOnly(aFile); - putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED); - } catch (IngesterException ex) { - putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); - logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS + extractStringsAndIndex(aFile); + return; } - return; - } - boolean wasTextAdded = false; - Map<String, String> extractedMetadata = new HashMap<>(); + final long size = aFile.getSize(); + //if not to index content, or a dir, or 0 content, index meta data only + + if ((indexContent == false || aFile.isDir() || size == 0)) { + try { + if (context.fileIngestIsCancelled()) { + return; + } + ingester.indexMetaDataOnly(aFile); + putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED); + } catch (IngesterException ex) { + putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); + logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS + } + return; + } - //extract text with one of the extractors, divide into chunks and index with Solr - try { - //logger.log(Level.INFO, "indexing: " + aFile.getName()); if (context.fileIngestIsCancelled()) { return; } - if (MimeTypes.OCTET_STREAM.equals(mimeType)) { - extractStringsAndIndex(aFile); + + // we skip archive formats that are opened by the archive module. + // @@@ We could have a check here to see if the archive module was enabled though... + if (ARCHIVE_MIME_TYPES.contains(mimeType)) { + try { + if (context.fileIngestIsCancelled()) { + return; + } + ingester.indexMetaDataOnly(aFile); + putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED); + } catch (IngesterException ex) { + putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); + logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS + } return; } - if (!extractTextAndSearch(extractor, aFile, extractedMetadata)) { - // Text extractor not found for file. Extract string only. + + boolean wasTextAdded = false; + Map<String, String> extractedMetadata = new HashMap<>(); + + //extract text with one of the extractors, divide into chunks and index with Solr + try { + //logger.log(Level.INFO, "indexing: " + aFile.getName()); + if (context.fileIngestIsCancelled()) { + return; + } + if (MimeTypes.OCTET_STREAM.equals(mimeType)) { + extractStringsAndIndex(aFile); + return; + } + if (!extractTextAndIndex(extractor, aFile, extractedMetadata)) { + // Text extractor not found for file. Extract string only. + putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); + } else { + putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED); + wasTextAdded = true; + } + + } catch (IngesterException e) { + logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " //NON-NLS + + aFile.getName(), e); + putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); + } catch (Exception e) { + logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " //NON-NLS + + aFile.getName(), e); putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); - } else { - putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED); - wasTextAdded = true; } - } catch (IngesterException e) { - logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " //NON-NLS - + aFile.getName(), e); - putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); - } catch (Exception e) { - logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " //NON-NLS - + aFile.getName(), e); - putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); - } - - if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) { - //Carved Files should be the only type of unallocated files capable of a txt extension and - //should be ignored by the TextFileExtractor because they may contain more than one text encoding - wasTextAdded = searchTextFile(aFile); - } + if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) { + //Carved Files should be the only type of unallocated files capable of a txt extension and + //should be ignored by the TextFileExtractor because they may contain more than one text encoding + wasTextAdded = indexTextFile(aFile); + } - // if it wasn't supported or had an error, default to strings - if (wasTextAdded == false) { - extractStringsAndIndex(aFile); - } + // if it wasn't supported or had an error, default to strings + if (wasTextAdded == false) { + extractStringsAndIndex(aFile); + } - // Now that the indexing is complete, create the metadata artifact (if applicable). - // It is unclear why calling this from extractTextAndIndex() generates - // errors. - if (!extractedMetadata.isEmpty()) { - createMetadataArtifact(aFile, extractedMetadata); + // Now that the indexing is complete, create the metadata artifact (if applicable). + // It is unclear why calling this from extractTextAndIndex() generates + // errors. + if (!extractedMetadata.isEmpty()) { + createMetadataArtifact(aFile, extractedMetadata); + } } - } - /** - * Adds the text file to the index given an encoding. Returns true if - * indexing was successful and false otherwise. - * - * @param aFile Text file to analyze - */ - private boolean searchTextFile(AbstractFile aFile) { - try { - TextFileExtractor textFileExtractor = new TextFileExtractor(aFile); - Reader textReader = textFileExtractor.getReader(); - if (textReader == null) { - logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName()); - } else { - Ingester.getDefault().search(textReader, aFile.getId(), aFile.getName(), aFile, context, true, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists()); - textReader.close(); - putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED); - return true; + /** + * Adds the text file to the index given an encoding. Returns true if + * indexing was successful and false otherwise. + * + * @param aFile Text file to analyze + */ + private boolean indexTextFile(AbstractFile aFile) { + try { + TextFileExtractor textFileExtractor = new TextFileExtractor(aFile); + Reader textReader = textFileExtractor.getReader(); + if (textReader == null) { + logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName()); + } else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) { + textReader.close(); + putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED); + return true; + } + } catch (IngesterException | IOException | TextExtractor.InitReaderException ex) { + logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex); } - } catch (Exception ex) { - logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex); + return false; } - return false; } - } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettings.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettings.java index 133c3b37db729f53d8d1adc71c4ba2e98af151b2..865024dae865bb1ff02e7ae4027cccdb0ef1e070 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettings.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettings.java @@ -30,7 +30,7 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett private static final long serialVersionUID = 1L; - private final HashSet<String> namesOfEnabledKeywordLists; + private HashSet<String> namesOfEnabledKeywordLists; private HashSet<String> namesOfDisabledKeywordLists; // Added in version 1.1 /** @@ -41,8 +41,6 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett private Boolean limitedOCREnabled; private boolean ocrOnly; - - private boolean indexToSolr; /** * Constructs ingest job settings for the keywords search module. @@ -57,7 +55,6 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett this.ocrEnabled = null; this.limitedOCREnabled = null; this.ocrOnly = false; - this.indexToSolr = true; } /** @@ -72,13 +69,12 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett * @param ocrOnly True if keyword search ingest should * be solely limited to OCR. */ - KeywordSearchJobSettings(List<String> namesOfEnabledKeywordLists, List<String> namesOfDisabledKeywordLists, boolean ocrEnabled, boolean limitedOCREnabled, boolean ocrOnly, boolean indexToSolr) { + KeywordSearchJobSettings(List<String> namesOfEnabledKeywordLists, List<String> namesOfDisabledKeywordLists, boolean ocrEnabled, boolean limitedOCREnabled, boolean ocrOnly) { this.namesOfEnabledKeywordLists = new HashSet<>(namesOfEnabledKeywordLists); this.namesOfDisabledKeywordLists = new HashSet<>(namesOfDisabledKeywordLists); this.ocrEnabled = ocrEnabled; this.limitedOCREnabled = limitedOCREnabled; this.ocrOnly = ocrOnly; - this.indexToSolr = indexToSolr; } /** @@ -200,13 +196,5 @@ private void upgradeFromOlderVersions() { this.namesOfDisabledKeywordLists = new HashSet<>(); } } - - boolean isIndexToSolrEnabled() { - return indexToSolr; - } - - void setIndexToSolrEnabled(boolean enabled){ - indexToSolr = enabled; - } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.form b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.form index ce9d1bc8540c694986f25b660e454a2c7f20b53b..0294690848b2bcbcd199f8846067f6b4514c8338 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.form +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.form @@ -16,10 +16,70 @@ <AuxValue name="FormSettings_listenerGenerationStyle" type="java.lang.Integer" value="0"/> <AuxValue name="FormSettings_variablesLocal" type="java.lang.Boolean" value="false"/> <AuxValue name="FormSettings_variablesModifier" type="java.lang.Integer" value="2"/> - <AuxValue name="designerSize" type="java.awt.Dimension" value="-84,-19,0,5,115,114,0,18,106,97,118,97,46,97,119,116,46,68,105,109,101,110,115,105,111,110,65,-114,-39,-41,-84,95,68,20,2,0,2,73,0,6,104,101,105,103,104,116,73,0,5,119,105,100,116,104,120,112,0,0,0,-56,0,0,1,73"/> </AuxValues> - <Layout class="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout"/> + <Layout> + <DimensionLayout dim="0"> + <Group type="103" groupAlignment="1" attributes="0"> + <Group type="102" attributes="0"> + <EmptySpace max="-2" attributes="0"/> + <Group type="103" groupAlignment="0" attributes="0"> + <Component id="languagesLabel" alignment="1" max="32767" attributes="0"/> + <Group type="102" attributes="0"> + <Group type="103" groupAlignment="0" attributes="0"> + <Component id="listsScrollPane" pref="316" max="32767" attributes="1"/> + <Component id="titleLabel" min="-2" max="-2" attributes="0"/> + <Group type="102" alignment="0" attributes="0"> + <Component id="encodingsLabel" min="-2" max="-2" attributes="0"/> + <EmptySpace type="unrelated" max="-2" attributes="0"/> + <Component id="keywordSearchEncodings" min="-2" max="-2" attributes="0"/> + </Group> + <Group type="102" alignment="0" attributes="0"> + <EmptySpace min="10" pref="10" max="-2" attributes="0"/> + <Component id="languagesValLabel" min="-2" pref="274" max="-2" attributes="0"/> + </Group> + <Component id="ocrCheckBox" alignment="0" min="-2" max="-2" attributes="0"/> + <Group type="102" alignment="0" attributes="0"> + <EmptySpace min="21" pref="21" max="-2" attributes="0"/> + <Group type="103" groupAlignment="0" attributes="0"> + <Component id="ocrOnlyCheckbox" min="-2" max="-2" attributes="0"/> + <Component id="limitedOcrCheckbox" min="-2" pref="288" max="-2" attributes="0"/> + </Group> + </Group> + </Group> + <EmptySpace max="-2" attributes="0"/> + </Group> + </Group> + </Group> + </Group> + </DimensionLayout> + <DimensionLayout dim="1"> + <Group type="103" groupAlignment="0" attributes="0"> + <Group type="102" alignment="0" attributes="0"> + <EmptySpace min="-2" pref="7" max="-2" attributes="0"/> + <Component id="titleLabel" min="-2" max="-2" attributes="0"/> + <EmptySpace min="-2" max="-2" attributes="0"/> + <Component id="listsScrollPane" max="32767" attributes="0"/> + <EmptySpace max="-2" attributes="0"/> + <Component id="languagesLabel" min="-2" pref="13" max="-2" attributes="0"/> + <EmptySpace min="-2" max="-2" attributes="0"/> + <Component id="languagesValLabel" min="-2" max="-2" attributes="0"/> + <EmptySpace type="unrelated" min="-2" max="-2" attributes="0"/> + <Group type="103" groupAlignment="3" attributes="0"> + <Component id="encodingsLabel" alignment="3" min="-2" max="-2" attributes="0"/> + <Component id="keywordSearchEncodings" alignment="3" min="-2" max="-2" attributes="0"/> + </Group> + <EmptySpace type="unrelated" min="-2" max="-2" attributes="0"/> + <Component id="ocrCheckBox" min="-2" max="-2" attributes="0"/> + <EmptySpace min="-2" max="-2" attributes="0"/> + <Component id="ocrOnlyCheckbox" min="-2" max="-2" attributes="0"/> + <EmptySpace min="-2" max="-2" attributes="0"/> + <Component id="limitedOcrCheckbox" min="-2" max="-2" attributes="0"/> + <EmptySpace min="-2" max="-2" attributes="0"/> + </Group> + </Group> + </DimensionLayout> + </Layout> <SubComponents> <Container class="javax.swing.JScrollPane" name="listsScrollPane"> <Properties> @@ -35,11 +95,6 @@ <AuxValues> <AuxValue name="autoScrollPane" type="java.lang.Boolean" value="true"/> </AuxValues> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="0" gridY="1" gridWidth="7" gridHeight="1" fill="1" ipadX="284" ipadY="-71" insetsTop="6" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="1.0" weightY="1.0"/> - </Constraint> - </Constraints> <Layout class="org.netbeans.modules.form.compat2.layouts.support.JScrollPaneSupportLayout"/> <SubComponents> @@ -73,11 +128,6 @@ <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchJobSettingsPanel.titleLabel.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> </Property> </Properties> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="0" gridY="0" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="7" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> - </Constraint> - </Constraints> </Component> <Component class="javax.swing.JLabel" name="languagesLabel"> <Properties> @@ -92,11 +142,6 @@ </Property> <Property name="verticalTextPosition" type="int" value="3"/> </Properties> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="0" gridY="2" gridWidth="8" gridHeight="1" fill="0" ipadX="25" ipadY="-22" insetsTop="6" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> - </Constraint> - </Constraints> </Component> <Component class="javax.swing.JLabel" name="languagesValLabel"> <Properties> @@ -107,11 +152,6 @@ <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchJobSettingsPanel.languagesValLabel.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> </Property> </Properties> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="0" gridY="3" gridWidth="6" gridHeight="1" fill="0" ipadX="270" ipadY="0" insetsTop="6" insetsLeft="20" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> - </Constraint> - </Constraints> </Component> <Component class="javax.swing.JLabel" name="encodingsLabel"> <Properties> @@ -119,11 +159,6 @@ <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchJobSettingsPanel.encodingsLabel.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> </Property> </Properties> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="0" gridY="4" gridWidth="1" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="11" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> - </Constraint> - </Constraints> </Component> <Component class="javax.swing.JLabel" name="keywordSearchEncodings"> <Properties> @@ -131,11 +166,6 @@ <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchJobSettingsPanel.keywordSearchEncodings.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> </Property> </Properties> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="1" gridY="4" gridWidth="1" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="11" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> - </Constraint> - </Constraints> </Component> <Component class="javax.swing.JCheckBox" name="ocrCheckBox"> <Properties> @@ -146,11 +176,6 @@ <Events> <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="ocrCheckBoxActionPerformed"/> </Events> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="0" gridY="5" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="7" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> - </Constraint> - </Constraints> </Component> <Component class="javax.swing.JCheckBox" name="limitedOcrCheckbox"> <Properties> @@ -162,11 +187,6 @@ <Events> <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="limitedOcrCheckboxActionPerformed"/> </Events> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="0" gridY="7" gridWidth="2" gridHeight="1" fill="0" ipadX="216" ipadY="0" insetsTop="0" insetsLeft="31" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> - </Constraint> - </Constraints> </Component> <Component class="javax.swing.JCheckBox" name="ocrOnlyCheckbox"> <Properties> @@ -177,24 +197,6 @@ <Events> <EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="ocrOnlyCheckboxActionPerformed"/> </Events> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="0" gridY="6" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="0" insetsLeft="31" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> - </Constraint> - </Constraints> - </Component> - <Component class="javax.swing.JCheckBox" name="solrCheckbox"> - <Properties> - <Property name="selected" type="boolean" value="true"/> - <Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor"> - <ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchJobSettingsPanel.solrCheckbox.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/> - </Property> - </Properties> - <Constraints> - <Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription"> - <GridBagConstraints gridX="0" gridY="8" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="7" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/> - </Constraint> - </Constraints> </Component> </SubComponents> </Form> diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java index 2c6182d78b2e3af71596a065e5fa1af60aaa8cf4..6bf036000aba656acad1df751834df5665ad441e 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java @@ -40,8 +40,6 @@ */ @SuppressWarnings("PMD.SingularField") // UI widgets cause lots of false positives public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSettingsPanel implements PropertyChangeListener { - - private static final long serialVersionUID = 1L; private final KeywordListsTableModel tableModel = new KeywordListsTableModel(); private final List<String> keywordListNames = new ArrayList<>(); private final Map<String, Boolean> keywordListStates = new HashMap<>(); @@ -67,7 +65,6 @@ private void initializeKeywordListSettings(KeywordSearchJobSettings settings) { ocrCheckBox.setSelected(settings.isOCREnabled()); limitedOcrCheckbox.setSelected(settings.isLimitedOCREnabled()); ocrOnlyCheckbox.setSelected(settings.isOCROnly()); - solrCheckbox.setSelected(settings.isIndexToSolrEnabled()); handleOcrEnabled(settings.isOCREnabled()); } @@ -197,7 +194,7 @@ public IngestModuleIngestJobSettings getSettings() { } } return new KeywordSearchJobSettings(enabledListNames, disabledListNames, - this.ocrCheckBox.isSelected(), this.limitedOcrCheckbox.isSelected(), this.ocrOnlyCheckbox.isSelected(), this.solrCheckbox.isSelected()); + this.ocrCheckBox.isSelected(), this.limitedOcrCheckbox.isSelected(), this.ocrOnlyCheckbox.isSelected()); } void reset(KeywordSearchJobSettings newSettings) { @@ -256,7 +253,6 @@ public Class<?> getColumnClass(int c) { @SuppressWarnings("unchecked") // <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents private void initComponents() { - java.awt.GridBagConstraints gridBagConstraints; listsScrollPane = new javax.swing.JScrollPane(); listsTable = new javax.swing.JTable(); @@ -268,10 +264,8 @@ private void initComponents() { ocrCheckBox = new javax.swing.JCheckBox(); limitedOcrCheckbox = new javax.swing.JCheckBox(); ocrOnlyCheckbox = new javax.swing.JCheckBox(); - solrCheckbox = new javax.swing.JCheckBox(); setPreferredSize(new java.awt.Dimension(300, 170)); - setLayout(new java.awt.GridBagLayout()); listsScrollPane.setBorder(javax.swing.BorderFactory.createEtchedBorder()); listsScrollPane.setPreferredSize(new java.awt.Dimension(300, 100)); @@ -291,68 +285,19 @@ private void initComponents() { listsScrollPane.setViewportView(listsTable); listsTable.setDefaultRenderer(String.class, new SimpleTableCellRenderer()); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 1; - gridBagConstraints.gridwidth = 7; - gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH; - gridBagConstraints.ipadx = 284; - gridBagConstraints.ipady = -71; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.weightx = 1.0; - gridBagConstraints.weighty = 1.0; - gridBagConstraints.insets = new java.awt.Insets(6, 10, 0, 0); - add(listsScrollPane, gridBagConstraints); - titleLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.titleLabel.text")); // NOI18N - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 0; - gridBagConstraints.gridwidth = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(7, 10, 0, 0); - add(titleLabel, gridBagConstraints); languagesLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesLabel.text")); // NOI18N languagesLabel.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesLabel.toolTipText")); // NOI18N languagesLabel.setPreferredSize(new java.awt.Dimension(294, 35)); languagesLabel.setVerticalTextPosition(javax.swing.SwingConstants.BOTTOM); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 2; - gridBagConstraints.gridwidth = 8; - gridBagConstraints.ipadx = 25; - gridBagConstraints.ipady = -22; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(6, 10, 0, 0); - add(languagesLabel, gridBagConstraints); languagesValLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesValLabel.text")); // NOI18N languagesValLabel.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesValLabel.toolTipText")); // NOI18N - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 3; - gridBagConstraints.gridwidth = 6; - gridBagConstraints.ipadx = 270; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(6, 20, 0, 0); - add(languagesValLabel, gridBagConstraints); encodingsLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.encodingsLabel.text")); // NOI18N - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 4; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(11, 10, 0, 0); - add(encodingsLabel, gridBagConstraints); keywordSearchEncodings.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.keywordSearchEncodings.text")); // NOI18N - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 1; - gridBagConstraints.gridy = 4; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(11, 10, 0, 0); - add(keywordSearchEncodings, gridBagConstraints); ocrCheckBox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.ocrCheckBox.text")); // NOI18N ocrCheckBox.addActionListener(new java.awt.event.ActionListener() { @@ -360,13 +305,6 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { ocrCheckBoxActionPerformed(evt); } }); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 5; - gridBagConstraints.gridwidth = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(7, 10, 0, 0); - add(ocrCheckBox, gridBagConstraints); limitedOcrCheckbox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.limitedOcrCheckbox.text")); // NOI18N limitedOcrCheckbox.setVerticalTextPosition(javax.swing.SwingConstants.TOP); @@ -375,14 +313,6 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { limitedOcrCheckboxActionPerformed(evt); } }); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 7; - gridBagConstraints.gridwidth = 2; - gridBagConstraints.ipadx = 216; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(0, 31, 0, 0); - add(limitedOcrCheckbox, gridBagConstraints); ocrOnlyCheckbox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.ocrOnlyCheckbox.text")); // NOI18N ocrOnlyCheckbox.addActionListener(new java.awt.event.ActionListener() { @@ -390,23 +320,57 @@ public void actionPerformed(java.awt.event.ActionEvent evt) { ocrOnlyCheckboxActionPerformed(evt); } }); - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 6; - gridBagConstraints.gridwidth = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(0, 31, 0, 0); - add(ocrOnlyCheckbox, gridBagConstraints); - - solrCheckbox.setSelected(true); - solrCheckbox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.solrCheckbox.text")); // NOI18N - gridBagConstraints = new java.awt.GridBagConstraints(); - gridBagConstraints.gridx = 0; - gridBagConstraints.gridy = 8; - gridBagConstraints.gridwidth = 2; - gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST; - gridBagConstraints.insets = new java.awt.Insets(7, 10, 0, 0); - add(solrCheckbox, gridBagConstraints); + + javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this); + this.setLayout(layout); + layout.setHorizontalGroup( + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING) + .addGroup(layout.createSequentialGroup() + .addContainerGap() + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) + .addComponent(languagesLabel, javax.swing.GroupLayout.Alignment.TRAILING, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) + .addGroup(layout.createSequentialGroup() + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) + .addComponent(listsScrollPane, javax.swing.GroupLayout.DEFAULT_SIZE, 316, Short.MAX_VALUE) + .addComponent(titleLabel) + .addGroup(layout.createSequentialGroup() + .addComponent(encodingsLabel) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) + .addComponent(keywordSearchEncodings)) + .addGroup(layout.createSequentialGroup() + .addGap(10, 10, 10) + .addComponent(languagesValLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 274, javax.swing.GroupLayout.PREFERRED_SIZE)) + .addComponent(ocrCheckBox) + .addGroup(layout.createSequentialGroup() + .addGap(21, 21, 21) + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) + .addComponent(ocrOnlyCheckbox) + .addComponent(limitedOcrCheckbox, javax.swing.GroupLayout.PREFERRED_SIZE, 288, javax.swing.GroupLayout.PREFERRED_SIZE)))) + .addContainerGap()))) + ); + layout.setVerticalGroup( + layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) + .addGroup(layout.createSequentialGroup() + .addGap(7, 7, 7) + .addComponent(titleLabel) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(listsScrollPane, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(languagesLabel, javax.swing.GroupLayout.PREFERRED_SIZE, 13, javax.swing.GroupLayout.PREFERRED_SIZE) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(languagesValLabel) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) + .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) + .addComponent(encodingsLabel) + .addComponent(keywordSearchEncodings)) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) + .addComponent(ocrCheckBox) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(ocrOnlyCheckbox) + .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) + .addComponent(limitedOcrCheckbox, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE) + .addContainerGap()) + ); }// </editor-fold>//GEN-END:initComponents private void ocrCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_ocrCheckBoxActionPerformed @@ -432,7 +396,6 @@ private void ocrOnlyCheckboxActionPerformed(java.awt.event.ActionEvent evt) {//G private javax.swing.JTable listsTable; private javax.swing.JCheckBox ocrCheckBox; private javax.swing.JCheckBox ocrOnlyCheckbox; - private javax.swing.JCheckBox solrCheckbox; private javax.swing.JLabel titleLabel; // End of variables declaration//GEN-END:variables } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchModuleFactory.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchModuleFactory.java index 43bbafaf063761d1f8b03e1ba361564ad65bbb60..c6b2c39f888228c1ad2a6463c65dc35e7371bb20 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchModuleFactory.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchModuleFactory.java @@ -130,12 +130,7 @@ public boolean isDataArtifactIngestModuleFactory() { @Override public DataArtifactIngestModule createDataArtifactIngestModule(IngestModuleIngestJobSettings settings) { - if (!(settings instanceof KeywordSearchJobSettings)) { - throw new IllegalArgumentException(NbBundle.getMessage(this.getClass(), - "KeywordSearchModuleFactory.createFileIngestModule.exception.msg")); - } - - return new KwsDataArtifactIngestModule((KeywordSearchJobSettings) settings); + return new KwsDataArtifactIngestModule(); } @Override @@ -145,12 +140,7 @@ public boolean isAnalysisResultIngestModuleFactory() { @Override public AnalysisResultIngestModule createAnalysisResultIngestModule(IngestModuleIngestJobSettings settings) { - if (!(settings instanceof KeywordSearchJobSettings)) { - throw new IllegalArgumentException(NbBundle.getMessage(this.getClass(), - "KeywordSearchModuleFactory.createFileIngestModule.exception.msg")); - } - - return new KwsAnalysisResultIngestModule((KeywordSearchJobSettings) settings); + return new KwsAnalysisResultIngestModule(); } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java index 0087c7d05e01ebe7d31df0ad337ce08cf12c8f7e..b7057a2c89d0061bdd4d3ea4954d2a6952325760 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java @@ -29,6 +29,7 @@ import org.sleuthkit.autopsy.coreutils.StringExtract; import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT; import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.StringsExtractOptions; +import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.UpdateFrequency; //This file contains constants and settings for KeywordSearch class KeywordSearchSettings { @@ -45,9 +46,34 @@ class KeywordSearchSettings { static final boolean LIMITED_OCR_ENABLED_DEFAULT = false; private static boolean skipKnown = true; private static final Logger logger = Logger.getLogger(KeywordSearchSettings.class.getName()); + private static UpdateFrequency UpdateFreq = UpdateFrequency.DEFAULT; private static List<StringExtract.StringExtractUnicodeTable.SCRIPT> stringExtractScripts = new ArrayList<>(); private static Map<String, String> stringExtractOptions = new HashMap<>(); + /** + * Gets the update Frequency from KeywordSearch_Options.properties + * + * @return KeywordSearchIngestModule's update frequency + */ + static UpdateFrequency getUpdateFrequency() { + if (ModuleSettings.getConfigSetting(PROPERTIES_OPTIONS, "UpdateFrequency") != null) { //NON-NLS + return UpdateFrequency.valueOf(ModuleSettings.getConfigSetting(PROPERTIES_OPTIONS, "UpdateFrequency")); //NON-NLS + } + //if it failed, return the default/last known value + logger.log(Level.WARNING, "Could not read property for UpdateFrequency, returning backup value."); //NON-NLS + return UpdateFrequency.DEFAULT; + } + + /** + * Sets the update frequency and writes to KeywordSearch_Options.properties + * + * @param freq Sets KeywordSearchIngestModule to this value. + */ + static void setUpdateFrequency(UpdateFrequency freq) { + ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, "UpdateFrequency", freq.name()); //NON-NLS + UpdateFreq = freq; + } + /** * Sets whether or not to skip adding known good files to the search during * index. @@ -217,6 +243,11 @@ static void setDefaults() { logger.log(Level.INFO, "No configuration for NSRL found, generating default..."); //NON-NLS KeywordSearchSettings.setSkipKnown(true); } + //setting default Update Frequency + if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, "UpdateFrequency")) { //NON-NLS + logger.log(Level.INFO, "No configuration for Update Frequency found, generating default..."); //NON-NLS + KeywordSearchSettings.setUpdateFrequency(UpdateFrequency.DEFAULT); + } //setting default Extract UTF8 if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, StringsExtractOptions.EXTRACT_UTF8.toString())) { logger.log(Level.INFO, "No configuration for UTF8 found, generating default..."); //NON-NLS diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java index 8cab5236ec7c33277d1e47398d95bc98d95189e4..abbc893f85866d16ad57a6c93c560911846c42df 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java @@ -20,22 +20,11 @@ import java.awt.Component; import java.io.File; -import java.io.Reader; -import java.util.regex.Matcher; import org.sleuthkit.autopsy.coreutils.Logger; import javax.swing.JOptionPane; -import org.openide.util.Lookup; import org.openide.windows.WindowManager; -import org.sleuthkit.autopsy.textextractors.TextExtractor; -import org.sleuthkit.autopsy.textextractors.TextExtractorFactory; -import org.sleuthkit.datamodel.AbstractFile; -import org.sleuthkit.datamodel.BlackboardArtifact; -import org.sleuthkit.datamodel.Content; -import org.sleuthkit.datamodel.TskCoreException; class KeywordSearchUtil { - - private static final String SNIPPET_DELIMITER = String.valueOf(Character.toChars(171)); public enum DIALOG_MESSAGE_TYPE { @@ -153,36 +142,6 @@ static KeywordSearchQuery getQueryForKeyword(Keyword keyword, KeywordList keywor } return query; } - - /** - * Make a snippet from the given content that has the given hit plus some - * surrounding context. - * - * @param content The content to extract the snippet from. - * - * @param hitMatcher The Matcher that has the start/end info for where the - * hit is in the content. - * @param hit The actual hit in the content. - * - * @return A snippet extracted from content that contains hit plus some - * surrounding context. - */ - static String makeSnippet(String content, Matcher hitMatcher, String hit) { - // Get the snippet from the document. - final int end = hitMatcher.end(); - final int start = hitMatcher.start(); - - return makeSnippet(content, start, end, hit); - } - - static String makeSnippet(String content, int startOffset, int endOffset, String hit) { - // Get the snippet from the document. - int maxIndex = content.length() - 1; - - return content.substring(Integer.max(0, startOffset - 20), Integer.max(0, startOffset)) - + SNIPPET_DELIMITER + hit + SNIPPET_DELIMITER - + content.substring(Integer.min(maxIndex, endOffset), Integer.min(maxIndex, endOffset + 20)); - } /** * Is the Keyword Search list at absPath an XML list? @@ -195,40 +154,4 @@ static boolean isXMLList(String absPath) { //TODO: make this more robust, if necessary return new File(absPath).getName().endsWith(".xml"); //NON-NLS } - - static Reader getReader(Content content) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException{ - return getReader(content, null); - } - - static Reader getReader(Content content, Lookup stringsExtractionContext) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException{ - Reader reader = null; - if (content instanceof BlackboardArtifact) { - BlackboardArtifact artifact = (BlackboardArtifact) content; - if (artifact.getArtifactID() > 0) { - /* - * Artifact indexing is only supported for artifacts that use - * negative artifact ids to avoid overlapping with the object - * ids of other types of Content. - */ - return null; - } - TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(content, null); - reader = blackboardExtractor.getReader(); - - } else if (content instanceof AbstractFile) { - TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor( content, stringsExtractionContext); - reader = stringsExtractor.getReader(); - } else { - try { - TextExtractor contentExtractor = TextExtractorFactory.getExtractor(content, null); - reader = contentExtractor.getReader(); - } catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) { - // Try the StringsTextExtractor if Tika extractions fails. - TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(content, null); - reader = stringsExtractor.getReader(); - } - } - - return reader; - } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsAnalysisResultIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsAnalysisResultIngestModule.java index 2697024752136b351382b236433b282c62ffb3fa..b82c4d91fef544c3a43d06e5c68aef54a2f1aae9 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsAnalysisResultIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsAnalysisResultIngestModule.java @@ -18,14 +18,16 @@ */ package org.sleuthkit.autopsy.keywordsearch; -import java.io.Reader; import java.util.logging.Level; +import org.openide.util.Lookup; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.ingest.AnalysisResultIngestModule; import org.sleuthkit.autopsy.ingest.IngestJobContext; import org.sleuthkit.autopsy.ingest.IngestModule; +import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService; import org.sleuthkit.datamodel.AnalysisResult; import org.sleuthkit.datamodel.BlackboardArtifact; +import org.sleuthkit.datamodel.TskCoreException; /** * An analysis result ingest module that indexes text for keyword search. All @@ -38,33 +40,21 @@ public class KwsAnalysisResultIngestModule implements AnalysisResultIngestModule private static final Logger LOGGER = Logger.getLogger(KeywordSearchIngestModule.class.getName()); private static final int TSK_KEYWORD_HIT_TYPE_ID = BlackboardArtifact.Type.TSK_KEYWORD_HIT.getTypeID(); private IngestJobContext context; - private final KeywordSearchJobSettings settings; + private KeywordSearchService searchService; - KwsAnalysisResultIngestModule(KeywordSearchJobSettings settings) { - this.settings = settings; - } - @Override public void startUp(IngestJobContext context) throws IngestModule.IngestModuleException { this.context = context; + searchService = Lookup.getDefault().lookup(KeywordSearchService.class); } @Override public IngestModule.ProcessResult process(AnalysisResult result) { try { if (result.getType().getTypeID() != TSK_KEYWORD_HIT_TYPE_ID) { - Ingester ingester = Ingester.getDefault(); - Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(result); - String sourceName = result.getDisplayName() + "_" + result.getArtifactID(); - ingester.indexMetaDataOnly(result, sourceName); - ingester.search(blackboardExtractedTextReader, - result.getArtifactID(), - sourceName, result, - context, true, - settings.isIndexToSolrEnabled(), - settings.getNamesOfEnabledKeyWordLists()); + searchService.index(result); } - } catch (Exception ex) { + } catch (TskCoreException ex) { LOGGER.log(Level.SEVERE, String.format("Error indexing analysis result '%s' (job ID=%d)", result, context.getJobId()), ex); //NON-NLS return IngestModule.ProcessResult.ERROR; } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsDataArtifactIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsDataArtifactIngestModule.java index 81e5dee952ea266e316be093b3792f74a58cd4f1..fe4cac8b4fd095657450fcaa259da2b2073f9478 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsDataArtifactIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KwsDataArtifactIngestModule.java @@ -18,13 +18,12 @@ */ package org.sleuthkit.autopsy.keywordsearch; -import java.io.Reader; import java.util.logging.Level; +import org.openide.util.Lookup; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.ingest.DataArtifactIngestModule; import org.sleuthkit.autopsy.ingest.IngestJobContext; -import org.sleuthkit.autopsy.textextractors.TextExtractor; -import org.sleuthkit.autopsy.textextractors.TextExtractorFactory; +import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.DataArtifact; import org.sleuthkit.datamodel.TskCoreException; @@ -40,36 +39,24 @@ public class KwsDataArtifactIngestModule implements DataArtifactIngestModule { private static final Logger LOGGER = Logger.getLogger(KeywordSearchIngestModule.class.getName()); private static final int TSK_ASSOCIATED_OBJECT_TYPE_ID = BlackboardArtifact.Type.TSK_ASSOCIATED_OBJECT.getTypeID(); private IngestJobContext context; - private final KeywordSearchJobSettings settings; + private KeywordSearchService searchService; - KwsDataArtifactIngestModule(KeywordSearchJobSettings settings) { - this.settings = settings; - } - @Override public void startUp(IngestJobContext context) throws IngestModuleException { this.context = context; + searchService = Lookup.getDefault().lookup(KeywordSearchService.class); } @Override public ProcessResult process(DataArtifact artifact) { try { if (artifact.getType().getTypeID() != TSK_ASSOCIATED_OBJECT_TYPE_ID) { - Ingester ingester = Ingester.getDefault(); - Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(artifact); - String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID(); - ingester.indexMetaDataOnly(artifact, sourceName); - ingester.search(blackboardExtractedTextReader, - artifact.getArtifactID(), - sourceName, artifact, - context, true, - settings.isIndexToSolrEnabled(), - settings.getNamesOfEnabledKeyWordLists()); + searchService.index(artifact); } - } catch (Exception ex) { + } catch (TskCoreException ex) { LOGGER.log(Level.SEVERE, String.format("Error indexing data artifact '%s' (job ID=%d)", artifact, context.getJobId()), ex); //NON-NLS return ProcessResult.ERROR; - } + } return ProcessResult.OK; } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index 451a449b43095665c1ef7463462a49e3f2c5acf1..993611a4d0b706d24e96bc562407ec1582d3f596 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -46,6 +46,7 @@ import static org.sleuthkit.autopsy.keywordsearch.KeywordSearchSettings.MODULE_NAME; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.Account; +import org.sleuthkit.datamodel.AccountFileInstance; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; @@ -430,7 +431,7 @@ private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreE keywordsFoundInThisDocument.put(hit, hit); if (artifactAttributeType == null) { - hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit)); + hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); } else { switch (artifactAttributeType) { case TSK_EMAIL: @@ -441,7 +442,7 @@ private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreE */ if (hit.length() >= MIN_EMAIL_ADDR_LENGTH && DomainValidator.getInstance(true).isValidTld(hit.substring(hit.lastIndexOf('.')))) { - hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit)); + hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); } break; @@ -458,14 +459,14 @@ private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreE if (ccnMatcher.find()) { final String group = ccnMatcher.group("ccn"); if (CreditCardValidator.isValidCCN(group)) { - hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit)); + hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); } } } break; default: - hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit)); + hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); break; } } @@ -485,6 +486,30 @@ private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreE return hits; } + /** + * Make a snippet from the given content that has the given hit plus some + * surrounding context. + * + * @param content The content to extract the snippet from. + * + * @param hitMatcher The Matcher that has the start/end info for where the + * hit is in the content. + * @param hit The actual hit in the content. + * + * @return A snippet extracted from content that contains hit plus some + * surrounding context. + */ + private String makeSnippet(String content, Matcher hitMatcher, String hit) { + // Get the snippet from the document. + int maxIndex = content.length() - 1; + final int end = hitMatcher.end(); + final int start = hitMatcher.start(); + + return content.substring(Integer.max(0, start - 20), Integer.max(0, start)) + + SNIPPET_DELIMITER + hit + SNIPPET_DELIMITER + + content.substring(Integer.min(maxIndex, end), Integer.min(maxIndex, end + 20)); + } + @Override public void addFilter(KeywordQueryFilter filter) { this.filters.add(filter); @@ -548,11 +573,6 @@ synchronized public String getEscapedQueryString() { */ @Override public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) { - return createKeywordHitArtifact(content, originalKeyword, foundKeyword, hit, snippet, listName, ingestJobId); - } - - - public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) { final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName(); if (content == null) { @@ -563,8 +583,8 @@ public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyw /* * Credit Card number hits are handled differently */ - if (originalKW.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { - createCCNAccount(content, originalKW, foundKeyword, hit, snippet, listName, ingestJobId); + if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { + createCCNAccount(content, foundKeyword, hit, snippet, listName, ingestJobId); return null; } @@ -575,10 +595,8 @@ public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyw Collection<BlackboardAttribute> attributes = new ArrayList<>(); String configuration = originalKeyword.getOriginalTerm(); - attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm().toLowerCase())); - if(!originalKW.searchTermIsWholeWord() || !originalKW.searchTermIsLiteral()) { - attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKW.getSearchTerm())); - } + attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm())); + attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, getQueryString())); if (snippet != null) { attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet)); @@ -588,7 +606,7 @@ public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyw -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID)) ); - if (originalKW.searchTermIsLiteral()) { + if (originalKeyword.searchTermIsLiteral()) { configuration += " (" + TskData.KeywordSearchQueryType.SUBSTRING.name() + ")"; attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, TskData.KeywordSearchQueryType.SUBSTRING.ordinal())); } else { @@ -612,11 +630,11 @@ public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyw } } - private static void createCCNAccount(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) { + private void createCCNAccount(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) { final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName(); - if (originalKW.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { + if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { LOGGER.log(Level.SEVERE, "Keyword hit is not a credit card number"); //NON-NLS return; } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java index d9dd4d921a4a4284acb003fd1cb7aaadd4c573ba..5e64dcbd9029e9c6ee0d6f7fe5e21552234032a7 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java @@ -93,22 +93,28 @@ public void index(Content content) throws TskCoreException { return; } try { - Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(content); + TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(content, null); + Reader blackboardExtractedTextReader = blackboardExtractor.getReader(); String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID(); ingester.indexMetaDataOnly(artifact, sourceName); - // Will not cause an inline search becauce the keyword list is null - ingester.search(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null, true, true, null); - } catch (Exception ex) { + ingester.indexText(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null); + } catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) { throw new TskCoreException("Error indexing artifact", ex); } } else { try { - - Reader reader = KeywordSearchUtil.getReader(content); - // Will not cause an inline search becauce the keyword list is null - ingester.search(reader, content.getId(), content.getName(), content, null, true, true, null); - } catch (Exception ex) { - throw new TskCoreException("Error indexing content", ex); + TextExtractor contentExtractor = TextExtractorFactory.getExtractor(content, null); + Reader contentExtractedTextReader = contentExtractor.getReader(); + ingester.indexText(contentExtractedTextReader, content.getId(), content.getName(), content, null); + } catch (TextExtractorFactory.NoTextExtractorFound | Ingester.IngesterException | TextExtractor.InitReaderException ex) { + try { + // Try the StringsTextExtractor if Tika extractions fails. + TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(content, null); + Reader stringsExtractedTextReader = stringsExtractor.getReader(); + ingester.indexStrings(stringsExtractedTextReader, content.getId(), content.getName(), content, null); + } catch (Ingester.IngesterException | TextExtractor.InitReaderException ex1) { + throw new TskCoreException("Error indexing content", ex1); + } } // only do a Solr commit if ingest is not running. If ingest is running, the changes will // be committed via a periodic commit or via final commit after the ingest job has finished. @@ -415,11 +421,11 @@ public void indexArtifact(BlackboardArtifact artifact) throws TskCoreException { try { String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID(); - TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(artifact, null); + TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor((Content) artifact, null); Reader blackboardExtractedTextReader = blackboardExtractor.getReader(); ingester.indexMetaDataOnly(artifact, sourceName); - ingester.search(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null, true, true, null); - } catch (Exception ex) { + ingester.indexText(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null); + } catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) { throw new TskCoreException(ex.getCause().getMessage(), ex); } } diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java index a88ad8cad85dd6feb52ccbdcd0cae974ef506a1e..088af9f4b2e876262c86ecabdb341493d9020aeb 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java @@ -420,6 +420,15 @@ private void analyzeRegistryFiles(long ingestJobId) { Report report = currentCase.addReport(regOutputFiles.fullPlugins, NbBundle.getMessage(this.getClass(), "ExtractRegistry.parentModuleName.noSpace"), "RegRipper " + regFile.getUniquePath(), regFile); //NON-NLS + + // Index the report content so that it will be available for keyword search. + KeywordSearchService searchService = Lookup.getDefault().lookup(KeywordSearchService.class); + if (null == searchService) { + logger.log(Level.WARNING, "Keyword search service not found. Report will not be indexed"); + } else { + searchService.index(report); + report.close(); + } } catch (TskCoreException e) { this.addErrorMessage("Error adding regripper output as Autopsy report: " + e.getLocalizedMessage()); //NON-NLS }