Skip to content
Snippets Groups Projects
Commit 5c573261 authored by Kelly Kelly's avatar Kelly Kelly
Browse files

stashing

parent 4db6e52a
No related branches found
No related tags found
No related merge requests found
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
<dependency conf="solr-war->default" org="org.apache.solr" name="solr" rev="4.10.4" transitive="false" /> <!-- the war file for embedded Solr 4 --> <dependency conf="solr-war->default" org="org.apache.solr" name="solr" rev="4.10.4" transitive="false" /> <!-- the war file for embedded Solr 4 -->
<dependency conf="solr-libs->default" name="solr-cell" rev="8.11.2" org="org.apache.solr"/> <dependency conf="solr-libs->default" name="solr-cell" rev="8.11.2" org="org.apache.solr"/>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
<!-- <dependency org="org.apache.lucene" name="lucene-core" rev="8.11.2"/> -->
<!-- Autopsy --> <!-- Autopsy -->
<dependency conf="autopsy->default" org="org.apache.solr" name="solr-solrj" rev="8.11.2"/> <dependency conf="autopsy->default" org="org.apache.solr" name="solr-solrj" rev="8.11.2"/>
<dependency conf="autopsy->default" org="com.optimaize.languagedetector" name="language-detector" rev="0.6"/> <dependency conf="autopsy->default" org="com.optimaize.languagedetector" name="language-detector" rev="0.6"/>
......
...@@ -44,6 +44,7 @@ file.reference.stax2-api-4.2.1.jar=release/modules/ext/stax2-api-4.2.1.jar ...@@ -44,6 +44,7 @@ file.reference.stax2-api-4.2.1.jar=release/modules/ext/stax2-api-4.2.1.jar
file.reference.woodstox-core-6.2.4.jar=release/modules/ext/woodstox-core-6.2.4.jar file.reference.woodstox-core-6.2.4.jar=release/modules/ext/woodstox-core-6.2.4.jar
file.reference.zookeeper-3.8.0.jar=release/modules/ext/zookeeper-3.8.0.jar file.reference.zookeeper-3.8.0.jar=release/modules/ext/zookeeper-3.8.0.jar
file.reference.zookeeper-jute-3.8.0.jar=release/modules/ext/zookeeper-jute-3.8.0.jar file.reference.zookeeper-jute-3.8.0.jar=release/modules/ext/zookeeper-jute-3.8.0.jar
file.reference.lucene-core-8.11.2.jar=release/modules/ext/lucene-core-8.11.2.jar
javac.source=1.8 javac.source=1.8
javac.compilerargs=-Xlint -Xlint:-serial javac.compilerargs=-Xlint -Xlint:-serial
license.file=../LICENSE-2.0.txt license.file=../LICENSE-2.0.txt
......
...@@ -418,6 +418,10 @@ ...@@ -418,6 +418,10 @@
<runtime-relative-path>ext/zookeeper-jute-3.8.0.jar</runtime-relative-path> <runtime-relative-path>ext/zookeeper-jute-3.8.0.jar</runtime-relative-path>
<binary-origin>release/modules/ext/zookeeper-jute-3.8.0.jar</binary-origin> <binary-origin>release/modules/ext/zookeeper-jute-3.8.0.jar</binary-origin>
</class-path-extension> </class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/lucene-core-8.11.2.jar</runtime-relative-path>
<binary-origin>release/modules/ext/lucene-core-8.11.2.jar</binary-origin>
</class-path-extension>
</data> </data>
</configuration> </configuration>
</project> </project>
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import com.twelvemonkeys.lang.StringUtil; import com.twelvemonkeys.lang.StringUtil;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
...@@ -27,6 +28,11 @@ ...@@ -27,6 +28,11 @@
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.commons.validator.routines.DomainValidator; import org.apache.commons.validator.routines.DomainValidator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.openide.util.Exceptions;
import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException; import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
...@@ -71,24 +77,24 @@ void searchChunk(Chunk chunk) throws TskCoreException { ...@@ -71,24 +77,24 @@ void searchChunk(Chunk chunk) throws TskCoreException {
List<KeywordHit> keywordHits = new ArrayList<>(); List<KeywordHit> keywordHits = new ArrayList<>();
if (originalKeyword.searchTermIsLiteral()) { if (originalKeyword.searchTermIsLiteral()) {
if (!originalKeyword.searchTermIsWholeWord()) { // if (!originalKeyword.searchTermIsWholeWord()) {
if (StringUtil.containsIgnoreCase(chunk.geLowerCasedChunk(), originalKeyword.getSearchTerm())) { if (StringUtil.containsIgnoreCase(chunk.geLowerCasedChunk(), originalKeyword.getSearchTerm())) {
keywordHits.addAll(createKeywordHits(chunk, originalKeyword)); keywordHits.addAll(createKeywordHits(chunk, originalKeyword));
} }
} else { // } else {
String REGEX_FIND_WORD="\\b\\W*%s\\W*\\b"; //"[\\w[\\.']]*%s[\\w[\\.']]*"; //"(?i).*?\\b%s\\b.*?"; // String REGEX_FIND_WORD="\\b\\W*%s\\W*\\b"; //"[\\w[\\.']]*%s[\\w[\\.']]*"; //"(?i).*?\\b%s\\b.*?";
String regex=String.format(REGEX_FIND_WORD, Pattern.quote(originalKeyword.getSearchTerm().toLowerCase())); // String regex=String.format(REGEX_FIND_WORD, Pattern.quote(originalKeyword.getSearchTerm().toLowerCase()));
// if(chunk.geLowerCasedChunk().matches(regex)) { // if(chunk.geLowerCasedChunk().matches(regex)) {
// keywordHits.addAll(createKeywordHits(chunk, originalKeyword)); // keywordHits.addAll(createKeywordHits(chunk, originalKeyword));
// } // }
Pattern pattern = Pattern.compile(regex, java.util.regex.Pattern.CASE_INSENSITIVE); // Pattern pattern = Pattern.compile(regex, java.util.regex.Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(chunk.geLowerCasedChunk()); // Matcher matcher = pattern.matcher(chunk.geLowerCasedChunk());
if (matcher.find()) { // if (matcher.find()) {
keywordHits.addAll(createKeywordHits(chunk, originalKeyword)); // keywordHits.addAll(createKeywordHits(chunk, originalKeyword));
} // }
} // }
} else { } else {
String regex = originalKeyword.getSearchTerm(); String regex = originalKeyword.getSearchTerm();
...@@ -163,6 +169,7 @@ private List<KeywordHit> createKeywordHits(Chunk chunk, Keyword originalKeyword) ...@@ -163,6 +169,7 @@ private List<KeywordHit> createKeywordHits(Chunk chunk, Keyword originalKeyword)
} else { } else {
String REGEX_FIND_WORD="\\b\\W*%s\\W*\\b"; String REGEX_FIND_WORD="\\b\\W*%s\\W*\\b";
searchPattern=String.format(REGEX_FIND_WORD, Pattern.quote(originalKeyword.getSearchTerm().toLowerCase())); searchPattern=String.format(REGEX_FIND_WORD, Pattern.quote(originalKeyword.getSearchTerm().toLowerCase()));
testingTokenizer(chunk, originalKeyword);
} }
} else { } else {
searchPattern = keywordString; searchPattern = keywordString;
...@@ -353,4 +360,28 @@ void makeArtifacts(Content content, IngestJobContext context, long sourceID) { ...@@ -353,4 +360,28 @@ void makeArtifacts(Content content, IngestJobContext context, long sourceID) {
map.clear(); map.clear();
} }
} }
private void testingTokenizer(Chunk chunk, Keyword originalKeyword) {
try {
List<String> tokens = analyze(chunk.geLowerCasedChunk(), new StandardAnalyzer());
for(String token: tokens) {
if(token.equals(originalKeyword.getSearchTerm())) {
}
}
} catch (IOException ex) {
Exceptions.printStackTrace(ex);
}
}
public List<String> analyze(String text, Analyzer analyzer) throws IOException{
List<String> result = new ArrayList<>();
TokenStream tokenStream = analyzer.tokenStream("sampleName", text);
CharTermAttribute attr = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while(tokenStream.incrementToken()) {
result.add(attr.toString());
}
return result;
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment