Skip to content
Snippets Groups Projects
Commit 5c573261 authored by Kelly Kelly's avatar Kelly Kelly
Browse files

stashing

parent 4db6e52a
Branches
Tags
No related merge requests found
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
<dependency conf="solr-war->default" org="org.apache.solr" name="solr" rev="4.10.4" transitive="false" /> <!-- the war file for embedded Solr 4 --> <dependency conf="solr-war->default" org="org.apache.solr" name="solr" rev="4.10.4" transitive="false" /> <!-- the war file for embedded Solr 4 -->
<dependency conf="solr-libs->default" name="solr-cell" rev="8.11.2" org="org.apache.solr"/> <dependency conf="solr-libs->default" name="solr-cell" rev="8.11.2" org="org.apache.solr"/>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
<!-- <dependency org="org.apache.lucene" name="lucene-core" rev="8.11.2"/> -->
<!-- Autopsy --> <!-- Autopsy -->
<dependency conf="autopsy->default" org="org.apache.solr" name="solr-solrj" rev="8.11.2"/> <dependency conf="autopsy->default" org="org.apache.solr" name="solr-solrj" rev="8.11.2"/>
<dependency conf="autopsy->default" org="com.optimaize.languagedetector" name="language-detector" rev="0.6"/> <dependency conf="autopsy->default" org="com.optimaize.languagedetector" name="language-detector" rev="0.6"/>
......
...@@ -44,6 +44,7 @@ file.reference.stax2-api-4.2.1.jar=release/modules/ext/stax2-api-4.2.1.jar ...@@ -44,6 +44,7 @@ file.reference.stax2-api-4.2.1.jar=release/modules/ext/stax2-api-4.2.1.jar
file.reference.woodstox-core-6.2.4.jar=release/modules/ext/woodstox-core-6.2.4.jar file.reference.woodstox-core-6.2.4.jar=release/modules/ext/woodstox-core-6.2.4.jar
file.reference.zookeeper-3.8.0.jar=release/modules/ext/zookeeper-3.8.0.jar file.reference.zookeeper-3.8.0.jar=release/modules/ext/zookeeper-3.8.0.jar
file.reference.zookeeper-jute-3.8.0.jar=release/modules/ext/zookeeper-jute-3.8.0.jar file.reference.zookeeper-jute-3.8.0.jar=release/modules/ext/zookeeper-jute-3.8.0.jar
file.reference.lucene-core-8.11.2.jar=release/modules/ext/lucene-core-8.11.2.jar
javac.source=1.8 javac.source=1.8
javac.compilerargs=-Xlint -Xlint:-serial javac.compilerargs=-Xlint -Xlint:-serial
license.file=../LICENSE-2.0.txt license.file=../LICENSE-2.0.txt
......
...@@ -418,6 +418,10 @@ ...@@ -418,6 +418,10 @@
<runtime-relative-path>ext/zookeeper-jute-3.8.0.jar</runtime-relative-path> <runtime-relative-path>ext/zookeeper-jute-3.8.0.jar</runtime-relative-path>
<binary-origin>release/modules/ext/zookeeper-jute-3.8.0.jar</binary-origin> <binary-origin>release/modules/ext/zookeeper-jute-3.8.0.jar</binary-origin>
</class-path-extension> </class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/lucene-core-8.11.2.jar</runtime-relative-path>
<binary-origin>release/modules/ext/lucene-core-8.11.2.jar</binary-origin>
</class-path-extension>
</data> </data>
</configuration> </configuration>
</project> </project>
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import com.twelvemonkeys.lang.StringUtil; import com.twelvemonkeys.lang.StringUtil;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
...@@ -27,6 +28,11 @@ ...@@ -27,6 +28,11 @@
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.commons.validator.routines.DomainValidator; import org.apache.commons.validator.routines.DomainValidator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.openide.util.Exceptions;
import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException; import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
...@@ -71,24 +77,24 @@ void searchChunk(Chunk chunk) throws TskCoreException { ...@@ -71,24 +77,24 @@ void searchChunk(Chunk chunk) throws TskCoreException {
List<KeywordHit> keywordHits = new ArrayList<>(); List<KeywordHit> keywordHits = new ArrayList<>();
if (originalKeyword.searchTermIsLiteral()) { if (originalKeyword.searchTermIsLiteral()) {
if (!originalKeyword.searchTermIsWholeWord()) { // if (!originalKeyword.searchTermIsWholeWord()) {
if (StringUtil.containsIgnoreCase(chunk.geLowerCasedChunk(), originalKeyword.getSearchTerm())) { if (StringUtil.containsIgnoreCase(chunk.geLowerCasedChunk(), originalKeyword.getSearchTerm())) {
keywordHits.addAll(createKeywordHits(chunk, originalKeyword)); keywordHits.addAll(createKeywordHits(chunk, originalKeyword));
} }
} else { // } else {
String REGEX_FIND_WORD="\\b\\W*%s\\W*\\b"; //"[\\w[\\.']]*%s[\\w[\\.']]*"; //"(?i).*?\\b%s\\b.*?"; // String REGEX_FIND_WORD="\\b\\W*%s\\W*\\b"; //"[\\w[\\.']]*%s[\\w[\\.']]*"; //"(?i).*?\\b%s\\b.*?";
String regex=String.format(REGEX_FIND_WORD, Pattern.quote(originalKeyword.getSearchTerm().toLowerCase())); // String regex=String.format(REGEX_FIND_WORD, Pattern.quote(originalKeyword.getSearchTerm().toLowerCase()));
// if(chunk.geLowerCasedChunk().matches(regex)) { // if(chunk.geLowerCasedChunk().matches(regex)) {
// keywordHits.addAll(createKeywordHits(chunk, originalKeyword)); // keywordHits.addAll(createKeywordHits(chunk, originalKeyword));
// } // }
Pattern pattern = Pattern.compile(regex, java.util.regex.Pattern.CASE_INSENSITIVE); // Pattern pattern = Pattern.compile(regex, java.util.regex.Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(chunk.geLowerCasedChunk()); // Matcher matcher = pattern.matcher(chunk.geLowerCasedChunk());
if (matcher.find()) { // if (matcher.find()) {
keywordHits.addAll(createKeywordHits(chunk, originalKeyword)); // keywordHits.addAll(createKeywordHits(chunk, originalKeyword));
} // }
} // }
} else { } else {
String regex = originalKeyword.getSearchTerm(); String regex = originalKeyword.getSearchTerm();
...@@ -163,6 +169,7 @@ private List<KeywordHit> createKeywordHits(Chunk chunk, Keyword originalKeyword) ...@@ -163,6 +169,7 @@ private List<KeywordHit> createKeywordHits(Chunk chunk, Keyword originalKeyword)
} else { } else {
String REGEX_FIND_WORD="\\b\\W*%s\\W*\\b"; String REGEX_FIND_WORD="\\b\\W*%s\\W*\\b";
searchPattern=String.format(REGEX_FIND_WORD, Pattern.quote(originalKeyword.getSearchTerm().toLowerCase())); searchPattern=String.format(REGEX_FIND_WORD, Pattern.quote(originalKeyword.getSearchTerm().toLowerCase()));
testingTokenizer(chunk, originalKeyword);
} }
} else { } else {
searchPattern = keywordString; searchPattern = keywordString;
...@@ -353,4 +360,28 @@ void makeArtifacts(Content content, IngestJobContext context, long sourceID) { ...@@ -353,4 +360,28 @@ void makeArtifacts(Content content, IngestJobContext context, long sourceID) {
map.clear(); map.clear();
} }
} }
private void testingTokenizer(Chunk chunk, Keyword originalKeyword) {
try {
List<String> tokens = analyze(chunk.geLowerCasedChunk(), new StandardAnalyzer());
for(String token: tokens) {
if(token.equals(originalKeyword.getSearchTerm())) {
}
}
} catch (IOException ex) {
Exceptions.printStackTrace(ex);
}
}
public List<String> analyze(String text, Analyzer analyzer) throws IOException{
List<String> result = new ArrayList<>();
TokenStream tokenStream = analyzer.tokenStream("sampleName", text);
CharTermAttribute attr = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while(tokenStream.incrementToken()) {
result.add(attr.toString());
}
return result;
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment