From 1b78b4964bc2e17d334af4555bb810d765f7905f Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Thu, 10 Aug 2023 14:03:17 -0400
Subject: [PATCH] updates for path normalizer

---
 .../malwarescan/MalwareScanIngestModule.java  |  16 +-
 .../autopsy/malwarescan/PathNormalizer.java   | 201 ++++++++++++++++++
 .../malwarescan/UsernameAnonymizer.java       | 155 --------------
 3 files changed, 213 insertions(+), 159 deletions(-)
 create mode 100644 Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/PathNormalizer.java
 delete mode 100644 Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/UsernameAnonymizer.java

diff --git a/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/MalwareScanIngestModule.java b/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/MalwareScanIngestModule.java
index 1d5871af3b..c52e72a350 100644
--- a/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/MalwareScanIngestModule.java
+++ b/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/MalwareScanIngestModule.java
@@ -136,7 +136,6 @@ private static class SharedProcessing {
 
         private final CTLicensePersistence ctSettingsPersistence = CTLicensePersistence.getInstance();
         private final CTApiDAO ctApiDAO = CTApiDAO.getInstance();
-        private final UsernameAnonymizer usernameAnonymizer = new UsernameAnonymizer();
 
         private IngestJobState ingestJobState = null;
 
@@ -235,6 +234,7 @@ private IngestJobState getNewJobState(IngestJobContext context) throws Exception
             return new IngestJobState(
                     context,
                     tskCase,
+                    new PathNormalizer(tskCase),
                     new FileTypeDetector(),
                     licenseInfoOpt.get(),
                     malwareType,
@@ -657,7 +657,7 @@ private boolean uploadFile(IngestJobState ingestJobState, String md5, long objId
             // upload metadata
             MetadataUploadRequest metaRequest = new MetadataUploadRequest()
                     .setCreatedDate(af.getCrtime() == 0 ? null : af.getCrtime())
-                    .setFilePath(usernameAnonymizer.anonymousUsername(af.getUniquePath()))
+                    .setFilePath(ingestJobState.getPathNormalizer().normalizePath(af.getUniquePath()))
                     .setFileSizeBytes(af.getSize())
                     .setFileUploadUrl(authTokenResponse.getFileUploadUrl())
                     .setMd5(md5)
@@ -733,7 +733,7 @@ private void longPollForNotFound(IngestJobState ingestJobState) throws Interrupt
                     if (!ingestJobState.isDoFileLookups() || ingestJobState.getIngestJobContext().fileIngestIsCancelled()) {
                         return;
                     }
-                    
+
                     Thread.sleep(FILE_UPLOAD_RETRY_SLEEP_MILLIS);
                 }
             }
@@ -934,6 +934,7 @@ static class IngestJobState {
                     null,
                     null,
                     null,
+                    null,
                     false,
                     false
             );
@@ -951,10 +952,12 @@ static class IngestJobState {
             private boolean uploadUnknownFiles;
             private boolean doFileLookups;
             private final IngestJobContext ingestJobContext;
+            private final PathNormalizer pathNormalizer;
 
-            IngestJobState(IngestJobContext ingestJobContext, SleuthkitCase tskCase, FileTypeDetector fileTypeDetector, LicenseInfo licenseInfo, BlackboardArtifact.Type malwareType, boolean uploadUnknownFiles, boolean doFileLookups) {
+            IngestJobState(IngestJobContext ingestJobContext, SleuthkitCase tskCase, PathNormalizer pathNormalizer, FileTypeDetector fileTypeDetector, LicenseInfo licenseInfo, BlackboardArtifact.Type malwareType, boolean uploadUnknownFiles, boolean doFileLookups) {
                 this.tskCase = tskCase;
                 this.fileTypeDetector = fileTypeDetector;
+                this.pathNormalizer = pathNormalizer;
                 this.licenseInfo = licenseInfo;
                 this.malwareType = malwareType;
                 this.dsId = ingestJobContext == null ? 0L : ingestJobContext.getDataSource().getId();
@@ -1017,6 +1020,11 @@ boolean isDoFileLookups() {
             void disableDoFileLookups() {
                 this.doFileLookups = false;
             }
+
+            public PathNormalizer getPathNormalizer() {
+                return pathNormalizer;
+            }
+
         }
     }
 }
diff --git a/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/PathNormalizer.java b/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/PathNormalizer.java
new file mode 100644
index 0000000000..f8a0a299a5
--- /dev/null
+++ b/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/PathNormalizer.java
@@ -0,0 +1,201 @@
+/*
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2023 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.basistech.df.cybertriage.autopsy.malwarescan;
+
+import com.google.common.net.InetAddresses;
+import java.net.InetAddress;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import org.apache.commons.lang3.StringUtils;
+import org.sleuthkit.autopsy.coreutils.Logger;
+import org.sleuthkit.datamodel.SleuthkitCase;
+import org.sleuthkit.datamodel.TskCoreException;
+
+/**
+ * Utility class to anonymize paths.
+ */
+class PathNormalizer {
+
+    private static final Logger LOGGER = Logger.getLogger(PathNormalizer.class.getName());
+
+    private static final String ANONYMIZED_USERNAME = "<user>";
+    private static final String ANONYMIZED_IP = "<private_ip>";
+    private static final String ANONYMIZED_HOSTNAME = "<hostname>";
+    private static final String FORWARD_SLASH = "/";
+    private static final String BACK_SLASH = "\\";
+
+    private static final Pattern USER_PATH_FORWARD_SLASH_REGEX = Pattern.compile("(?<!all )([/]{0,1}\\Qusers\\E/)(?!(public|Default|defaultAccount|All Users))([^/]+)(/){0,1}", Pattern.CASE_INSENSITIVE);
+    private static final Pattern USER_PATH_BACK_SLASH_REGEX = Pattern.compile("(?<!all )([\\\\]{0,1}\\Qusers\\E\\\\)(?!(public|Default|defaultAccount|All Users))([^\\\\]+)([\\\\]){0,1}", Pattern.CASE_INSENSITIVE);
+
+    private static final Pattern USER_PATH_FORWARD_SLASH_REGEX_XP = Pattern.compile("([/]{0,1}\\Qdocuments and settings\\E/)(?!(Default User|All Users))([^/]+)(/){0,1}", Pattern.CASE_INSENSITIVE);
+    private static final Pattern USER_PATH_BACK_SLASH_REGEX_XP = Pattern.compile("([\\\\]{0,1}\\Qdocuments and settings\\E\\\\)(?!(Default User|All Users))([^\\\\]+)(\\\\){0,1}", Pattern.CASE_INSENSITIVE);
+
+    private static final Pattern UNC_PATH_FORWARD_SLASH_PATTERN = Pattern.compile("(//)([^/]+)(/){0,1}");
+    private static final Pattern UNC_PATH_BACK_SLASH_PATTERN = Pattern.compile("(\\\\\\\\)([^\\\\]+)(\\\\){0,1}");
+
+    private static final String USERNAME_REGEX_REPLACEMENT = "$1" + ANONYMIZED_USERNAME + "$4";
+
+    private final SleuthkitCase skCase;
+
+    PathNormalizer(SleuthkitCase skCase) {
+        this.skCase = skCase;
+    }
+
+    protected List<String> getUsernames() {
+        try {
+            return this.skCase.getOsAccountManager().getOsAccounts().stream()
+                    .filter(acct -> acct != null)
+                    .map(acct -> acct.getLoginName().orElse(null))
+                    .filter(StringUtils::isNotBlank)
+                    .collect(Collectors.toList());
+        } catch (TskCoreException ex) {
+            LOGGER.log(Level.WARNING, "There was an error getting current os accounts", ex);
+            return Collections.emptyList();
+        }
+    }
+
+    public String normalizePath(String inputString) {
+        if (StringUtils.isBlank(inputString)) {
+            return "";
+        }
+
+        String anonymousString = anonymizeUserFromPathsWithForwardSlashes(inputString);
+        anonymousString = anonymizeUserFromPathsWithBackSlashes(anonymousString);
+        anonymousString = anonymizeServerFromUNCPath(anonymousString);
+
+        return anonymousString;
+    }
+
+    private String anonymizeUserFromPathsWithForwardSlashes(String stringWithUsername) {
+        String anonymousString = stringWithUsername;
+        anonymousString = regexReplace(anonymousString, USER_PATH_FORWARD_SLASH_REGEX_XP, USERNAME_REGEX_REPLACEMENT);
+        anonymousString = regexReplace(anonymousString, USER_PATH_FORWARD_SLASH_REGEX, USERNAME_REGEX_REPLACEMENT);
+        anonymousString = replaceFolder(anonymousString, getUsernames(), ANONYMIZED_USERNAME, FORWARD_SLASH);
+        return anonymousString;
+    }
+
+    // Most paths in CyberTriage are normalized with forward slashes
+    // but there can still be strings containing paths that are not normalized such paths contained in arguments or event log payloads
+    private String anonymizeUserFromPathsWithBackSlashes(String stringWithUsername) {
+        String anonymousString = stringWithUsername;
+        anonymousString = regexReplace(anonymousString, USER_PATH_BACK_SLASH_REGEX_XP, USERNAME_REGEX_REPLACEMENT);
+        anonymousString = regexReplace(anonymousString, USER_PATH_BACK_SLASH_REGEX, USERNAME_REGEX_REPLACEMENT);
+        anonymousString = replaceFolder(anonymousString, getUsernames(), ANONYMIZED_USERNAME, BACK_SLASH);
+
+        return anonymousString;
+    }
+
+    private String anonymizeServerFromUNCPath(String inputString) {
+
+        Set<String> serverNames = new HashSet<>();
+        String anonymousString = inputString.toLowerCase(Locale.ENGLISH);
+
+        Matcher forwardSlashMatcher = UNC_PATH_FORWARD_SLASH_PATTERN.matcher(anonymousString);
+        while (forwardSlashMatcher.find()) {
+            String serverName = forwardSlashMatcher.group(2);
+            serverNames.add(serverName);
+        }
+
+        Matcher backSlashMatcher = UNC_PATH_BACK_SLASH_PATTERN.matcher(anonymousString);
+        while (backSlashMatcher.find()) {
+            String serverName = backSlashMatcher.group(2);
+            serverNames.add(serverName);
+        }
+
+        for (String serverName : serverNames) {
+
+            if (StringUtils.isBlank(serverName)) {
+                continue;
+            }
+
+            if (InetAddresses.isInetAddress(serverName) && isLocalIP(serverName)) {
+                anonymousString = replaceFolder(anonymousString, Collections.singletonList(serverName), ANONYMIZED_IP);
+            } else {
+                anonymousString = replaceFolder(anonymousString, Collections.singletonList(serverName), ANONYMIZED_HOSTNAME);
+            }
+
+        }
+
+        return anonymousString;
+    }
+
+    private static String regexReplace(String orig, Pattern pattern, String regexReplacement) {
+        Matcher matcher = pattern.matcher(orig);
+        return matcher.replaceAll(regexReplacement);
+    }
+
+    private static String replaceFolder(String orig, List<String> valuesToReplace, String replacementValue) {
+        String anonymized = orig;
+        anonymized = replaceFolder(anonymized, valuesToReplace, replacementValue, FORWARD_SLASH);
+        anonymized = replaceFolder(anonymized, valuesToReplace, replacementValue, BACK_SLASH);
+        return anonymized;
+    }
+
+    private static String replaceFolder(String orig, List<String> valuesToReplace, String replacementValue, String folderDelimiter) {
+        if (orig == null || valuesToReplace == null) {
+            return orig;
+        }
+
+        String anonymousString = orig;
+
+        // ensure non-null
+        folderDelimiter = StringUtils.defaultString(folderDelimiter);
+        replacementValue = StringUtils.defaultString(replacementValue);
+
+        // replace
+        for (String valueToReplace : valuesToReplace) {
+            if (StringUtils.isNotEmpty(valueToReplace)) {
+                anonymousString = StringUtils.replace(anonymousString,
+                        folderDelimiter + valueToReplace + folderDelimiter,
+                        folderDelimiter + replacementValue + folderDelimiter);
+            }
+        }
+
+        return anonymousString;
+    }
+
+    /**
+     * Returns true if IP Address is Any Local / Site Local / Link Local / Loop
+     * back local. Sample list "0.0.0.0", wildcard addres
+     * "10.1.1.1","10.10.10.10", site local address "127.0.0.0","127.2.2.2",
+     * loopback address "169.254.0.0","169.254.10.10", Link local address
+     * "172.16.0.0","172.31.245.245", site local address
+     *
+     * @param ipAddress
+     * @return
+     */
+    public static boolean isLocalIP(String ipAddress) {
+        try {
+            InetAddress a = InetAddresses.forString(ipAddress);
+            return a.isAnyLocalAddress() || a.isSiteLocalAddress()
+                    || a.isLoopbackAddress() || a.isLinkLocalAddress();
+        } catch (IllegalArgumentException ex) {
+            LOGGER.log(Level.WARNING, "Invalid IP string", ex);
+            return false;
+        }
+    }
+
+}
diff --git a/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/UsernameAnonymizer.java b/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/UsernameAnonymizer.java
deleted file mode 100644
index 75261ae43f..0000000000
--- a/Core/src/com/basistech/df/cybertriage/autopsy/malwarescan/UsernameAnonymizer.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Autopsy Forensic Browser
- *
- * Copyright 2023 Basis Technology Corp.
- * Contact: carrier <at> sleuthkit <dot> org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.basistech.df.cybertriage.autopsy.malwarescan;
-
-import com.google.common.net.InetAddresses;
-import java.net.InetAddress;
-import java.util.HashSet;
-import java.util.Locale;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.apache.commons.lang3.StringUtils;
-import org.sleuthkit.autopsy.coreutils.Logger;
-
-/**
- * Utility class to anonymize username in paths also anonymizes hostname / ip
- * from UNC paths
- */
-class UsernameAnonymizer {
-
-    private static final Logger LOGGER = Logger.getLogger(UsernameAnonymizer.class.getName());
-
-    private final String USER_PATH_FORWARD_SLASH_REGEX = "(?<!all )([/]{0,1}\\Qusers\\E/)(?!(public|Default|defaultAccount|All Users))([^/]+)(/){0,1}";
-    private final String USER_PATH_BACK_SLASH_REGEX = "(?<!all )([\\\\]{0,1}\\Qusers\\E\\\\)(?!(public|Default|defaultAccount|All Users))([^\\\\]+)([\\\\]){0,1}";
-
-    private final double WINDOWS_VERSION;
-    private final double DEFAULT_WINDOWS_VERSION = 10.0;
-    private final String USER_PATH_FORWARD_SLASH_REGEX_XP = "([/]{0,1}\\Qdocuments and settings\\E/)(?!(Default User|All Users))([^/]+)(/){0,1}";
-    private final String USER_PATH_BACK_SLASH_REGEX_XP = "([\\\\]{0,1}\\Qdocuments and settings\\E\\\\)(?!(Default User|All Users))([^\\\\]+)(\\\\){0,1}";
-
-    private final Pattern UNC_PATH_FORWARD_SLASH_PATTERN = Pattern.compile("(//)([^/]+)(/){0,1}");
-    private final Pattern UNC_PATH_BACK_SLASH_PATTERN = Pattern.compile("(\\\\\\\\)([^\\\\]+)(\\\\){0,1}");
-
-    public UsernameAnonymizer() {
-        // This constructor was added for the unit tests
-        // For most purposes, the other constructor should be used so we get the collection info such as users and windows version
-
-        WINDOWS_VERSION = DEFAULT_WINDOWS_VERSION;
-    }
-
-    public String anonymousUsername(String inputString) {
-        if (StringUtils.isBlank(inputString)) {
-            return "";
-        }
-
-        String anonymousString = anonymizeUserFromPathsWithForwardSlashes(inputString);
-        anonymousString = anonymizeUserFromPathsWithBackSlashes(anonymousString);
-        anonymousString = anonymizeServerFromUNCPath(anonymousString);
-
-        return anonymousString;
-    }
-
-    private String anonymizeUserFromPathsWithForwardSlashes(String stringWithUsername) {
-        Pattern pattern = WINDOWS_VERSION < 6 ? Pattern.compile(USER_PATH_FORWARD_SLASH_REGEX_XP, Pattern.CASE_INSENSITIVE) : Pattern.compile(USER_PATH_FORWARD_SLASH_REGEX, Pattern.CASE_INSENSITIVE);
-        Matcher matcher = pattern.matcher(stringWithUsername.toLowerCase(Locale.ENGLISH));
-        String replacement = "";
-        while (matcher.find()) {
-            replacement = String.format("$1%s$4", "<user>");
-        }
-        String anonymousString = matcher.replaceAll(replacement);
-
-        return anonymousString;
-    }
-
-    // Most paths in CyberTriage are normalized with forward slashes
-    // but there can still be strings containing paths that are not normalized such paths contained in arguments or event log payloads
-    private String anonymizeUserFromPathsWithBackSlashes(String stringWithUsername) {
-        Pattern pattern = WINDOWS_VERSION < 6 ? Pattern.compile(USER_PATH_BACK_SLASH_REGEX_XP, Pattern.CASE_INSENSITIVE) : Pattern.compile(USER_PATH_BACK_SLASH_REGEX, Pattern.CASE_INSENSITIVE);
-        Matcher matcher = pattern.matcher(stringWithUsername.toLowerCase(Locale.ENGLISH));
-        String replacement = "";
-        while (matcher.find()) {
-            replacement = String.format("$1%s$4", "<user>");
-        }
-        String anonymousString = matcher.replaceAll(replacement);
-
-        return anonymousString;
-    }
-
-    private String anonymizeServerFromUNCPath(String inputString) {
-
-        Set<String> serverNames = new HashSet<>();
-        String anonymousString = inputString.toLowerCase(Locale.ENGLISH);
-
-        Matcher forwardSlashMatcher = UNC_PATH_FORWARD_SLASH_PATTERN.matcher(anonymousString);
-        while (forwardSlashMatcher.find()) {
-            String serverName = forwardSlashMatcher.group(2);
-            serverNames.add(serverName);
-        }
-
-        Matcher backSlashMatcher = UNC_PATH_BACK_SLASH_PATTERN.matcher(anonymousString);
-        while (backSlashMatcher.find()) {
-            String serverName = backSlashMatcher.group(2);
-            serverNames.add(serverName);
-        }
-
-        for (String serverName : serverNames) {
-
-            if (StringUtils.isBlank(serverName)) {
-                continue;
-            }
-
-            if (InetAddresses.isInetAddress(serverName)) {
-                if (isLocalIP(serverName)) {
-                    anonymousString = StringUtils.replace(anonymousString, "\\" + serverName + "\\", "\\<private_ip>\\");
-                    anonymousString = StringUtils.replace(anonymousString, "/" + serverName + "/", "/<private_ip>/");
-                }
-            } else {
-                anonymousString = StringUtils.replace(anonymousString, "\\" + serverName + "\\", "\\<hostname>\\");
-                anonymousString = StringUtils.replace(anonymousString, "/" + serverName + "/", "/<hostname>/");
-            }
-
-        }
-
-        return anonymousString;
-    }
-
-    /**
-     * Returns true if IP Address is Any Local / Site Local / Link Local / Loop
-     * back local. Sample list "0.0.0.0", wildcard addres
-     * "10.1.1.1","10.10.10.10", site local address "127.0.0.0","127.2.2.2",
-     * loopback address "169.254.0.0","169.254.10.10", Link local address
-     * "172.16.0.0","172.31.245.245", site local address
-     *
-     * @param ipAddress
-     * @return
-     */
-    public static boolean isLocalIP(String ipAddress) {
-        try {
-            InetAddress a = InetAddresses.forString(ipAddress);
-            return a.isAnyLocalAddress() || a.isSiteLocalAddress()
-                    || a.isLoopbackAddress() || a.isLinkLocalAddress();
-        } catch (IllegalArgumentException ex) {
-            LOGGER.log(Level.WARNING, "Invalid IP string", ex);
-            return false;
-        }
-    }
-
-}
-- 
GitLab