From 48da203239a1cd181aa5b56004ff1d2e12f74567 Mon Sep 17 00:00:00 2001
From: apriestman <apriestman@basistech.com>
Date: Thu, 5 Nov 2020 14:14:21 -0500
Subject: [PATCH] Stop hashing if EOF is found.

Keep track of how many bytes are written to the EncodedFileOutputStream.
---
 .../datamodel/EncodedFileOutputStream.java    | 20 +++++++++++++++++--
 .../org/sleuthkit/datamodel/HashUtility.java  |  7 ++++++-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/bindings/java/src/org/sleuthkit/datamodel/EncodedFileOutputStream.java b/bindings/java/src/org/sleuthkit/datamodel/EncodedFileOutputStream.java
index f181b936f..7c0183e1d 100644
--- a/bindings/java/src/org/sleuthkit/datamodel/EncodedFileOutputStream.java
+++ b/bindings/java/src/org/sleuthkit/datamodel/EncodedFileOutputStream.java
@@ -30,7 +30,8 @@
  */
 public class EncodedFileOutputStream extends BufferedOutputStream {
 
-	private TskData.EncodingType type;
+	private final TskData.EncodingType type;
+	private long encodedDataLength;
 
 	/**
 	 * Create an encoded output stream using the specified encoding.
@@ -43,6 +44,7 @@ public class EncodedFileOutputStream extends BufferedOutputStream {
 	public EncodedFileOutputStream(OutputStream out, TskData.EncodingType type) throws IOException {
 		super(out);
 		this.type = type;
+		encodedDataLength = 0;
 		writeHeader();
 	}
 
@@ -65,11 +67,13 @@ public EncodedFileOutputStream(OutputStream out, int size, TskData.EncodingType
 	private void writeHeader() throws IOException {
 		// We get the encoded header here so it will be in plaintext after encoding
 		write(EncodedFileUtil.getEncodedHeader(type), 0, EncodedFileUtil.getHeaderLength());
+		encodedDataLength -= EncodedFileUtil.getHeaderLength();
 	}
 
 	@Override
 	public void write(int b) throws IOException {
 		super.write((int) EncodedFileUtil.encodeByte((byte) b, type));
+		encodedDataLength++;
 	}
 
 	@Override
@@ -83,5 +87,17 @@ public void write(byte[] b,
 		}
 
 		super.write(encodedData, off, len);
+		encodedDataLength += len;
 	}
-}
+	
+	/**
+	 * Get the number of bytes written to the file, excluding header bytes.
+	 * This is needed for storing the original length of the file in the
+	 * tsk_files table in cases where we don't know the size in advance.
+	 * 
+	 * @return the number of bytes written to the stream, excluding the header.
+	 */
+	public long getBytesWritten() {
+		return encodedDataLength;
+	} 
+}
\ No newline at end of file
diff --git a/bindings/java/src/org/sleuthkit/datamodel/HashUtility.java b/bindings/java/src/org/sleuthkit/datamodel/HashUtility.java
index d8618b0ee..4ce6e192e 100644
--- a/bindings/java/src/org/sleuthkit/datamodel/HashUtility.java
+++ b/bindings/java/src/org/sleuthkit/datamodel/HashUtility.java
@@ -68,6 +68,11 @@ static public List<HashResult> calculateHashes(Content content, Collection<HashT
 			} catch (TskCoreException ex) {
 				throw new TskCoreException("Error reading data at address " + i * BUFFER_SIZE + " from content with ID: " + content.getId(), ex);
 			}
+			
+			// Check for EOF
+			if (read == -1) {
+				break;
+			}
 
 			// Only update with the read bytes.
 			if (read == BUFFER_SIZE) {
@@ -228,4 +233,4 @@ static public String calculateMd5Hash(Content content) throws IOException {
 			throw new IOException(ex);
 		}
 	}	
-}
+}
\ No newline at end of file
-- 
GitLab