From 48da203239a1cd181aa5b56004ff1d2e12f74567 Mon Sep 17 00:00:00 2001 From: apriestman <apriestman@basistech.com> Date: Thu, 5 Nov 2020 14:14:21 -0500 Subject: [PATCH] Stop hashing if EOF is found. Keep track of how many bytes are written to the EncodedFileOutputStream. --- .../datamodel/EncodedFileOutputStream.java | 20 +++++++++++++++++-- .../org/sleuthkit/datamodel/HashUtility.java | 7 ++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/bindings/java/src/org/sleuthkit/datamodel/EncodedFileOutputStream.java b/bindings/java/src/org/sleuthkit/datamodel/EncodedFileOutputStream.java index f181b936f..7c0183e1d 100644 --- a/bindings/java/src/org/sleuthkit/datamodel/EncodedFileOutputStream.java +++ b/bindings/java/src/org/sleuthkit/datamodel/EncodedFileOutputStream.java @@ -30,7 +30,8 @@ */ public class EncodedFileOutputStream extends BufferedOutputStream { - private TskData.EncodingType type; + private final TskData.EncodingType type; + private long encodedDataLength; /** * Create an encoded output stream using the specified encoding. @@ -43,6 +44,7 @@ public class EncodedFileOutputStream extends BufferedOutputStream { public EncodedFileOutputStream(OutputStream out, TskData.EncodingType type) throws IOException { super(out); this.type = type; + encodedDataLength = 0; writeHeader(); } @@ -65,11 +67,13 @@ public EncodedFileOutputStream(OutputStream out, int size, TskData.EncodingType private void writeHeader() throws IOException { // We get the encoded header here so it will be in plaintext after encoding write(EncodedFileUtil.getEncodedHeader(type), 0, EncodedFileUtil.getHeaderLength()); + encodedDataLength -= EncodedFileUtil.getHeaderLength(); } @Override public void write(int b) throws IOException { super.write((int) EncodedFileUtil.encodeByte((byte) b, type)); + encodedDataLength++; } @Override @@ -83,5 +87,17 @@ public void write(byte[] b, } super.write(encodedData, off, len); + encodedDataLength += len; } -} + + /** + * Get the number of bytes written to the file, excluding header bytes. + * This is needed for storing the original length of the file in the + * tsk_files table in cases where we don't know the size in advance. + * + * @return the number of bytes written to the stream, excluding the header. + */ + public long getBytesWritten() { + return encodedDataLength; + } +} \ No newline at end of file diff --git a/bindings/java/src/org/sleuthkit/datamodel/HashUtility.java b/bindings/java/src/org/sleuthkit/datamodel/HashUtility.java index d8618b0ee..4ce6e192e 100644 --- a/bindings/java/src/org/sleuthkit/datamodel/HashUtility.java +++ b/bindings/java/src/org/sleuthkit/datamodel/HashUtility.java @@ -68,6 +68,11 @@ static public List<HashResult> calculateHashes(Content content, Collection<HashT } catch (TskCoreException ex) { throw new TskCoreException("Error reading data at address " + i * BUFFER_SIZE + " from content with ID: " + content.getId(), ex); } + + // Check for EOF + if (read == -1) { + break; + } // Only update with the read bytes. if (read == BUFFER_SIZE) { @@ -228,4 +233,4 @@ static public String calculateMd5Hash(Content content) throws IOException { throw new IOException(ex); } } -} +} \ No newline at end of file -- GitLab