From 4cd1df6516d50cc8cbf787091e4d70b519c428ef Mon Sep 17 00:00:00 2001
From: David Goitia <david@goitia.es>
Date: Thu, 17 Oct 2024 04:19:50 +0200
Subject: [PATCH] use Stream instead of byte[] for file content

---
 build.gradle.kts                              |   2 +-
 .../es/goitia/pe/CountingInputStream.java     |  72 ++++
 src/main/java/es/goitia/pe/PEInfo.java        | 339 ++++++++++++------
 3 files changed, 309 insertions(+), 104 deletions(-)
 create mode 100644 src/main/java/es/goitia/pe/CountingInputStream.java
diff --git a/build.gradle.kts b/build.gradle.kts
index 557a202..77bcc22 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -19,7 +19,7 @@ plugins {
 
 description = "PE file info extractor"
 group = "es.goitia.pe"
-version = "1.0.0"
+version = "2.0.0"
 var mainClassName = "es.goitia.pe.PEInfo"
 
 repositories {
diff --git a/src/main/java/es/goitia/pe/CountingInputStream.java b/src/main/java/es/goitia/pe/CountingInputStream.java
new file mode 100644
index 0000000..f83ffbc
--- /dev/null
+++ b/src/main/java/es/goitia/pe/CountingInputStream.java
@@ -0,0 +1,72 @@
+package es.goitia.pe;
+
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * InputStream with offset counter
+ * Without custom support for {@link InputStream#mark(int)}
+ */
+public class CountingInputStream extends FilterInputStream {
+    long offset;
+
+    /**
+     * Wraps other inputStream for counting bytes read/skipped
+     *
+     * @param in the underlying input stream, or <code>null</code> if
+     *           this instance is to be created without an underlying stream.
+     */
+    protected CountingInputStream(InputStream in) {
+        super(in);
+    }
+
+    public long getOffset() {
+        return offset;
+    }
+
+    public void setOffset(long offset) {
+        this.offset = offset;
+    }
+
+    public void resetOffset() {
+        setOffset(0);
+    }
+
+    @Override
+    public int read() throws IOException {
+        int read = in.read();
+        if (read != -1) {
+            offset++;
+        }
+        return read;
+    }
+
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+        int read = in.read(b, off, len);
+        if (read != -1) {
+            offset += read;
+        }
+        return read;
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        long skipped = in.skip(n);
+        offset += skipped;
+        return skipped;
+    }
+
+    public boolean skipAll(long n) throws IOException {
+        long total = 0;
+        while (total < n) {
+            long s = skip(n - total);
+            if (s == 0) {
+                return false;
+            }
+            total += s;
+        }
+        return true;
+    }
+}
diff --git a/src/main/java/es/goitia/pe/PEInfo.java b/src/main/java/es/goitia/pe/PEInfo.java
index ce02282..8ba36ea 100644
--- a/src/main/java/es/goitia/pe/PEInfo.java
+++ b/src/main/java/es/goitia/pe/PEInfo.java
@@ -8,10 +8,10 @@
 import lombok.Data;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -38,36 +38,68 @@ public class PEInfo {
     private String comments;
 
     /**
-     * Reads a single byte from the buffer and returns it as an integer.
+     * Reads a single byte from the input stream and returns it as an integer.
      *
-     * @param p      the byte buffer
-     * @param offset the offset to read from
+     * @param is the input stream to read from
      * @return the byte value as an integer
+     * @throws IOException if an I/O error occurs
+     */
+    private static String READ_STRING(InputStream is, int length) throws IOException {
+        byte[] data = READ_BYTES(is, length);
+        return data == null ? null : new String(data).trim();
+    }
+
+    /**
+     * Reads a single byte from the input stream and returns it as an integer.
+     *
+     * @param is the input stream to read from
+     * @return the byte value as an integer
+     * @throws IOException if an I/O error occurs
+     */
+    private static byte[] READ_BYTES(InputStream is, int length) throws IOException {
+        byte[] bytes = new byte[length];
+        int total = 0;
+        int read;
+        while (total < length && (read = is.read(bytes, total, length - total)) != -1) {
+            total += read;
+        }
+        return total < length ? null : bytes;
+    }
+
+    /**
+     * Reads a single byte from the input stream and returns it as an integer.
+     *
+     * @param is the input stream to read from
+     * @return the byte value as an integer
+     * @throws IOException if an I/O error occurs
      */
-    private static int READ_BYTE(byte[] p, int offset) {
-        return p[offset] & 0xFF;
+    private static int READ_BYTE(InputStream is) throws IOException {
+        int data = is.read() & 0xFF;
+        return data;
     }
 
     /**
-     * Reads two bytes from the buffer and combines them into a word (16-bit value).
+     * Reads two bytes from the input stream and combines them into a word (16-bit value).
      *
-     * @param p      the byte buffer
-     * @param offset the offset to read from
+     * @param is the input stream to read from
      * @return the word value
+     * @throws IOException if an I/O error occurs
      */
-    private static int READ_WORD(byte[] p, int offset) {
-        return READ_BYTE(p, offset) | (READ_BYTE(p, offset + 1) << 8);
+    private static int READ_WORD(InputStream is) throws IOException {
+        int data = READ_BYTE(is) | (READ_BYTE(is) << 8);
+        return data;
     }
 
     /**
-     * Reads four bytes from the buffer and combines them into a double word (32-bit value).
+     * Reads four bytes from the input stream and combines them into a double word (32-bit value).
      *
-     * @param p      the byte buffer
-     * @param offset the offset to read from
+     * @param is the input stream to read from
      * @return the double word value
+     * @throws IOException if an I/O error occurs
      */
-    private static int READ_DWORD(byte[] p, int offset) {
-        return READ_WORD(p, offset) | (READ_WORD(p, offset + 2) << 16);
+    private static int READ_DWORD(InputStream is) throws IOException {
+        int data = READ_WORD(is) | (READ_WORD(is) << 16);
+        return data;
     }
 
     /**
@@ -81,18 +113,38 @@ private static int PAD(int x) {
     }
 
     /**
-     * Processes the given buffer to extract version information.
+     * Pads the given value to align it to the next multiple of 4.
+     *
+     * @param x the value to pad
+     * @return the padded value
+     */
+    private static int PAD(long x) {
+        return PAD((int) x);
+    }
+
+    /**
+     * Pads the given value to align it to the next multiple of 4.
+     *
+     * @param is the stream with offset want to pad
+     * @return the padded value
+     */
+    private static boolean PAD(CountingInputStream is) throws IOException {
+        return is.skipAll(PAD(is.getOffset()) - is.getOffset());
+    }
+
+    /**
+     * Processes the given input stream to extract version information.
      *
-     * @param buf the buffer containing the executable data
+     * @param is the input stream containing the executable data
      * @return the extracted version information
+     * @throws IOException if an I/O error occurs
      */
-    public static PEInfo process(byte[] buf) {
+    public static PEInfo process(InputStream is) throws IOException {
         Builder builder = PEInfo.builder().values(new HashMap<>());
-        byte[] version = findVersion(buf);
-        if (version != null) {
-            parseVersion(version, 0, builder);
+        if (findVersion(new CountingInputStream(is), builder)) {
+            return builder.build();
         }
-        return builder.build();
+        return null;
     }
 
     /**
@@ -103,113 +155,188 @@ public static PEInfo process(byte[] buf) {
      * @throws IOException if an I/O error occurs
      */
     public static PEInfo process(Path path) throws IOException {
-        byte[] buf = Files.readAllBytes(path);
-        return process(buf);
+        try (InputStream is = Files.newInputStream(path)) {
+            return process(is);
+        }
     }
 
     /**
-     * Finds the version resource in the given buffer.
+     * Finds the version resource in the given input stream.
      *
-     * @param buf the buffer containing the executable data
-     * @return the version resource data, or null if not found
+     * @param is      the input stream containing the executable data
+     * @param builder the builder to populate with the extracted information
+     * @return true if version information is found, false otherwise
+     * @throws IOException if an I/O error occurs
      */
-    private static byte[] findVersion(byte[] buf) {
-        if (READ_WORD(buf, 0) != 0x5A4D) // Checks for MZ signature, indicating a valid executable
-            return null;
-        int peOffset = READ_DWORD(buf, 0x3C); // Offset to the PE header
-        if (READ_WORD(buf, peOffset) != 0x4550) // Checks for PE signature
-            return null;
-        int coffOffset = peOffset + 4;
+    private static boolean findVersion(CountingInputStream is, Builder builder) throws IOException {
+        if (READ_WORD(is) != 0x5A4D) { // Checks for MZ signature, indicating a valid executable
+            return false;
+        }
+        if (!is.skipAll(0x3A)) { // Skip to the PE header offset
+            return false;
+        }
+        int peOffset = READ_DWORD(is); // Offset to the PE header
+        if (!is.skipAll(peOffset - is.getOffset())) { // Skip to the PE header
+            return false;
+        }
+
+        if (READ_WORD(is) != 0x4550) { // Checks for PE signature
+            return false;
+        }
+//        is.skip(2); // Skip to COFF header
+        if (!is.skipAll(2 + 2)) { // Skip to Num sections
+            return false;
+        }
+
+        int numSections = READ_WORD(is); // Number of sections
+        if (!is.skipAll(12)) { // Skip to the optional header size
+            return false;
+        }
+        int optHeaderSize = READ_WORD(is); // Size of the optional header
+        if (numSections == 0 || optHeaderSize == 0) {
+            return false;
+        }
+        if (!is.skipAll(2)) { // Skip to the optional header
+            return false;
+        }
+        long optHeaderOffset = is.getOffset();
 
-        int numSections = READ_WORD(buf, coffOffset + 2); // Number of sections
-        int optHeaderSize = READ_WORD(buf, coffOffset + 16); // Size of the optional header
-        if (numSections == 0 || optHeaderSize == 0)
-            return null;
-        int optHeaderOffset = coffOffset + 20;
-        if (READ_WORD(buf, optHeaderOffset) != 0x10B) // Checks for 32-bit optional header magic
-            return null;
-        int dataDirOffset = optHeaderOffset + 96; // Offset to the data directories
-        int vaRes = READ_DWORD(buf, dataDirOffset + 8 * 2); // Virtual address of the resource directory
+        int magic = READ_WORD(is); // Optional header magic
+        if (magic != 0x10B) { // Checks for 32-bit optional header magic
+            return false;
+        }
+        if (!is.skipAll(94 + 8 * 2)) { // Skip to the data directories
+            return false;
+        }
+        int vaRes = READ_DWORD(is); // Virtual address of the resource directory
+
+        if (!is.skipAll(optHeaderSize - (is.getOffset() - optHeaderOffset))) { // Skip to the section table
+            return false;
+        }
 
-        int secTableOffset = optHeaderOffset + optHeaderSize;
         for (int i = 0; i < numSections; i++) {
-            int secOffset = secTableOffset + 40 * i;
-            String secName = new String(Arrays.copyOfRange(buf, secOffset, secOffset + 8)).trim();
+            String secName = READ_STRING(is, 8); // Read section name
 
-            if (!".rsrc".equals(secName)) // Look for the resource section
+            if (secName == null) {
+                return false;
+            } else if (!".rsrc".equals(secName)) {
+                if (!is.skipAll(32)) {
+                    return false;
+                }
                 continue;
-            int vaSec = READ_DWORD(buf, secOffset + 12); // Virtual address of the section
-            int rawDataOffset = READ_DWORD(buf, secOffset + 20); // Raw data offset of the section
+            }
+            if (!is.skipAll(4)) {
+                return false;
+            }
+            int vaSec = READ_DWORD(is); // Virtual address of the section
+            if (!is.skipAll(4)) {
+                return false;
+            }
+            int rawDataOffset = READ_DWORD(is); // Raw data offset of the section
             int resSecOffset = rawDataOffset + (vaRes - vaSec);
 
-            int numNamed = READ_WORD(buf, resSecOffset + 12); // Number of named entries
-            int numId = READ_WORD(buf, resSecOffset + 14); // Number of ID entries
+            if (!is.skipAll(resSecOffset - is.getOffset() + 12)) {
+                return false;
+            }
+            int numNamed = READ_WORD(is); // Number of named entries
+            int numId = READ_WORD(is); // Number of ID entries
 
             for (int j = 0; j < numNamed + numId; j++) {
                 int resOffset = resSecOffset + 16 + 8 * j;
-                int name = READ_DWORD(buf, resOffset);
-                if (name != 16) // Check for version resource (RT_VERSION)
+                if (!is.skipAll(resOffset - is.getOffset())) {
+                    return false;
+                }
+                int name = READ_DWORD(is); // Resource name
+                if (name != 16) { // Check for version resource (RT_VERSION)
+                    if (!is.skipAll(4)) {
+                        return false;
+                    }
                     continue;
-                int offs = READ_DWORD(buf, resOffset + 4);
-                if ((offs & 0x80000000) == 0) // Check if it's a directory resource
-                    return null;
+                }
+                int offs = READ_DWORD(is); // Offset to the resource data
+                if ((offs & 0x80000000) == 0) { // Check if it's a directory resource
+                    return false;
+                }
+
+                // Process version dir
                 int verDirOffset = resSecOffset + (offs & 0x7FFFFFFF);
-                numNamed = READ_WORD(buf, verDirOffset + 12);
-                numId = READ_WORD(buf, verDirOffset + 14);
-                if (numNamed == 0 && numId == 0)
-                    return null;
+                if (!is.skipAll(verDirOffset - is.getOffset() + 12)) {
+                    return false;
+                }
+                numNamed = READ_WORD(is); // Number of named entries
+                numId = READ_WORD(is); // Number of ID entries
+                if (numNamed == 0 && numId == 0) {
+                    return false;
+                }
                 resOffset = verDirOffset + 16;
-                offs = READ_DWORD(buf, resOffset + 4);
-                if ((offs & 0x80000000) == 0) // Check if it's a directory resource
-                    return null;
+                if (!is.skipAll(resOffset - is.getOffset() + 4)) {
+                    return false;
+                }
+                offs = READ_DWORD(is); // Offset to the data
+                if ((offs & 0x80000000) == 0) { // Check if it's a directory resource
+                    return false;
+                }
                 verDirOffset = resSecOffset + (offs & 0x7FFFFFFF);
-                numNamed = READ_WORD(buf, verDirOffset + 12);
-                numId = READ_WORD(buf, verDirOffset + 14);
-                if (numNamed == 0 && numId == 0)
-                    return null;
+                if (!is.skipAll(verDirOffset - is.getOffset() + 12)) {
+                    return false;
+                }
+                numNamed = READ_WORD(is); // Number of named entries
+                numId = READ_WORD(is); // Number of ID entries
+                if (numNamed == 0 && numId == 0) {
+                    return false;
+                }
                 resOffset = verDirOffset + 16;
-                offs = READ_DWORD(buf, resOffset + 4);
+                if (!is.skipAll(resOffset - is.getOffset() + 4)) {
+                    return false;
+                }
+                offs = READ_DWORD(is);
                 if ((offs & 0x80000000) != 0) // Check if it's a directory resource
-                    return null;
+                    return false;
                 verDirOffset = resSecOffset + offs;
 
-                int verVa = READ_DWORD(buf, verDirOffset); // Virtual address of the version resource
+                if (!is.skipAll(verDirOffset - is.getOffset())) {
+                    return false;
+                }
+                int verVa = READ_DWORD(is); // Virtual address of the version resource
                 int verPtrOffset = rawDataOffset + (verVa - vaSec);
-                return Arrays.copyOfRange(buf, verPtrOffset, buf.length); // Extract the version resource data
+                if (!is.skipAll(verPtrOffset - is.getOffset())) {
+                    return false;
+                }
+                is.resetOffset();
+                parseVersion(is, builder);
+                return true;
             }
         }
-        return null;
+        return false;
     }
 
     /**
      * Parses the version resource data and populates the builder with the extracted information.
      *
-     * @param version the version resource data
-     * @param offs    the offset to start parsing from
+     * @param is      the input stream containing the version resource data
      * @param builder the builder to populate with the extracted information
-     * @return the next offset to parse from, padded to maintain alignment
+     * @throws IOException if an I/O error occurs
      */
-    private static int parseVersion(byte[] version, int offs, Builder builder) {
-        offs = PAD(offs); // Align offset to the next multiple of 4
-        int len = READ_WORD(version, offs); // Length of the version block
-        offs += 2;
-        int valLen = READ_WORD(version, offs); // Length of the value field
-        offs += 2;
-        int type = READ_WORD(version, offs); // Type of data (text or binary)
-        offs += 2;
+    private static void parseVersion(CountingInputStream is, Builder builder) throws IOException {
+        if (!PAD(is)) { // Align offset to the next multiple of 4
+            return;
+        }
+        int len = READ_WORD(is); // Length of the version block
+        int valLen = READ_WORD(is); // Length of the value field
+        int type = READ_WORD(is); // Type of data (text or binary)
         StringBuilder info = new StringBuilder();
         for (int i = 0; i < 200; i++) { // Extract the key name (e.g., "FileDescription")
-            int c = READ_WORD(version, offs);
-            offs += 2;
+            int c = READ_WORD(is);
             if (c == 0) break;
             info.append((char) c);
         }
-        offs = PAD(offs); // Align offset
+        if (!PAD(is)) {
+            return;
+        }
         if (type != 0) { // If it's a text field
             StringBuilder value = new StringBuilder();
             for (int i = 0; i < valLen; i++) { // Extract the value associated with the key
-                int c = READ_WORD(version, offs);
-                offs += 2;
+                int c = READ_WORD(is);
                 if (c == 0) break;
                 value.append((char) c);
             }
@@ -243,25 +370,31 @@ private static int parseVersion(byte[] version, int offs, Builder builder) {
                     break;
             }
         } else { // If it's a binary field
+            if (!is.skipAll(8)) {
+                return;
+            }
             if ("VS_VERSION_INFO".contentEquals(info)) { // Extract version numbers from fixed info
-                builder.fileVersion(String.format("%d.%d.%d.%d",
-                        READ_WORD(version, offs + 10),
-                        READ_WORD(version, offs + 8),
-                        READ_WORD(version, offs + 14),
-                        READ_WORD(version, offs + 12)
+                builder.fileVersion(String.format("%2$d.%1$d.%4$d.%3$d",
+                        READ_WORD(is), // Minor version
+                        READ_WORD(is), // Major version
+                        READ_WORD(is), // Revision number
+                        READ_WORD(is)  // Build number
                 ));
-                builder.productVersion(String.format("%d.%d.%d.%d",
-                        READ_WORD(version, offs + 18),
-                        READ_WORD(version, offs + 16),
-                        READ_WORD(version, offs + 22),
-                        READ_WORD(version, offs + 20)
+                builder.productVersion(String.format("%2$d.%1$d.%4$d.%3$d",
+                        READ_WORD(is), // Minor version
+                        READ_WORD(is), // Major version
+                        READ_WORD(is), // Revision number
+                        READ_WORD(is)  // Build number
                 ));
             }
-            offs += valLen;
+            if (!is.skipAll(valLen - 24)) {
+                return;
+            }
         }
-        while (offs < len) // Recursively parse any additional blocks
-            offs = parseVersion(version, offs, builder);
-        return PAD(offs); // Return the padded offset to maintain alignment
+
+        while (is.getOffset() < len) // Recursively parse any additional blocks
+            parseVersion(is, builder);
+        PAD(is); // Return the padded offset to maintain alignment
     }
 
     public static void main(String[] args) {