From 4cd1df6516d50cc8cbf787091e4d70b519c428ef Mon Sep 17 00:00:00 2001 From: David Goitia Date: Thu, 17 Oct 2024 04:19:50 +0200 Subject: [PATCH] use Stream instead of byte[] for file content --- build.gradle.kts | 2 +- .../es/goitia/pe/CountingInputStream.java | 72 ++++ src/main/java/es/goitia/pe/PEInfo.java | 339 ++++++++++++------ 3 files changed, 309 insertions(+), 104 deletions(-) create mode 100644 src/main/java/es/goitia/pe/CountingInputStream.java diff --git a/build.gradle.kts b/build.gradle.kts index 557a202..77bcc22 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -19,7 +19,7 @@ plugins { description = "PE file info extractor" group = "es.goitia.pe" -version = "1.0.0" +version = "2.0.0" var mainClassName = "es.goitia.pe.PEInfo" repositories { diff --git a/src/main/java/es/goitia/pe/CountingInputStream.java b/src/main/java/es/goitia/pe/CountingInputStream.java new file mode 100644 index 0000000..f83ffbc --- /dev/null +++ b/src/main/java/es/goitia/pe/CountingInputStream.java @@ -0,0 +1,72 @@ +package es.goitia.pe; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * InputStream with offset counter + * Without custom support for {@link InputStream#mark(int)} + */ +public class CountingInputStream extends FilterInputStream { + long offset; + + /** + * Wraps other inputStream for counting bytes read/skipped + * + * @param in the underlying input stream, or null if + * this instance is to be created without an underlying stream. + */ + protected CountingInputStream(InputStream in) { + super(in); + } + + public long getOffset() { + return offset; + } + + public void setOffset(long offset) { + this.offset = offset; + } + + public void resetOffset() { + setOffset(0); + } + + @Override + public int read() throws IOException { + int read = in.read(); + if (read != -1) { + offset++; + } + return read; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int read = in.read(b, off, len); + if (read != -1) { + offset += read; + } + return read; + } + + @Override + public long skip(long n) throws IOException { + long skipped = in.skip(n); + offset += skipped; + return skipped; + } + + public boolean skipAll(long n) throws IOException { + long total = 0; + while (total < n) { + long s = skip(n - total); + if (s == 0) { + return false; + } + total += s; + } + return true; + } +} diff --git a/src/main/java/es/goitia/pe/PEInfo.java b/src/main/java/es/goitia/pe/PEInfo.java index ce02282..8ba36ea 100644 --- a/src/main/java/es/goitia/pe/PEInfo.java +++ b/src/main/java/es/goitia/pe/PEInfo.java @@ -8,10 +8,10 @@ import lombok.Data; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -38,36 +38,68 @@ public class PEInfo { private String comments; /** - * Reads a single byte from the buffer and returns it as an integer. + * Reads a single byte from the input stream and returns it as an integer. * - * @param p the byte buffer - * @param offset the offset to read from + * @param is the input stream to read from * @return the byte value as an integer + * @throws IOException if an I/O error occurs + */ + private static String READ_STRING(InputStream is, int length) throws IOException { + byte[] data = READ_BYTES(is, length); + return data == null ? null : new String(data).trim(); + } + + /** + * Reads a single byte from the input stream and returns it as an integer. + * + * @param is the input stream to read from + * @return the byte value as an integer + * @throws IOException if an I/O error occurs + */ + private static byte[] READ_BYTES(InputStream is, int length) throws IOException { + byte[] bytes = new byte[length]; + int total = 0; + int read; + while (total < length && (read = is.read(bytes, total, length - total)) != -1) { + total += read; + } + return total < length ? null : bytes; + } + + /** + * Reads a single byte from the input stream and returns it as an integer. + * + * @param is the input stream to read from + * @return the byte value as an integer + * @throws IOException if an I/O error occurs */ - private static int READ_BYTE(byte[] p, int offset) { - return p[offset] & 0xFF; + private static int READ_BYTE(InputStream is) throws IOException { + int data = is.read() & 0xFF; + return data; } /** - * Reads two bytes from the buffer and combines them into a word (16-bit value). + * Reads two bytes from the input stream and combines them into a word (16-bit value). * - * @param p the byte buffer - * @param offset the offset to read from + * @param is the input stream to read from * @return the word value + * @throws IOException if an I/O error occurs */ - private static int READ_WORD(byte[] p, int offset) { - return READ_BYTE(p, offset) | (READ_BYTE(p, offset + 1) << 8); + private static int READ_WORD(InputStream is) throws IOException { + int data = READ_BYTE(is) | (READ_BYTE(is) << 8); + return data; } /** - * Reads four bytes from the buffer and combines them into a double word (32-bit value). + * Reads four bytes from the input stream and combines them into a double word (32-bit value). * - * @param p the byte buffer - * @param offset the offset to read from + * @param is the input stream to read from * @return the double word value + * @throws IOException if an I/O error occurs */ - private static int READ_DWORD(byte[] p, int offset) { - return READ_WORD(p, offset) | (READ_WORD(p, offset + 2) << 16); + private static int READ_DWORD(InputStream is) throws IOException { + int data = READ_WORD(is) | (READ_WORD(is) << 16); + return data; } /** @@ -81,18 +113,38 @@ private static int PAD(int x) { } /** - * Processes the given buffer to extract version information. + * Pads the given value to align it to the next multiple of 4. + * + * @param x the value to pad + * @return the padded value + */ + private static int PAD(long x) { + return PAD((int) x); + } + + /** + * Pads the given value to align it to the next multiple of 4. + * + * @param is the stream with offset want to pad + * @return the padded value + */ + private static boolean PAD(CountingInputStream is) throws IOException { + return is.skipAll(PAD(is.getOffset()) - is.getOffset()); + } + + /** + * Processes the given input stream to extract version information. * - * @param buf the buffer containing the executable data + * @param is the input stream containing the executable data * @return the extracted version information + * @throws IOException if an I/O error occurs */ - public static PEInfo process(byte[] buf) { + public static PEInfo process(InputStream is) throws IOException { Builder builder = PEInfo.builder().values(new HashMap<>()); - byte[] version = findVersion(buf); - if (version != null) { - parseVersion(version, 0, builder); + if (findVersion(new CountingInputStream(is), builder)) { + return builder.build(); } - return builder.build(); + return null; } /** @@ -103,113 +155,188 @@ public static PEInfo process(byte[] buf) { * @throws IOException if an I/O error occurs */ public static PEInfo process(Path path) throws IOException { - byte[] buf = Files.readAllBytes(path); - return process(buf); + try (InputStream is = Files.newInputStream(path)) { + return process(is); + } } /** - * Finds the version resource in the given buffer. + * Finds the version resource in the given input stream. * - * @param buf the buffer containing the executable data - * @return the version resource data, or null if not found + * @param is the input stream containing the executable data + * @param builder the builder to populate with the extracted information + * @return true if version information is found, false otherwise + * @throws IOException if an I/O error occurs */ - private static byte[] findVersion(byte[] buf) { - if (READ_WORD(buf, 0) != 0x5A4D) // Checks for MZ signature, indicating a valid executable - return null; - int peOffset = READ_DWORD(buf, 0x3C); // Offset to the PE header - if (READ_WORD(buf, peOffset) != 0x4550) // Checks for PE signature - return null; - int coffOffset = peOffset + 4; + private static boolean findVersion(CountingInputStream is, Builder builder) throws IOException { + if (READ_WORD(is) != 0x5A4D) { // Checks for MZ signature, indicating a valid executable + return false; + } + if (!is.skipAll(0x3A)) { // Skip to the PE header offset + return false; + } + int peOffset = READ_DWORD(is); // Offset to the PE header + if (!is.skipAll(peOffset - is.getOffset())) { // Skip to the PE header + return false; + } + + if (READ_WORD(is) != 0x4550) { // Checks for PE signature + return false; + } +// is.skip(2); // Skip to COFF header + if (!is.skipAll(2 + 2)) { // Skip to Num sections + return false; + } + + int numSections = READ_WORD(is); // Number of sections + if (!is.skipAll(12)) { // Skip to the optional header size + return false; + } + int optHeaderSize = READ_WORD(is); // Size of the optional header + if (numSections == 0 || optHeaderSize == 0) { + return false; + } + if (!is.skipAll(2)) { // Skip to the optional header + return false; + } + long optHeaderOffset = is.getOffset(); - int numSections = READ_WORD(buf, coffOffset + 2); // Number of sections - int optHeaderSize = READ_WORD(buf, coffOffset + 16); // Size of the optional header - if (numSections == 0 || optHeaderSize == 0) - return null; - int optHeaderOffset = coffOffset + 20; - if (READ_WORD(buf, optHeaderOffset) != 0x10B) // Checks for 32-bit optional header magic - return null; - int dataDirOffset = optHeaderOffset + 96; // Offset to the data directories - int vaRes = READ_DWORD(buf, dataDirOffset + 8 * 2); // Virtual address of the resource directory + int magic = READ_WORD(is); // Optional header magic + if (magic != 0x10B) { // Checks for 32-bit optional header magic + return false; + } + if (!is.skipAll(94 + 8 * 2)) { // Skip to the data directories + return false; + } + int vaRes = READ_DWORD(is); // Virtual address of the resource directory + + if (!is.skipAll(optHeaderSize - (is.getOffset() - optHeaderOffset))) { // Skip to the section table + return false; + } - int secTableOffset = optHeaderOffset + optHeaderSize; for (int i = 0; i < numSections; i++) { - int secOffset = secTableOffset + 40 * i; - String secName = new String(Arrays.copyOfRange(buf, secOffset, secOffset + 8)).trim(); + String secName = READ_STRING(is, 8); // Read section name - if (!".rsrc".equals(secName)) // Look for the resource section + if (secName == null) { + return false; + } else if (!".rsrc".equals(secName)) { + if (!is.skipAll(32)) { + return false; + } continue; - int vaSec = READ_DWORD(buf, secOffset + 12); // Virtual address of the section - int rawDataOffset = READ_DWORD(buf, secOffset + 20); // Raw data offset of the section + } + if (!is.skipAll(4)) { + return false; + } + int vaSec = READ_DWORD(is); // Virtual address of the section + if (!is.skipAll(4)) { + return false; + } + int rawDataOffset = READ_DWORD(is); // Raw data offset of the section int resSecOffset = rawDataOffset + (vaRes - vaSec); - int numNamed = READ_WORD(buf, resSecOffset + 12); // Number of named entries - int numId = READ_WORD(buf, resSecOffset + 14); // Number of ID entries + if (!is.skipAll(resSecOffset - is.getOffset() + 12)) { + return false; + } + int numNamed = READ_WORD(is); // Number of named entries + int numId = READ_WORD(is); // Number of ID entries for (int j = 0; j < numNamed + numId; j++) { int resOffset = resSecOffset + 16 + 8 * j; - int name = READ_DWORD(buf, resOffset); - if (name != 16) // Check for version resource (RT_VERSION) + if (!is.skipAll(resOffset - is.getOffset())) { + return false; + } + int name = READ_DWORD(is); // Resource name + if (name != 16) { // Check for version resource (RT_VERSION) + if (!is.skipAll(4)) { + return false; + } continue; - int offs = READ_DWORD(buf, resOffset + 4); - if ((offs & 0x80000000) == 0) // Check if it's a directory resource - return null; + } + int offs = READ_DWORD(is); // Offset to the resource data + if ((offs & 0x80000000) == 0) { // Check if it's a directory resource + return false; + } + + // Process version dir int verDirOffset = resSecOffset + (offs & 0x7FFFFFFF); - numNamed = READ_WORD(buf, verDirOffset + 12); - numId = READ_WORD(buf, verDirOffset + 14); - if (numNamed == 0 && numId == 0) - return null; + if (!is.skipAll(verDirOffset - is.getOffset() + 12)) { + return false; + } + numNamed = READ_WORD(is); // Number of named entries + numId = READ_WORD(is); // Number of ID entries + if (numNamed == 0 && numId == 0) { + return false; + } resOffset = verDirOffset + 16; - offs = READ_DWORD(buf, resOffset + 4); - if ((offs & 0x80000000) == 0) // Check if it's a directory resource - return null; + if (!is.skipAll(resOffset - is.getOffset() + 4)) { + return false; + } + offs = READ_DWORD(is); // Offset to the data + if ((offs & 0x80000000) == 0) { // Check if it's a directory resource + return false; + } verDirOffset = resSecOffset + (offs & 0x7FFFFFFF); - numNamed = READ_WORD(buf, verDirOffset + 12); - numId = READ_WORD(buf, verDirOffset + 14); - if (numNamed == 0 && numId == 0) - return null; + if (!is.skipAll(verDirOffset - is.getOffset() + 12)) { + return false; + } + numNamed = READ_WORD(is); // Number of named entries + numId = READ_WORD(is); // Number of ID entries + if (numNamed == 0 && numId == 0) { + return false; + } resOffset = verDirOffset + 16; - offs = READ_DWORD(buf, resOffset + 4); + if (!is.skipAll(resOffset - is.getOffset() + 4)) { + return false; + } + offs = READ_DWORD(is); if ((offs & 0x80000000) != 0) // Check if it's a directory resource - return null; + return false; verDirOffset = resSecOffset + offs; - int verVa = READ_DWORD(buf, verDirOffset); // Virtual address of the version resource + if (!is.skipAll(verDirOffset - is.getOffset())) { + return false; + } + int verVa = READ_DWORD(is); // Virtual address of the version resource int verPtrOffset = rawDataOffset + (verVa - vaSec); - return Arrays.copyOfRange(buf, verPtrOffset, buf.length); // Extract the version resource data + if (!is.skipAll(verPtrOffset - is.getOffset())) { + return false; + } + is.resetOffset(); + parseVersion(is, builder); + return true; } } - return null; + return false; } /** * Parses the version resource data and populates the builder with the extracted information. * - * @param version the version resource data - * @param offs the offset to start parsing from + * @param is the input stream containing the version resource data * @param builder the builder to populate with the extracted information - * @return the next offset to parse from, padded to maintain alignment + * @throws IOException if an I/O error occurs */ - private static int parseVersion(byte[] version, int offs, Builder builder) { - offs = PAD(offs); // Align offset to the next multiple of 4 - int len = READ_WORD(version, offs); // Length of the version block - offs += 2; - int valLen = READ_WORD(version, offs); // Length of the value field - offs += 2; - int type = READ_WORD(version, offs); // Type of data (text or binary) - offs += 2; + private static void parseVersion(CountingInputStream is, Builder builder) throws IOException { + if (!PAD(is)) { // Align offset to the next multiple of 4 + return; + } + int len = READ_WORD(is); // Length of the version block + int valLen = READ_WORD(is); // Length of the value field + int type = READ_WORD(is); // Type of data (text or binary) StringBuilder info = new StringBuilder(); for (int i = 0; i < 200; i++) { // Extract the key name (e.g., "FileDescription") - int c = READ_WORD(version, offs); - offs += 2; + int c = READ_WORD(is); if (c == 0) break; info.append((char) c); } - offs = PAD(offs); // Align offset + if (!PAD(is)) { + return; + } if (type != 0) { // If it's a text field StringBuilder value = new StringBuilder(); for (int i = 0; i < valLen; i++) { // Extract the value associated with the key - int c = READ_WORD(version, offs); - offs += 2; + int c = READ_WORD(is); if (c == 0) break; value.append((char) c); } @@ -243,25 +370,31 @@ private static int parseVersion(byte[] version, int offs, Builder builder) { break; } } else { // If it's a binary field + if (!is.skipAll(8)) { + return; + } if ("VS_VERSION_INFO".contentEquals(info)) { // Extract version numbers from fixed info - builder.fileVersion(String.format("%d.%d.%d.%d", - READ_WORD(version, offs + 10), - READ_WORD(version, offs + 8), - READ_WORD(version, offs + 14), - READ_WORD(version, offs + 12) + builder.fileVersion(String.format("%2$d.%1$d.%4$d.%3$d", + READ_WORD(is), // Minor version + READ_WORD(is), // Major version + READ_WORD(is), // Revision number + READ_WORD(is) // Build number )); - builder.productVersion(String.format("%d.%d.%d.%d", - READ_WORD(version, offs + 18), - READ_WORD(version, offs + 16), - READ_WORD(version, offs + 22), - READ_WORD(version, offs + 20) + builder.productVersion(String.format("%2$d.%1$d.%4$d.%3$d", + READ_WORD(is), // Minor version + READ_WORD(is), // Major version + READ_WORD(is), // Revision number + READ_WORD(is) // Build number )); } - offs += valLen; + if (!is.skipAll(valLen - 24)) { + return; + } } - while (offs < len) // Recursively parse any additional blocks - offs = parseVersion(version, offs, builder); - return PAD(offs); // Return the padded offset to maintain alignment + + while (is.getOffset() < len) // Recursively parse any additional blocks + parseVersion(is, builder); + PAD(is); // Return the padded offset to maintain alignment } public static void main(String[] args) {