From 1d77868cb145a13eeeb455b041127869b69baa54 Mon Sep 17 00:00:00 2001 From: NinjaCheetah <58050615+NinjaCheetah@users.noreply.github.com> Date: Mon, 3 Jun 2024 23:20:34 -0400 Subject: [PATCH] Reworked U8 module and added initial support for dumping U8 archives back to bytes --- src/libWiiPy/archive/u8.py | 119 +++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 33 deletions(-) diff --git a/src/libWiiPy/archive/u8.py b/src/libWiiPy/archive/u8.py index 0dd9847..c5443ae 100644 --- a/src/libWiiPy/archive/u8.py +++ b/src/libWiiPy/archive/u8.py @@ -4,10 +4,10 @@ # See https://wiibrew.org/wiki/U8_archive for details about the U8 archive format import io -import binascii import os from dataclasses import dataclass from typing import List +from src.libWiiPy.shared import align_value @dataclass @@ -43,14 +43,9 @@ class U8Archive: ---------- """ self.u8_magic = b'' - self.root_node_offset = 0 # Offset of the root node, which will always be 0x20. - self.header_size = 0 # The size of the U8 header. - self.data_offset = 0 # The offset of the data, which is root_node_offset + header_size, aligned to 0x40. - self.header_padding = b'' - self.root_node = U8Node self.u8_node_list: List[U8Node] = [] # All the nodes in the header of a U8 file. self.file_name_list: List[str] = [] - self.u8_file_data_list: List[bytes] = [] + self.file_data_list: List[bytes] = [] self.u8_file_structure = dict def load(self, u8_data: bytes) -> None: @@ -68,24 +63,30 @@ class U8Archive: self.u8_magic = u8_data.read(4) if self.u8_magic != b'\x55\xAA\x38\x2D': raise TypeError("This is not a valid U8 archive!") - self.root_node_offset = int(binascii.hexlify(u8_data.read(4)), 16) - self.header_size = int(binascii.hexlify(u8_data.read(4)), 16) - self.data_offset = int(binascii.hexlify(u8_data.read(4)), 16) - self.header_padding = u8_data.read(16) - root_node_type = int.from_bytes(u8_data.read(2)) - root_node_name_offset = int.from_bytes(u8_data.read(2)) - root_node_data_offset = int.from_bytes(u8_data.read(4)) + # The following code is all skipped because these values really don't matter for extraction. They honestly + # really only matter to my code when they get calculated and used for packing. + + # Offset of the root node, which will always be 0x20. + # root_node_offset = int(binascii.hexlify(u8_data.read(4)), 16) + # The size of the U8 header. + # header_size = int(binascii.hexlify(u8_data.read(4)), 16) + # The offset of the data, which is root_node_offset + header_size, aligned to 0x10. + # data_offset = int(binascii.hexlify(u8_data.read(4)), 16) + + # Seek ahead to the size defined in the root node, because it's the total number of nodes in the file. The + # rest of the data in the root node (not that it really matters) will get read when we read the whole list. + u8_data.seek(u8_data.tell() + 36) root_node_size = int.from_bytes(u8_data.read(4)) - self.root_node = U8Node(root_node_type, root_node_name_offset, root_node_data_offset, root_node_size) - self.u8_node_list.append(self.root_node) - # Iterate over the number of nodes that the root node lists, minus one since the count includes itself. - for node in range(self.root_node.size - 1): + # Seek back before the root node so that it gets read with all the rest. + u8_data.seek(u8_data.tell() - 12) + # Iterate over the number of nodes that the root node lists. + for node in range(root_node_size): node_type = int.from_bytes(u8_data.read(2)) node_name_offset = int.from_bytes(u8_data.read(2)) node_data_offset = int.from_bytes(u8_data.read(4)) node_size = int.from_bytes(u8_data.read(4)) self.u8_node_list.append(U8Node(node_type, node_name_offset, node_data_offset, node_size)) - # Iterate over all loaded nodes and create a list of file names. + # Iterate over all loaded nodes and create a list of file names and a list of file data. name_base_offset = u8_data.tell() for node in self.u8_node_list: u8_data.seek(name_base_offset + node.name_offset) @@ -97,30 +98,75 @@ class U8Archive: self.file_name_list.append(name) if node.type == 0: u8_data.seek(node.data_offset) - self.u8_file_data_list.append(u8_data.read(node.size)) + self.file_data_list.append(u8_data.read(node.size)) else: - self.u8_file_data_list.append(b'') + self.file_data_list.append(b'') - def dump(self) -> None: + def dump(self) -> bytes: """ - Dumps the U8Archive object into a U8 file. + Dumps the U8Archive object into the raw data of a U8 archive. + + Returns + ------- + bytes + The full U8 archive as bytes. """ + # This is 0 because the header size DOES NOT include the initial 32 bytes describing the file. + header_size = 0 + # Add 12 bytes for each node, since that's how many bytes each one is made up of. + for node in range(len(self.u8_node_list)): + header_size += 12 + # Add the number of bytes used for each file/folder name in the string table. + for file_name in self.file_name_list: + header_size += len(file_name) + 1 + # The initial data offset is equal to the file header (32 bytes) + node data aligned to 16 bytes. + data_offset = align_value(header_size + 32, 16) + # Adjust all nodes to place file data in the same order as the nodes. Why isn't it already like this? + current_data_offset = data_offset + for node in range(len(self.u8_node_list)): + if self.u8_node_list[node].type == 0: + self.u8_node_list[node].data_offset = current_data_offset + current_data_offset += self.u8_node_list[node].size + # Begin joining all the U8 archive data into one variable. u8_data = b'' # Magic number. u8_data += b'\x55\xAA\x38\x2D' # Root node offset (this is always 0x20). u8_data += int.to_bytes(0x20, 4) + # Size of the file header (excluding the first 32 bytes). + u8_data += int.to_bytes(header_size, 4) + # Offset of the beginning of the data region of the U8 archive. + u8_data += int.to_bytes(data_offset, 4) + # 16 bytes of zeroes. + u8_data += (b'\x00' * 16) + # Iterate over all the U8 nodes and dump them. + for node in self.u8_node_list: + u8_data += int.to_bytes(node.type, 2) + u8_data += int.to_bytes(node.name_offset, 2) + u8_data += int.to_bytes(node.data_offset, 4) + u8_data += int.to_bytes(node.size, 4) + # Iterate over all file names and dump them. All file names are suffixed by a \x00 byte. + for file_name in self.file_name_list: + u8_data += str.encode(file_name) + b'\x00' + # Apply the extra padding we calculated earlier by padding to where the data offset begins. + while len(u8_data) < data_offset: + u8_data += b'\x00' + # Iterate all file data and dump it. + for file in self.file_data_list: + u8_data += file + # Return the U8 archive. + return u8_data def extract_u8(u8_data, output_folder) -> None: if os.path.isdir(output_folder): raise ValueError("Output folder already exists!") - os.mkdir(output_folder) - + # Create a new U8Archive object and load the provided U8 file data into it. u8_archive = U8Archive() u8_archive.load(u8_data) - + # TODO: Comment this + # Also TODO: You can go more than two layers! Really should've checked that more before assuming it was the case. current_dir = "" for node in range(len(u8_archive.u8_node_list)): if u8_archive.u8_node_list[node].name_offset != 0: @@ -135,20 +181,27 @@ def extract_u8(u8_data, output_folder) -> None: elif u8_archive.u8_node_list[node].type == 0: lower_path = os.path.join(output_folder, current_dir) output_file = open(os.path.join(lower_path, u8_archive.file_name_list[node]), "wb") - output_file.write(u8_archive.u8_file_data_list[node]) + output_file.write(u8_archive.file_data_list[node]) output_file.close() -def pack_u8(input_data) -> None: - if os.path.isdir(input_data): +def pack_u8(input_path) -> bytes: + if os.path.isdir(input_path): raise ValueError("Only single-file packing is currently supported!") - elif os.path.isfile(input_data): - with open(input_data, "rb") as f: + elif os.path.isfile(input_path): + with open(input_path, "rb") as f: u8_archive = U8Archive() - file_name = os.path.basename(input_data) + file_name = os.path.basename(input_path) + file_data = f.read() + u8_archive.file_name_list.append("") u8_archive.file_name_list.append(file_name) - u8_archive.u8_file_data_list.append(f.read()) + u8_archive.file_data_list.append(b'') + u8_archive.file_data_list.append(file_data) + u8_archive.u8_node_list.append(U8Node(256, 0, 0, 2)) + u8_archive.u8_node_list.append(U8Node(0, 1, 0, len(file_data))) + + return u8_archive.dump()