Reworked U8 module and added initial support for dumping U8 archives back to bytes

This commit is contained in:
Campbell 2024-06-03 23:20:34 -04:00
parent ade4b68394
commit 1d77868cb1
Signed by: NinjaCheetah
GPG Key ID: B547958AF96ED344

View File

@ -4,10 +4,10 @@
# See https://wiibrew.org/wiki/U8_archive for details about the U8 archive format # See https://wiibrew.org/wiki/U8_archive for details about the U8 archive format
import io import io
import binascii
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from typing import List from typing import List
from src.libWiiPy.shared import align_value
@dataclass @dataclass
@ -43,14 +43,9 @@ class U8Archive:
---------- ----------
""" """
self.u8_magic = b'' self.u8_magic = b''
self.root_node_offset = 0 # Offset of the root node, which will always be 0x20.
self.header_size = 0 # The size of the U8 header.
self.data_offset = 0 # The offset of the data, which is root_node_offset + header_size, aligned to 0x40.
self.header_padding = b''
self.root_node = U8Node
self.u8_node_list: List[U8Node] = [] # All the nodes in the header of a U8 file. self.u8_node_list: List[U8Node] = [] # All the nodes in the header of a U8 file.
self.file_name_list: List[str] = [] self.file_name_list: List[str] = []
self.u8_file_data_list: List[bytes] = [] self.file_data_list: List[bytes] = []
self.u8_file_structure = dict self.u8_file_structure = dict
def load(self, u8_data: bytes) -> None: def load(self, u8_data: bytes) -> None:
@ -68,24 +63,30 @@ class U8Archive:
self.u8_magic = u8_data.read(4) self.u8_magic = u8_data.read(4)
if self.u8_magic != b'\x55\xAA\x38\x2D': if self.u8_magic != b'\x55\xAA\x38\x2D':
raise TypeError("This is not a valid U8 archive!") raise TypeError("This is not a valid U8 archive!")
self.root_node_offset = int(binascii.hexlify(u8_data.read(4)), 16) # The following code is all skipped because these values really don't matter for extraction. They honestly
self.header_size = int(binascii.hexlify(u8_data.read(4)), 16) # really only matter to my code when they get calculated and used for packing.
self.data_offset = int(binascii.hexlify(u8_data.read(4)), 16)
self.header_padding = u8_data.read(16) # Offset of the root node, which will always be 0x20.
root_node_type = int.from_bytes(u8_data.read(2)) # root_node_offset = int(binascii.hexlify(u8_data.read(4)), 16)
root_node_name_offset = int.from_bytes(u8_data.read(2)) # The size of the U8 header.
root_node_data_offset = int.from_bytes(u8_data.read(4)) # header_size = int(binascii.hexlify(u8_data.read(4)), 16)
# The offset of the data, which is root_node_offset + header_size, aligned to 0x10.
# data_offset = int(binascii.hexlify(u8_data.read(4)), 16)
# Seek ahead to the size defined in the root node, because it's the total number of nodes in the file. The
# rest of the data in the root node (not that it really matters) will get read when we read the whole list.
u8_data.seek(u8_data.tell() + 36)
root_node_size = int.from_bytes(u8_data.read(4)) root_node_size = int.from_bytes(u8_data.read(4))
self.root_node = U8Node(root_node_type, root_node_name_offset, root_node_data_offset, root_node_size) # Seek back before the root node so that it gets read with all the rest.
self.u8_node_list.append(self.root_node) u8_data.seek(u8_data.tell() - 12)
# Iterate over the number of nodes that the root node lists, minus one since the count includes itself. # Iterate over the number of nodes that the root node lists.
for node in range(self.root_node.size - 1): for node in range(root_node_size):
node_type = int.from_bytes(u8_data.read(2)) node_type = int.from_bytes(u8_data.read(2))
node_name_offset = int.from_bytes(u8_data.read(2)) node_name_offset = int.from_bytes(u8_data.read(2))
node_data_offset = int.from_bytes(u8_data.read(4)) node_data_offset = int.from_bytes(u8_data.read(4))
node_size = int.from_bytes(u8_data.read(4)) node_size = int.from_bytes(u8_data.read(4))
self.u8_node_list.append(U8Node(node_type, node_name_offset, node_data_offset, node_size)) self.u8_node_list.append(U8Node(node_type, node_name_offset, node_data_offset, node_size))
# Iterate over all loaded nodes and create a list of file names. # Iterate over all loaded nodes and create a list of file names and a list of file data.
name_base_offset = u8_data.tell() name_base_offset = u8_data.tell()
for node in self.u8_node_list: for node in self.u8_node_list:
u8_data.seek(name_base_offset + node.name_offset) u8_data.seek(name_base_offset + node.name_offset)
@ -97,30 +98,75 @@ class U8Archive:
self.file_name_list.append(name) self.file_name_list.append(name)
if node.type == 0: if node.type == 0:
u8_data.seek(node.data_offset) u8_data.seek(node.data_offset)
self.u8_file_data_list.append(u8_data.read(node.size)) self.file_data_list.append(u8_data.read(node.size))
else: else:
self.u8_file_data_list.append(b'') self.file_data_list.append(b'')
def dump(self) -> None: def dump(self) -> bytes:
""" """
Dumps the U8Archive object into a U8 file. Dumps the U8Archive object into the raw data of a U8 archive.
Returns
-------
bytes
The full U8 archive as bytes.
""" """
# This is 0 because the header size DOES NOT include the initial 32 bytes describing the file.
header_size = 0
# Add 12 bytes for each node, since that's how many bytes each one is made up of.
for node in range(len(self.u8_node_list)):
header_size += 12
# Add the number of bytes used for each file/folder name in the string table.
for file_name in self.file_name_list:
header_size += len(file_name) + 1
# The initial data offset is equal to the file header (32 bytes) + node data aligned to 16 bytes.
data_offset = align_value(header_size + 32, 16)
# Adjust all nodes to place file data in the same order as the nodes. Why isn't it already like this?
current_data_offset = data_offset
for node in range(len(self.u8_node_list)):
if self.u8_node_list[node].type == 0:
self.u8_node_list[node].data_offset = current_data_offset
current_data_offset += self.u8_node_list[node].size
# Begin joining all the U8 archive data into one variable.
u8_data = b'' u8_data = b''
# Magic number. # Magic number.
u8_data += b'\x55\xAA\x38\x2D' u8_data += b'\x55\xAA\x38\x2D'
# Root node offset (this is always 0x20). # Root node offset (this is always 0x20).
u8_data += int.to_bytes(0x20, 4) u8_data += int.to_bytes(0x20, 4)
# Size of the file header (excluding the first 32 bytes).
u8_data += int.to_bytes(header_size, 4)
# Offset of the beginning of the data region of the U8 archive.
u8_data += int.to_bytes(data_offset, 4)
# 16 bytes of zeroes.
u8_data += (b'\x00' * 16)
# Iterate over all the U8 nodes and dump them.
for node in self.u8_node_list:
u8_data += int.to_bytes(node.type, 2)
u8_data += int.to_bytes(node.name_offset, 2)
u8_data += int.to_bytes(node.data_offset, 4)
u8_data += int.to_bytes(node.size, 4)
# Iterate over all file names and dump them. All file names are suffixed by a \x00 byte.
for file_name in self.file_name_list:
u8_data += str.encode(file_name) + b'\x00'
# Apply the extra padding we calculated earlier by padding to where the data offset begins.
while len(u8_data) < data_offset:
u8_data += b'\x00'
# Iterate all file data and dump it.
for file in self.file_data_list:
u8_data += file
# Return the U8 archive.
return u8_data
def extract_u8(u8_data, output_folder) -> None: def extract_u8(u8_data, output_folder) -> None:
if os.path.isdir(output_folder): if os.path.isdir(output_folder):
raise ValueError("Output folder already exists!") raise ValueError("Output folder already exists!")
os.mkdir(output_folder) os.mkdir(output_folder)
# Create a new U8Archive object and load the provided U8 file data into it.
u8_archive = U8Archive() u8_archive = U8Archive()
u8_archive.load(u8_data) u8_archive.load(u8_data)
# TODO: Comment this
# Also TODO: You can go more than two layers! Really should've checked that more before assuming it was the case.
current_dir = "" current_dir = ""
for node in range(len(u8_archive.u8_node_list)): for node in range(len(u8_archive.u8_node_list)):
if u8_archive.u8_node_list[node].name_offset != 0: if u8_archive.u8_node_list[node].name_offset != 0:
@ -135,20 +181,27 @@ def extract_u8(u8_data, output_folder) -> None:
elif u8_archive.u8_node_list[node].type == 0: elif u8_archive.u8_node_list[node].type == 0:
lower_path = os.path.join(output_folder, current_dir) lower_path = os.path.join(output_folder, current_dir)
output_file = open(os.path.join(lower_path, u8_archive.file_name_list[node]), "wb") output_file = open(os.path.join(lower_path, u8_archive.file_name_list[node]), "wb")
output_file.write(u8_archive.u8_file_data_list[node]) output_file.write(u8_archive.file_data_list[node])
output_file.close() output_file.close()
def pack_u8(input_data) -> None: def pack_u8(input_path) -> bytes:
if os.path.isdir(input_data): if os.path.isdir(input_path):
raise ValueError("Only single-file packing is currently supported!") raise ValueError("Only single-file packing is currently supported!")
elif os.path.isfile(input_data): elif os.path.isfile(input_path):
with open(input_data, "rb") as f: with open(input_path, "rb") as f:
u8_archive = U8Archive() u8_archive = U8Archive()
file_name = os.path.basename(input_data) file_name = os.path.basename(input_path)
file_data = f.read()
u8_archive.file_name_list.append("")
u8_archive.file_name_list.append(file_name) u8_archive.file_name_list.append(file_name)
u8_archive.u8_file_data_list.append(f.read())
u8_archive.file_data_list.append(b'')
u8_archive.file_data_list.append(file_data)
u8_archive.u8_node_list.append(U8Node(256, 0, 0, 2))
u8_archive.u8_node_list.append(U8Node(0, 1, 0, len(file_data)))
return u8_archive.dump()