mirror of
https://github.com/NinjaCheetah/libWiiPy.git
synced 2025-04-27 22:01:01 -04:00
Rewrote most of U8 module, now extracts and packs all archives as expected
This commit is contained in:
parent
72a8b9b6a6
commit
152a13fbe2
@ -8,7 +8,7 @@ import os
|
|||||||
import pathlib
|
import pathlib
|
||||||
from dataclasses import dataclass as _dataclass
|
from dataclasses import dataclass as _dataclass
|
||||||
from typing import List
|
from typing import List
|
||||||
from ..shared import _align_value
|
from ..shared import _align_value, _pad_bytes
|
||||||
|
|
||||||
|
|
||||||
@_dataclass
|
@_dataclass
|
||||||
@ -47,7 +47,10 @@ class U8Archive:
|
|||||||
self.u8_node_list: List[_U8Node] = [] # All the nodes in the header of a U8 file.
|
self.u8_node_list: List[_U8Node] = [] # All the nodes in the header of a U8 file.
|
||||||
self.file_name_list: List[str] = []
|
self.file_name_list: List[str] = []
|
||||||
self.file_data_list: List[bytes] = []
|
self.file_data_list: List[bytes] = []
|
||||||
self.u8_file_structure = dict
|
self.root_node_offset: int = 0
|
||||||
|
self.header_size: int = 0
|
||||||
|
self.data_offset: int = 0
|
||||||
|
self.root_node: _U8Node = _U8Node(0, 0, 0, 0)
|
||||||
|
|
||||||
def load(self, u8_data: bytes) -> None:
|
def load(self, u8_data: bytes) -> None:
|
||||||
"""
|
"""
|
||||||
@ -64,26 +67,25 @@ class U8Archive:
|
|||||||
self.u8_magic = u8_data.read(4)
|
self.u8_magic = u8_data.read(4)
|
||||||
if self.u8_magic != b'\x55\xAA\x38\x2D':
|
if self.u8_magic != b'\x55\xAA\x38\x2D':
|
||||||
raise TypeError("This is not a valid U8 archive!")
|
raise TypeError("This is not a valid U8 archive!")
|
||||||
# The following code is all skipped because these values really don't matter for extraction. They honestly
|
|
||||||
# really only matter to my code when they get calculated and used for packing.
|
|
||||||
|
|
||||||
# Offset of the root node, which will always be 0x20.
|
# Offset of the root node, which will always be 0x20.
|
||||||
# root_node_offset = int(binascii.hexlify(u8_data.read(4)), 16)
|
self.root_node_offset = int.from_bytes(u8_data.read(4))
|
||||||
# The size of the U8 header.
|
# The size of the U8 header.
|
||||||
# header_size = int(binascii.hexlify(u8_data.read(4)), 16)
|
self.header_size = int.from_bytes(u8_data.read(4))
|
||||||
# The offset of the data, which is root_node_offset + header_size, aligned to 0x10.
|
# The offset of the data, which is root_node_offset + header_size, aligned to 0x10.
|
||||||
# data_offset = int(binascii.hexlify(u8_data.read(4)), 16)
|
self.data_offset = int.from_bytes(u8_data.read(4))
|
||||||
|
# Seek past 16 bytes of padding, then load the root node.
|
||||||
# Seek ahead to the size defined in the root node, because it's the total number of nodes in the file. The
|
u8_data.seek(u8_data.tell() + 16)
|
||||||
# rest of the data in the root node (not that it really matters) will get read when we read the whole list.
|
root_node_type = int.from_bytes(u8_data.read(1))
|
||||||
u8_data.seek(u8_data.tell() + 36)
|
root_node_name_offset = int.from_bytes(u8_data.read(3))
|
||||||
|
root_node_data_offset = int.from_bytes(u8_data.read(4))
|
||||||
root_node_size = int.from_bytes(u8_data.read(4))
|
root_node_size = int.from_bytes(u8_data.read(4))
|
||||||
|
self.root_node = _U8Node(root_node_type, root_node_name_offset, root_node_data_offset, root_node_size)
|
||||||
# Seek back before the root node so that it gets read with all the rest.
|
# Seek back before the root node so that it gets read with all the rest.
|
||||||
u8_data.seek(u8_data.tell() - 12)
|
u8_data.seek(u8_data.tell() - 12)
|
||||||
# Iterate over the number of nodes that the root node lists.
|
# Iterate over the number of nodes that the root node lists.
|
||||||
for node in range(root_node_size):
|
for node in range(root_node_size):
|
||||||
node_type = int.from_bytes(u8_data.read(2))
|
node_type = int.from_bytes(u8_data.read(1))
|
||||||
node_name_offset = int.from_bytes(u8_data.read(2))
|
node_name_offset = int.from_bytes(u8_data.read(3))
|
||||||
node_data_offset = int.from_bytes(u8_data.read(4))
|
node_data_offset = int.from_bytes(u8_data.read(4))
|
||||||
node_size = int.from_bytes(u8_data.read(4))
|
node_size = int.from_bytes(u8_data.read(4))
|
||||||
self.u8_node_list.append(_U8Node(node_type, node_name_offset, node_data_offset, node_size))
|
self.u8_node_list.append(_U8Node(node_type, node_name_offset, node_data_offset, node_size))
|
||||||
@ -120,15 +122,19 @@ class U8Archive:
|
|||||||
# Add the number of bytes used for each file/folder name in the string table.
|
# Add the number of bytes used for each file/folder name in the string table.
|
||||||
for file_name in self.file_name_list:
|
for file_name in self.file_name_list:
|
||||||
header_size += len(file_name) + 1
|
header_size += len(file_name) + 1
|
||||||
# The initial data offset is equal to the file header (32 bytes) + node data aligned to 16 bytes.
|
# The initial data offset is equal to the file header (32 bytes) + node data aligned to 64 bytes.
|
||||||
data_offset = _align_value(header_size + 32, 16)
|
data_offset = _align_value(header_size + 32, 64)
|
||||||
# Adjust all nodes to place file data in the same order as the nodes. Why isn't it already like this?
|
# Adjust all nodes to place file data in the same order as the nodes. Why isn't it already like this?
|
||||||
current_data_offset = data_offset
|
current_data_offset = data_offset
|
||||||
|
current_name_offset = 0
|
||||||
for node in range(len(self.u8_node_list)):
|
for node in range(len(self.u8_node_list)):
|
||||||
if self.u8_node_list[node].type == 0:
|
if self.u8_node_list[node].type == 0:
|
||||||
self.u8_node_list[node].data_offset = current_data_offset
|
self.u8_node_list[node].data_offset = _align_value(current_data_offset, 32)
|
||||||
current_data_offset += self.u8_node_list[node].size
|
current_data_offset += _align_value(self.u8_node_list[node].size, 32)
|
||||||
# Begin joining all the U8 archive data into one variable.
|
# Calculate the name offsets, including the extra 1 for the NULL byte at the end of each name.
|
||||||
|
self.u8_node_list[node].name_offset = current_name_offset
|
||||||
|
current_name_offset += len(self.file_name_list[node]) + 1
|
||||||
|
# Begin joining all the U8 archive data into bytes.
|
||||||
u8_data = b''
|
u8_data = b''
|
||||||
# Magic number.
|
# Magic number.
|
||||||
u8_data += b'\x55\xAA\x38\x2D'
|
u8_data += b'\x55\xAA\x38\x2D'
|
||||||
@ -142,19 +148,18 @@ class U8Archive:
|
|||||||
u8_data += (b'\x00' * 16)
|
u8_data += (b'\x00' * 16)
|
||||||
# Iterate over all the U8 nodes and dump them.
|
# Iterate over all the U8 nodes and dump them.
|
||||||
for node in self.u8_node_list:
|
for node in self.u8_node_list:
|
||||||
u8_data += int.to_bytes(node.type, 2)
|
u8_data += int.to_bytes(node.type, 1)
|
||||||
u8_data += int.to_bytes(node.name_offset, 2)
|
u8_data += int.to_bytes(node.name_offset, 3)
|
||||||
u8_data += int.to_bytes(node.data_offset, 4)
|
u8_data += int.to_bytes(node.data_offset, 4)
|
||||||
u8_data += int.to_bytes(node.size, 4)
|
u8_data += int.to_bytes(node.size, 4)
|
||||||
# Iterate over all file names and dump them. All file names are suffixed by a \x00 byte.
|
# Iterate over all file names and dump them. All file names are suffixed by a \x00 byte.
|
||||||
for file_name in self.file_name_list:
|
for file_name in self.file_name_list:
|
||||||
u8_data += str.encode(file_name) + b'\x00'
|
u8_data += str.encode(file_name) + b'\x00'
|
||||||
# Apply the extra padding we calculated earlier by padding to where the data offset begins.
|
# Apply the extra padding we calculated earlier by padding to where the data offset begins.
|
||||||
while len(u8_data) < data_offset:
|
u8_data = _pad_bytes(u8_data, 64)
|
||||||
u8_data += b'\x00'
|
|
||||||
# Iterate all file data and dump it.
|
# Iterate all file data and dump it.
|
||||||
for file in self.file_data_list:
|
for file in self.file_data_list:
|
||||||
u8_data += file
|
u8_data += _pad_bytes(file, 32)
|
||||||
# Return the U8 archive.
|
# Return the U8 archive.
|
||||||
return u8_data
|
return u8_data
|
||||||
|
|
||||||
@ -185,69 +190,58 @@ def extract_u8(u8_data, output_folder) -> None:
|
|||||||
u8_archive.load(u8_data)
|
u8_archive.load(u8_data)
|
||||||
# This variable stores the path of the directory we're currently processing.
|
# This variable stores the path of the directory we're currently processing.
|
||||||
current_dir = output_folder
|
current_dir = output_folder
|
||||||
# This variable stores the final nodes for every directory we've entered, and is used to handle the recursion of
|
# This variable stores the order of directory nodes leading to the current working directory, to make sure that
|
||||||
# those directories to ensure that everything gets where it belongs.
|
# things get where they belong.
|
||||||
directory_recursion = [0]
|
parent_dirs = [0]
|
||||||
# Iterate over every node and extract the files and folders.
|
|
||||||
for node in range(len(u8_archive.u8_node_list)):
|
for node in range(len(u8_archive.u8_node_list)):
|
||||||
# Code for a directory node. Second check just ensures we ignore the root node.
|
# Code for a directory node (excluding the root node since that already exists).
|
||||||
if u8_archive.u8_node_list[node].type == 256 and u8_archive.u8_node_list[node].name_offset != 0:
|
if u8_archive.u8_node_list[node].type == 1 and u8_archive.u8_node_list[node].name_offset != 0:
|
||||||
# The size value for a directory node is the position of the last node in this directory, with the root node
|
if u8_archive.u8_node_list[node].data_offset == parent_dirs[-1]:
|
||||||
# counting as node 1.
|
|
||||||
# If the current node is below the end of the current directory, create this directory inside the previous
|
|
||||||
# current directory and make the current.
|
|
||||||
if node + 1 < directory_recursion[-1]:
|
|
||||||
current_dir = current_dir.joinpath(u8_archive.file_name_list[node])
|
current_dir = current_dir.joinpath(u8_archive.file_name_list[node])
|
||||||
os.mkdir(current_dir)
|
current_dir.mkdir(exist_ok=True)
|
||||||
# If the current node is beyond the end of the current directory, we've followed that path all the way down,
|
parent_dirs.append(node)
|
||||||
# so reset back to the root directory and put our new directory there.
|
else:
|
||||||
elif node + 1 > directory_recursion[-1]:
|
# Go up until we're back at the correct level.
|
||||||
current_dir = output_folder.joinpath(u8_archive.file_name_list[node])
|
while u8_archive.u8_node_list[node].data_offset != parent_dirs[-1]:
|
||||||
os.mkdir(current_dir)
|
parent_dirs.pop()
|
||||||
# This check is here just in case a directory ever ends with an empty directory and not a file.
|
parent_dirs.append(node)
|
||||||
elif node + 1 == directory_recursion[-1]:
|
current_dir = output_folder
|
||||||
current_dir = current_dir.parent
|
# Rebuild current working directory, and make sure all directories in the path exist.
|
||||||
directory_recursion.pop()
|
for directory in parent_dirs:
|
||||||
# If the last node for the directory we just processed is new (which is always should be), add it to the
|
current_dir = current_dir.joinpath(u8_archive.file_name_list[directory])
|
||||||
# recursion array.
|
current_dir.mkdir(exist_ok=True)
|
||||||
if u8_archive.u8_node_list[node].size not in directory_recursion:
|
|
||||||
directory_recursion.append(u8_archive.u8_node_list[node].size)
|
|
||||||
# Code for a file node.
|
# Code for a file node.
|
||||||
elif u8_archive.u8_node_list[node].type == 0:
|
elif u8_archive.u8_node_list[node].type == 0:
|
||||||
# Write out the file to the current directory.
|
open(current_dir.joinpath(u8_archive.file_name_list[node]), "wb").write(u8_archive.file_data_list[node])
|
||||||
output_file = open(current_dir.joinpath(u8_archive.file_name_list[node]), "wb")
|
# Handle an invalid node type.
|
||||||
output_file.write(u8_archive.file_data_list[node])
|
elif u8_archive.u8_node_list[node].type != 0 and u8_archive.u8_node_list[node].type != 1:
|
||||||
output_file.close()
|
raise ValueError("A node with an invalid type (" + str(u8_archive.u8_node_list[node].type) + ") was found!")
|
||||||
# If this file is the final node for the current directory, pop() the recursion array and set the current
|
|
||||||
# directory to the parent of the previous current.
|
|
||||||
if node + 1 in directory_recursion:
|
|
||||||
current_dir = current_dir.parent
|
|
||||||
directory_recursion.pop()
|
|
||||||
# Code for a totally unrecognized node type, which should not happen.
|
|
||||||
elif u8_archive.u8_node_list[node].type != 0 and u8_archive.u8_node_list[node].type != 256:
|
|
||||||
raise ValueError("A node with an invalid type (" + str(u8_archive.u8_node_list[node].type) + ") was"
|
|
||||||
"found!")
|
|
||||||
|
|
||||||
|
|
||||||
def _pack_u8_dir(u8_archive: U8Archive, current_path, node_count, name_offset):
|
def _pack_u8_dir(u8_archive: U8Archive, current_path, node_count, parent_node):
|
||||||
# First, get the list of everything in current path.
|
# First, get the list of everything in current path.
|
||||||
root_list = os.listdir(current_path)
|
root_list = os.listdir(current_path)
|
||||||
file_list = []
|
file_list = []
|
||||||
dir_list = []
|
dir_list = []
|
||||||
# Create separate lists of the files and directories in the current directory so that we can handle the files first.
|
# Create separate lists of the files and directories in the current directory so that we can handle the files first.
|
||||||
|
# noinspection PyTypeChecker
|
||||||
|
root_list.sort(key=str.lower)
|
||||||
for path in root_list:
|
for path in root_list:
|
||||||
if os.path.isfile(current_path.joinpath(path)):
|
if os.path.isfile(current_path.joinpath(path)):
|
||||||
file_list.append(path)
|
file_list.append(path)
|
||||||
elif os.path.isdir(current_path.joinpath(path)):
|
elif os.path.isdir(current_path.joinpath(path)):
|
||||||
dir_list.append(path)
|
dir_list.append(path)
|
||||||
|
# noinspection PyTypeChecker
|
||||||
|
file_list.sort(key=str.lower)
|
||||||
|
# noinspection PyTypeChecker
|
||||||
|
dir_list.sort(key=str.lower)
|
||||||
# For files, read their data into the file data list, add their name into the file name list, then calculate the
|
# For files, read their data into the file data list, add their name into the file name list, then calculate the
|
||||||
# offset for their file name and create a new U8Node() for them.
|
# offset for their file name and create a new U8Node() for them. -1 values are temporary and are set during dumping.
|
||||||
for file in file_list:
|
for file in file_list:
|
||||||
node_count += 1
|
node_count += 1
|
||||||
u8_archive.file_name_list.append(file)
|
u8_archive.file_name_list.append(file)
|
||||||
u8_archive.file_data_list.append(open(current_path.joinpath(file), "rb").read())
|
u8_archive.file_data_list.append(open(current_path.joinpath(file), "rb").read())
|
||||||
u8_archive.u8_node_list.append(_U8Node(0, name_offset, 0, len(u8_archive.file_data_list[-1])))
|
u8_archive.u8_node_list.append(_U8Node(0, -1, -1, len(u8_archive.file_data_list[-1])))
|
||||||
name_offset = name_offset + len(file) + 1 # Add 1 to accommodate the null byte at the end of the name.
|
|
||||||
# For directories, add their name to the file name list, add empty data to the file data list (since they obviously
|
# For directories, add their name to the file name list, add empty data to the file data list (since they obviously
|
||||||
# wouldn't have any), find the total number of files and directories inside the directory to calculate the final
|
# wouldn't have any), find the total number of files and directories inside the directory to calculate the final
|
||||||
# node included in it, then recursively call this function again on that directory to process it.
|
# node included in it, then recursively call this function again on that directory to process it.
|
||||||
@ -256,12 +250,11 @@ def _pack_u8_dir(u8_archive: U8Archive, current_path, node_count, name_offset):
|
|||||||
u8_archive.file_name_list.append(directory)
|
u8_archive.file_name_list.append(directory)
|
||||||
u8_archive.file_data_list.append(b'')
|
u8_archive.file_data_list.append(b'')
|
||||||
max_node = node_count + sum(1 for _ in current_path.joinpath(directory).rglob('*'))
|
max_node = node_count + sum(1 for _ in current_path.joinpath(directory).rglob('*'))
|
||||||
u8_archive.u8_node_list.append(_U8Node(256, name_offset, 0, max_node))
|
u8_archive.u8_node_list.append(_U8Node(1, -1, parent_node, max_node))
|
||||||
name_offset = name_offset + len(directory) + 1 # Add 1 to accommodate the null byte at the end of the name.
|
u8_archive, node_count = _pack_u8_dir(u8_archive, current_path.joinpath(directory), node_count,
|
||||||
u8_archive, node_count, name_offset = _pack_u8_dir(u8_archive, current_path.joinpath(directory), node_count,
|
u8_archive.u8_node_list.index(u8_archive.u8_node_list[-1]))
|
||||||
name_offset)
|
|
||||||
# Return the U8Archive object, the current node we're on, and the current name offset.
|
# Return the U8Archive object, the current node we're on, and the current name offset.
|
||||||
return u8_archive, node_count, name_offset
|
return u8_archive, node_count
|
||||||
|
|
||||||
|
|
||||||
def pack_u8(input_path) -> bytes:
|
def pack_u8(input_path) -> bytes:
|
||||||
@ -279,34 +272,19 @@ def pack_u8(input_path) -> bytes:
|
|||||||
The data for the packed U8 archive.
|
The data for the packed U8 archive.
|
||||||
"""
|
"""
|
||||||
input_path = pathlib.Path(input_path)
|
input_path = pathlib.Path(input_path)
|
||||||
if os.path.isdir(input_path):
|
if input_path.is_dir():
|
||||||
# Append empty entries at the start for the root node, and then create the root U8Node() object, using rglob()
|
# Append empty entries at the start for the root node, and then create the root U8Node() object, using rglob()
|
||||||
# to read the total count of files and directories that will be packed so that we can add the total node count.
|
# to read the total count of files and directories that will be packed so that we can add the total node count.
|
||||||
u8_archive = U8Archive()
|
u8_archive = U8Archive()
|
||||||
u8_archive.file_name_list.append("")
|
u8_archive.file_name_list.append("")
|
||||||
u8_archive.file_data_list.append(b'')
|
u8_archive.file_data_list.append(b'')
|
||||||
u8_archive.u8_node_list.append(_U8Node(256, 0, 0, sum(1 for _ in input_path.rglob('*')) + 1))
|
u8_archive.u8_node_list.append(_U8Node(1, 0, 0, sum(1 for _ in input_path.rglob('*')) + 1))
|
||||||
# Call the private function _pack_u8_dir() on the root note, which will recursively call itself to pack every
|
# Call the private function _pack_u8_dir() on the root note, which will recursively call itself to pack every
|
||||||
# subdirectory and file. Discard node_count and name_offset since we don't care about them here, as they're
|
# subdirectory and file. Discard node_count and name_offset since we don't care about them here, as they're
|
||||||
# really only necessary for the directory recursion.
|
# really only necessary for the directory recursion.
|
||||||
u8_archive, _, _ = _pack_u8_dir(u8_archive, input_path, node_count=1, name_offset=1)
|
u8_archive, _ = _pack_u8_dir(u8_archive, input_path, node_count=1, parent_node=0)
|
||||||
return u8_archive.dump()
|
|
||||||
elif os.path.isfile(input_path):
|
|
||||||
# Simple code to handle if a single file is provided as input. Not really sure *why* you'd do this, since the
|
|
||||||
# whole point of a U8 archive is to stitch files together, but it's here nonetheless.
|
|
||||||
with open(input_path, "rb") as f:
|
|
||||||
u8_archive = U8Archive()
|
|
||||||
file_name = input_path.name
|
|
||||||
file_data = f.read()
|
|
||||||
# Append blank file name for the root node.
|
|
||||||
u8_archive.file_name_list.append("")
|
|
||||||
u8_archive.file_name_list.append(file_name)
|
|
||||||
# Append blank data for the root node.
|
|
||||||
u8_archive.file_data_list.append(b'')
|
|
||||||
u8_archive.file_data_list.append(file_data)
|
|
||||||
# Append generic U8Node for the root, followed by the actual file's node.
|
|
||||||
u8_archive.u8_node_list.append(_U8Node(256, 0, 0, 2))
|
|
||||||
u8_archive.u8_node_list.append(_U8Node(0, 1, 0, len(file_data)))
|
|
||||||
return u8_archive.dump()
|
return u8_archive.dump()
|
||||||
|
elif input_path.is_file():
|
||||||
|
raise ValueError("This does not appear to be a directory.")
|
||||||
else:
|
else:
|
||||||
raise FileNotFoundError("Input file/directory: \"" + str(input_path) + "\" does not exist!")
|
raise FileNotFoundError("Input directory: \"" + str(input_path) + "\" does not exist!")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user