Added a lower LZ77 compression level that runs faster

2026-01-08 10:45:59 -05:00 · 2025-01-26 12:56:41 -05:00 · 2025-01-26 12:56:41 -05:00 · 9eabf2caee
commit 9eabf2caee
parent 5ae867197b
2 changed files with 95 additions and 19 deletions
--- a/src/libWiiPy/archive/lz77.py
+++ b/src/libWiiPy/archive/lz77.py
@ -20,11 +20,11 @@ class _LZNode:
    weight: int = 0


-def _compress_compare_bytes(byte1: bytes, offset1: int, byte2: bytes, offset2: int, abs_len_max: int) -> int:
+def _compress_compare_bytes(buffer: bytes, offset1: int, offset2: int, abs_len_max: int) -> int:
    # Compare bytes up to the maximum length we can match.
    num_matched = 0
    while num_matched < abs_len_max:
-        if byte1[offset1 + num_matched] != byte2[offset2 + num_matched]:
+        if buffer[offset1 + num_matched] != buffer[offset2 + num_matched]:
            break
        num_matched += 1
    return num_matched
@ -43,7 +43,7 @@ def _compress_search_matches(buffer: bytes, pos: int) -> (int, int):
    biggest_match, biggest_match_pos = 0, 0
    # Search for matches.
    for i in range(_LZ_MIN_DISTANCE, max_dist + 1):
-        num_matched = _compress_compare_bytes(buffer, pos - i, buffer, pos, max_len)
+        num_matched = _compress_compare_bytes(buffer, pos - i, pos, max_len)
        if num_matched > biggest_match:
            biggest_match = num_matched
            biggest_match_pos = i
@ -52,6 +52,25 @@ def _compress_search_matches(buffer: bytes, pos: int) -> (int, int):
    return biggest_match, biggest_match_pos


+def _compress_search_matches_greedy(buffer: bytes, pos: int) -> (int, int):
+    # Finds and returns the first valid match, rather that finding the best one.
+    bytes_left = len(buffer) - pos
+    global _LZ_MAX_DISTANCE, _LZ_MAX_LENGTH, _LZ_MIN_DISTANCE
+    # Default to only looking back 4096 bytes, unless we've moved fewer than 4096 bytes, in which case we should
+    # only look as far back as we've gone.
+    max_dist = min(_LZ_MAX_DISTANCE, pos)
+    # Default to only matching up to 18 bytes, unless fewer than 18 bytes remain, in which case we can only match
+    # up to that many bytes.
+    max_len = min(_LZ_MAX_LENGTH, bytes_left)
+    match, match_pos = 0, 0
+    for i in range(_LZ_MIN_DISTANCE, max_dist + 1):
+        match = _compress_compare_bytes(buffer, pos - i, pos, max_len)
+        match_pos = i
+        if match >= _LZ_MIN_LENGTH or match == max_len:
+            break
+    return match, match_pos
+
+
 def _compress_node_is_ref(node: _LZNode) -> bool:
    return node.len >= _LZ_MIN_LENGTH

@ -64,20 +83,9 @@ def _compress_get_node_cost(length: int) -> int:
    return 1 + (num_bytes * 8)


-def compress_lz77(data: bytes) -> bytes:
-    """
-    Compresses data using the Wii's LZ77 compression algorithm and returns the compressed result.
-
-    Parameters
-    ----------
-    data: bytes
-        The data to compress.
-
-    Returns
-    -------
-    bytes
-        The LZ77-compressed data.
-    """
+def _compress_lz77_optimized(data: bytes) -> bytes:
+    # Optimized compressor based around a node graph that finds optimal string matches. Originally the default
+    # implementation, but unfortunately it's very slow.
    nodes = [_LZNode() for _ in range(len(data))]
    # Iterate over the uncompressed data, starting from the end.
    pos = len(data)
@ -117,7 +125,7 @@ def compress_lz77(data: bytes) -> bytes:
            node.len = len_best
            node.dist = dist
            node.weight = weight_best
-    # Write the header data.
+    # Write the compressed data.
    with io.BytesIO() as buffer:
        # Write the header data.
        buffer.write(b'LZ77\x10')  # The LZ type on the Wii is *always* 0x10.
@ -155,6 +163,74 @@ def compress_lz77(data: bytes) -> bytes:
    return out_data


+def _compress_lz77_greedy(data: bytes) -> bytes:
+    # Greedy compressor that processes the file start to end and saves the first matches found. Faster than the
+    # optimized implementation, but creates larger files.
+    global _LZ_MAX_LENGTH, _LZ_MIN_LENGTH, _LZ_MIN_DISTANCE
+    with io.BytesIO() as buffer:
+        # Write the header data.
+        buffer.write(b'LZ77\x10')  # The LZ type on the Wii is *always* 0x10.
+        buffer.write(len(data).to_bytes(3, 'little'))
+
+        src_pos = 0
+        while src_pos < len(data):
+            head = 0
+            head_pos = buffer.tell()
+            buffer.write(b'\x00')  # Reserve a byte for the chunk head.
+
+            i = 0
+            while i < 8 and src_pos < len(data):
+                length, dist = _compress_search_matches_greedy(data, src_pos)
+                # This is a reference node.
+                if length >= _LZ_MIN_LENGTH:
+                    encoded = (((length - _LZ_MIN_LENGTH) & 0xF) << 12) | ((dist - _LZ_MIN_DISTANCE) & 0xFFF)
+                    buffer.write(encoded.to_bytes(2))
+                    head = (head | (1 << (7 - i))) & 0xFF
+                    src_pos += length
+                # This is a direct copy node.
+                else:
+                    buffer.write(data[src_pos:src_pos + 1])
+                    src_pos += 1
+                i += 1
+
+            pos = buffer.tell()
+            buffer.seek(head_pos)
+            buffer.write(head.to_bytes(1))
+            buffer.seek(pos)
+
+        buffer.seek(0)
+        out_data = buffer.read()
+    return out_data
+
+
+def compress_lz77(data: bytes, compression_level: int = 1) -> bytes:
+    """
+    Compresses data using the Wii's LZ77 compression algorithm and returns the compressed result. Supports two
+    different levels of compression, one based around a "greedy" LZ compression algorithm and the other based around
+    an optimized LZ compression algorithm. The greedy compressor, level 1, will produce a larger compressed file but
+    will run noticeably faster than the optimized compressor, which is level 2, especially for larger data.
+
+    Parameters
+    ----------
+    data: bytes
+        The data to compress.
+    compression_level: int
+        The compression level to use, either 1 and 2. Default value is 1.
+
+    Returns
+    -------
+    bytes
+        The LZ77-compressed data.
+    """
+    if compression_level == 1:
+        out_data = _compress_lz77_greedy(data)
+    elif compression_level == 2:
+        out_data = _compress_lz77_optimized(data)
+    else:
+        raise ValueError(f"Invalid compression level \"{compression_level}\"!\"")
+    return out_data
+
+
 def decompress_lz77(lz77_data: bytes) -> bytes:
    """
    Decompresses LZ77-compressed data and returns the decompressed result. Supports data both with and without the
--- a/src/libWiiPy/title/nus.py
+++ b/src/libWiiPy/title/nus.py
@ -4,7 +4,7 @@
 # See https://wiibrew.org/wiki/NUS for details about the NUS

 import requests
-import hashlib
+#import hashlib
 from typing import List
 from urllib.parse import urlparse as _urlparse
 from .title import Title