Skip to main content

steel_utils/
hash.rs

1//! CRC32C hashing for component validation.
2//!
3//! Minecraft uses CRC32C (Castagnoli) checksums to validate component data
4//! in serverbound packets. This module provides a hasher that matches
5//! Minecraft's `HashOps` implementation exactly.
6//!
7//! ## Type Tags
8//!
9//! Minecraft prefixes each value with a type tag byte before hashing:
10//! - Primitives: TAG_BYTE, TAG_SHORT, TAG_INT, TAG_LONG, TAG_FLOAT, TAG_DOUBLE
11//! - Boolean: TAG_BOOLEAN followed by 0x00 or 0x01
12//! - String: TAG_STRING followed by length (i32 BE) and UTF-8 bytes
13//! - Collections use start/end markers: TAG_MAP_START/END, TAG_LIST_START/END
14//!
15//! All numeric values are little-endian (matching Guava's Hasher).
16
17/// Type tags matching Minecraft's `HashOps` implementation.
18#[repr(u8)]
19#[derive(Clone, Copy)]
20pub enum HashTag {
21    /// Empty/null value tag.
22    Empty = 1,
23    /// Start of a map/object.
24    MapStart = 2,
25    /// End of a map/object.
26    MapEnd = 3,
27    /// Start of a list/array.
28    ListStart = 4,
29    /// End of a list/array.
30    ListEnd = 5,
31    /// Byte (i8) value.
32    Byte = 6,
33    /// Short (i16) value.
34    Short = 7,
35    /// Int (i32) value.
36    Int = 8,
37    /// Long (i64) value.
38    Long = 9,
39    /// Float (f32) value.
40    Float = 10,
41    /// Double (f64) value.
42    Double = 11,
43    /// String value.
44    String = 12,
45    /// Boolean value.
46    Boolean = 13,
47    /// Start of a byte array.
48    ByteArrayStart = 14,
49    /// End of a byte array.
50    ByteArrayEnd = 15,
51    /// Start of an int array.
52    IntArrayStart = 16,
53    /// End of an int array.
54    IntArrayEnd = 17,
55    /// Start of a long array.
56    LongArrayStart = 18,
57    /// End of a long array.
58    LongArrayEnd = 19,
59}
60
61/// A CRC32C hasher for component values.
62///
63/// This hasher is designed to produce the same hashes as Minecraft's
64/// `HashOps` implementation using Guava's `Hashing.crc32c()`.
65///
66/// # Example
67///
68/// ```
69/// use steel_utils::hash::ComponentHasher;
70///
71/// let mut hasher = ComponentHasher::new();
72/// hasher.put_int(42);
73/// let hash = hasher.finish();
74/// ```
75#[derive(Default)]
76pub struct ComponentHasher {
77    data: Vec<u8>,
78}
79
80impl ComponentHasher {
81    /// Creates a new hasher.
82    #[must_use]
83    pub const fn new() -> Self {
84        Self { data: Vec::new() }
85    }
86
87    /// Writes a raw tag byte.
88    fn put_tag(&mut self, tag: HashTag) {
89        self.data.push(tag as u8);
90    }
91
92    /// Writes raw bytes without any tag or length prefix.
93    pub fn put_raw_bytes(&mut self, bytes: &[u8]) {
94        self.data.extend_from_slice(bytes);
95    }
96
97    /// Hashes an empty/null value.
98    pub fn put_empty(&mut self) {
99        self.put_tag(HashTag::Empty);
100    }
101
102    /// Hashes a byte (i8) value with tag.
103    pub fn put_byte(&mut self, value: i8) {
104        self.put_tag(HashTag::Byte);
105        self.data.push(value as u8);
106    }
107
108    /// Hashes an unsigned byte (u8) value with tag.
109    pub fn put_ubyte(&mut self, value: u8) {
110        self.put_tag(HashTag::Byte);
111        self.data.push(value);
112    }
113
114    /// Hashes a short (i16) value with tag.
115    /// Guava uses little-endian byte order.
116    pub fn put_short(&mut self, value: i16) {
117        self.put_tag(HashTag::Short);
118        self.data.extend_from_slice(&value.to_le_bytes());
119    }
120
121    /// Hashes an int (i32) value with tag.
122    /// Guava uses little-endian byte order.
123    pub fn put_int(&mut self, value: i32) {
124        self.put_tag(HashTag::Int);
125        self.data.extend_from_slice(&value.to_le_bytes());
126    }
127
128    /// Hashes a long (i64) value with tag.
129    /// Guava uses little-endian byte order.
130    pub fn put_long(&mut self, value: i64) {
131        self.put_tag(HashTag::Long);
132        self.data.extend_from_slice(&value.to_le_bytes());
133    }
134
135    /// Hashes a float (f32) value with tag.
136    /// Guava uses little-endian byte order.
137    pub fn put_float(&mut self, value: f32) {
138        self.put_tag(HashTag::Float);
139        self.data.extend_from_slice(&value.to_bits().to_le_bytes());
140    }
141
142    /// Hashes a double (f64) value with tag.
143    /// Guava uses little-endian byte order.
144    pub fn put_double(&mut self, value: f64) {
145        self.put_tag(HashTag::Double);
146        self.data.extend_from_slice(&value.to_bits().to_le_bytes());
147    }
148
149    /// Hashes a boolean value with tag.
150    pub fn put_bool(&mut self, value: bool) {
151        self.put_tag(HashTag::Boolean);
152        self.data.push(u8::from(value));
153    }
154
155    /// Hashes a string value with tag, length prefix, and UTF-16 LE characters.
156    ///
157    /// This matches Guava's Hasher which uses little-endian for all primitives:
158    /// - `putInt(length)` writes length as 4 bytes little-endian
159    /// - `putUnencodedChars` writes each char as 2 bytes little-endian
160    pub fn put_string(&mut self, value: &str) {
161        self.put_tag(HashTag::String);
162        // Length is the number of UTF-16 code units, not bytes
163        // Guava uses little-endian for putInt
164        let char_count: i32 = value.chars().map(|c| c.len_utf16() as i32).sum();
165        self.data.extend_from_slice(&char_count.to_le_bytes());
166        // Write each UTF-16 code unit as little-endian (low byte first, then high byte)
167        // This matches Guava's putUnencodedChars behavior
168        for c in value.chars() {
169            let mut buf = [0u16; 2];
170            let encoded = c.encode_utf16(&mut buf);
171            for code_unit in encoded {
172                self.data.extend_from_slice(&code_unit.to_le_bytes());
173            }
174        }
175    }
176
177    /// Starts a map/object. Call `end_map()` when done adding entries.
178    pub fn start_map(&mut self) {
179        self.put_tag(HashTag::MapStart);
180    }
181
182    /// Ends a map/object.
183    pub fn end_map(&mut self) {
184        self.put_tag(HashTag::MapEnd);
185    }
186
187    /// Starts a list. Call `end_list()` when done adding elements.
188    pub fn start_list(&mut self) {
189        self.put_tag(HashTag::ListStart);
190    }
191
192    /// Ends a list.
193    pub fn end_list(&mut self) {
194        self.put_tag(HashTag::ListEnd);
195    }
196
197    /// Starts a byte array. Call `end_byte_array()` when done.
198    pub fn start_byte_array(&mut self) {
199        self.put_tag(HashTag::ByteArrayStart);
200    }
201
202    /// Ends a byte array.
203    pub fn end_byte_array(&mut self) {
204        self.put_tag(HashTag::ByteArrayEnd);
205    }
206
207    /// Starts an int array. Call `end_int_array()` when done.
208    pub fn start_int_array(&mut self) {
209        self.put_tag(HashTag::IntArrayStart);
210    }
211
212    /// Writes an int value without tag (for use inside int arrays).
213    /// Guava uses little-endian byte order.
214    pub fn put_int_raw(&mut self, value: i32) {
215        self.data.extend_from_slice(&value.to_le_bytes());
216    }
217
218    /// Ends an int array.
219    pub fn end_int_array(&mut self) {
220        self.put_tag(HashTag::IntArrayEnd);
221    }
222
223    /// Starts a long array. Call `end_long_array()` when done.
224    pub fn start_long_array(&mut self) {
225        self.put_tag(HashTag::LongArrayStart);
226    }
227
228    /// Writes a long value without tag (for use inside long arrays).
229    /// Guava uses little-endian byte order.
230    pub fn put_long_raw(&mut self, value: i64) {
231        self.data.extend_from_slice(&value.to_le_bytes());
232    }
233
234    /// Ends a long array.
235    pub fn end_long_array(&mut self) {
236        self.put_tag(HashTag::LongArrayEnd);
237    }
238
239    /// Hashes a byte array with start/end markers.
240    pub fn put_byte_array(&mut self, bytes: &[u8]) {
241        self.start_byte_array();
242        self.data.extend_from_slice(bytes);
243        self.end_byte_array();
244    }
245
246    /// Hashes an int array with start/end markers.
247    pub fn put_int_array(&mut self, values: &[i32]) {
248        self.start_int_array();
249        for &v in values {
250            self.put_int_raw(v);
251        }
252        self.end_int_array();
253    }
254
255    /// Hashes a long array with start/end markers.
256    pub fn put_long_array(&mut self, values: &[i64]) {
257        self.start_long_array();
258        for &v in values {
259            self.put_long_raw(v);
260        }
261        self.end_long_array();
262    }
263
264    /// Returns the current hash data (for nested hashing).
265    #[must_use]
266    pub fn current_data(&self) -> &[u8] {
267        &self.data
268    }
269
270    /// Finishes hashing and returns the CRC32C checksum as i32.
271    #[must_use]
272    pub fn finish(self) -> i32 {
273        crc32c::crc32c(&self.data) as i32
274    }
275
276    /// Finishes hashing and returns the hash as a padded i64.
277    /// Used for sorting map entries.
278    #[must_use]
279    pub fn finish_as_long(self) -> i64 {
280        let hash = crc32c::crc32c(&self.data);
281        // Pad to long by zero-extending (matches Guava's HashCode.padToLong())
282        i64::from(hash)
283    }
284}
285
286/// A hash entry for map sorting.
287///
288/// Vanilla Minecraft hashes each key and value, then sorts by these hashes,
289/// and writes ONLY the 4-byte hash values (not the original encoded bytes) to the final hasher.
290#[derive(Clone)]
291pub struct HashEntry {
292    /// The hash of the key data (for sorting).
293    pub key_hash: i64,
294    /// The hash of the value data (for sorting).
295    pub value_hash: i64,
296    /// The 4-byte CRC32C hash of the key (to be written to the final hasher).
297    pub key_bytes: [u8; 4],
298    /// The 4-byte CRC32C hash of the value (to be written to the final hasher).
299    pub value_bytes: [u8; 4],
300}
301
302impl HashEntry {
303    /// Creates a new hash entry.
304    #[must_use]
305    pub fn new(key_hasher: ComponentHasher, value_hasher: ComponentHasher) -> Self {
306        let key_bytes = crc32c::crc32c(&key_hasher.data);
307        let value_bytes = crc32c::crc32c(&value_hasher.data);
308        Self {
309            key_hash: i64::from(key_bytes),
310            value_hash: i64::from(value_bytes),
311            key_bytes: key_bytes.to_le_bytes(),
312            value_bytes: value_bytes.to_le_bytes(),
313        }
314    }
315}
316
317/// Sorts map entries according to Minecraft's ordering:
318/// First by key hash, then by value hash (both as padded longs).
319pub fn sort_map_entries(entries: &mut [HashEntry]) {
320    entries.sort_by(|a, b| {
321        a.key_hash
322            .cmp(&b.key_hash)
323            .then_with(|| a.value_hash.cmp(&b.value_hash))
324    });
325}
326
327/// Trait for types that can be hashed for component validation.
328pub trait HashComponent {
329    /// Hashes this value into the given hasher.
330    fn hash_component(&self, hasher: &mut ComponentHasher);
331
332    /// Computes the hash of this value.
333    fn compute_hash(&self) -> i32 {
334        let mut hasher = ComponentHasher::new();
335        self.hash_component(&mut hasher);
336        hasher.finish()
337    }
338}
339
340// Implement HashComponent for primitive types
341impl HashComponent for i8 {
342    fn hash_component(&self, hasher: &mut ComponentHasher) {
343        hasher.put_byte(*self);
344    }
345}
346
347impl HashComponent for u8 {
348    fn hash_component(&self, hasher: &mut ComponentHasher) {
349        hasher.put_ubyte(*self);
350    }
351}
352
353impl HashComponent for i16 {
354    fn hash_component(&self, hasher: &mut ComponentHasher) {
355        hasher.put_short(*self);
356    }
357}
358
359impl HashComponent for i32 {
360    fn hash_component(&self, hasher: &mut ComponentHasher) {
361        hasher.put_int(*self);
362    }
363}
364
365impl HashComponent for i64 {
366    fn hash_component(&self, hasher: &mut ComponentHasher) {
367        hasher.put_long(*self);
368    }
369}
370
371impl HashComponent for f32 {
372    fn hash_component(&self, hasher: &mut ComponentHasher) {
373        hasher.put_float(*self);
374    }
375}
376
377impl HashComponent for f64 {
378    fn hash_component(&self, hasher: &mut ComponentHasher) {
379        hasher.put_double(*self);
380    }
381}
382
383impl HashComponent for bool {
384    fn hash_component(&self, hasher: &mut ComponentHasher) {
385        hasher.put_bool(*self);
386    }
387}
388
389impl HashComponent for str {
390    fn hash_component(&self, hasher: &mut ComponentHasher) {
391        hasher.put_string(self);
392    }
393}
394
395impl HashComponent for String {
396    fn hash_component(&self, hasher: &mut ComponentHasher) {
397        hasher.put_string(self);
398    }
399}
400
401impl HashComponent for () {
402    fn hash_component(&self, hasher: &mut ComponentHasher) {
403        // Unit type hashes as empty
404        hasher.put_empty();
405    }
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411
412    #[test]
413    fn test_int_hash() {
414        let mut hasher = ComponentHasher::new();
415        hasher.put_int(42);
416        let hash = hasher.finish();
417        // Verify format: [TAG_INT=8] [00 00 00 2A]
418        assert_ne!(hash, 0);
419    }
420
421    #[test]
422    fn test_string_hash() {
423        let mut hasher = ComponentHasher::new();
424        hasher.put_string("hello");
425        let hash = hasher.finish();
426        // Verify format: [TAG_STRING=12] [00 00 00 05] [h e l l o]
427        assert_ne!(hash, 0);
428    }
429
430    #[test]
431    fn test_bool_hash() {
432        let mut hasher_true = ComponentHasher::new();
433        hasher_true.put_bool(true);
434        let hash_true = hasher_true.finish();
435
436        let mut hasher_false = ComponentHasher::new();
437        hasher_false.put_bool(false);
438        let hash_false = hasher_false.finish();
439
440        // true and false should produce different hashes
441        assert_ne!(hash_true, hash_false);
442    }
443
444    #[test]
445    fn test_empty_map_hash() {
446        let mut hasher = ComponentHasher::new();
447        hasher.start_map();
448        hasher.end_map();
449        let hash = hasher.finish();
450        // Format: [TAG_MAP_START=2] [TAG_MAP_END=3]
451        assert_ne!(hash, 0);
452    }
453
454    #[test]
455    fn test_empty_list_hash() {
456        let mut hasher = ComponentHasher::new();
457        hasher.start_list();
458        hasher.end_list();
459        let hash = hasher.finish();
460        // Format: [TAG_LIST_START=4] [TAG_LIST_END=5]
461        assert_ne!(hash, 0);
462    }
463
464    #[test]
465    fn test_byte_array_hash() {
466        let mut hasher = ComponentHasher::new();
467        hasher.put_byte_array(&[1, 2, 3, 4]);
468        let hash = hasher.finish();
469        // Format: [TAG_BYTE_ARRAY_START=14] [01 02 03 04] [TAG_BYTE_ARRAY_END=15]
470        assert_ne!(hash, 0);
471    }
472
473    #[test]
474    fn test_deterministic() {
475        // Same input should always produce same hash
476        let hash1 = {
477            let mut h = ComponentHasher::new();
478            h.put_int(12345);
479            h.put_string("test");
480            h.finish()
481        };
482        let hash2 = {
483            let mut h = ComponentHasher::new();
484            h.put_int(12345);
485            h.put_string("test");
486            h.finish()
487        };
488        assert_eq!(hash1, hash2);
489    }
490
491    #[test]
492    fn test_text_component_steel() {
493        use text_components::TextComponent;
494
495        // A simple text component with just "Steel" should collapse to a string
496        let component = TextComponent::from("Steel");
497        let hash = component.compute_hash();
498
499        // Expected hash from vanilla Minecraft client
500        assert_eq!(hash, -25_646_594, "Hash should match vanilla client");
501    }
502
503    #[test]
504    fn test_text_component_simple_styled() {
505        use text_components::TextComponent;
506        use text_components::{Modifier, format::Color};
507
508        // Simple styled component: {"text":"R","color":"red","bold":true}
509        // Expected hash from vanilla client 1.21.11: 1605556242
510        let component = TextComponent::plain("R").color(Color::Red).bold(true);
511        let hash = component.compute_hash();
512
513        assert_eq!(
514            hash, 1_605_556_242,
515            "Hash should match vanilla client 1.21.11 for simple styled text"
516        );
517    }
518
519    #[test]
520    fn test_text_component_rainbow() {
521        use text_components::TextComponent;
522
523        // Rainbow text from the issue
524        let json = r##"[{"text":"R","color":"red","bold":true},{"text":"a","color":"#ff5a00"},{"text":"i","color":"yellow","bold":true},{"text":"n","color":"green"},{"text":"b","color":"aqua","bold":true},{"text":"o","color":"blue"},{"text":"w","color":"light_purple","bold":true}]"##;
525        let component = TextComponent::from_snbt(json).expect("Failed to parse rainbow text");
526        let hash = component.compute_hash();
527
528        // Expected hash from vanilla Minecraft client (from the error message)
529        assert_eq!(
530            hash, 796_582_470,
531            "Hash should match vanilla client for rainbow text"
532        );
533    }
534}