Double hashing w/ extra bits of hash.

This commit is contained in:
Simon Forman 2023-03-03 14:52:26 -08:00
parent d8571d2063
commit 7284a7fcf3
1 changed files with 51 additions and 10 deletions

View File

@ -100,27 +100,69 @@ hash_key(char* key)
} }
// Capacity is a power of two (10 for now.) // Capacity is a power of two (10 for now.)
#define EXPONENT 10
#define CAPACITY 1024 #define CAPACITY 1024
#define HASH_MASK 0x3ff #define HASH_MASK 1023
char* hash_table[CAPACITY]; char* hash_table[CAPACITY];
u32 u32
ht_insert(char *symbol) ht_insert(char *symbol)
{ {
u32 index = hash_key(symbol) & HASH_MASK; u64 hash = hash_key(symbol);
// We're not checking for collisions yet. u32 index = hash % CAPACITY;
hash_table[index] = symbol;
return index; char *candidate = hash_table[index];
if (!candidate) {
hash_table[index] = symbol;
return JOY_VALUE(joySymbol, VALUE_OF(hash));
}
// https://en.wikipedia.org/wiki/Double_hashing
// Rather than use another hash function I'm going to try
// using the extra bits of the same hash.
u32 increment = ((VALUE_OF(hash) >> EXPONENT) | 1) % CAPACITY;
// If I understand correctly, making the increment odd
// means it will traverse the whole (even-sized) table.
while (candidate) {
// Compare pointers then hashes (since we already have
// one hash I'm guessing that that's cheaper or at least
// no more expensive than string comparision.)
if (candidate == symbol || hash == hash_key(candidate))
break;
index = (index + increment) % CAPACITY;
candidate = hash_table[index];
}
if (!candidate) {
hash_table[index] = symbol;
}
return JOY_VALUE(joySymbol, VALUE_OF(hash));
} }
char* char*
ht_lookup(u64 hash) ht_lookup(u32 hash)
{ {
u64 index = hash & HASH_MASK; // Note that hash will be truncated to N (N=30 as it happens) bits
return hash_table[index]; // by VALUE_OF().
u32 index = hash % CAPACITY;
char *candidate = hash_table[index];
u32 increment = ((hash >> EXPONENT) | 1) % CAPACITY;
while (candidate) {
if (hash == VALUE_OF(hash_key(candidate)))
return candidate;
index = (index + increment) % CAPACITY;
candidate = hash_table[index];
}
return 0;
} }
u32
push_symbol(char *symbol, u32 stack)
{
return cons(JOY_VALUE(joySymbol, ht_insert(symbol)), stack);
}
void void
main() main()
{ {
@ -135,8 +177,7 @@ main()
stack = cons(joy_true, stack); stack = cons(joy_true, stack);
stack = cons(42, stack); stack = cons(42, stack);
u32 word = JOY_VALUE(joySymbol, ht_insert("cats")); stack = push_symbol("cats", stack);
stack = cons(word, stack);
u32 el = empty_list; u32 el = empty_list;