summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>2011-03-28 18:19:31 +0000
committertanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>2011-03-28 18:19:31 +0000
commit623590de821184aafc3aa6d72b9fd24791884751 (patch)
tree5426a861f2f7a83e2dc4d34fc25e2508796c2eff
parent7f20a31b739a5465934433593061eb6c64ee9997 (diff)
downloadsrc-623590de821184aafc3aa6d72b9fd24791884751.tar.gz
Add faster BICTest variants
Add simpler differential distribution test Add Crap8 hash for testing Include seed value in verification test Test Murmur3_x86_32 by default git-svn-id: http://smhasher.googlecode.com/svn/trunk@100 77a7d1d3-4c08-bdc2-d393-d5859734b01a
-rw-r--r--AvalancheTest.h192
-rw-r--r--DifferentialTest.h41
-rw-r--r--Hashes.cpp21
-rw-r--r--Hashes.h1
-rw-r--r--KeysetTest.cpp12
-rw-r--r--main.cpp74
6 files changed, 319 insertions, 22 deletions
diff --git a/AvalancheTest.h b/AvalancheTest.h
index 30bc6ea..4c23369 100644
--- a/AvalancheTest.h
+++ b/AvalancheTest.h
@@ -134,7 +134,7 @@ void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias,
flipbit(key,keybit);
hash(&key,keybytes,0,&h2);
- keytype d = h1 ^ h2;
+ hashtype d = h1 ^ h2;
for(int out1 = 0; out1 < hashbits; out1++)
for(int out2 = 0; out2 < hashbits; out2++)
@@ -211,7 +211,7 @@ bool BicTest ( pfHash hash, const int reps )
double bias;
int a,b;
- BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,false);
+ BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
if(bias > maxBias)
{
@@ -232,3 +232,191 @@ bool BicTest ( pfHash hash, const int reps )
}
//-----------------------------------------------------------------------------
+// BIC test variant - store all intermediate data in a table, draw diagram
+// afterwards (much faster)
+
+template< typename keytype, typename hashtype >
+void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
+{
+ const int keybytes = sizeof(keytype);
+ const int keybits = keybytes * 8;
+ const int hashbytes = sizeof(hashtype);
+ const int hashbits = hashbytes * 8;
+ const int pagesize = hashbits*hashbits*4;
+
+ Rand r(11938);
+
+ double maxBias = 0;
+ int maxK = 0;
+ int maxA = 0;
+ int maxB = 0;
+
+ keytype key;
+ hashtype h1,h2;
+
+ std::vector<int> bins(keybits*pagesize,0);
+
+ for(int keybit = 0; keybit < keybits; keybit++)
+ {
+ if(keybit % (keybits/10) == 0) printf(".");
+
+ int * page = &bins[keybit*pagesize];
+
+ for(int irep = 0; irep < reps; irep++)
+ {
+ r.rand_p(&key,keybytes);
+ hash(&key,keybytes,0,&h1);
+ flipbit(key,keybit);
+ hash(&key,keybytes,0,&h2);
+
+ hashtype d = h1 ^ h2;
+
+ for(int out1 = 0; out1 < hashbits-1; out1++)
+ for(int out2 = out1+1; out2 < hashbits; out2++)
+ {
+ int * b = &page[(out1*hashbits+out2)*4];
+
+ uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
+
+ b[x]++;
+ }
+ }
+ }
+
+ printf("\n");
+
+ for(int out1 = 0; out1 < hashbits-1; out1++)
+ {
+ for(int out2 = out1+1; out2 < hashbits; out2++)
+ {
+ if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+ for(int keybit = 0; keybit < keybits; keybit++)
+ {
+ int * page = &bins[keybit*pagesize];
+ int * bins = &page[(out1*hashbits+out2)*4];
+
+ double bias = 0;
+
+ for(int b = 0; b < 4; b++)
+ {
+ double b2 = double(bins[b]) / double(reps / 2);
+ b2 = fabs(b2 * 2 - 1);
+
+ if(b2 > bias) bias = b2;
+ }
+
+ if(bias > maxBias)
+ {
+ maxBias = bias;
+ maxK = keybit;
+ maxA = out1;
+ maxB = out2;
+ }
+
+ if(verbose)
+ {
+ if (bias < 0.01) printf(".");
+ else if(bias < 0.05) printf("o");
+ else if(bias < 0.33) printf("O");
+ else printf("X");
+ }
+ }
+
+ // Finished keybit
+
+ if(verbose) printf("\n");
+ }
+
+ if(verbose)
+ {
+ for(int i = 0; i < keybits+12; i++) printf("-");
+ printf("\n");
+ }
+ }
+
+ printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+
+//-----------------------------------------------------------------------------
+// BIC test variant - iterate over output bits, then key bits. No temp storage,
+// but slooooow
+
+template< typename keytype, typename hashtype >
+void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
+{
+ const int keybytes = sizeof(keytype);
+ const int keybits = keybytes * 8;
+ const int hashbytes = sizeof(hashtype);
+ const int hashbits = hashbytes * 8;
+
+ Rand r(11938);
+
+ double maxBias = 0;
+ int maxK = 0;
+ int maxA = 0;
+ int maxB = 0;
+
+ keytype key;
+ hashtype h1,h2;
+
+ for(int out1 = 0; out1 < hashbits-1; out1++)
+ for(int out2 = out1+1; out2 < hashbits; out2++)
+ {
+ if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+ for(int keybit = 0; keybit < keybits; keybit++)
+ {
+ int bins[4] = { 0, 0, 0, 0 };
+
+ for(int irep = 0; irep < reps; irep++)
+ {
+ r.rand_p(&key,keybytes);
+ hash(&key,keybytes,0,&h1);
+ flipbit(key,keybit);
+ hash(&key,keybytes,0,&h2);
+
+ hashtype d = h1 ^ h2;
+
+ uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+ bins[b]++;
+ }
+
+ double bias = 0;
+
+ for(int b = 0; b < 4; b++)
+ {
+ double b2 = double(bins[b]) / double(reps / 2);
+ b2 = fabs(b2 * 2 - 1);
+
+ if(b2 > bias) bias = b2;
+ }
+
+ if(bias > maxBias)
+ {
+ maxBias = bias;
+ maxK = keybit;
+ maxA = out1;
+ maxB = out2;
+ }
+
+ if(verbose)
+ {
+ if (bias < 0.05) printf(".");
+ else if(bias < 0.10) printf("o");
+ else if(bias < 0.50) printf("O");
+ else printf("X");
+ }
+ }
+
+ // Finished keybit
+
+ if(verbose) printf("\n");
+ }
+
+ printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/DifferentialTest.h b/DifferentialTest.h
index b0b1231..3136cbb 100644
--- a/DifferentialTest.h
+++ b/DifferentialTest.h
@@ -237,4 +237,45 @@ void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst,
avg /= double(diffs.size());
}
+//-----------------------------------------------------------------------------
+// Simpler differential-distribution test - for all 1-bit differentials,
+// generate random key pairs and run full distribution/collision tests on the
+// hash differentials
+
+template < typename keytype, typename hashtype >
+bool DiffDistTest2 ( pfHash hash )
+{
+ Rand r(857374);
+
+ int keybits = sizeof(keytype) * 8;
+ const int keycount = 256*256*32;
+ keytype k;
+
+ std::vector<hashtype> hashes(keycount);
+ hashtype h1,h2;
+
+ bool result = true;
+
+ for(int keybit = 0; keybit < keybits; keybit++)
+ {
+ printf("Testing bit %d\n",keybit);
+
+ for(int i = 0; i < keycount; i++)
+ {
+ r.rand_p(&k,sizeof(keytype));
+
+ hash(&k,sizeof(keytype),0,&h1);
+ flipbit(&k,sizeof(keytype),keybit);
+ hash(&k,sizeof(keytype),0,&h2);
+
+ hashes[i] = h1 ^ h2;
+ }
+
+ result &= TestHashList<hashtype>(hashes,true,true,true);
+ printf("\n");
+ }
+
+ return result;
+}
+
//----------------------------------------------------------------------------
diff --git a/Hashes.cpp b/Hashes.cpp
index aae3db8..1930bc5 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -132,3 +132,24 @@ uint32_t Bernstein ( const void * key, int len, uint32_t h )
}
//-----------------------------------------------------------------------------
+// Crap8 hash from http://www.team5150.com/~andrew/noncryptohashzoo/Crap8.html
+
+uint32_t Crap8( const uint8_t *key, uint32_t len, uint32_t seed ) {
+ #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); }
+ #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }
+
+ const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key;
+ uint32_t h = len + seed, k = n + len;
+ uint64_t p;
+
+ while ( len >= 8 ) { c8mix(key4[0]) c8mix(key4[1]) key4 += 2; len -= 8; }
+ if ( len >= 4 ) { c8mix(key4[0]) key4 += 1; len -= 4; }
+ if ( len ) { c8mix( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) }
+ c8fold( h ^ k, n, k, k )
+ return k;
+}
+
+void Crap8_test ( const void * key, int len, uint32_t seed, void * out )
+{
+ *(uint32_t*)out = Crap8((const uint8_t*)key,len,seed);
+}
diff --git a/Hashes.h b/Hashes.h
index d0b6778..b5b3c1f 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -32,6 +32,7 @@ void FNV ( const void * key, int len, uint32_t seed, void * ou
void SuperFastHash ( const void * key, int len, uint32_t seed, void * out );
void lookup3_test ( const void * key, int len, uint32_t seed, void * out );
void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out );
+void Crap8_test ( const void * key, int len, uint32_t seed, void * out );
uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index dda137c..6519f8b 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -18,17 +18,23 @@ bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool
memset(hashes,0,hashbytes*256);
memset(final,0,hashbytes);
+ // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
+ // the seed
+
for(int i = 0; i < 256; i++)
{
key[i] = (uint8_t)i;
-
- hash(key,i,0,&hashes[i*hashbytes]);
+
+ hash(key,i,256-i,&hashes[i*hashbytes]);
}
- //----------
+ // Then hash the result array
hash(hashes,hashbytes*256,0,final);
+ // The first four bytes of that hash, interpreted as a little-endian integer, is our
+ // verification value
+
uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
delete [] key;
diff --git a/main.cpp b/main.cpp
index 77cb771..ab397e7 100644
--- a/main.cpp
+++ b/main.cpp
@@ -16,7 +16,9 @@ bool g_testAll = false;
bool g_testSanity = false;
bool g_testSpeed = false;
bool g_testDiff = false;
+bool g_testDiffDist = false;
bool g_testAvalanche = false;
+bool g_testBIC = false;
bool g_testCyclic = false;
bool g_testSparse = false;
bool g_testPermutation = false;
@@ -43,28 +45,29 @@ HashInfo g_hashes[] =
{ DoNothingHash, 64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
{ DoNothingHash, 128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },
- { crc32, 32, 0x5C7DDD1F, "crc32", "CRC-32" },
+ { crc32, 32, 0x3719DB20, "crc32", "CRC-32" },
{ md5_32, 32, 0xC10C356B, "md5_32a", "MD5, first 32 bits of result" },
{ sha1_32a, 32, 0xF9376EA7, "sha1_32a", "SHA1, first 32 bits of result" },
- { FNV, 32, 0x2B377407, "FNV", "Fowler-Noll-Vo hash, 32-bit" },
- { lookup3_test, 32, 0xDEC6FD2F, "lookup3", "Bob Jenkins' lookup3" },
+ { FNV, 32, 0xE3CBBE91, "FNV", "Fowler-Noll-Vo hash, 32-bit" },
+ { lookup3_test, 32, 0x3D83917A, "lookup3", "Bob Jenkins' lookup3" },
{ SuperFastHash, 32, 0x980ACD1D, "superfast", "Paul Hsieh's SuperFastHash" },
- { MurmurOAAT_test, 32, 0xF5AC8D0D, "MurmurOAAT", "Murmur one-at-a-time" },
+ { MurmurOAAT_test, 32, 0x5363BD98, "MurmurOAAT", "Murmur one-at-a-time" },
+ { Crap8_test, 32, 0x743E97A1, "Crap8", "Crap8" },
// MurmurHash2
- { MurmurHash2_test, 32, 0xA6D95DE6, "Murmur2", "MurmurHash2 for x86, 32-bit" },
- { MurmurHash2A_test, 32, 0xB79DC030, "Murmur2A", "MurmurHash2A for x86, 32-bit" },
- { MurmurHash64A_test, 64, 0xDBD7FF4B, "Murmur2B", "MurmurHash2 for x64, 64-bit" },
- { MurmurHash64B_test, 64, 0x3B861F71, "Murmur2C", "MurmurHash2 for x86, 64-bit" },
+ { MurmurHash2_test, 32, 0x27864C1E, "Murmur2", "MurmurHash2 for x86, 32-bit" },
+ { MurmurHash2A_test, 32, 0x7FBD4396, "Murmur2A", "MurmurHash2A for x86, 32-bit" },
+ { MurmurHash64A_test, 64, 0x1F0D3804, "Murmur2B", "MurmurHash2 for x64, 64-bit" },
+ { MurmurHash64B_test, 64, 0xDD537C05, "Murmur2C", "MurmurHash2 for x86, 64-bit" },
// MurmurHash3
- { MurmurHash3_x86_32, 32, 0x3B75AFFD, "Murmur3A", "MurmurHash3 for x86, 32-bit" },
- { MurmurHash3_x86_128, 128, 0x78C7F0DB, "Murmur3C", "MurmurHash3 for x86, 128-bit" },
- { MurmurHash3_x64_128, 128, 0x54667393, "Murmur3F", "MurmurHash3 for x64, 128-bit" },
+ { MurmurHash3_x86_32, 32, 0xEA5DFD02, "Murmur3A", "MurmurHash3 for x86, 32-bit" },
+ { MurmurHash3_x86_128, 128, 0x411C981B, "Murmur3C", "MurmurHash3 for x86, 128-bit" },
+ { MurmurHash3_x64_128, 128, 0x04D005BA, "Murmur3F", "MurmurHash3 for x64, 128-bit" },
};
@@ -169,6 +172,20 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
}
//-----------------------------------------------------------------------------
+ // Differential-distribution tests
+
+ if(g_testDiffDist /*|| g_testAll*/)
+ {
+ printf("[[[ Differential Distribution Tests ]]]\n\n");
+
+ bool result = true;
+
+ result &= DiffDistTest2<uint64_t,hashtype>(hash);
+
+ printf("\n");
+ }
+
+ //-----------------------------------------------------------------------------
// Avalanche tests
if(g_testAvalanche || g_testAll)
@@ -202,6 +219,22 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
}
//-----------------------------------------------------------------------------
+ // Bit Independence Criteria
+
+ if(g_testBIC /*|| g_testAll*/)
+ {
+ printf("[[[ Bit Independence Criteria ]]]\n\n");
+
+ bool result = true;
+
+ //result &= BicTest<uint64_t,hashtype>(hash,2000000);
+ BicTest3<Blob<88>,hashtype>(hash,2000000);
+
+ if(!result) printf("*********FAIL*********\n");
+ printf("\n");
+ }
+
+ //-----------------------------------------------------------------------------
// Keyset 'Cyclic'
if(g_testCyclic || g_testAll)
@@ -482,10 +515,15 @@ void testHash ( const char * name )
int main ( int argc, char ** argv )
{
+ const char * hashToTest = "murmur3a";
+
if(argc < 2)
{
- printf("Bad args\n");
- exit(1);
+ printf("(No test hash given on command line, testing Murmur3_x86_32.)\n");
+ }
+ else
+ {
+ hashToTest = argv[1];
}
SetAffinity(2);
@@ -494,18 +532,20 @@ int main ( int argc, char ** argv )
int timeBegin = clock();
- g_testAll = false;
+ g_testAll = true;
- g_testSanity = true;
- g_testSpeed = true;
+ //g_testSanity = true;
+ //g_testSpeed = true;
//g_testAvalanche = true;
+ //g_testBIC = true;
//g_testCyclic = true;
//g_testDiff = true;
+ //g_testDiffDist = true;
//g_testSparse = true;
//g_testPermutation = true;
//g_testZeroes = true;
- testHash(argv[1]);
+ testHash(hashToTest);
//----------