1: %% -*- coding: utf-8 -*- 2: %% 3: %% %CopyrightBegin% 4: %% 5: %% Copyright Ericsson AB 2000-2013. All Rights Reserved. 6: %% 7: %% The contents of this file are subject to the Erlang Public License, 8: %% Version 1.1, (the "License"); you may not use this file except in 9: %% compliance with the License. You should have received a copy of the 10: %% Erlang Public License along with this software. If not, it can be 11: %% retrieved online at http://www.erlang.org/. 12: %% 13: %% Software distributed under the License is distributed on an "AS IS" 14: %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15: %% the License for the specific language governing rights and limitations 16: %% under the License. 17: %% 18: %% %CopyrightEnd% 19: %% 20: 21: %% 22: %% Verifying erlang:phash/2. And now also phash2/2, to some extent. 23: %% Test the hashing algorithm for integer numbers in 2 ways: 24: %% 1 Test that numbers in diferent sequences get sufficiently spread 25: %% in a "bit pattern" way (modulo 256 etc). 26: %% 2 Test that numbers are correctly hashed compared to a reference implementation, 27: %% regardless of their internal representation. The hashing algorithm should never 28: %% change. 29: %% The hashing of other datatypes is tested with a few samples, so that we are sure 30: %% it does not change across versions. 31: %% Also tests that the limit can be between 0 and 16#FFFFFFFF. 32: %% 33: -module(hash_SUITE). 34: -export([basic_test/0,cmp_test/1,range_test/0,spread_test/1, 35: phash2_test/0, otp_5292_test/0, 36: otp_7127_test/0]). 37: -compile({nowarn_deprecated_function, {erlang,hash,2}}). 38: 39: %% 40: %% Define to run outside of test server 41: %% 42: %-define(STANDALONE,1). 43: 44: %% 45: %% Define for debug output 46: %% 47: %-define(debug,1). 48: 49: -ifdef(STANDALONE). 50: -define(config(A,B),config(A,B)). 51: -export([config/2]). 52: -else. 53: -include_lib("test_server/include/test_server.hrl"). 54: -endif. 55: 56: -ifdef(debug). 57: -ifdef(STANDALONE). 58: -define(line, erlang:display({?MODULE,?LINE}), ). 59: -endif. 60: -define(dbgformat(A,B),io:format(A,B)). 61: -else. 62: -ifdef(STANDALONE). 63: -define(line, noop, ). 64: -endif. 65: -define(dbgformat(A,B),noop). 66: -endif. 67: 68: -ifdef(STANDALONE). 69: config(priv_dir,_) -> 70: ".". 71: -else. 72: %% When run in test server. 73: -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, 74: init_per_group/2,end_per_group/2, 75: test_basic/1,test_cmp/1,test_range/1,test_spread/1, 76: test_phash2/1,otp_5292/1,bit_level_binaries/1,otp_7127/1, 77: end_per_testcase/2,init_per_testcase/2]). 78: init_per_testcase(_Case, Config) -> 79: Dog=test_server:timetrap(test_server:minutes(10)), 80: [{watchdog, Dog}|Config]. 81: 82: end_per_testcase(_Case, Config) -> 83: Dog=?config(watchdog, Config), 84: test_server:timetrap_cancel(Dog), 85: ok. 86: suite() -> [{ct_hooks,[ts_install_cth]}]. 87: 88: all() -> 89: [test_basic, test_cmp, test_range, test_spread, 90: test_phash2, otp_5292, bit_level_binaries, otp_7127]. 91: 92: groups() -> 93: []. 94: 95: init_per_suite(Config) -> 96: Config. 97: 98: end_per_suite(_Config) -> 99: ok. 100: 101: init_per_group(_GroupName, Config) -> 102: Config. 103: 104: end_per_group(_GroupName, Config) -> 105: Config. 106: 107: 108: test_basic(suite) -> 109: []; 110: test_basic(doc) -> 111: ["Tests basic functionality of erlang:phash and that the " 112: "hashes has not changed (neither hash nor phash)"]; 113: test_basic(Config) when is_list(Config) -> 114: basic_test(). 115: 116: 117: test_cmp(suite) -> 118: []; 119: test_cmp(doc) -> 120: ["Compares integer hashes made by erlang:phash with those of a reference " 121: "implementation"]; 122: test_cmp(Config) when is_list(Config) -> 123: cmp_test(10000). 124: 125: test_range(suite) -> 126: []; 127: test_range(doc) -> 128: ["Tests ranges on erlang:phash from 1 to 2^32"]; 129: test_range(Config) when is_list(Config) -> 130: range_test(). 131: 132: test_spread(suite) -> 133: []; 134: test_spread(doc) -> 135: ["Tests that the hashes are spread ok"]; 136: test_spread(Config) when is_list(Config) -> 137: spread_test(10). 138: 139: test_phash2(suite) -> 140: []; 141: test_phash2(doc) -> 142: ["Tests phash2"]; 143: test_phash2(Config) when is_list(Config) -> 144: phash2_test(). 145: 146: otp_5292(suite) -> 147: []; 148: otp_5292(doc) -> 149: ["Tests hash, phash and phash2 regarding integers."]; 150: otp_5292(Config) when is_list(Config) -> 151: otp_5292_test(). 152: 153: %% Test hashing bit-level binaries. 154: bit_level_binaries(Config) when is_list(Config) -> 155: bit_level_binaries_do(). 156: 157: otp_7127(suite) -> 158: []; 159: otp_7127(doc) -> 160: ["Tests phash2/1."]; 161: otp_7127(Config) when is_list(Config) -> 162: otp_7127_test(). 163: 164: -endif. 165: 166: 167: 168: %% 169: %% Here are the real tests, they can be run without test_server, 170: %% define -DSTANDALONE when compiling. 171: %% 172: basic_test() -> 173: 685556714 = erlang:phash({a,b,c},16#FFFFFFFF), 174: 14468079 = erlang:hash({a,b,c},16#7FFFFFF), 175: 37442646 = erlang:phash([a,b,c,{1,2,3},c:pid(0,2,3), 176: 16#77777777777777],16#FFFFFFFF), 177: Comment = case erlang:hash([a,b,c,{1,2,3},c:pid(0,2,3), 178: 16#77777777777777],16#7FFFFFF) of 179: 102727602 -> 180: big = erlang:system_info(endian), 181: "Big endian machine"; 182: 105818829 -> 183: little = erlang:system_info(endian), 184: "Little endian machine" 185: end, 186: ExternalReference = <<131,114,0,3,100,0,13,110,111,110,111,100,101,64, 187: 110,111,104,111,115,116,0,0,0,0,122,0,0,0,0,0,0,0,0>>, 188: 1113403635 = erlang:phash(binary_to_term(ExternalReference), 189: 16#FFFFFFFF), 190: 123 = erlang:hash(binary_to_term(ExternalReference), 191: 16#7FFFFFF), 192: ExternalFun = <<131,117,0,0,0,3,103,100,0,13,110,111,110,111,100,101,64, 193: 110,111,104,111,115,116,0,0,0,38,0,0,0,0,0,100,0,8,101, 194: 114,108,95,101,118,97,108,97,20,98,5,182,139,98,108,0,0, 195: 0,3,104,2,100,0,1,66,109,0,0,0,33,131,114,0,3,100,0,13, 196: 110,111,110,111,100,101,64,110,111,104,111,115,116,0,0, 197: 0,0,122,0,0,0,0,0,0,0,0,104,2,100,0,1,76,107,0,33,131, 198: 114,0,3,100,0,13,110,111,110,111,100,101,64,110,111,104, 199: 111,115,116,0,0,0,0,122,0,0,0,0,0,0,0,0,104,2,100,0,1,82, 200: 114,0,3,100,0,13,110,111,110,111,100,101,64,110,111,104, 201: 111,115,116,0,0,0,0,122,0,0,0,0,0,0,0,0,106,108,0,0,0,1, 202: 104,5,100,0,6,99,108,97,117,115,101,97,1,106,106,108,0,0, 203: 0,1,104,3,100,0,7,105,110,116,101,103,101,114,97,1,97,1, 204: 106,106,104,3,100,0,4,101,118,97,108,104,2,100,0,5,115, 205: 104,101,108,108,100,0,10,108,111,99,97,108,95,102,117, 206: 110,99,108,0,0,0,1,103,100,0,13,110,111,110,111,100,101, 207: 64,110,111,104,111,115,116,0,0,0,22,0,0,0,0,0,106>>, 208: 170987488 = erlang:phash(binary_to_term(ExternalFun), 209: 16#FFFFFFFF), 210: 124460689 = erlang:hash(binary_to_term(ExternalFun), 211: 16#7FFFFFF), 212: case (catch erlang:phash(1,0)) of 213: {'EXIT',{badarg, _}} -> 214: {comment, Comment}; 215: _ -> 216: exit(phash_accepted_zero_as_range) 217: end. 218: 219: 220: range_test() -> 221: random:seed(), 222: F = fun(From,From,_FF) -> 223: ok; 224: (From,To,FF) -> 225: R = random:uniform(16#FFFFFFFFFFFFFFFF), 226: X = erlang:phash(R, From), 227: Y = erlang:phash(R, 16#100000000) - 1, 228: Z = (Y rem From) + 1, 229: case X =:= Z of 230: true -> 231: FF(From*2,To,FF); 232: _ -> 233: exit({range_test_failed, hash_on, R, range, From}) 234: end 235: end, 236: F(1,16#100000000,F). 237: 238: 239: 240: spread_test(N) -> 241: test_fun(N,{erlang,phash},16#50000000000,fun(X) -> 242: X 243: end), 244: test_fun(N,{erlang,phash},0,fun(X) -> 245: X 246: end), 247: test_fun(N,{erlang,phash},16#123456789ABCDEF123456789ABCDEF,fun(X) -> 248: X 249: end), 250: test_fun(N,{erlang,phash},16#50000000000,fun(X) -> 251: integer_to_list(X) 252: end), 253: test_fun(N,{erlang,phash},16#50000000000,fun(X) -> 254: integer_to_bytelist(X,[]) 255: end), 256: test_fun(N,{erlang,phash},16#50000000000,fun(X) -> 257: integer_to_binary_value(X) 258: end). 259: 260: 261: 262: cmp_test(N) -> 263: % No need to save seed, the error indicates what number caused it. 264: random:seed(), 265: do_cmp_hashes(N,8). 266: do_cmp_hashes(0,_) -> 267: ok; 268: do_cmp_hashes(N,Steps) -> 269: R0 = random:uniform(1 bsl Steps - 1) + random:uniform(16#FFFFFFFF), 270: R = case random:uniform(2) of 271: 1 -> 272: R0; 273: _ -> 274: -R0 275: end, 276: NSteps = case N rem 10 of 277: 0 -> 278: case (Steps + 8) rem 1024 of 279: 0 -> 280: 8; 281: OK -> 282: OK 283: end; 284: _ -> 285: Steps 286: end, 287: X = erlang:phash(R,16#FFFFFFFF), 288: Y = make_hash(R,16#FFFFFFFF), 289: case X =:= Y of 290: true -> 291: do_cmp_hashes(N - 1, NSteps); 292: _ -> 293: exit({missmatch_on_input, R, phash, X, make_hash, Y}) 294: end. 295: 296: phash2_test() -> 297: Max = 1 bsl 32, 298: BPort = <<131,102,100,0,13,110,111,110,111,100,101,64,110,111,104, 299: 111,115,116,0,0,0,1,0>>, 300: Port = binary_to_term(BPort), 301: 302: BXPort = <<131,102,100,0,11,97,112,97,64,108,101,103,111,108,97,115, 303: 0,0,0,24,3>>, 304: XPort = binary_to_term(BXPort), 305: 306: BRef = <<131,114,0,3,100,0,13,110,111,110,111,100,101,64,110,111,104, 307: 111,115,116,0,0,0,1,255,0,0,0,0,0,0,0,0>>, 308: Ref = binary_to_term(BRef), 309: 310: BXRef = <<131,114,0,3,100,0,11,97,112,97,64,108,101,103,111,108,97,115, 311: 2,0,0,0,155,0,0,0,0,0,0,0,0>>, 312: XRef = binary_to_term(BXRef), 313: 314: BXPid = <<131,103,100,0,11,97,112,97,64,108,101,103,111,108,97,115, 315: 0,0,0,36,0,0,0,0,1>>, 316: XPid = binary_to_term(BXPid), 317: 318: 319: %% X = f1(), Y = f2(), Z = f3(X, Y), 320: 321: %% F1 = fun f1/0, % -> abc 322: B1 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98, 323: 13,196,76,242,0,0,0,1,0,0,0,0,100,0,1,116,97,1,98,2,195,126, 324: 58,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 325: 115,116,0,0,0,112,0,0,0,0,0>>, 326: F1 = binary_to_term(B1), 327: 328: %% F2 = fun f2/0, % -> abd 329: B2 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98, 330: 13,196,76,242,0,0,0,2,0,0,0,0,100,0,1,116,97,2,98,3,130,152, 331: 185,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 332: 115,116,0,0,0,112,0,0,0,0,0>>, 333: F2 = binary_to_term(B2), 334: 335: %% F3 = fun f3/2, % -> {abc, abd} 336: B3 = <<131,112,0,0,0,66,2,215,206,77,69,249,50,170,17,129,47,21,98, 337: 13,196,76,242,0,0,0,3,0,0,0,0,100,0,1,116,97,3,98,7,168,160, 338: 93,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 339: 115,116,0,0,0,112,0,0,0,0,0>>, 340: F3 = binary_to_term(B3), 341: 342: %% F4 = fun () -> 123456789012345678901234567 end, 343: B4 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98, 344: 13,196,76,242,0,0,0,4,0,0,0,0,100,0,1,116,97,4,98,2,230,21, 345: 171,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 346: 115,116,0,0,0,112,0,0,0,0,0>>, 347: F4 = binary_to_term(B4), 348: 349: %% F5 = fun() -> {X,Y,Z} end, 350: B5 = <<131,112,0,0,0,92,0,215,206,77,69,249,50,170,17,129,47,21,98, 351: 13,196,76,242,0,0,0,5,0,0,0,3,100,0,1,116,97,5,98,0,99,101, 352: 130,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111, 353: 115,116,0,0,0,112,0,0,0,0,0,100,0,3,97,98,99,100,0,3,97,98, 354: 100,104,2,100,0,3,97,98,99,100,0,3,97,98,100>>, 355: F5 = binary_to_term(B5), 356: 357: Chars = lists:seq(32,127), 358: NotAHeapBin = list_to_binary(lists:flatten(lists:duplicate(500,Chars))), 359: <<_:128,SubBin/binary>> = NotAHeapBin, 360: L = [%% nil 361: {[], 3468870702}, 362: 363: %% atom :( not very good ): 364: %% (cannot use block_hash due to compatibility issues...) 365: {abc,26499}, 366: {abd,26500}, 367: {'åäö', 62518}, 368: %% 81 runes as an atom, 'ᚠᚡᚢᚣᚤᚥᚦᚧᚨᚩᚪᚫᚬᚭᚮᚯᚰᚱᚲᚳᚴᚵᚶᚷᚸᚹᚺᚻᚼᚽᚾᚿᛀᛁᛂᛃᛄᛅᛆᛇᛈᛉᛊᛋᛌᛍᛎᛏᛐᛑᛒᛓᛔᛕᛖᛗᛘᛙᛚᛛᛜᛝᛞᛟᛠᛡᛢᛣᛤᛥᛦᛧᛨᛩᛪ᛫᛬᛭ᛮᛯᛰ' 369: {erlang:binary_to_term(<<131, 118, 0, 243, (unicode:characters_to_binary(lists:seq(5792, 5872)))/binary >>), 241561024}, 370: %% åäö dynamic 371: {erlang:binary_to_term(<<131, 118, 0, 6, 195, 165, 195, 164, 195, 182>>),62518}, 372: %% the atom '゙゚゛゜ゝゞゟ゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズ' 373: {erlang:binary_to_term(<<131, 118, 0, 102, (unicode:characters_to_binary(lists:seq(12441, 12542)))/binary>>), 246053818}, 374: %% the atom, '😃' 375: {erlang:binary_to_term(<<131, 118, 0, 4, 240, 159, 152, 131>>), 1026307}, 376: 377: %% small 378: {0,3175731469}, 379: {1, 539485162}, 380: {-1, 1117813597}, 381: {1 bsl 20, 1477815345}, 382: {-(1 bsl 20), 3076904293}, 383: 384: %% bignum 385: {4294967296, 2108323275}, 386: {-4294967296, 2586067094}, 387: {981494972710656, 1622082818}, 388: {-981494972710656, 3367191372}, 389: {36893488147419103232, 2545846594}, 390: {-36893488147419103232, 1649047068}, 391: {1606938044258990275541962092341162602522202993782792835301376, 392: 2573322433}, 393: {-1606938044258990275541962092341162602522202993782792835301376, 394: 2288753377}, 395: 396: %% binary 397: {<<>>, 147926629}, 398: {<<0:8>>, 2914887855}, 399: {<<0:32>>, 2014511533}, 400: {<<"abc">>, 1306188027}, 401: {<<"12345678901234567890">>, 3021061640}, 402: {NotAHeapBin,2644086993}, 403: {SubBin,3575839236}, 404: 405: %% unaligned sub binaries 406: {unaligned_sub_bin(<<>>), 147926629}, 407: {unaligned_sub_bin(<<0:8>>), 2914887855}, 408: {unaligned_sub_bin(<<0:32>>), 2014511533}, 409: {unaligned_sub_bin(<<"abc">>), 1306188027}, 410: {unaligned_sub_bin(<<"12345678901234567890">>), 3021061640}, 411: {unaligned_sub_bin(NotAHeapBin),2644086993}, 412: {unaligned_sub_bin(SubBin),3575839236}, 413: 414: %% bit-level binaries 415: {<<0:7>>, 1055790816}, 416: {<<"abc",13:4>>, 670412287}, 417: {<<5:3,"12345678901234567890">>, 289973273}, 418: 419: %% fun 420: {F1, 3826013332}, 421: {F2, 126009152}, 422: {F3, 3482452479}, 423: {F4, 633704783}, 424: {F5, 1241537408}, 425: 426: %% module fun 427: {fun lists:map/2, 840287883}, 428: {fun lists:map/3, 2318478565}, 429: {fun lists:filter/2, 635165125}, 430: {fun lists:filter/3, 3824649396}, 431: {fun xxx:map/2, 2630071865}, 432: {fun xxx:map/3, 4237970519}, 433: 434: %% pid 435: {c:pid(0,0,0), 2858162279}, 436: {c:pid(0,1,0), 2870503209}, 437: {c:pid(0,2,0), 1707788908}, 438: {XPid, 1290188489}, 439: 440: %% port 441: {Port,1954394636}, 442: {XPort,274735}, 443: 444: %% ref 445: {Ref, 1675501484}, 446: {XRef, 3845846926}, 447: 448: %% float 449: {0.0, 423528920}, 450: {3.14, 3731709215}, 451: {-3.14, 1827518724}, 452: 453: %% list 454: {[0.0], 167906877}, 455: {[{}], 4050867804}, 456: {[<<>>], 440873397}, 457: {[[]], 499070068}, 458: {[abc], 3112446404}, 459: {[a,b,c], 1505666924}, 460: {[a,b|c], 433753489}, 461: {"abc", 519996486}, 462: {"abc"++[1009], 290369864}, 463: {"abc"++[1009]++"de", 4134369195}, 464: {"1234567890123456", 963649519}, 465: 466: %% tuple 467: {{}, 221703996}, 468: {{{}}, 2165044361}, 469: {{<<>>}, 682464809}, 470: {{0.0}, 688441152}, 471: {{[]}, 1775079505}, 472: {{abc}, 2032039329}, 473: {{a,1,{},-3.14}, 1364396939}, 474: {{c:pid(0,2,0)}, 686997880}, 475: {{F4}, 2279632930}, 476: {{a,<<>>}, 2724468891}, 477: {{b,<<>>}, 2702508511} 478: ], 479: SpecFun = fun(S) -> sofs:no_elements(S) > 1 end, 480: F = sofs:relation_to_family(sofs:converse(sofs:relation(L))), 481: D = sofs:to_external(sofs:family_specification(SpecFun, F)), 482: [] = D, 483: [] = [{E,H,H2} || {E,H} <- L, (H2 = erlang:phash2(E, Max)) =/= H], 484: ok. 485: 486: -ifdef(FALSE). 487: f1() -> 488: abc. 489: 490: f2() -> 491: abd. 492: 493: f3(X, Y) -> 494: {X, Y}. 495: -endif. 496: 497: otp_5292_test() -> 498: H = fun(E) -> [erlang:hash(E, 16#7FFFFFF), 499: erlang:hash(-E, 16#7FFFFFF)] 500: end, 501: S1 = md5([md5(hash_int(S, E, H)) || {Start, N, Sz} <- d(), 502: {S, E} <- int(Start, N, Sz)]), 503: PH = fun(E) -> [erlang:phash(E, 1 bsl 32), 504: erlang:phash(-E, 1 bsl 32), 505: erlang:phash2(E, 1 bsl 32), 506: erlang:phash2(-E, 1 bsl 32)] 507: end, 508: S2 = md5([md5(hash_int(S, E, PH)) || {Start, N, Sz} <- d(), 509: {S, E} <- int(Start, N, Sz)]), 510: Comment = case S1 of 511: <<4,248,208,156,200,131,7,1,173,13,239,173,112,81,16,174>> -> 512: big = erlang:system_info(endian), 513: "Big endian machine"; 514: <<180,28,33,231,239,184,71,125,76,47,227,241,78,184,176,233>> -> 515: little = erlang:system_info(endian), 516: "Little endian machine" 517: end, 518: <<124,81,198,121,174,233,19,137,10,83,33,80,226,111,238,99>> = S2, 519: 2 = erlang:hash(1, (1 bsl 27) -1), 520: {'EXIT', _} = (catch erlang:hash(1, (1 bsl 27))), 521: {comment, Comment}. 522: 523: d() -> 524: [%% Start, NumOfIntervals, SizeOfInterval 525: {(1 bsl I)-100, 2, 100} || I <- lists:seq(1, 1000)]. 526: 527: int(Start, N, Sz) -> 528: {_, R} = lists:mapfoldl(fun(S, Acc) -> 529: {S + Sz, [{S,S+Sz-1} | Acc]} 530: end, [], lists:seq(Start, Start+(N-1)*Sz, Sz)), 531: lists:reverse(R). 532: 533: hash_int(Start, End, F) -> 534: HL = lists:flatmap(fun(E) -> F(E) end, lists:seq(Start, End)), 535: {Start, End, md5(HL)}. 536: 537: md5(T) -> 538: erlang:md5(term_to_binary(T)). 539: 540: bit_level_binaries_do() -> 541: [3511317,7022633,14044578,28087749,56173436,112344123,90467083|_] = 542: bit_level_all_different(fun erlang:hash/2), 543: [3511317,7022633,14044578,28087749,56173436,112344123,90467083|_] = 544: bit_level_all_different(fun erlang:phash/2), 545: [102233154,19716,102133857,4532024,123369135,24565730,109558721|_] = 546: bit_level_all_different(fun erlang:phash2/2), 547: 548: 13233341 = test_hash_phash(<<42:7>>, 16#7FFFFFF), 549: 79121243 = test_hash_phash(<<99:7>>, 16#7FFFFFF), 550: 95517726 = test_hash_phash(<<16#378ABF73:31>>, 16#7FFFFFF), 551: 552: 64409098 = test_phash2(<<99:7>>, 16#7FFFFFF), 553: 55555814 = test_phash2(<<123,19:2>>, 16#7FFFFFF), 554: 83868582 = test_phash2(<<123,45,6:3>>, 16#7FFFFFF), 555: 2123204 = test_phash2(<<123,45,7:3>>, 16#7FFFFFF), 556: 557: ok. 558: 559: bit_level_all_different(Hash) -> 560: {name,Name} = erlang:fun_info(Hash, name), 561: Seq = lists:seq(1, 32), 562: Hashes0 = [Hash(<<1:Sz>>, 16#7FFFFFF) || Sz <- Seq], 563: io:format("~p/2 ~p", [Name,Hashes0]), 564: Hashes0 = [Hash(unaligned_sub_bitstr(<<1:Sz>>), 16#7FFFFFF) || Sz <- Seq], 565: 32 = length(lists:usort(Hashes0)), 566: 567: Hashes1 = [Hash(<<(1 bsl (Sz-1)):Sz>>, 16#7FFFFFF) || Sz <- Seq], 568: io:format("~p/2 ~p", [Name,Hashes1]), 569: Hashes1 = [Hash(unaligned_sub_bitstr(<<(1 bsl (Sz-1)):Sz>>), 16#7FFFFFF) || 570: Sz <- Seq], 571: 32 = length(lists:usort(Hashes1)), 572: 573: Hashes2 = [Hash(<<0:Sz>>, 16#7FFFFFF) || Sz <- Seq], 574: io:format("~p/2 ~p", [Name,Hashes2]), 575: Hashes2 = [Hash(unaligned_sub_bitstr(<<0:Sz>>), 16#7FFFFFF) || Sz <- Seq], 576: 32 = length(lists:usort(Hashes2)), 577: 578: Hashes1. 579: 580: test_hash_phash(Bitstr, Rem) -> 581: Hash = erlang:hash(Bitstr, Rem), 582: Hash = erlang:phash(Bitstr, Rem), 583: Hash = erlang:hash(unaligned_sub_bitstr(Bitstr), Rem), 584: Hash = erlang:phash(unaligned_sub_bitstr(Bitstr), Rem). 585: 586: test_phash2(Bitstr, Rem) -> 587: Hash = erlang:phash2(Bitstr, Rem), 588: Hash = erlang:phash2(unaligned_sub_bitstr(Bitstr), Rem). 589: 590: otp_7127_test() -> 591: %% Used to return 2589127136. 592: 38990304 = erlang:phash2(<<"Scott9">>), 593: ok. 594: 595: %% 596: %% Reference implementation of integer hashing 597: %% 598: 599: %% 600: %% These are primes just above 2^28 that will never be changed, they are also in 601: %% utils.c. 602: %% 603: -define(FN2,268439161). 604: -define(FN3,268435459). 605: -define(FN4,268436141). 606: 607: make_hash(N,M) -> 608: Prime1 = ?FN2, 609: {Prime2, BL0} = to_bytes(N), 610: BL = pad(BL0), 611: (integer_hash(BL, Prime1, Prime2) rem M) + 1. 612: 613: to_bytes(N) when N < 0 -> 614: {?FN4,to_bytes(-N,[])}; 615: to_bytes(N) -> 616: {?FN3,to_bytes(N,[])}. 617: to_bytes(0,Acc) -> 618: Acc; 619: to_bytes(N,Acc) -> 620: to_bytes(N bsr 8, [N band 16#FF | Acc]). 621: 622: pad([]) -> 623: [0,0,0,0]; 624: pad(L) -> 625: case 4 - (length(L) rem 4) of 626: 4 -> 627: L; 628: N -> 629: lists:duplicate(N,0) ++ L 630: end. 631: 632: integer_hash(BL,P1,P2) -> 633: (do_ihash(0,lists:reverse(BL),P1) * P2) band 16#FFFFFFFF. 634: 635: do_ihash(Hash,[],_) -> 636: Hash; 637: do_ihash(Hash, [H|T], P) -> 638: do_ihash((((Hash * P) band 16#FFFFFFFF) + H) band 16#FFFFFFFF, T, P). 639: 640: 641: 642: 643: %% 644: %% Utilities for the test of "spreading" 645: %% 646: -ifdef(debug). 647: hex(N) -> 648: hex(0,N,[]). 649: hex(X,0,Acc) when X >= 8 -> 650: [$0, $x | Acc]; 651: hex(X,N,Acc) -> 652: hex(X+1,N bsr 4, [trans(N band 16#F) | Acc]). 653: 654: trans(N) when N < 10 -> 655: N + $0; 656: trans(10) -> 657: $A; 658: trans(11) -> 659: $B; 660: trans(12) -> 661: $C; 662: trans(13) -> 663: $D; 664: trans(14) -> 665: $E; 666: trans(15) -> 667: $F. 668: -endif. 669: 670: gen_keys(N, Template, BP,Fun) -> 671: Ratio = (1 bsl (BP * 8)), 672: Low = Template + Ratio, 673: High = Template + (N*Ratio), 674: ?dbgformat("N = ~p, BP = ~p, Template = ~p, Low = ~s, High = ~s~n", 675: [hex(N),hex(BP),hex(Template),hex(Low),hex(High-1)]), 676: Fun(Template), 677: gen_keys2(Low, High,Ratio,Fun). 678: 679: gen_keys2(High,High2,_,_) when High >= High2 -> 680: []; 681: gen_keys2(Low,High,R,Fun) -> 682: Fun(Low), 683: gen_keys2(Low + R,High,R,Fun). 684: 685: test_fun(N,{HM,HF}, Template, Fun) -> 686: init_table(), 687: test_fun_1(0,1,N+1,{HM,HF},Template,Fun). 688: 689: test_fun_1(_,To,To,_,_,_) -> 690: ok; 691: test_fun_1(A,X,To,Y,Z,W) when A > To -> 692: ?dbgformat("~p:~p(~p,~p,~p,~p,~p,~p)~n",[?MODULE,test_fun_1,To,X,To,Y,Z,W]), 693: test_fun_1(0,X+1,To,Y,Z,W); 694: test_fun_1(Pos,Siz,To,{HM,HF},Template,Fun) when 1 bsl (Siz*8) =< 65536 -> 695: io:format("Byte: ~p, Size: ~p~n",[Pos,Siz]), 696: N = 1 bsl (Siz*8), 697: gen_keys(N,Template,Pos,fun (X) -> 698: P = HM:HF(Fun(X),N), 699: ets:insert(?MODULE,{P}) 700: end 701: ), 702: Hits = collect_hits(), 703: io:format( 704: "Hashing of ~p values spread over ~p buckets~n", 705: [N,Hits]), 706: case (N div Hits) > 2 of 707: true -> 708: exit({not_spread_enough, Hits, on, N}); 709: _ -> 710: test_fun_1(Pos + Siz, Siz, To,{HM,HF},Template,Fun) 711: end; 712: test_fun_1(_,_,_,_,_,_) -> 713: ok. 714: 715: init_table() -> 716: (catch ets:delete(?MODULE)), 717: ets:new(?MODULE,[ordered_set,named_table]). 718: 719: collect_hits() -> 720: N = ets:info(?MODULE,size), 721: init_table(), 722: N. 723: 724: integer_to_binary_value(N) -> 725: list_to_binary(lists:reverse(integer_to_bytelist(N,[]))). 726: 727: integer_to_bytelist(0,Acc) -> 728: Acc; 729: integer_to_bytelist(N,Acc) -> 730: integer_to_bytelist(N bsr 8, [N band 16#FF | Acc]). 731: 732: unaligned_sub_bin(Bin0) when is_binary(Bin0) -> 733: Bin1 = <<42:6,Bin0/binary,3:2>>, 734: Sz = size(Bin0), 735: <<42:6,Bin:Sz/binary,3:2>> = id(Bin1), 736: Bin. 737: 738: unaligned_sub_bitstr(Bin0) when is_bitstring(Bin0) -> 739: Bin1 = <<(-1):4,Bin0/bits,(-1):64>>, 740: Bits = bit_size(Bin0), 741: <<_:4,Bin:Bits/bits,_:64>> = id(Bin1), 742: Bin. 743: 744: id(I) -> I. 745: