1: %% 2: %% %CopyrightBegin% 3: %% 4: %% Copyright Ericsson AB 1996-2013. All Rights Reserved. 5: %% 6: %% The contents of this file are subject to the Erlang Public License, 7: %% Version 1.1, (the "License"); you may not use this file except in 8: %% compliance with the License. You should have received a copy of the 9: %% Erlang Public License along with this software. If not, it can be 10: %% retrieved online at http://www.erlang.org/. 11: %% 12: %% Software distributed under the License is distributed on an "AS IS" 13: %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 14: %% the License for the specific language governing rights and limitations 15: %% under the License. 16: %% 17: %% %CopyrightEnd% 18: %% 19: -module(timer_SUITE). 20: 21: -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2]). 22: -export([do_big_test/1]). 23: -export([big_test/1, collect/3, i_t/3, a_t/2]). 24: -export([do_nrev/1, internal_watchdog/2]). 25: 26: -include_lib("test_server/include/test_server.hrl"). 27: 28: %% Test suite for timer module. This is a really nasty test it runs a 29: %% lot of timeouts and then checks in the end if any of them was 30: %% trigggered too early or if any late timeouts was much too 31: %% late. What should be added is more testing of the interface 32: %% functions I guess. But I don't have time for that now. 33: %% 34: %% Expect it to run for at least 5-10 minutes! 35: 36: 37: %% The main test case in this module is "do_big_test", which 38: %% orders a large number of timeouts and measures how 39: %% exact the timeouts arrives. To simulate a system under load there is 40: %% also a number of other concurrent processes running "nrev" at the same 41: %% time. The result is analyzed afterwards by trying to check if the 42: %% measured values are reasonable. It is hard to determine what is 43: %% reasonable on different machines therefore the test can sometimes 44: %% fail, even though the timer module is ok. I have checked against 45: %% previous versions of the timer module (which contained bugs) and it 46: %% seems it fails every time when running the buggy timer modules. 47: %% 48: %% The solution is to rewrite the test suite. Possible strategies for a 49: %% rewrite: smarter math on the measuring data, test cases with varying 50: %% amount of load. The test suite should also include tests that test the 51: %% interface of the timer module. 52: 53: suite() -> [{ct_hooks,[ts_install_cth]}]. 54: 55: all() -> 56: [do_big_test]. 57: 58: groups() -> 59: []. 60: 61: init_per_suite(Config) -> 62: Config. 63: 64: end_per_suite(_Config) -> 65: ok. 66: 67: init_per_group(_GroupName, Config) -> 68: Config. 69: 70: end_per_group(_GroupName, Config) -> 71: Config. 72: 73: 74: %% ------------------------------------------------------- %% 75: 76: do_big_test(TConfig) when is_list(TConfig) -> 77: Dog = ?t:timetrap(?t:minutes(20)), 78: Save = process_flag(trap_exit, true), 79: Result = big_test(200), 80: process_flag(trap_exit, Save), 81: ?t:timetrap_cancel(Dog), 82: report_result(Result). 83: 84: report_result(ok) -> ok; 85: report_result(Error) -> ?line test_server:fail(Error). 86: 87: %% ------------------------------------------------------- %% 88: 89: big_test(N) -> 90: C = start_collect(), 91: system_time(), system_time(), system_time(), 92: A1 = element(2, erlang:now()), 93: A2 = A1 * 3, 94: A3 = element(3, erlang:now()), 95: random:seed(A1, A2, A3), 96: random:uniform(100),random:uniform(100),random:uniform(100), 97: 98: big_loop(C, N, []), 99: 100: %%C ! print_report, 101: C ! {self(), get_report}, 102: Report = receive 103: {report, R} -> 104: R 105: end, 106: C ! stop, 107: receive 108: {'EXIT', C, normal} -> 109: ok 110: end, 111: print_report(Report), 112: Result = analyze_report(Report), 113: %%io:format("big_test is done: ~w~n", [Result]), 114: Result. 115: 116: big_loop(_C, 0, []) -> 117: %%io:format("All processes are done!~n", []), 118: ok; 119: big_loop(C, 0, Pids) -> 120: %%ok = io:format("Loop done, ~w processes remaining~n", [length(Pids)]), 121: %% wait for remaining processes 122: receive 123: {'EXIT', Pid, done} -> 124: big_loop(C, 0, lists:delete(Pid, Pids)); 125: {'EXIT', Pid, Error} -> 126: ?line ok = io:format("XXX Pid ~w died with reason ~p~n", 127: [Pid, Error]), 128: big_loop(C, 0, lists:delete(Pid, Pids)) 129: end; 130: big_loop(C, N, Pids) -> 131: %% First reap any processes that are done. 132: receive 133: {'EXIT', Pid, done} -> 134: big_loop(C, N, lists:delete(Pid, Pids)); 135: {'EXIT', Pid, Error} -> 136: ?line ok =io:format("XXX Internal error: Pid ~w died, reason ~p~n", 137: [Pid, Error]), 138: big_loop(C, N, lists:delete(Pid, Pids)) 139: after 0 -> 140: 141: %% maybe start an interval timer test 142: Pids1 = maybe_start_i_test(Pids, C, random:uniform(4)), 143: 144: %% start 1-4 "after" tests 145: Pids2 = start_after_test(Pids1, C, random:uniform(4)), 146: %%Pids2=Pids1, 147: 148: %% wait a little while 149: timer:sleep(random:uniform(200)*10), 150: 151: %% spawn zero, one or two nrev to get some load ;-/ 152: Pids3 = start_nrev(Pids2, random:uniform(100)), 153: 154: big_loop(C, N-1, Pids3) 155: end. 156: 157: 158: start_nrev(Pids, N) when N < 25 -> 159: Pids; 160: start_nrev(Pids, N) when N < 75 -> 161: [spawn_link(timer_SUITE, do_nrev, [1])|Pids]; 162: start_nrev(Pids, _N) -> 163: NrevPid1 = spawn_link(timer_SUITE, do_nrev, [random:uniform(1000)*10]), 164: NrevPid2 = spawn_link(timer_SUITE, do_nrev, [1]), 165: [NrevPid1,NrevPid2|Pids]. 166: 167: 168: start_after_test(Pids, C, 1) -> 169: TO1 = random:uniform(100)*100, 170: [s_a_t(C, TO1)|Pids]; 171: start_after_test(Pids, C, 2) -> 172: TO1 = random:uniform(100)*100, 173: TO2 = TO1 div random:uniform(3) + 200, 174: [s_a_t(C, TO1),s_a_t(C, TO2)|Pids]; 175: start_after_test(Pids, C, N) -> 176: TO1 = random:uniform(100)*100, 177: start_after_test([s_a_t(C, TO1)|Pids], C, N-1). 178: 179: s_a_t(C, TimeOut) -> 180: spawn_link(timer_SUITE, a_t, [C, TimeOut]). 181: 182: a_t(C, TimeOut) -> 183: start_watchdog(self(), TimeOut), 184: Start = system_time(), 185: timer:send_after(TimeOut, self(), now), 186: receive 187: now -> 188: Stop = system_time(), 189: report(C, Start,Stop,TimeOut), 190: exit(done); 191: watchdog -> 192: Stop = system_time(), 193: report(C, Start,Stop,TimeOut), 194: ?line ok = io:format("Internal watchdog timeout (a), not good!!~n", 195: []), 196: exit(done) 197: end. 198: 199: 200: maybe_start_i_test(Pids, C, 1) -> 201: %% ok do it 202: TOI = random:uniform(100)*100, 203: CountI = random:uniform(10) + 3, % at least 4 times 204: [spawn_link(timer_SUITE, i_t, [C, TOI, CountI])|Pids]; 205: maybe_start_i_test(Pids, _C, _) -> 206: Pids. 207: 208: i_t(C, TimeOut, Times) -> 209: start_watchdog(self(), TimeOut*Times), 210: Start = system_time(), 211: {ok, Ref} = timer:send_interval(TimeOut, interval), 212: i_wait(Start, Start, 1, TimeOut, Times, Ref, C). 213: 214: i_wait(Start, Prev, Times, TimeOut, Times, Ref, C) -> 215: receive 216: interval -> 217: Now = system_time(), 218: report_interval(C, {final,Times}, Start, Prev, Now, TimeOut), 219: timer:cancel(Ref), 220: exit(done); 221: watchdog -> 222: Now = system_time(), 223: report_interval(C, {final,Times}, Start, Prev, Now, TimeOut), 224: timer:cancel(Ref), 225: ?line ok = io:format("Internal watchdog timeout (i), not good!!~n", 226: []), 227: exit(done) 228: end; 229: i_wait(Start, Prev, Count, TimeOut, Times, Ref, C) -> 230: receive 231: interval -> 232: Now = system_time(), 233: report_interval(C, Count, Start, Prev, Now, TimeOut), 234: i_wait(Start, Now, Count+1, TimeOut, Times, Ref, C); 235: watchdog -> 236: Now = system_time(), 237: report_interval(C, {final,Count}, Start, Prev, Now, TimeOut), 238: ?line ok = io:format("Internal watchdog timeout (j), not good!!~n", 239: []), 240: exit(done) 241: end. 242: 243: report(C, Start, Stop, Time) -> 244: C ! {a_sample, Start, Stop, Time}. 245: report_interval(C, Count, Start, Prev, Now, TimeOut) -> 246: C ! {i_sample, Count, Start, Prev, Now, TimeOut}. 247: 248: %% ------------------------------------------------------- %% 249: 250: %% internal watchdog 251: start_watchdog(Pid, TimeOut) -> 252: spawn_link(timer_SUITE, internal_watchdog, [Pid, 3*TimeOut+1000]). 253: 254: internal_watchdog(Pid, TimeOut) -> 255: receive 256: after TimeOut -> 257: Pid ! watchdog, 258: exit(normal) 259: end. 260: 261: %% ------------------------------------------------------- %% 262: 263: -record(stat, {n=0,max=0,min=min,avg=0}). 264: 265: start_collect() -> 266: spawn_link(timer_SUITE, collect, [0,{0,new_update(),new_update()},[]]). 267: 268: collect(N, {E,A,B}, I) -> 269: receive 270: {a_sample, Start, Stop, Time} when Stop - Start > Time -> 271: collect(N+1, {E,update(Stop-Start-Time,A),B}, I); 272: {a_sample, Start, Stop, Time} when Stop - Start < Time -> 273: collect(N+1, {E,A,update(Time-Stop+Start,B)}, I); 274: {a_sample, _Start, _Stop, _Time} -> 275: collect(N+1, {E+1,A,B}, I); 276: {i_sample, {final,Count}, Start, Prev, Now, TimeOut} -> 277: IntervDiff = Now - Prev - TimeOut, 278: Drift = Now - (Count*TimeOut) - Start, 279: collect(N, {E,A,B}, [{{final,Count},IntervDiff,Drift}|I]); 280: {i_sample, Count, Start, Prev, Now, TimeOut} -> 281: IntervDiff = Now - Prev - TimeOut, 282: Drift = Now - (Count*TimeOut) - Start, 283: collect(N, {E,A,B}, [{Count,IntervDiff,Drift}|I]); 284: print_report -> 285: print_report({E,A,B,I}), 286: collect(N,{E,A,B}, I); 287: {Pid, get_report} when is_pid(Pid) -> 288: Pid ! {report, {E, A, B, I}}, 289: collect(N,{E,A,B}, I); 290: reset -> 291: collect(0, {0,new_update(),new_update()}, []); 292: stop -> 293: exit(normal); 294: _Other -> 295: collect(N, {E,A,B}, I) 296: end. 297: 298: new_update() -> #stat{}. 299: update(New, Stat) when New > Stat#stat.max -> 300: Stat#stat{n=Stat#stat.n + 1, max=New, avg=(New+Stat#stat.avg) div 2}; 301: update(New, Stat) when New < Stat#stat.min -> 302: Stat#stat{n=Stat#stat.n + 1, min=New, avg=(New+Stat#stat.avg) div 2}; 303: update(New, Stat) -> 304: Stat#stat{n=Stat#stat.n + 1, avg=(New+Stat#stat.avg) div 2}. 305: 306: %update(New, {N,Max,Min,Avg}) when New>Max -> 307: % {N+1,New,Min,(New+Avg) div 2}; 308: %update(New, {N,Max,Min,Avg}) when New<Min -> 309: % {N+1,Max,New,(New+Avg) div 2}; 310: %update(New, {N,Max,Min,Avg}) -> 311: % {N+1,Max,Min,(New+Avg) div 2}. 312: 313: print_report({E,LateS,EarlyS,I}) -> 314: Early = EarlyS#stat.n, Late = LateS#stat.n, 315: Total = E + Early + Late, 316: io:format("~nOn total of ~w timeouts, there were ~w exact, ~w " 317: "late and ~w early.~n", [Total, E, Late, Early]), 318: io:format("Late stats (N,Max,Min,Avg): ~w~nEarly stats: ~w~n", 319: [LateS, EarlyS]), 320: IntervS = collect_interval_final_stats(I), 321: io:format("Interval stats (Max,Min,Avg): ~w~n", [IntervS]), 322: ok. 323: 324: collect_interval_final_stats(I) -> 325: collect_interval_final_stats(I, 0, min, 0). 326: collect_interval_final_stats([], Max, Min, Avg) -> 327: {Max, Min, Avg}; 328: collect_interval_final_stats([{{final,_Count},_,Dev}|T], Max, Min, Avg) -> 329: NMax = if Dev>Max -> Dev; true -> Max end, 330: NMin = if Dev<Min -> Dev; true -> Min end, 331: collect_interval_final_stats(T, NMax, NMin, (Dev+Avg) div 2); 332: collect_interval_final_stats([_|T], Max, Min, Avg) -> 333: collect_interval_final_stats(T, Max, Min, Avg). 334: 335: analyze_report({E,LateS,EarlyS,I}) -> 336: Early = EarlyS#stat.n, Late = LateS#stat.n, 337: IntervS = collect_interval_final_stats(I), 338: Res1 = min_and_early_check(E, Early, Late, element(2,IntervS)), 339: Res2 = abnormal_max_check(LateS#stat.max, element(1,IntervS)), 340: res_combine(ok, [Res1, Res2]). 341: 342: -define(ok_i_min, -100). 343: -define(ok_max, 8000). 344: -define(ok_i_max, 4000). 345: 346: %% ok as long as Early == 0 and IntervMin >= ok_interv_min 347: min_and_early_check(_Exact, 0, _Late, IntervMin) when IntervMin >= ?ok_i_min -> 348: ok; 349: min_and_early_check(_Exact, Early, _Late, IntervMin) when IntervMin >= ?ok_i_min -> 350: {error, {early_timeouts, Early}}; 351: min_and_early_check(_Exact, 0, _Late, _IntervMin) -> 352: {error, early_interval_timeout}; 353: min_and_early_check(_Exact, Early, _Late, _IntervMin) -> 354: {error, [{early_timeouts, Early},{error, early_interval_timeout}]}. 355: 356: abnormal_max_check(LateMax, IntMax) when LateMax < ?ok_max, 357: IntMax < ?ok_i_max -> 358: ok; 359: abnormal_max_check(LateMax, IntMax) when IntMax < ?ok_i_max -> 360: {error, {big_late_max, LateMax}}; 361: abnormal_max_check(LateMax, IntMax) when LateMax < ?ok_max -> 362: {error, {big_interval_max, IntMax}}; 363: abnormal_max_check(LateMax, IntMax) -> 364: {error, [{big_late_max, LateMax},{big_interval_max, IntMax}]}. 365: 366: res_combine(Res, []) -> 367: Res; 368: res_combine(Res, [ok|T]) -> 369: res_combine(Res, T); 370: res_combine(ok, [{error,What}|T]) -> 371: res_combine({error,What}, T); 372: res_combine({error,Es}, [{error,E}|T]) -> 373: res_combine({error,lists:flatten([E,Es])}, T). 374: 375: 376: system_time() -> 377: %%element(1, statistics(wall_clock)). 378: {M,S,U} = erlang:now(), 379: 1000000000 * M + 1000 * S + (U div 1000). 380: 381: %% ------------------------------------------------------- %% 382: 383: do_nrev(Sleep) -> 384: timer:sleep(Sleep), 385: test(1000,"abcdefghijklmnopqrstuvxyz1234"), 386: exit(done). 387: 388: test(0,_) -> 389: true; 390: test(N,L) -> 391: nrev(L), 392: test(N - 1, L). 393: 394: nrev([]) -> 395: []; 396: nrev([H|T]) -> 397: append(nrev(T), [H]). 398: 399: append([H|T],Z) -> 400: [H|append(T,Z)]; 401: append([],X) -> 402: X. 403: 404: %% ------------------------------------------------------- %%