1: %%
    2: %% %CopyrightBegin%
    3: %%
    4: %% Copyright Ericsson AB 1996-2013. All Rights Reserved.
    5: %%
    6: %% The contents of this file are subject to the Erlang Public License,
    7: %% Version 1.1, (the "License"); you may not use this file except in
    8: %% compliance with the License. You should have received a copy of the
    9: %% Erlang Public License along with this software. If not, it can be
   10: %% retrieved online at http://www.erlang.org/.
   11: %%
   12: %% Software distributed under the License is distributed on an "AS IS"
   13: %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
   14: %% the License for the specific language governing rights and limitations
   15: %% under the License.
   16: %%
   17: %% %CopyrightEnd%
   18: %%
   19: -module(timer_SUITE).
   20: 
   21: -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2]).
   22: -export([do_big_test/1]).
   23: -export([big_test/1, collect/3, i_t/3, a_t/2]).
   24: -export([do_nrev/1, internal_watchdog/2]).
   25: 
   26: -include_lib("test_server/include/test_server.hrl").
   27: 
   28: %% Test suite for timer module. This is a really nasty test it runs a
   29: %% lot of timeouts and then checks in the end if any of them was
   30: %% trigggered too early or if any late timeouts was much too
   31: %% late. What should be added is more testing of the interface
   32: %% functions I guess. But I don't have time for that now.
   33: %%
   34: %% Expect it to run for at least 5-10 minutes!
   35: 
   36: 
   37: %% The main test case in this module is "do_big_test", which
   38: %% orders a large number of timeouts and measures how
   39: %% exact the timeouts arrives. To simulate a system under load there is
   40: %% also a number of other concurrent processes running "nrev" at the same
   41: %% time. The result is analyzed afterwards by trying to check if the
   42: %% measured values are reasonable. It is hard to determine what is
   43: %% reasonable on different machines therefore the test can sometimes
   44: %% fail, even though the timer module is ok. I have checked against
   45: %% previous versions of the timer module (which contained bugs) and it
   46: %% seems it fails every time when running the buggy timer modules.
   47: %% 
   48: %% The solution is to rewrite the test suite. Possible strategies for a
   49: %% rewrite: smarter math on the measuring data, test cases with varying
   50: %% amount of load. The test suite should also include tests that test the
   51: %% interface of the timer module.
   52: 
   53: suite() -> [{ct_hooks,[ts_install_cth]}].
   54: 
   55: all() -> 
   56:     [do_big_test].
   57: 
   58: groups() -> 
   59:     [].
   60: 
   61: init_per_suite(Config) ->
   62:     Config.
   63: 
   64: end_per_suite(_Config) ->
   65:     ok.
   66: 
   67: init_per_group(_GroupName, Config) ->
   68:     Config.
   69: 
   70: end_per_group(_GroupName, Config) ->
   71:     Config.
   72: 
   73: 
   74: %% ------------------------------------------------------- %%
   75: 
   76: do_big_test(TConfig) when is_list(TConfig) ->
   77:     Dog = ?t:timetrap(?t:minutes(20)),
   78:     Save = process_flag(trap_exit, true),
   79:     Result = big_test(200),
   80:     process_flag(trap_exit, Save),
   81:     ?t:timetrap_cancel(Dog),
   82:     report_result(Result).
   83: 
   84: report_result(ok) -> ok;
   85: report_result(Error) -> ?line test_server:fail(Error).
   86: 
   87: %% ------------------------------------------------------- %%
   88: 
   89: big_test(N) ->
   90:     C = start_collect(),
   91:     system_time(), system_time(), system_time(),
   92:     A1 = element(2, erlang:now()),
   93:     A2 = A1 * 3,
   94:     A3 = element(3, erlang:now()),
   95:     random:seed(A1, A2, A3),
   96:     random:uniform(100),random:uniform(100),random:uniform(100),
   97: 
   98:     big_loop(C, N, []),
   99: 
  100:     %%C ! print_report,
  101:     C ! {self(), get_report},
  102:     Report = receive
  103: 		 {report, R} ->
  104: 		     R
  105: 	     end,
  106:     C ! stop,
  107:     receive
  108: 	{'EXIT', C, normal} ->
  109: 	    ok
  110:     end,
  111:     print_report(Report),
  112:     Result = analyze_report(Report),
  113:     %%io:format("big_test is done: ~w~n", [Result]),
  114:     Result.
  115:     
  116: big_loop(_C, 0, []) ->
  117:     %%io:format("All processes are done!~n", []),
  118:     ok;
  119: big_loop(C, 0, Pids) ->
  120:     %%ok = io:format("Loop done, ~w processes remaining~n", [length(Pids)]),
  121:     %% wait for remaining processes
  122:     receive
  123: 	{'EXIT', Pid, done} ->
  124: 	    big_loop(C, 0, lists:delete(Pid, Pids));
  125: 	{'EXIT', Pid, Error} ->
  126: 	    ?line ok = io:format("XXX Pid ~w died with reason ~p~n",
  127: 				 [Pid, Error]),
  128: 	    big_loop(C, 0, lists:delete(Pid, Pids))
  129:     end;
  130: big_loop(C, N, Pids) ->
  131:     %% First reap any processes that are done.
  132:     receive
  133: 	{'EXIT', Pid, done} ->
  134: 	    big_loop(C, N, lists:delete(Pid, Pids));
  135: 	{'EXIT', Pid, Error} ->
  136: 	    ?line ok =io:format("XXX Internal error: Pid ~w died, reason ~p~n",
  137: 				 [Pid, Error]),
  138: 	    big_loop(C, N, lists:delete(Pid, Pids))
  139:     after 0 ->
  140: 
  141: 	    %% maybe start an interval timer test
  142: 	    Pids1 = maybe_start_i_test(Pids, C, random:uniform(4)),
  143: 	    
  144: 	    %% start 1-4 "after" tests
  145: 	    Pids2 = start_after_test(Pids1, C, random:uniform(4)),
  146: 	    %%Pids2=Pids1,
  147: 
  148: 	    %% wait a little while
  149: 	    timer:sleep(random:uniform(200)*10),
  150: 
  151: 	    %% spawn zero, one or two nrev to get some load ;-/
  152: 	    Pids3 = start_nrev(Pids2, random:uniform(100)),
  153: 	    
  154: 	    big_loop(C, N-1, Pids3)
  155:     end.
  156: 
  157: 
  158: start_nrev(Pids, N) when N < 25 ->
  159:     Pids;
  160: start_nrev(Pids, N) when N < 75 ->
  161:     [spawn_link(timer_SUITE, do_nrev, [1])|Pids];
  162: start_nrev(Pids, _N) ->
  163:     NrevPid1 = spawn_link(timer_SUITE, do_nrev, [random:uniform(1000)*10]),
  164:     NrevPid2 = spawn_link(timer_SUITE, do_nrev, [1]),
  165:     [NrevPid1,NrevPid2|Pids].
  166:     
  167: 
  168: start_after_test(Pids, C, 1) ->
  169:     TO1 = random:uniform(100)*100,
  170:     [s_a_t(C, TO1)|Pids];
  171: start_after_test(Pids, C, 2) ->
  172:     TO1 = random:uniform(100)*100,
  173:     TO2 = TO1 div random:uniform(3) + 200,
  174:     [s_a_t(C, TO1),s_a_t(C, TO2)|Pids];
  175: start_after_test(Pids, C, N) ->
  176:     TO1 = random:uniform(100)*100,
  177:     start_after_test([s_a_t(C, TO1)|Pids], C, N-1).
  178: 
  179: s_a_t(C, TimeOut) ->
  180:     spawn_link(timer_SUITE, a_t, [C, TimeOut]).
  181: 
  182: a_t(C, TimeOut) ->
  183:     start_watchdog(self(), TimeOut),
  184:     Start = system_time(),
  185:     timer:send_after(TimeOut, self(), now),
  186:     receive
  187: 	now ->
  188: 	    Stop = system_time(),
  189: 	    report(C, Start,Stop,TimeOut),
  190: 	    exit(done);
  191: 	watchdog ->
  192: 	    Stop = system_time(),
  193: 	    report(C, Start,Stop,TimeOut),
  194: 	    ?line ok = io:format("Internal watchdog timeout (a), not good!!~n",
  195: 				 []),
  196: 	    exit(done)
  197:     end.
  198: 
  199: 
  200: maybe_start_i_test(Pids, C, 1) ->
  201:     %% ok do it
  202:     TOI = random:uniform(100)*100,
  203:     CountI = random:uniform(10) + 3,                      % at least 4 times
  204:     [spawn_link(timer_SUITE, i_t, [C, TOI, CountI])|Pids];
  205: maybe_start_i_test(Pids, _C, _) ->
  206:     Pids.
  207: 
  208: i_t(C, TimeOut, Times) ->
  209:     start_watchdog(self(), TimeOut*Times),
  210:     Start = system_time(),
  211:     {ok, Ref} = timer:send_interval(TimeOut, interval),
  212:     i_wait(Start, Start, 1, TimeOut, Times, Ref, C).
  213: 
  214: i_wait(Start, Prev, Times, TimeOut, Times, Ref, C) ->
  215:     receive
  216: 	interval ->
  217: 	    Now = system_time(),
  218: 	    report_interval(C, {final,Times}, Start, Prev, Now, TimeOut),
  219: 	    timer:cancel(Ref),
  220: 	    exit(done);
  221: 	watchdog ->
  222: 	    Now = system_time(),
  223: 	    report_interval(C, {final,Times}, Start, Prev, Now, TimeOut),
  224: 	    timer:cancel(Ref),
  225: 	    ?line ok = io:format("Internal watchdog timeout (i), not good!!~n",
  226: 				 []),
  227: 	    exit(done)
  228:     end;
  229: i_wait(Start, Prev, Count, TimeOut, Times, Ref, C) ->
  230:     receive
  231: 	interval ->
  232: 	    Now = system_time(),
  233: 	    report_interval(C, Count, Start, Prev, Now, TimeOut),
  234: 	    i_wait(Start, Now, Count+1, TimeOut, Times, Ref, C);
  235: 	watchdog ->
  236: 	    Now = system_time(),
  237: 	    report_interval(C, {final,Count}, Start, Prev, Now, TimeOut),
  238: 	    ?line ok = io:format("Internal watchdog timeout (j), not good!!~n",
  239: 				 []),
  240: 	    exit(done)
  241:     end.
  242: 
  243: report(C, Start, Stop, Time) ->
  244:     C ! {a_sample, Start, Stop, Time}.
  245: report_interval(C, Count, Start, Prev, Now, TimeOut) ->
  246:     C ! {i_sample, Count, Start, Prev, Now, TimeOut}.
  247: 
  248: %% ------------------------------------------------------- %%
  249: 
  250: %% internal watchdog
  251: start_watchdog(Pid, TimeOut) ->
  252:     spawn_link(timer_SUITE, internal_watchdog, [Pid, 3*TimeOut+1000]).
  253: 
  254: internal_watchdog(Pid, TimeOut) ->
  255:     receive
  256:     after TimeOut ->
  257: 	    Pid ! watchdog,
  258: 	    exit(normal)
  259:     end.
  260: 
  261: %% ------------------------------------------------------- %%
  262: 
  263: -record(stat, {n=0,max=0,min=min,avg=0}).
  264: 
  265: start_collect() ->
  266:     spawn_link(timer_SUITE, collect, [0,{0,new_update(),new_update()},[]]).
  267: 
  268: collect(N, {E,A,B}, I) ->
  269:     receive
  270: 	{a_sample, Start, Stop, Time} when Stop - Start > Time ->
  271: 	    collect(N+1, {E,update(Stop-Start-Time,A),B}, I);
  272: 	{a_sample, Start, Stop, Time} when Stop - Start < Time ->
  273: 	    collect(N+1, {E,A,update(Time-Stop+Start,B)}, I);
  274: 	{a_sample, _Start, _Stop, _Time} ->
  275: 	    collect(N+1, {E+1,A,B}, I);
  276: 	{i_sample, {final,Count}, Start, Prev, Now, TimeOut} ->
  277: 	    IntervDiff = Now - Prev - TimeOut,
  278: 	    Drift = Now - (Count*TimeOut) - Start,
  279: 	    collect(N, {E,A,B}, [{{final,Count},IntervDiff,Drift}|I]);
  280: 	{i_sample, Count, Start, Prev, Now, TimeOut} ->
  281: 	    IntervDiff = Now - Prev - TimeOut,
  282: 	    Drift = Now - (Count*TimeOut) - Start,
  283: 	    collect(N, {E,A,B}, [{Count,IntervDiff,Drift}|I]);
  284: 	print_report ->
  285: 	    print_report({E,A,B,I}),
  286: 	    collect(N,{E,A,B}, I);
  287: 	{Pid, get_report} when is_pid(Pid) ->
  288: 	    Pid ! {report, {E, A, B, I}},
  289: 	    collect(N,{E,A,B}, I);
  290: 	reset ->
  291: 	    collect(0, {0,new_update(),new_update()}, []);
  292: 	stop ->
  293: 	    exit(normal);
  294: 	_Other ->
  295: 	    collect(N, {E,A,B}, I)
  296:     end.
  297: 
  298: new_update() -> #stat{}.
  299: update(New, Stat) when New > Stat#stat.max ->
  300:     Stat#stat{n=Stat#stat.n + 1, max=New, avg=(New+Stat#stat.avg) div 2};
  301: update(New, Stat) when New < Stat#stat.min ->
  302:     Stat#stat{n=Stat#stat.n + 1, min=New, avg=(New+Stat#stat.avg) div 2};
  303: update(New, Stat) ->
  304:     Stat#stat{n=Stat#stat.n + 1, avg=(New+Stat#stat.avg) div 2}.
  305: 
  306: %update(New, {N,Max,Min,Avg}) when New>Max ->
  307: %    {N+1,New,Min,(New+Avg) div 2};
  308: %update(New, {N,Max,Min,Avg}) when New<Min ->
  309: %    {N+1,Max,New,(New+Avg) div 2};
  310: %update(New, {N,Max,Min,Avg}) ->
  311: %    {N+1,Max,Min,(New+Avg) div 2}.
  312: 
  313: print_report({E,LateS,EarlyS,I}) ->
  314:     Early = EarlyS#stat.n, Late = LateS#stat.n,
  315:     Total = E + Early + Late,
  316:     io:format("~nOn total of ~w timeouts, there were ~w exact, ~w "
  317: 	      "late and ~w early.~n", [Total, E, Late, Early]),
  318:     io:format("Late stats (N,Max,Min,Avg): ~w~nEarly stats: ~w~n",
  319: 	      [LateS, EarlyS]),
  320:     IntervS = collect_interval_final_stats(I),
  321:     io:format("Interval stats (Max,Min,Avg): ~w~n", [IntervS]),
  322:     ok.
  323: 
  324: collect_interval_final_stats(I) ->
  325:     collect_interval_final_stats(I, 0, min, 0).
  326: collect_interval_final_stats([], Max, Min, Avg) ->
  327:     {Max, Min, Avg};
  328: collect_interval_final_stats([{{final,_Count},_,Dev}|T], Max, Min, Avg) ->
  329:     NMax = if Dev>Max -> Dev; true -> Max end,
  330:     NMin = if Dev<Min -> Dev; true -> Min end,
  331:     collect_interval_final_stats(T, NMax, NMin, (Dev+Avg) div 2);
  332: collect_interval_final_stats([_|T], Max, Min, Avg) ->
  333:     collect_interval_final_stats(T, Max, Min, Avg).
  334: 
  335: analyze_report({E,LateS,EarlyS,I}) ->
  336:     Early = EarlyS#stat.n, Late = LateS#stat.n,
  337:     IntervS = collect_interval_final_stats(I),
  338:     Res1 = min_and_early_check(E, Early, Late, element(2,IntervS)),
  339:     Res2 = abnormal_max_check(LateS#stat.max, element(1,IntervS)),
  340:     res_combine(ok, [Res1, Res2]).
  341: 
  342: -define(ok_i_min, -100).
  343: -define(ok_max, 8000).
  344: -define(ok_i_max, 4000).
  345: 
  346: %% ok as long as Early == 0 and IntervMin >= ok_interv_min
  347: min_and_early_check(_Exact, 0, _Late, IntervMin) when IntervMin >= ?ok_i_min ->
  348:     ok;
  349: min_and_early_check(_Exact, Early, _Late, IntervMin) when IntervMin >= ?ok_i_min ->
  350:     {error, {early_timeouts, Early}};
  351: min_and_early_check(_Exact, 0, _Late, _IntervMin) ->
  352:     {error, early_interval_timeout};
  353: min_and_early_check(_Exact, Early, _Late, _IntervMin) ->
  354:     {error, [{early_timeouts, Early},{error, early_interval_timeout}]}.
  355: 
  356: abnormal_max_check(LateMax, IntMax) when LateMax < ?ok_max,
  357:                                          IntMax < ?ok_i_max ->
  358:     ok;
  359: abnormal_max_check(LateMax, IntMax) when IntMax < ?ok_i_max ->
  360:     {error, {big_late_max, LateMax}};
  361: abnormal_max_check(LateMax, IntMax) when LateMax < ?ok_max ->
  362:     {error, {big_interval_max, IntMax}};
  363: abnormal_max_check(LateMax, IntMax) ->
  364:     {error, [{big_late_max, LateMax},{big_interval_max, IntMax}]}.
  365: 
  366: res_combine(Res, []) ->
  367:     Res;
  368: res_combine(Res, [ok|T]) ->
  369:     res_combine(Res, T);
  370: res_combine(ok, [{error,What}|T]) ->
  371:     res_combine({error,What}, T);
  372: res_combine({error,Es}, [{error,E}|T]) ->
  373:     res_combine({error,lists:flatten([E,Es])}, T).
  374: 
  375: 
  376: system_time() ->
  377:     %%element(1, statistics(wall_clock)).
  378:     {M,S,U} = erlang:now(),
  379:     1000000000 * M + 1000 * S + (U div 1000).
  380: 
  381: %% ------------------------------------------------------- %%
  382: 
  383: do_nrev(Sleep) ->
  384:     timer:sleep(Sleep),
  385:     test(1000,"abcdefghijklmnopqrstuvxyz1234"),
  386:     exit(done).
  387: 
  388: test(0,_) ->
  389:     true;
  390: test(N,L) ->
  391:     nrev(L),
  392:     test(N - 1, L).
  393: 
  394: nrev([]) ->
  395:     [];
  396: nrev([H|T]) ->
  397:     append(nrev(T), [H]).
  398:     
  399: append([H|T],Z) ->
  400: 	[H|append(T,Z)];
  401: append([],X) ->
  402: 	X.
  403: 
  404: %% ------------------------------------------------------- %%