1: %%
    2: %% %CopyrightBegin%
    3: %%
    4: %% Copyright Ericsson AB 1996-2011. All Rights Reserved.
    5: %%
    6: %% The contents of this file are subject to the Erlang Public License,
    7: %% Version 1.1, (the "License"); you may not use this file except in
    8: %% compliance with the License. You should have received a copy of the
    9: %% Erlang Public License along with this software. If not, it can be
   10: %% retrieved online at http://www.erlang.org/.
   11: %%
   12: %% Software distributed under the License is distributed on an "AS IS"
   13: %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
   14: %% the License for the specific language governing rights and limitations
   15: %% under the License.
   16: %%
   17: %% %CopyrightEnd%
   18: %%
   19: -module(memsup_SUITE).
   20: -include_lib("test_server/include/test_server.hrl").
   21: 
   22: %% Test server specific exports
   23: -export([all/0, suite/0,groups/0,init_per_group/2,end_per_group/2]).
   24: -export([init_per_suite/1, end_per_suite/1]).
   25: -export([init_per_testcase/2, end_per_testcase/2]).
   26: 
   27: %% Test cases
   28: -export([api/1, alarm1/1, alarm2/1, process/1]).
   29: -export([config/1, timeout/1, unavailable/1, port/1]).
   30: -export([otp_5910/1]).
   31: 
   32: %% Default timetrap timeout (set in init_per_testcase)
   33: -define(default_timeout, ?t:minutes(1)).
   34: 
   35: init_per_suite(Config) when is_list(Config) ->
   36:     ?line ok = application:start(os_mon),
   37:     Config.
   38: 
   39: end_per_suite(Config) when is_list(Config) ->
   40:     ?line ok = application:stop(os_mon),
   41:     Config.
   42: 
   43: init_per_testcase(_Case, Config) ->
   44:     Dog = ?t:timetrap(?default_timeout),
   45:     [{watchdog,Dog} | Config].
   46: 
   47: end_per_testcase(_Case, Config) ->
   48:     Dog = ?config(watchdog, Config),
   49:     ?t:timetrap_cancel(Dog),
   50:     Config.
   51: 
   52: suite() -> [{ct_hooks,[ts_install_cth]}].
   53: 
   54: all() -> 
   55:     All = case test_server:os_type() of
   56: 	      {unix, sunos} ->
   57: 		  [api, alarm1, alarm2, process, config, timeout,
   58: 		   unavailable, port];
   59: 	      {unix, linux} ->
   60: 		  [api, alarm1, alarm2, process, timeout];
   61: 	      _OS -> [api, alarm1, alarm2, process]
   62: 	  end,
   63:     Bugs = [otp_5910],
   64:     All ++ Bugs.
   65: 
   66: groups() -> 
   67:     [].
   68: 
   69: init_per_group(_GroupName, Config) ->
   70:     Config.
   71: 
   72: end_per_group(_GroupName, Config) ->
   73:     Config.
   74: 
   75: 
   76: api(suite) ->
   77:     [];
   78: api(doc) ->
   79:     ["Test of API functions"];
   80: api(Config) when is_list(Config) ->
   81: 
   82:     %% get_memory_data()
   83:     ?line RegMemData = memsup:get_memory_data(),
   84:     case RegMemData of
   85: 	{TotMem, AllBytes, {Pid, PidBytes}} when is_integer(TotMem),
   86: 						 is_integer(AllBytes),
   87: 						 is_pid(Pid),
   88: 						 is_integer(PidBytes) ->
   89: 	    ok;
   90: 	{0, 0, _WorstPid} ->
   91: 	    ?line ?t:fail(first_data_collection_failed);
   92: 	_ ->
   93: 	    ?line ?t:fail({bad_return, RegMemData})
   94:     end,
   95: 
   96:     %% get_system_memory_data()
   97:     ?line ExtMemData = memsup:get_system_memory_data(),
   98:     Tags = [	total_memory,
   99: 		free_memory,
  100: 		system_total_memory,
  101: 		largest_free,
  102: 		number_of_free,
  103: 		free_swap,
  104: 		total_swap,
  105: 		cached_memory,
  106: 		buffered_memory,
  107: 		shared_memory],
  108: 
  109:     ?line true = lists:all(fun({Tag,Value}) when is_atom(Tag),
  110: 						 is_integer(Value) ->
  111: 				   lists:member(Tag, Tags);
  112: 			      (_) ->
  113: 				   false
  114: 			   end,
  115: 			   ExtMemData),
  116: 
  117:     %% get_os_wordsize()
  118:     ?line ok = case memsup:get_os_wordsize() of
  119: 	32             -> ok;
  120: 	64             -> ok;
  121: 	unsupported_os -> ok;
  122: 	_ -> error
  123:     end,
  124: 
  125:     %% get_check_interval()
  126:     ?line 60000 = memsup:get_check_interval(),
  127: 
  128:     %% set_check_interval(Minutes)
  129:     ?line ok = memsup:set_check_interval(2),
  130:     ?line 120000 = memsup:get_check_interval(),
  131:     ?line {'EXIT',{badarg,_}} =
  132: 	(catch memsup:set_check_interval(0.2)),
  133:     ?line 120000 = memsup:get_check_interval(),
  134:     ?line ok = memsup:set_check_interval(1),
  135: 
  136:     %% get_procmem_high_watermark()
  137:     ?line 5 = memsup:get_procmem_high_watermark(),
  138: 
  139:     %% set_procmem_high_watermark()
  140:     ?line ok = memsup:set_procmem_high_watermark(0.1),
  141:     ?line 10 = memsup:get_procmem_high_watermark(),
  142:     ?line {'EXIT',{badarg,_}} =
  143: 	(catch memsup:set_procmem_high_watermark(-0.1)),
  144:     ?line 10 = memsup:get_procmem_high_watermark(),
  145:     ?line ok = memsup:set_procmem_high_watermark(0.05),
  146: 
  147:     %% get_sysmem_high_watermark()
  148:     ?line 80 = memsup:get_sysmem_high_watermark(),
  149: 
  150:     %% set_sysmem_high_watermark()
  151:     ?line ok = memsup:set_sysmem_high_watermark(0.9),
  152:     ?line 90 = memsup:get_sysmem_high_watermark(),
  153:     ?line {'EXIT',{badarg,_}} =
  154: 	(catch memsup:set_sysmem_high_watermark(-0.9)),
  155:     ?line 90 = memsup:get_sysmem_high_watermark(),
  156:     ?line ok = memsup:set_sysmem_high_watermark(0.8),
  157: 
  158:     %% get|set_helper_timeout
  159:     ?line 30 = memsup:get_helper_timeout(),
  160:     ?line ok = memsup:set_helper_timeout(29),
  161:     ?line 29 = memsup:get_helper_timeout(),
  162:     ?line {'EXIT',{badarg,_}} = (catch memsup:set_helper_timeout(31.0)),
  163:     ?line 29 = memsup:get_helper_timeout(),
  164:     ok.
  165: 
  166: %%----------------------------------------------------------------------
  167: %% NOTE: The test case is a bit weak as it will fail if the memory
  168: %% usage changes too much during its course.
  169: %%----------------------------------------------------------------------
  170: alarm1(suite) ->
  171:     [];
  172: alarm1(doc) ->
  173:     ["Test alarms when memsup_system_only==false"];
  174: alarm1(Config) when is_list(Config) ->
  175: 
  176:     %% If system memory usage is too high, the testcase cannot
  177:     %% be run correctly
  178:     ?line {Total, Alloc, {_Pid,_PidAlloc}} = memsup:get_memory_data(),
  179:     io:format("alarm1: Total: ~p, Alloc: ~p~n", [Total, Alloc]),
  180:     ?line SysUsage = Alloc/Total,
  181:     if
  182: 	SysUsage>0.99 ->
  183: 	    {skip, sys_mem_too_high};
  184: 	true ->
  185: 	    alarm1(Config, SysUsage)
  186:     end.
  187: 
  188: alarm1(_Config, SysUsage) ->
  189:     %% Set a long memory check interval, we will force memory checks
  190:     %% instead
  191:     ?line ok = memsup:set_check_interval(60),
  192: 
  193:     %% Check thresholds
  194:     ?line SysThreshold = (memsup:get_sysmem_high_watermark()/100),
  195:     ?line ProcThreshold = (memsup:get_procmem_high_watermark()/100),
  196: 
  197:     %% Check if a system alarm already should be set or not
  198:     SysP = if
  199: 	       SysUsage>SysThreshold -> true;
  200: 	       SysUsage=<SysThreshold -> false
  201: 	   end,
  202: 
  203:     %% If system memory is higher than threshold, make sure the system
  204:     %% alarm is set. Otherwise, make sure it is not set
  205:     case alarm_set(system_memory_high_watermark) of
  206: 	{true, []} when SysP ->
  207: 	    ok;
  208: 	false when not SysP ->
  209: 	    ok;
  210: 	_ ->
  211: 	    ?line ?t:fail({sys_alarm, SysUsage, SysThreshold})
  212:     end,
  213: 
  214:     %% Lower/raise the threshold to clear/set the alarm
  215:     NewSysThreshold = if
  216:         SysP ->
  217:             Value = 1.1*SysUsage,
  218:             if
  219:                 Value > 0.99 -> 0.99;
  220:                 true -> Value
  221:             end;
  222:         not SysP -> 0.9*SysUsage
  223:     end,
  224: 
  225:     ?line ok = memsup:set_sysmem_high_watermark(NewSysThreshold),
  226: 
  227:     %% Initiate and wait for a new data collection
  228:     ?line ok = force_collection(),
  229: 
  230:     %% Make sure the alarm is cleared/set
  231:     ?t:sleep(?t:seconds(5)),
  232:     case alarm_set(system_memory_high_watermark) of
  233: 	{true, []} when not SysP ->
  234: 	    ok;
  235: 	false when SysP ->
  236: 	    ok;
  237: 	_ ->
  238: 	    ?line ?t:fail({sys_alarm, SysUsage, NewSysThreshold})
  239:     end,
  240: 
  241:     %% Reset the threshold to set/clear the alarm again
  242:     ?line ok = memsup:set_sysmem_high_watermark(SysThreshold),
  243:     ?line ok = force_collection(),
  244:     ?t:sleep(?t:seconds(1)),
  245:     case alarm_set(system_memory_high_watermark) of
  246: 	{true, []} when SysP ->
  247: 	    ok;
  248: 	false when not SysP ->
  249: 	    ok;
  250: 	_ ->
  251: 	    ?line ?t:fail({sys_alarm, SysUsage, SysThreshold})
  252:     end,
  253: 
  254:     %% Check memory usage
  255:     ?line {Total2, _, {WorstPid, PidAlloc}} = memsup:get_memory_data(),
  256: 
  257:     %% Check if a process alarm already should be set or not
  258:     PidUsage = PidAlloc/Total2,
  259:     ProcP = if
  260: 		PidUsage>ProcThreshold -> true;
  261: 		PidUsage=<ProcThreshold -> false
  262: 	    end,
  263: 
  264:     %% Make sure the process alarm is set/not set accordingly
  265:     case alarm_set(process_memory_high_watermark) of
  266: 	{true, WorstPid} when ProcP ->
  267: 	    ok;
  268: 	false when not ProcP ->
  269: 	    ok;
  270: 	{true, BadPid1} when ProcP ->
  271: 	    ?line ?t:fail({proc_alarm, WorstPid, BadPid1});
  272: 	_ ->
  273: 	    ?line ?t:fail({proc_alarm, PidUsage, ProcThreshold})
  274:     end,
  275: 
  276:     %% Lower/raise the threshold to clear/set the alarm
  277:     NewProcThreshold = if
  278: 			   ProcP -> 1.1*PidUsage;
  279: 			   not ProcP -> 0.9*PidUsage
  280: 		       end,
  281:     ?line ok = memsup:set_procmem_high_watermark(NewProcThreshold),
  282:     ?line ok = force_collection(),
  283:     ?t:sleep(?t:seconds(1)),
  284:     case alarm_set(process_memory_high_watermark) of
  285: 	{true, WorstPid} when not ProcP ->
  286: 	    ok;
  287: 	false when ProcP ->
  288: 	    ok;
  289: 	{true, BadPid2} when not ProcP ->
  290: 	    ?line test_server:fail({proc_alarm, WorstPid, BadPid2});
  291: 	_ ->
  292: 	    ?line ?t:fail({proc_alarm, PidUsage, ProcThreshold})
  293:     end,
  294: 
  295:     %% Reset the threshold to clear/set the alarm
  296:     ?line ok = memsup:set_procmem_high_watermark(ProcThreshold),
  297:     ?line ok = force_collection(),
  298:     ?t:sleep(?t:seconds(1)),
  299:     case alarm_set(process_memory_high_watermark) of
  300: 	{true, WorstPid} when ProcP ->
  301: 	    ok;
  302: 	false when not ProcP ->
  303: 	    ok;
  304: 	{true, BadPid3} when ProcP ->
  305: 	    ?line test_server:fail({proc_alarm, WorstPid, BadPid3});
  306: 	_ ->
  307: 	    ?line ?t:fail({proc_alarm, PidUsage, ProcThreshold})
  308:     end,
  309: 
  310:     %% Reset memory check interval
  311:     ?line ok = memsup:set_check_interval(1),
  312:     ok.
  313: 
  314: alarm2(suite) ->
  315:     [];
  316: alarm2(doc) ->
  317:     ["Test alarms when memsup_system_only==true"];
  318: alarm2(Config) when is_list(Config) ->
  319: 
  320:     %% If system memory usage is too high, the testcase cannot
  321:     %% be run correctly
  322:     ?line {Total, Alloc, {_Pid,_PidAlloc}} = memsup:get_memory_data(),
  323:     ?line SysUsage = Alloc/Total,
  324:     if
  325: 	SysUsage>0.99 ->
  326: 	    {skip, sys_mem_too_high};
  327: 	true ->
  328: 	    alarm2(Config, SysUsage)
  329:     end.
  330: 
  331: alarm2(_Config, _SysUsage) ->
  332: 
  333:     %% Change memsup_system_only and restart memsup
  334:     ?line ok = application:set_env(os_mon, memsup_system_only, true),
  335:     ?line ok = supervisor:terminate_child(os_mon_sup, memsup),
  336:     ?line {ok, _Memsup1} = supervisor:restart_child(os_mon_sup, memsup),
  337: 
  338:     %% Set a long memory check interval, we will force memory checks
  339:     %% instead
  340:     ?line ok = memsup:set_check_interval(60),
  341: 
  342:     %% Check data and thresholds
  343:     ?line {Total, Alloc, undefined} = memsup:get_memory_data(),
  344:     ?line SysThreshold = (memsup:get_sysmem_high_watermark()/100),
  345:     ?line true = is_integer(memsup:get_procmem_high_watermark()),
  346: 
  347:     %% Check if a system alarm already should be set or not
  348:     ?line SysUsage = Alloc/Total,
  349:     SysP = if
  350: 	       SysUsage>SysThreshold -> true;
  351: 	       SysUsage=<SysThreshold -> false
  352: 	   end,
  353: 
  354:     %% If system memory is higher than threshold, make sure the system
  355:     %% alarm is set. Otherwise, make sure it is not set
  356:     case alarm_set(system_memory_high_watermark) of
  357: 	{true, []} when SysP ->
  358: 	    ok;
  359: 	false when not SysP ->
  360: 	    ok;
  361: 	_ ->
  362: 	    ?line ?t:fail({sys_alarm, SysUsage, SysThreshold})
  363:     end,
  364: 
  365:     %% Lower/raise the threshold to clear/set the alarm
  366:     NewSysThreshold = if
  367:         SysP ->
  368:             Value = 1.1*SysUsage,
  369:             if
  370:                 Value > 0.99 -> 0.99;
  371:                 true -> Value
  372:             end;
  373:         not SysP -> 0.9*SysUsage
  374:     end,
  375: 
  376:     ?line ok = memsup:set_sysmem_high_watermark(NewSysThreshold),
  377: 
  378:     %% Initiate and wait for a new data collection
  379:     ?line ok = force_collection(),
  380: 
  381:     %% Make sure the alarm is cleared/set
  382:     ?t:sleep(?t:seconds(1)),
  383:     case alarm_set(system_memory_high_watermark) of
  384: 	{true, []} when not SysP ->
  385: 	    ok;
  386: 	false when SysP ->
  387: 	    ok;
  388: 	_ ->
  389: 	    ?line ?t:fail({sys_alarm, SysUsage, NewSysThreshold})
  390:     end,
  391: 
  392:     %% Reset the threshold to set/clear the alarm again
  393:     ?line ok = memsup:set_sysmem_high_watermark(SysThreshold),
  394:     ?line ok = force_collection(),
  395:     ?t:sleep(?t:seconds(1)),
  396:     case alarm_set(system_memory_high_watermark) of
  397: 	{true, []} when SysP ->
  398: 	    ok;
  399: 	false when not SysP ->
  400: 	    ok;
  401: 	_ ->
  402: 	    ?line ?t:fail({sys_alarm, SysUsage, SysThreshold})
  403:     end,
  404: 
  405:     %% Reset memsup_system_only and restart memsup
  406:     %% (memory check interval is then automatically reset)
  407:     ?line ok = application:set_env(os_mon, memsup_system_only, false),
  408:     ?line ok = supervisor:terminate_child(os_mon_sup, memsup),
  409:     ?line {ok, _Memsup2} = supervisor:restart_child(os_mon_sup, memsup),
  410: 
  411:     ok.
  412: 
  413: alarm_set(Alarm) ->
  414:     alarm_set(Alarm, alarm_handler:get_alarms()).
  415: alarm_set(Alarm, [{Alarm,Data}|_]) ->
  416:     {true,Data};
  417: alarm_set(Alarm, [_|T]) ->
  418:     alarm_set(Alarm, T);
  419: alarm_set(_Alarm, []) ->
  420:     false.
  421: 
  422: process(suite) ->
  423:     [];
  424: process(doc) ->
  425:     ["Make sure memsup discovers a process grown very large"];
  426: process(Config) when is_list(Config) ->
  427: 
  428:     %% Set a long memory check interval, we will force memory checks
  429:     %% instead
  430:     ?line ok = memsup:set_check_interval(60),
  431: 
  432:     %% Collect data
  433:     MemData = memsup:get_memory_data(),
  434:     io:format("process: memsup:get_memory_data() = ~p~n", [MemData]),
  435:     ?line {_Total,_Free,{_,Bytes}} = MemData,
  436: 
  437:     %% Start a new process larger than Worst
  438:     ?line WorsePid = spawn(fun() -> new_hog(Bytes) end),
  439:     ?t:sleep(?t:seconds(1)),
  440: 
  441:     %% Initiate and wait for a new data collection
  442:     ?line ok = force_collection(),
  443: 
  444:     %% Check that get_memory_data() returns updated result
  445:     ?line case memsup:get_memory_data() of
  446: 	      {_, _, {WorsePid, _MoreBytes}} ->
  447: 		  ok;
  448: 	      {_, _, BadWorst} ->
  449: 		  ?line ?t:fail({worst_pid, BadWorst})
  450: 	  end,
  451: 
  452:     %% Reset memory check interval
  453:     ?line exit(WorsePid, done),
  454:     ?line ok = memsup:set_check_interval(1),
  455:     ok.
  456: 
  457: new_hog(Bytes) ->
  458:     WordSize = erlang:system_info(wordsize),
  459:     N = (Bytes+200) div WordSize div 2,
  460:     List = lists:duplicate(N, a),
  461:     new_hog_1(List).
  462: 
  463: new_hog_1(List) ->
  464:     receive
  465: 	_Any -> exit(List)
  466:     end.
  467: 
  468: config(suite) ->
  469:     [];
  470: config(doc) ->
  471:     ["Test configuration"];
  472: config(Config) when is_list(Config) ->
  473: 
  474:     %% Change configuration parameters and make sure change is reflected
  475:     %% when memsup is restarted
  476:     ?line ok = application:set_env(os_mon, memory_check_interval, 2),
  477:     ?line ok =
  478: 	application:set_env(os_mon, system_memory_high_watermark, 0.9),
  479:     ?line ok =
  480: 	application:set_env(os_mon, process_memory_high_watermark, 0.1),
  481:     ?line ok = application:set_env(os_mon, memsup_helper_timeout, 35),
  482:     ?line ok = application:set_env(os_mon, memsup_system_only, true),
  483: 
  484:     ?line ok = supervisor:terminate_child(os_mon_sup, memsup),
  485:     ?line {ok, _Child1} = supervisor:restart_child(os_mon_sup, memsup),
  486: 
  487:     ?line 120000 = memsup:get_check_interval(),
  488:     ?line 90 = memsup:get_sysmem_high_watermark(),
  489:     ?line 10 = memsup:get_procmem_high_watermark(),
  490:     ?line 35 = memsup:get_helper_timeout(),
  491: 
  492:     %% Also try this with bad parameter values, should be ignored
  493:     ?line ok = application:set_env(os_mon, memory_check_interval, 0.2),
  494:     ?line ok =
  495: 	application:set_env(os_mon, system_memory_high_watermark, -0.9),
  496:     ?line ok =
  497: 	application:set_env(os_mon, process_memory_high_watermark,-0.1),
  498:     ?line ok = application:set_env(os_mon, memsup_helper_timeout, 0.35),
  499:     ?line ok = application:set_env(os_mon, memsup_system_only, arne),
  500: 
  501:     ?line ok = supervisor:terminate_child(os_mon_sup, memsup),
  502:     ?line {ok, _Child2} = supervisor:restart_child(os_mon_sup, memsup),
  503: 
  504:     ?line 60000 = memsup:get_check_interval(),
  505:     ?line 80 = memsup:get_sysmem_high_watermark(),
  506:     ?line 5 = memsup:get_procmem_high_watermark(),
  507:     ?line 30 = memsup:get_helper_timeout(),
  508: 
  509:     %% Reset configuration parameters
  510:     ?line ok = application:set_env(os_mon, memory_check_interval, 1),
  511:     ?line ok =
  512: 	application:set_env(os_mon, system_memory_high_watermark, 0.8),
  513:     ?line ok =
  514: 	application:set_env(os_mon, process_memory_high_watermark,0.05),
  515:     ?line ok = application:set_env(os_mon, memsup_helper_timeout, 30),
  516:     ?line ok = application:set_env(os_mon, memsup_system_only, false),
  517: 
  518:     ok.
  519: 
  520: unavailable(suite) ->
  521:     [];
  522: unavailable(doc) ->
  523:     ["Test correct behaviour when service is unavailable"];
  524: unavailable(Config) when is_list(Config) ->
  525: 
  526:     %% Close memsup
  527:     ?line ok = application:set_env(os_mon, start_memsup, false),
  528:     ?line ok = supervisor:terminate_child(os_mon_sup, memsup),
  529: 
  530:     %% Make sure all API functions return their dummy values
  531:     ?line {0,0,{_Pid,0}} = memsup:get_memory_data(),
  532:     ?line ok = application:set_env(os_mon, memsup_system_only, true),
  533:     ?line {0,0,undefined} = memsup:get_memory_data(),
  534:     ?line ok = application:set_env(os_mon, memsup_system_only, false),
  535:     ?line [] = memsup:get_system_memory_data(),
  536:     ?line 0  = memsup:get_os_wordsize(),
  537:     ?line 60000 = memsup:get_check_interval(),
  538:     ?line ok = memsup:set_check_interval(2),
  539:     ?line 5 = memsup:get_procmem_high_watermark(),
  540:     ?line ok = memsup:set_procmem_high_watermark(0.10),
  541:     ?line 80 = memsup:get_sysmem_high_watermark(),
  542:     ?line ok = memsup:set_sysmem_high_watermark(0.90),
  543:     ?line 30 = memsup:get_helper_timeout(),
  544:     ?line ok = memsup:set_helper_timeout(35),
  545: 
  546:     %% Start memsup again,
  547:     ?line ok = application:set_env(os_mon, start_memsup, true),
  548:     ?line {ok, _Child} = supervisor:restart_child(os_mon_sup, memsup),
  549: 
  550:     ok.
  551: 
  552: timeout(suite) ->
  553:     [];
  554: timeout(doc) ->
  555:     ["Test stability of memsup when data collection times out"];
  556: timeout(Config) when is_list(Config) ->
  557: 
  558:     %% Set a long memory check interval and memsup_helper timeout,
  559:     %% we will force memory checks instead and fake timeouts
  560:     ?line ok = memsup:set_check_interval(60),
  561:     ?line ok = memsup:set_helper_timeout(3600),
  562: 
  563:     %% Provoke a timeout during memory collection
  564:     ?line memsup ! time_to_collect,
  565:     ?line memsup ! reg_collection_timeout,
  566: 
  567:     %% Not much we can check though, except that memsup is still running
  568:     ?line {_,_,_} = memsup:get_memory_data(),
  569: 
  570:     %% Provoke a timeout during extensive memory collection
  571:     %% We fake a gen_server:call/2 to be able to send a timeout message
  572:     %% while the request is being handled
  573: 
  574:     %% Linux should be handled the same way as solaris.
  575: 
  576: %    TimeoutMsg = case ?t:os_type() of
  577: %		     {unix, sunos} -> ext_collection_timeout;
  578: %		     {unix, linux} -> reg_collection_timeout
  579: %		 end,
  580: 
  581:     TimeoutMsg = ext_collection_timeout,
  582: 
  583:     ?line Pid = whereis(memsup),
  584:     ?line Mref = erlang:monitor(process, Pid),
  585:     ?line Pid ! {'$gen_call', {self(), Mref}, get_system_memory_data},
  586:     ?line Pid ! TimeoutMsg,
  587:     receive
  588: 	{Mref, []} ->
  589: 	    erlang:demonitor(Mref),
  590: 	    ?line ok;
  591: 	{Mref, Res} ->
  592: 	    erlang:demonitor(Mref),
  593: 	    ?line ?t:fail({unexpected_result, Res});
  594: 	{'DOWN', Mref, _, _, _} ->
  595: 	    ?line ?t:fail(no_result)
  596:     end,
  597: 
  598:     %% Reset memory check interval and memsup_helper timeout
  599:     ?line ok = memsup:set_check_interval(1),
  600:     ?line ok = memsup:set_helper_timeout(30),
  601:     ?line memsup ! time_to_collect,
  602: 
  603:     ?line [_|_] = memsup:get_system_memory_data(),
  604: 
  605:     ok.
  606: 
  607: port(suite) ->
  608:     [];
  609: port(doc) ->
  610:     ["Test that memsup handles a terminating port program"];
  611: port(Config) when is_list(Config) ->
  612:     ?line Str = os:cmd("ps -e | grep '[m]emsup'"),
  613:     case io_lib:fread("~s", Str) of
  614: 	{ok, [Pid], _Rest} ->
  615: 
  616: 	    %% Monitor memsup
  617: 	    ?line MonRef = erlang:monitor(process, memsup),
  618: 	    ?line {Total1,_Alloc1,_Worst1} = memsup:get_memory_data(),
  619: 	    ?line true = Total1>0,
  620: 
  621: 	    %% Kill the port program
  622: 	    case os:cmd("kill -9 " ++ Pid) of
  623: 		[] ->
  624: 
  625: 		    %% memsup should now terminate
  626: 		    receive
  627: 			{'DOWN', MonRef, _, _, {port_died, _Reason}} ->
  628: 			    ok;
  629: 			{'DOWN', MonRef, _, _, Reason} ->
  630: 			    ?line ?t:fail({unexpected_exit_reason, Reason})
  631: 		    after
  632: 			3000 ->
  633: 			    ?line ?t:fail(still_alive)
  634: 		    end,
  635: 
  636: 		    %% Give os_mon_sup time to restart memsup
  637: 		    ?t:sleep(?t:seconds(3)),
  638: 		    ?line {Total2,_Alloc2,_Worst2} =
  639: 			memsup:get_memory_data(),
  640: 		    ?line true = Total2>0,
  641: 
  642: 		    ok;
  643: 
  644: 		Line ->
  645: 		    erlang:demonitor(MonRef),
  646: 		    {skip, {not_killed, Line}}
  647: 	    end;
  648: 	_ ->
  649: 	    {skip, {os_pid_not_found, Str}}
  650:     end.
  651: 
  652: otp_5910(suite) ->
  653:     [];
  654: otp_5910(doc) ->
  655:     ["Test that alarms are cleared and not set twice"];
  656: otp_5910(Config) when is_list(Config) ->
  657:     Alarms =
  658: 	[system_memory_high_watermark, process_memory_high_watermark],
  659: 
  660:     %% Make sure memsup sets both alarms
  661:     ?line ok = application:set_env(os_mon, memory_check_interval, 60),
  662:     ?line ok = memsup:set_check_interval(60),
  663:     ?line SysThreshold = (memsup:get_sysmem_high_watermark()/100),
  664:     ?line ProcThreshold = (memsup:get_procmem_high_watermark()/100),
  665: 
  666:     MemData = memsup:get_memory_data(),
  667: 
  668:     io:format("otp_5910: memsup:get_memory_data() = ~p~n", [MemData]),
  669:     ?line {Total, Alloc, {_Pid, _Bytes}} = MemData,
  670:     ?line Pid = spawn_opt(fun() ->
  671: 				  receive
  672: 				      die -> ok
  673: 				  end
  674: 			  end, [{min_heap_size, 1000}]),
  675:     %% Create a process guaranteed to live, be constant and
  676:     %% break memsup process limit
  677:     ?line {memory, Bytes} = erlang:process_info(Pid,memory),
  678:     ?line SysUsage = Alloc/Total,
  679:     ?line ProcUsage = Bytes/Total,
  680: 
  681:     if
  682: 	SysUsage>SysThreshold ->
  683: 	    ok;
  684: 	SysUsage=<SysThreshold ->
  685: 	    ?line ok = application:set_env(os_mon,
  686: 					   sys_mem_high_watermark,
  687: 					   0.5 * SysUsage),
  688: 	    ?line ok = memsup:set_sysmem_high_watermark(0.5 * SysUsage)
  689:     end,
  690:     if
  691: 	ProcUsage>ProcThreshold ->
  692: 	    ok;
  693: 	ProcUsage=<ProcThreshold ->
  694: 	    ?line ok = application:set_env(os_mon,
  695: 					   proc_mem_high_watermark,
  696: 					   0.5 * ProcUsage),
  697: 	    ?line ok = memsup:set_procmem_high_watermark(0.5 *ProcUsage)
  698:     end,
  699:     ?line ok = force_collection(),
  700:     ?t:sleep(?t:seconds(1)),
  701:     lists:foreach(fun(AlarmId) ->
  702: 			  case alarm_set(AlarmId) of
  703: 			      {true, _} -> ok;
  704: 			      false ->
  705: 				  ?line ?t:fail({alarm_not_set,
  706: 						 AlarmId})
  707: 			  end
  708: 		  end,
  709: 		  Alarms),
  710: 
  711:     %% Kill guaranteed process...
  712:     Pid ! die,
  713:     %% Kill memsup
  714:     exit(whereis(memsup), faked_memsup_crash),
  715:     %% Wait a little to make sure memsup has been restarted,
  716:     %% then make sure the alarms are set once, but not twice
  717:     ?t:sleep(?t:seconds(1)),
  718:     ?line MemUsage = memsup:get_memory_data(),
  719:     SetAlarms = alarm_handler:get_alarms(),
  720:     case lists:foldl(fun(system_memory_high_watermark, {S, P}) ->
  721: 			     {S+1, P};
  722: 			(process_memory_high_watermark, {S, P}) ->
  723: 			     {S, P+1};
  724: 			(_AlarmId, Acc0) ->
  725: 			     Acc0
  726: 		     end,
  727: 		     {0, 0},
  728: 		     SetAlarms) of
  729: 	{0, 0} ->
  730: 	    ok;
  731: 	_ ->
  732: 	    ?line ?t:fail({bad_number_of_alarms, SetAlarms, MemUsage})
  733:     end,
  734: 
  735:     %% Stop OS_Mon and make sure all memsup alarms are cleared
  736:     ?line ok = application:stop(os_mon),
  737:     ?t:sleep(?t:seconds(1)),
  738:     lists:foreach(fun(AlarmId) ->
  739: 			  case alarm_set(AlarmId) of
  740: 			      false -> ok;
  741: 			      {true, _} ->
  742: 				  ?line ?t:fail({alarm_is_set, AlarmId})
  743: 			  end
  744: 		  end,
  745: 		  Alarms),
  746: 
  747:     %% Reset configuration and restart OS_Mon
  748:     ?line ok = application:set_env(os_mon,memory_check_interval,1),
  749:     ?line ok = application:set_env(os_mon,sys_mem_high_watermark,0.8),
  750:     ?line ok = application:set_env(os_mon,proc_mem_high_watermark,0.05),
  751:     ?line ok = application:start(os_mon),
  752: 
  753:     ok.
  754: 
  755: %%----------------------------------------------------------------------
  756: %% Auxiliary
  757: %%----------------------------------------------------------------------
  758: 
  759: force_collection() ->
  760:     erlang:trace(whereis(memsup), true, ['receive']),
  761:     memsup ! time_to_collect,
  762:     TimerRef = erlang:send_after(5000, self(), timeout),
  763:     force_collection(TimerRef).
  764: 
  765: force_collection(TimerRef) ->
  766:     receive
  767: 	{trace, _Pid, 'receive', {collected_sys, _Sys}} ->
  768: 	    erlang:cancel_timer(TimerRef),
  769: 	    erlang:trace(whereis(memsup), false, ['receive']),
  770: 	    flush(),
  771: 	    ok;
  772: 	{trace, _Pid, 'receive', reg_collection_timeout} ->
  773: 	    erlang:cancel_timer(TimerRef),
  774: 	    erlang:trace(whereis(memsup), false, ['receive']),
  775: 	    flush(),
  776: 	    collection_timeout;
  777: 	timout ->
  778: 	    erlang:trace(whereis(memsup), false, ['receive']),
  779: 	    flush(),
  780: 	    timeout;
  781: 	_Msg ->
  782: 	    force_collection(TimerRef)
  783:     end.
  784: 
  785: flush() ->
  786:     receive
  787: 	{trace, _, _, _} ->
  788: 	    flush();
  789: 	timeout ->
  790: 	    flush()
  791:     after 0 ->
  792: 	    ok
  793:     end.