diff options
author | Erlang/OTP <otp@erlang.org> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <otp@erlang.org> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/os_mon/src | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/os_mon/src')
-rw-r--r-- | lib/os_mon/src/Makefile | 110 | ||||
-rw-r--r-- | lib/os_mon/src/cpu_sup.erl | 776 | ||||
-rw-r--r-- | lib/os_mon/src/disksup.erl | 369 | ||||
-rw-r--r-- | lib/os_mon/src/memsup.erl | 1022 | ||||
-rw-r--r-- | lib/os_mon/src/nteventlog.erl | 162 | ||||
-rw-r--r-- | lib/os_mon/src/os_mon.app.src | 32 | ||||
-rw-r--r-- | lib/os_mon/src/os_mon.appup.src | 41 | ||||
-rw-r--r-- | lib/os_mon/src/os_mon.erl | 179 | ||||
-rw-r--r-- | lib/os_mon/src/os_mon_mib.erl | 250 | ||||
-rw-r--r-- | lib/os_mon/src/os_mon_sysinfo.erl | 147 | ||||
-rw-r--r-- | lib/os_mon/src/os_sup.erl | 258 |
11 files changed, 3346 insertions, 0 deletions
diff --git a/lib/os_mon/src/Makefile b/lib/os_mon/src/Makefile new file mode 100644 index 0000000000..9a75446a89 --- /dev/null +++ b/lib/os_mon/src/Makefile @@ -0,0 +1,110 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1996-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(OS_MON_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/os_mon-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES= disksup memsup cpu_sup os_mon os_mon_mib os_sup os_mon_sysinfo \ + nteventlog + +INCLUDE=../include +CSRC=../c_src + +MEMSUP_HRL=$(INCLUDE)/memsup.hrl + +HRL_FILES= $(MEMSUP_HRL) +ERL_FILES= $(MODULES:%=%.erl) + +APP_FILE= os_mon.app +APP_SRC= $(APP_FILE).src +APP_TARGET=$(EBIN)/$(APP_FILE) + +APPUP_FILE= os_mon.appup + +APPUP_SRC= $(APPUP_FILE).src +APPUP_TARGET= ../ebin/$(APPUP_FILE) + + +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) $(APP_TARGET) $(APPUP_TARGET) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +ERL_COMPILE_FLAGS += +warn_obsolete_guard -I$(INCLUDE) + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core *~ + +docs: + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +$(APP_TARGET): $(APP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +$(APPUP_TARGET): $(APPUP_SRC) ../vsn.mk + sed -e 's;%VSN%;$(VSN);' $< > $@ + +#------------------------------------------------------- +# Special dependencies +#------------------------------------------------------- +$(EBIN)/memsup.$(EMULATOR): $(MEMSUP_HRL) + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/src + $(INSTALL_DATA) $(ERL_FILES) $(RELSYSDIR)/src + $(INSTALL_DATA) $(HRL_FILES) $(RELSYSDIR)/src + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + + + + + + + diff --git a/lib/os_mon/src/cpu_sup.erl b/lib/os_mon/src/cpu_sup.erl new file mode 100644 index 0000000000..742e20b1fa --- /dev/null +++ b/lib/os_mon/src/cpu_sup.erl @@ -0,0 +1,776 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(cpu_sup). + +%% API +-export([start_link/0, start/0, stop/0]). +-export([nprocs/0, avg1/0, avg5/0, avg15/0, util/0, util/1]). +-export([dummy_reply/1]). + +%% For testing +-export([ping/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%% Internal protocol with the port program +-define(nprocs,"n"). +-define(avg1,"1"). +-define(avg5,"5"). +-define(avg15,"f"). +-define(quit,"q"). +-define(ping,"p"). +-define(util,"u"). + +-define(cu_cpu_id, 0). +-define(cu_user, 1). +-define(cu_nice_user, 2). +-define(cu_kernel, 3). +-define(cu_io_wait, 4). +-define(cu_idle, 5). +-define(cu_hard_irq, 6). +-define(cu_soft_irq, 7). +-define(cu_steal, 8). + +-define(INT32(D3,D2,D1,D0), + (((D3) bsl 24) bor ((D2) bsl 16) bor ((D1) bsl 8) bor (D0))). + +-define(MAX_UINT32, ((1 bsl 32) - 1)). + +-record(cpu_util, {cpu, busy = [], non_busy = []}). + +-record(state, {server, os_type}). +%-record(state, {server, port = not_used, util = [], os_type}). + +-record(internal, {port = not_used, util = [], os_type}). + +%%---------------------------------------------------------------------- +%% Contract specifications +%%---------------------------------------------------------------------- + +-type(util_cpus() :: 'all' | integer() | [integer()]). +-type(util_state() :: + 'user' | + 'nice_user' | + 'kernel' | + 'wait' | + 'idle'). +-type(util_value() :: {util_state(), float()} | float()). +-type(util_desc() :: {util_cpus(), util_value(), util_value(), []}). + +%%---------------------------------------------------------------------- +%% Exported functions +%%---------------------------------------------------------------------- + +start() -> + gen_server:start({local, cpu_sup}, cpu_sup, [], []). + +start_link() -> + gen_server:start_link({local, cpu_sup}, cpu_sup, [], []). + +stop() -> + gen_server:call(cpu_sup, ?quit, infinity). + +-spec(nprocs/0 :: () -> integer() | {'error', any()}). + +nprocs() -> + os_mon:call(cpu_sup, ?nprocs, infinity). + +-spec(avg1/0 :: () -> integer() | {'error', any()}). + +avg1() -> + os_mon:call(cpu_sup, ?avg1, infinity). + +-spec(avg5/0 :: () -> integer() | {'error', any()}). + +avg5() -> + os_mon:call(cpu_sup, ?avg5, infinity). + +-spec(avg15/0 :: () -> integer() | {'error', any()}). + +avg15() -> + os_mon:call(cpu_sup, ?avg15, infinity). + +-spec(util/1 :: ([ 'detailed' | 'per_cpu']) -> + util_desc() | [util_desc()] | {'error', any()}). + +util(Args) when is_list (Args) -> + % Get arguments + case lists:foldl( + fun (detailed, {_ , PC}) -> {true, PC }; + (per_cpu , {D , _ }) -> {D , true}; + (_ , _ ) -> badarg + end, {false, false}, Args) of + badarg -> + erlang:error(badarg); + {Detailed, PerCpu} -> + os_mon:call(cpu_sup, {?util, Detailed, PerCpu}, infinity) + end; +util(_) -> + erlang:error(badarg). + +-spec(util/0 :: () -> float()). + +util() -> + case util([]) of + {all, Busy, _, _} -> Busy; + Error -> Error + end. + +dummy_reply(?nprocs) -> 0; +dummy_reply(?avg1) -> 0; +dummy_reply(?avg5) -> 0; +dummy_reply(?avg15) -> 0; +dummy_reply({?util,_,_}) -> {all, 0, 0, []}. + +%%---------------------------------------------------------------------- +%% For testing +%%---------------------------------------------------------------------- + +ping() -> + gen_server:call(cpu_sup,?ping). + +%%---------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------- + +%% init +init([]) -> + process_flag(trap_exit, true), + process_flag(priority, low), + {ok, + #state{ os_type = os:type(), + server = measurement_server_start() + } + }. +handle_call(?quit, _From, State) -> + {stop, normal, ok, State}; +handle_call({?util, D, PC}, {Client, _Tag}, + #state{os_type = {unix, Flavor}} = State) + when Flavor == sunos; + Flavor == linux -> + case measurement_server_call(State#state.server, {?util, D, PC, Client}) of + {error, Reason} -> + { reply, + {error, Reason}, + State#state{server=measurement_server_restart(State#state.server)} + }; + Result -> {reply, Result, State} + end; +handle_call({?util, Detailed, PerCpu}, _From, State) -> + String = "OS_MON (cpu_sup), util/1 unavailable for this OS~n", + error_logger:warning_msg(String), + {reply, dummy_reply({?util, Detailed, PerCpu}), State}; +handle_call(Request, _From, State) when Request==?nprocs; + Request==?avg1; + Request==?avg5; + Request==?avg15; + Request==?ping -> + case measurement_server_call(State#state.server, Request) of + {error, Reason} -> + { reply, + {error, Reason}, + State#state{server=measurement_server_restart(State#state.server)} + }; + Result -> {reply, Result, State} + end. +handle_cast(_Msg, State) -> + {noreply, State}. +handle_info({'EXIT', _Port, Reason}, State) -> + {stop, {server_died, Reason}, State#state{server=not_used}}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, State) -> + exit(State#state.server, normal). + +%% os_mon-2.0 +%% For live downgrade to/upgrade from os_mon-1.8[.1] +code_change(Vsn, PrevState, "1.8") -> + case Vsn of + + %% Downgrade from this version + {down, _Vsn} -> + process_flag(trap_exit, false); + + %% Upgrade to this version + _Vsn -> + process_flag(trap_exit, true) + end, + {ok, PrevState}; +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------- +%% internal functions +%%---------------------------------------------------------------------- + +get_uint32_measurement(Request, #internal{port = P, os_type = {unix, sunos}}) -> + port_server_call(P, Request); +get_uint32_measurement(Request, #internal{os_type = {unix, linux}}) -> + {ok,F} = file:open("/proc/loadavg",[read,raw]), + {ok,D} = file:read(F,24), + ok = file:close(F), + {ok,[Load1,Load5,Load15,_PRun,PTotal],_} = io_lib:fread("~f ~f ~f ~d/~d", D), + case Request of + ?avg1 -> sunify(Load1); + ?avg5 -> sunify(Load5); + ?avg15 -> sunify(Load15); + ?ping -> 4711; + ?nprocs -> PTotal + end; +get_uint32_measurement(Request, #internal{os_type = {unix, freebsd}}) -> + D = os:cmd("/sbin/sysctl -n vm.loadavg") -- "\n", + {ok,[Load1,Load5,Load15],_} = io_lib:fread("{ ~f ~f ~f }", D), + %% We could count the lines from the ps command as well + case Request of + ?avg1 -> sunify(Load1); + ?avg5 -> sunify(Load5); + ?avg15 -> sunify(Load15); + ?ping -> 4711; + ?nprocs -> + Ps = os:cmd("/bin/ps -ax | /usr/bin/wc -l"), + {ok, [N], _} = io_lib:fread("~d", Ps), + N-1 + end; +get_uint32_measurement(Request, #internal{os_type = {unix, dragonfly}}) -> + D = os:cmd("/sbin/sysctl -n vm.loadavg") -- "\n", + {ok,[Load1,Load5,Load15],_} = io_lib:fread("{ ~f ~f ~f }", D), + %% We could count the lines from the ps command as well + case Request of + ?avg1 -> sunify(Load1); + ?avg5 -> sunify(Load5); + ?avg15 -> sunify(Load15); + ?ping -> 4711; + ?nprocs -> + Ps = os:cmd("/bin/ps -ax | /usr/bin/wc -l"), + {ok, [N], _} = io_lib:fread("~d", Ps), + N-1 + end; +get_uint32_measurement(Request, #internal{os_type = {unix, openbsd}}) -> + D = os:cmd("/sbin/sysctl -n vm.loadavg") -- "\n", + {ok, [L1, L5, L15], _} = io_lib:fread("~f ~f ~f", D), + case Request of + ?avg1 -> sunify(L1); + ?avg5 -> sunify(L5); + ?avg15 -> sunify(L15); + ?ping -> 4711; + ?nprocs -> + Ps = os:cmd("/bin/ps -ax | /usr/bin/wc -l"), + {ok, [N], _} = io_lib:fread("~d", Ps), + N-1 + end; +get_uint32_measurement(Request, #internal{os_type = {unix, darwin}}) -> + %% Get the load average using uptime, overriding Locale setting. + D = os:cmd("LANG=C LC_ALL=C uptime") -- "\n", + %% Here is a sample uptime string from Mac OS 10.3.8 (C Locale): + %% "11:17 up 12 days, 20:39, 2 users, load averages: 1.07 0.95 0.66" + %% The safest way to extract the load averages seems to be grab everything + %% after the last colon and then do an fread on that. + Avg = lists:reverse(hd(string:tokens(lists:reverse(D), ":"))), + {ok,[L1,L5,L15],_} = io_lib:fread("~f ~f ~f", Avg), + + case Request of + ?avg1 -> sunify(L1); + ?avg5 -> sunify(L5); + ?avg15 -> sunify(L15); + ?ping -> 4711; + ?nprocs -> + Ps = os:cmd("/bin/ps -ax | /usr/bin/wc -l"), + {ok, [N], _} = io_lib:fread("~d", Ps), + N-1 + end; +get_uint32_measurement(Request, #internal{os_type = {unix, Sys}}) when Sys == irix64; + Sys == irix -> + %% Get the load average using uptime. + %% "8:01pm up 2 days, 22:12, 4 users, load average: 0.70, 0.58, 0.43" + D = os:cmd("uptime") -- "\n", + Avg = lists:reverse(hd(string:tokens(lists:reverse(D), ":"))), + {ok, [L1, L5, L15], _} = io_lib:fread("~f, ~f, ~f", Avg), + case Request of + ?avg1 -> sunify(L1); + ?avg5 -> sunify(L5); + ?avg15 -> sunify(L15); + ?ping -> 4711; + ?nprocs -> + {ok, ProcList} = file:list_dir("/proc/pinfo"), + length(ProcList) + end; +get_uint32_measurement(_, _) -> + throw(not_implemented). + + +get_util_measurement(?util, #internal{port = P }) -> + case port_server_call(P, ?util) of + {error, Error} -> {error, Error}; + NewCpuUtil -> NewCpuUtil + end; +get_util_measurement(_,_) -> + throw(not_implemented). + +%%---------------------------------------------------------------------- +%% BEGIN: tainted internal functions +%%---------------------------------------------------------------------- + +sunify(Val) -> + round(Val*256). % Note that Solaris and Linux load averages are + % measured quite differently anyway + + +keysearchdelete(_, _, []) -> + {false, []}; +keysearchdelete(K, N, [T|Ts]) when element(N, T) == K -> + {{value, T}, Ts}; +keysearchdelete(K, N, [T|Ts]) -> + {X, NTs} = keysearchdelete(K, N, Ts), + {X, [T|NTs]}. + +%% Internal cpu utilization functions + +%% cpu_util_diff(New, Old) takes a list of new cpu_util records as first +%% argument and a list of old cpu_util records as second argument. The +%% two lists have to be sorted on cpu index in ascending order. +%% +%% The returned value is a difference list in descending order. +cpu_util_diff(New, Old) -> + cpu_util_diff(New, Old, []). + +cpu_util_diff([], [], Acc) -> + Acc; +cpu_util_diff([#cpu_util{cpu = Cpu, + busy = NewBusy, + non_busy = NewNonBusy} | NewCpuUtils], + [#cpu_util{cpu = Cpu, + busy = OldBusy, + non_busy = OldNonBusy} | OldCpuUtils], + Acc) -> + {PreBusy, GotBusy} = state_list_diff(NewBusy, OldBusy), + {NonBusy, GotNonBusy} = state_list_diff(NewNonBusy, OldNonBusy), + Busy = case GotBusy orelse GotNonBusy of + true -> + PreBusy; + false -> + %% This can happen if cpu_sup:util/[0,1] is called + %% again immediately after the previous call has + %% returned. Because the user obviously is doing + %% something we charge "user". + lists:map(fun ({user, 0}) -> {user, 1}; + ({_, 0} = StateTup) -> StateTup + end, + PreBusy) + end, +cpu_util_diff(NewCpuUtils, OldCpuUtils, [#cpu_util{cpu = Cpu, + busy = Busy, + non_busy = NonBusy} + | Acc]); + +%% A new cpu appeared +cpu_util_diff([#cpu_util{cpu = NC}|_] = New, + [#cpu_util{cpu = OC}|_] = Old, + Acc) when NC < OC -> +cpu_util_diff(New, [#cpu_util{cpu = NC}|Old], Acc); +cpu_util_diff([#cpu_util{cpu = NC}|_] = New, [], Acc) -> +cpu_util_diff(New, [#cpu_util{cpu = NC}], Acc); + +%% An old cpu disappeared +cpu_util_diff([#cpu_util{cpu = NC}|Ns], + [#cpu_util{cpu = OC}|_] = Old, + Acc) when NC > OC -> +cpu_util_diff(Ns, Old, Acc); +cpu_util_diff([], _Old, Acc) -> +cpu_util_diff([], [], Acc). + +cpu_util_rel(NewCpuUtils, OldCpuUtils, Detailed, PerCpu) -> + cpu_util_rel(cpu_util_diff(NewCpuUtils, OldCpuUtils), Detailed, PerCpu). + +%% +%% cpu_util_rel/3 takes a difference list of cpu_util records as first +%% argument, a boolean determining if the result should be detailed as +%% second argument, and a boolean determining if the result should be +%% per cpu as third argument. The first argument (the difference list) +%% has to be sorted on cpu index in descending order. +%% +cpu_util_rel(CUDiff, false, false) -> + {B, T} = lists:foldl(fun (#cpu_util{busy = BusyList, + non_busy = NonBusyList}, + {BusyAcc, TotAcc}) -> + Busy = state_list_sum(BusyList), + NonBusy = state_list_sum(NonBusyList), + {BusyAcc+Busy, TotAcc+Busy+NonBusy} + end, + {0, 0}, + CUDiff), + BRel = B/T*100, + {all, BRel, 100-BRel, []}; +cpu_util_rel(CUDiff, true, false) -> + cpu_util_rel_det(CUDiff, #cpu_util{cpu = [], busy = [], non_busy = []}); +cpu_util_rel(CUDiff, false, true) -> + cpu_util_rel_pcpu(CUDiff, []); +cpu_util_rel(CUDiff, true, true) -> + cpu_util_rel_det_pcpu(CUDiff, []). + +cpu_util_rel_pcpu([], Acc) -> + Acc; +cpu_util_rel_pcpu([#cpu_util{cpu = C, + busy = BusyList, + non_busy = NonBusyList} | Rest], Acc) -> + Busy = state_list_sum(BusyList), + NonBusy = state_list_sum(NonBusyList), + Tot = Busy + NonBusy, + cpu_util_rel_pcpu(Rest, [{C, Busy/Tot*100, NonBusy/Tot*100, []}|Acc]). + +cpu_util_rel_det([], #cpu_util{cpu = CpuAcc, + busy = BusyAcc, + non_busy = NonBusyAcc}) -> + Total = state_list_sum(BusyAcc) + state_list_sum(NonBusyAcc), + {CpuAcc, mk_rel_states(BusyAcc,Total), mk_rel_states(NonBusyAcc,Total), []}; +cpu_util_rel_det([#cpu_util{cpu = Cpu, + busy = Busy, + non_busy = NonBusy} | Rest], + #cpu_util{cpu = CpuAcc, + busy = BusyAcc, + non_busy = NonBusyAcc}) -> + cpu_util_rel_det(Rest, #cpu_util{cpu = [Cpu|CpuAcc], + busy = state_list_add(Busy, + BusyAcc), + non_busy = state_list_add(NonBusy, + NonBusyAcc)}). + +cpu_util_rel_det_pcpu([], Acc) -> + Acc; +cpu_util_rel_det_pcpu([#cpu_util{cpu = Cpu, + busy = Busy, + non_busy = NonBusy}| Rest], Acc) -> + Total = state_list_sum(Busy) + state_list_sum(NonBusy), + cpu_util_rel_det_pcpu(Rest, + [{Cpu, + mk_rel_states(Busy, Total), + mk_rel_states(NonBusy, Total), + []} | Acc]). + +mk_rel_states(States, Total) -> + lists:map(fun ({State, Value}) -> {State, 100*Value/Total} end, States). + +state_list_sum(StateList) -> + lists:foldl(fun ({_, X}, Acc) -> Acc+X end, 0, StateList). + +state_list_diff([],[]) -> + {[], false}; +state_list_diff([{State,ValueNew}|RestNew], []) -> + state_list_diff([{State, ValueNew} | RestNew], [{State, 0}]); +state_list_diff([{State,ValueNew}|RestNew], [{State,ValueOld}|RestOld]) -> + ValDiff = val_diff(State, ValueNew, ValueOld), + {RestStateDiff, FoundDiff} = state_list_diff(RestNew, RestOld), + {[{State, ValDiff} | RestStateDiff], FoundDiff orelse ValDiff /= 0}. + +state_list_add([],[]) -> + []; +state_list_add([{State, ValueA}|RestA], []) -> + [{State, ValueA} | state_list_add(RestA, [])]; +state_list_add([{State, ValueA} | RestA], [{State, ValueB} | RestB]) -> + [{State, ValueA + ValueB} | state_list_add(RestA, RestB)]. + +one_step_backwards(State, New, Old) -> + case os:type() of + {unix, linux} -> + %% This should never happen! But values sometimes takes a step + %% backwards on linux. We'll ignore it as long as it's only + %% one step... + 0; + _ -> + val_diff2(State, New, Old) + end. + +val_diff(State, New, Old) when New == Old - 1 -> + one_step_backwards(State, New, Old); +val_diff(State, ?MAX_UINT32, 0) -> + one_step_backwards(State, ?MAX_UINT32, 0); +val_diff(State, New, Old) -> + val_diff2(State, New, Old). + +val_diff2(State, New, Old) when New > ?MAX_UINT32; Old > ?MAX_UINT32 -> + %% We obviously got uints > 32 bits + ensure_positive_diff(State, New - Old); +val_diff2(State, New, Old) when New < Old -> + %% 32-bit integer wrapped + ensure_positive_diff(State, (?MAX_UINT32 + 1) + New - Old); +val_diff2(_State, New, Old) -> + New - Old. + +ensure_positive_diff(_State, Diff) when Diff >= 0 -> + Diff; +ensure_positive_diff(State, Diff) -> + throw({error, {negative_diff, State, Diff}}). +%%---------------------------------------------------------------------- +%% END: tainted internal functions +%%---------------------------------------------------------------------- + +%%---------------------------------------------------------------------- +%% cpu_sup measurement server wrapper +%%---------------------------------------------------------------------- + +measurement_server_call(Pid, Request) -> + Timeout = 5000, + Pid ! {self(), Request}, + receive + {data, Data} -> Data + after Timeout -> + {error, timeout} + end. + +measurement_server_restart(Pid) -> + exit(Pid, kill), + measurement_server_start(). + +measurement_server_start() -> + spawn(fun() -> measurement_server_init() end). + +measurement_server_init() -> + process_flag(trap_exit, true), + OS = os:type(), + Server = case OS of + {unix, Flavor} when Flavor==sunos; + Flavor==linux -> + port_server_start(); + {unix, Flavor} when Flavor==darwin; + Flavor==freebsd; + Flavor==dragonfly; + Flavor==openbsd; + Flavor==irix64; + Flavor==irix -> + not_used; + _ -> + exit({unsupported_os, OS}) + end, + measurement_server_loop(#internal{port=Server, os_type=OS}). + +measurement_server_loop(State) -> + receive + {_, quit} -> + State#internal.port ! {self(), ?quit}, + ok; + {'DOWN',Monitor,process,_,_} -> + measurement_server_loop(State#internal{ util = lists:keydelete( + Monitor, + 2, + State#internal.util)}); + {Pid, {?util, D, PC, Client}} -> + {Monitor, OldCpuUtil, Utils2} = case keysearchdelete(Client, 1, State#internal.util) of + {{value, {Client, Mon, U}}, Us} -> {Mon, U, Us}; + {false, Us} -> {erlang:monitor(process, Client), [], Us} + end, + try get_util_measurement(?util, State) of + NewCpuUtil -> + Result = cpu_util_rel(NewCpuUtil, OldCpuUtil, D, PC), + Pid ! {data, Result}, + measurement_server_loop(State#internal{util=[{Client,Monitor,NewCpuUtil}|Utils2]}) + catch + Error -> + Pid ! {error, Error}, + measurement_server_loop(State) + end; + {Pid, Request} -> + try get_uint32_measurement(Request, State) of + Result -> Pid ! {data, Result} + catch + Error -> Pid ! {error, Error} + end, + measurement_server_loop(State); + {'EXIT', Pid, _n} when State#internal.port == Pid -> + measurement_server_loop(State#internal{port = port_server_start()}); + _Other -> + measurement_server_loop(State) + end. + +%%---------------------------------------------------------------------- +%% cpu_sup port program server wrapper +%%---------------------------------------------------------------------- + +port_server_call(Pid, Command) -> + Pid ! {self(), Command}, + receive + {Pid, {data, Result}} -> Result; + {Pid, {error, Reason}} -> {error, Reason} + end. + +port_server_start() -> + Timeout = 6000, + Pid = spawn_link(fun() -> port_server_init(Timeout) end), + Pid ! {self(), ?ping}, + receive + {Pid, {data,4711}} -> Pid; + {error,Reason} -> {error, Reason} + after Timeout -> + {error, timeout} + end. + +port_server_init(Timeout) -> + Port = start_portprogram(), + port_server_loop(Port, Timeout). + +port_server_loop(Port, Timeout) -> + receive + + % Adjust timeout + {Pid, {timeout, Timeout}} -> + Pid ! {data, Timeout}, + port_server_loop(Port, Timeout); + % Number of processors + {Pid, ?nprocs} -> + port_command(Port, ?nprocs), + Result = port_receive_uint32(Port, Timeout), + Pid ! {self(), {data, Result}}, + port_server_loop(Port, Timeout); + + % Average load for the past minute + {Pid, ?avg1} -> + port_command(Port, ?avg1), + Result = port_receive_uint32(Port, Timeout), + Pid ! {self(), {data, Result}}, + port_server_loop(Port, Timeout); + + % Average load for the past five minutes + {Pid, ?avg5} -> + port_command(Port, ?avg5), + Result = port_receive_uint32(Port, Timeout), + Pid ! {self(), {data, Result}}, + port_server_loop(Port, Timeout); + + % Average load for the past 15 minutes + {Pid, ?avg15} -> + port_command(Port, ?avg15), + Result = port_receive_uint32(Port, Timeout), + Pid ! {self(), {data, Result}}, + port_server_loop(Port, Timeout); + + {Pid, ?util} -> + port_command(Port, ?util), + Result = port_receive_util(Port, Timeout), + Pid ! {self(), {data, Result}}, + port_server_loop(Port, Timeout); + + % Port ping + {Pid, ?ping} -> + port_command(Port, ?ping), + Result = port_receive_uint32(Port, Timeout), + Pid ! {self(), {data, Result}}, + port_server_loop(Port, Timeout); + + % Close port and this server + {Pid, ?quit} -> + port_command(Port, ?quit), + port_close(Port), + Pid ! {self(), {data, quit}}, + ok; + + % Ignore other commands + _ -> port_server_loop(Port, Timeout) + end. + +port_receive_uint32( Port, Timeout) -> port_receive_uint32(Port, Timeout, []). +port_receive_uint32(_Port, _Timeout, [D3,D2,D1,D0]) -> ?INT32(D3,D2,D1,D0); +port_receive_uint32(_Port, _Timeout, [_,_,_,_ | G]) -> exit({port_garbage, G}); +port_receive_uint32(Port, Timeout, D) -> + receive + {'EXIT', Port, Reason} -> exit({port_exit, Reason}); + {Port, {data, ND}} -> port_receive_uint32(Port, Timeout, D ++ ND) + after Timeout -> exit(timeout_uint32) end. + +port_receive_util(Port, Timeout) -> + receive + {Port, {data, [ NP3,NP2,NP1,NP0, % Number of processors + NE3,NE2,NE1,NE0 % Number of entries per processor + | CpuData]}} -> + port_receive_cpu_util( ?INT32(NP3,NP2,NP1,NP0), + ?INT32(NE3,NE2,NE1,NE0), + CpuData, []); + {'EXIT', Port, Reason} -> exit({port_exit, Reason}) + after Timeout -> exit(timeout_util) end. + +% per processor receive loop +port_receive_cpu_util(0, _NE, [], CpuList) -> + % Return in ascending cpu_id order + lists:reverse(CpuList); +port_receive_cpu_util(0, _NE, Garbage, _) -> + exit( {port_garbage, Garbage}); +port_receive_cpu_util(NP, NE, CpuData, CpuList) -> + {CpuUtil, Rest} = port_receive_cpu_util_entries(NE, #cpu_util{}, CpuData), + port_receive_cpu_util(NP - 1, NE, Rest, [ CpuUtil | CpuList]). + +% per entry receive loop +port_receive_cpu_util_entries(0, CU, Rest) -> + {CU, Rest}; +port_receive_cpu_util_entries(NE, CU, + [ CID3, CID2, CID1, CID0, + Val3, Val2, Val1, Val0 | + CpuData]) -> + + TagId = ?INT32(CID3,CID2,CID1,CID0), + Value = ?INT32(Val3,Val2,Val1,Val0), + + % Conversions from integers to atoms + case TagId of + ?cu_cpu_id -> + NewCU = CU#cpu_util{cpu = Value}, + port_receive_cpu_util_entries(NE - 1, NewCU, CpuData); + ?cu_user -> + NewCU = CU#cpu_util{ + busy = [{user, Value} | CU#cpu_util.busy] }, + port_receive_cpu_util_entries(NE - 1, NewCU, CpuData); + ?cu_nice_user -> + NewCU = CU#cpu_util{ + busy = [{nice_user, Value} | CU#cpu_util.busy] }, + port_receive_cpu_util_entries(NE - 1, NewCU, CpuData); + ?cu_kernel -> + NewCU = CU#cpu_util{ + busy = [{kernel, Value} | CU#cpu_util.busy] }, + port_receive_cpu_util_entries(NE - 1, NewCU, CpuData); + ?cu_io_wait -> + NewCU = CU#cpu_util{ + non_busy = [{wait, Value} | CU#cpu_util.non_busy] }, + port_receive_cpu_util_entries(NE - 1, NewCU, CpuData); + ?cu_idle -> + NewCU = CU#cpu_util{ + non_busy = [{idle, Value} | CU#cpu_util.non_busy] }, + port_receive_cpu_util_entries(NE - 1, NewCU, CpuData); + ?cu_hard_irq -> + NewCU = CU#cpu_util{ + busy = [{hard_irq, Value} | CU#cpu_util.busy] }, + port_receive_cpu_util_entries(NE - 1, NewCU, CpuData); + ?cu_soft_irq -> + NewCU = CU#cpu_util{ + busy = [{soft_irq, Value} | CU#cpu_util.busy] }, + port_receive_cpu_util_entries(NE - 1, NewCU, CpuData); + ?cu_steal -> + NewCU = CU#cpu_util{ + non_busy = [{steal, Value} | CU#cpu_util.non_busy] }, + port_receive_cpu_util_entries(NE - 1, NewCU, CpuData); + Unhandled -> + exit({unexpected_type_id, Unhandled}) + end; +port_receive_cpu_util_entries(_, _, Data) -> + exit({data_mismatch, Data}). + +start_portprogram() -> + Command = filename:join([code:priv_dir(os_mon), "bin", "cpu_sup"]), + Port = open_port({spawn, Command}, [stream]), + port_command(Port, ?ping), + 4711 = port_receive_uint32(Port, 5000), + Port. diff --git a/lib/os_mon/src/disksup.erl b/lib/os_mon/src/disksup.erl new file mode 100644 index 0000000000..3340f7ee72 --- /dev/null +++ b/lib/os_mon/src/disksup.erl @@ -0,0 +1,369 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(disksup). +-behaviour(gen_server). + +%% API +-export([start_link/0]). +-export([get_disk_data/0, + get_check_interval/0, set_check_interval/1, + get_almost_full_threshold/0, set_almost_full_threshold/1]). +-export([dummy_reply/1, param_type/2, param_default/1]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%% Other exports +-export([format_status/2]). + +-record(state, {threshold, timeout, os, diskdata = [],port}). + +%%---------------------------------------------------------------------- +%% API +%%---------------------------------------------------------------------- + +start_link() -> + gen_server:start_link({local, disksup}, disksup, [], []). + +get_disk_data() -> + os_mon:call(disksup, get_disk_data). + +get_check_interval() -> + os_mon:call(disksup, get_check_interval). +set_check_interval(Minutes) -> + case param_type(disk_space_check_interval, Minutes) of + true -> + os_mon:call(disksup, {set_check_interval, Minutes}); + false -> + erlang:error(badarg) + end. + +get_almost_full_threshold() -> + os_mon:call(disksup, get_almost_full_threshold). +set_almost_full_threshold(Float) -> + case param_type(disk_almost_full_threshold, Float) of + true -> + os_mon:call(disksup, {set_almost_full_threshold, Float}); + false -> + erlang:error(badarg) + end. + +dummy_reply(get_disk_data) -> + [{"none", 0, 0}]; +dummy_reply(get_check_interval) -> + minutes_to_ms(os_mon:get_env(disksup, disk_space_check_interval)); +dummy_reply({set_check_interval, _}) -> + ok; +dummy_reply(get_almost_full_threshold) -> + round(os_mon:get_env(disksup, disk_almost_full_threshold) * 100); +dummy_reply({set_almost_full_threshold, _}) -> + ok. + +param_type(disk_space_check_interval, Val) when is_integer(Val), + Val>=1 -> true; +param_type(disk_almost_full_threshold, Val) when is_number(Val), + 0=<Val, + Val=<1 -> true; +param_type(_Param, _Val) -> false. + +param_default(disk_space_check_interval) -> 30; +param_default(disk_almost_full_threshold) -> 0.80. + +%%---------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------- + +init([]) -> + process_flag(trap_exit, true), + process_flag(priority, low), + + OS = get_os(), + Port = case OS of + {unix, Flavor} when Flavor==sunos4; + Flavor==solaris; + Flavor==freebsd; + Flavor==dragonfly; + Flavor==darwin; + Flavor==linux; + Flavor==openbsd; + Flavor==irix64; + Flavor==irix -> + start_portprogram(); + {win32, _OSname} -> + not_used; + _ -> + exit({unsupported_os, OS}) + end, + + %% Read the values of some configuration parameters + Threshold = os_mon:get_env(disksup, disk_almost_full_threshold), + Timeout = os_mon:get_env(disksup, disk_space_check_interval), + + %% Initiation first disk check + self() ! timeout, + + {ok, #state{port=Port, os=OS, + threshold=round(Threshold*100), + timeout=minutes_to_ms(Timeout)}}. + +handle_call(get_disk_data, _From, State) -> + {reply, State#state.diskdata, State}; + +handle_call(get_check_interval, _From, State) -> + {reply, State#state.timeout, State}; +handle_call({set_check_interval, Minutes}, _From, State) -> + Timeout = minutes_to_ms(Minutes), + {reply, ok, State#state{timeout=Timeout}}; + +handle_call(get_almost_full_threshold, _From, State) -> + {reply, State#state.threshold, State}; +handle_call({set_almost_full_threshold, Float}, _From, State) -> + Threshold = round(Float * 100), + {reply, ok, State#state{threshold=Threshold}}; + +handle_call({set_threshold, Threshold}, _From, State) -> % test only + {reply, ok, State#state{threshold=Threshold}}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(timeout, State) -> + NewDiskData = check_disk_space(State#state.os, State#state.port, + State#state.threshold), + timer:send_after(State#state.timeout, timeout), + {noreply, State#state{diskdata = NewDiskData}}; +handle_info({'EXIT', _Port, Reason}, State) -> + {stop, {port_died, Reason}, State#state{port=not_used}}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, State) -> + clear_alarms(), + case State#state.port of + not_used -> + ok; + Port -> + port_close(Port) + end, + ok. + +%% os_mon-2.0.1 +%% For live downgrade to/upgrade from os_mon-1.8[.1] +code_change(Vsn, PrevState, "1.8") -> + case Vsn of + + %% Downgrade from this version + {down, _Vsn} -> + State = case PrevState#state.port of + not_used -> PrevState#state{port=noport}; + _ -> PrevState + end, + {ok, State}; + + %% Upgrade to this version + _Vsn -> + State = case PrevState#state.port of + noport -> PrevState#state{port=not_used}; + _ -> PrevState + end, + {ok, State} + end; +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------- +%% Other exports +%%---------------------------------------------------------------------- + +format_status(_Opt, [_PDict, #state{os = OS, threshold = Threshold, + timeout = Timeout, + diskdata = DiskData}]) -> + [{data, [{"OS", OS}, + {"Timeout", Timeout}, + {"Threshold", Threshold}, + {"DiskData", DiskData}]}]. + +%%---------------------------------------------------------------------- +%% Internal functions +%%---------------------------------------------------------------------- + +get_os() -> + case os:type() of + {unix, sunos} -> + case os:version() of + {5,_,_} -> {unix, solaris}; + {4,_,_} -> {unix, sunos4}; + V -> exit({unknown_os_version, V}) + end; + {unix, irix64} -> {unix, irix}; + OS -> + OS + end. + +%%--Port handling functions--------------------------------------------- + +start_portprogram() -> + open_port({spawn, "sh -s disksup 2>&1"}, [stream]). + +my_cmd(Cmd0, Port) -> + %% Insert a new line after the command, in case the command + %% contains a comment character + Cmd = io_lib:format("(~s\n) </dev/null; echo \"\^M\"\n", [Cmd0]), + Port ! {self(), {command, [Cmd, 10]}}, + get_reply(Port, []). + +get_reply(Port, O) -> + receive + {Port, {data, N}} -> + case newline(N, O) of + {ok, Str} -> Str; + {more, Acc} -> get_reply(Port, Acc) + end; + {'EXIT', Port, Reason} -> + exit({port_died, Reason}) + end. + +newline([13|_], B) -> {ok, lists:reverse(B)}; +newline([H|T], B) -> newline(T, [H|B]); +newline([], B) -> {more, B}. + +%%--Check disk space---------------------------------------------------- + +check_disk_space({win32,_}, not_used, Threshold) -> + Result = os_mon_sysinfo:get_disk_info(), + check_disks_win32(Result, Threshold); +check_disk_space({unix, solaris}, Port, Threshold) -> + Result = my_cmd("/usr/bin/df -lk", Port), + check_disks_solaris(skip_to_eol(Result), Threshold); +check_disk_space({unix, irix}, Port, Threshold) -> + Result = my_cmd("/usr/sbin/df -lk",Port), + check_disks_irix(skip_to_eol(Result), Threshold); +check_disk_space({unix, linux}, Port, Threshold) -> + Result = my_cmd("/bin/df -lk", Port), + check_disks_solaris(skip_to_eol(Result), Threshold); +check_disk_space({unix, dragonfly}, Port, Threshold) -> + Result = my_cmd("/bin/df -k -t ufs,hammer", Port), + check_disks_solaris(skip_to_eol(Result), Threshold); +check_disk_space({unix, freebsd}, Port, Threshold) -> + Result = my_cmd("/bin/df -k -t ufs", Port), + check_disks_solaris(skip_to_eol(Result), Threshold); +check_disk_space({unix, openbsd}, Port, Threshold) -> + Result = my_cmd("/bin/df -k -t ffs", Port), + check_disks_solaris(skip_to_eol(Result), Threshold); +check_disk_space({unix, sunos4}, Port, Threshold) -> + Result = my_cmd("df", Port), + check_disks_solaris(skip_to_eol(Result), Threshold); +check_disk_space({unix, darwin}, Port, Threshold) -> + Result = my_cmd("/bin/df -k -t ufs,hfs", Port), + check_disks_solaris(skip_to_eol(Result), Threshold). + +% This code works for Linux and FreeBSD as well +check_disks_solaris("", _Threshold) -> + []; +check_disks_solaris("\n", _Threshold) -> + []; +check_disks_solaris(Str, Threshold) -> + case io_lib:fread("~s~d~d~d~d%~s", Str) of + {ok, [_FS, KB, _Used, _Avail, Cap, MntOn], RestStr} -> + if + Cap >= Threshold -> + set_alarm({disk_almost_full, MntOn}, []); + true -> + clear_alarm({disk_almost_full, MntOn}) + end, + [{MntOn, KB, Cap} | + check_disks_solaris(RestStr, Threshold)]; + _Other -> + check_disks_solaris(skip_to_eol(Str),Threshold) + end. + +%% Irix: like Linux with an extra FS type column and no '%'. +check_disks_irix("", _Threshold) -> []; +check_disks_irix("\n", _Threshold) -> []; +check_disks_irix(Str, Threshold) -> + case io_lib:fread("~s~s~d~d~d~d~s", Str) of + {ok, [_FS, _FSType, KB, _Used, _Avail, Cap, MntOn], RestStr} -> + if Cap >= Threshold -> set_alarm({disk_almost_full, MntOn}, []); + true -> clear_alarm({disk_almost_full, MntOn}) end, + [{MntOn, KB, Cap} | check_disks_irix(RestStr, Threshold)]; + _Other -> + check_disks_irix(skip_to_eol(Str),Threshold) + end. + +check_disks_win32([], _Threshold) -> + []; +check_disks_win32([H|T], Threshold) -> + case io_lib:fread("~s~s~d~d~d", H) of + {ok, [Drive,"DRIVE_FIXED",BAvail,BTot,_TotFree], _RestStr} -> + Cap = trunc((BTot-BAvail) / BTot * 100), + if + Cap >= Threshold -> + set_alarm({disk_almost_full, Drive}, []); + true -> + clear_alarm({disk_almost_full, Drive}) + end, + [{Drive, BTot div 1024, Cap} | + check_disks_win32(T, Threshold)]; % Return Total Capacity in Kbytes + {ok,_,_RestStr} -> + check_disks_win32(T,Threshold); + _Other -> + [] + end. + +%%--Alarm handling------------------------------------------------------ + +set_alarm(AlarmId, AlarmDescr) -> + case get(AlarmId) of + set -> + ok; + undefined -> + alarm_handler:set_alarm({AlarmId, AlarmDescr}), + put(AlarmId, set) + end. + +clear_alarm(AlarmId) -> + case get(AlarmId) of + set -> + alarm_handler:clear_alarm(AlarmId), + erase(AlarmId); + undefined -> + ok + end. + +clear_alarms() -> + lists:foreach(fun({{disk_almost_full, _MntOn} = AlarmId, set}) -> + alarm_handler:clear_alarm(AlarmId); + (_Other) -> + ignore + end, + get()). + +%%--Auxiliary----------------------------------------------------------- + +%% Type conversion +minutes_to_ms(Minutes) -> + trunc(60000*Minutes). + +skip_to_eol([]) -> + []; +skip_to_eol([$\n | T]) -> + T; +skip_to_eol([_ | T]) -> + skip_to_eol(T). diff --git a/lib/os_mon/src/memsup.erl b/lib/os_mon/src/memsup.erl new file mode 100644 index 0000000000..822e1f939c --- /dev/null +++ b/lib/os_mon/src/memsup.erl @@ -0,0 +1,1022 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(memsup). +-behaviour(gen_server). + +%% API +-export([start_link/0]). % for supervisor +-export([get_memory_data/0, get_system_memory_data/0, + get_check_interval/0, set_check_interval/1, + get_procmem_high_watermark/0, set_procmem_high_watermark/1, + get_sysmem_high_watermark/0, set_sysmem_high_watermark/1, + get_helper_timeout/0, set_helper_timeout/1, + get_os_wordsize/0]). +-export([dummy_reply/1, param_type/2, param_default/1]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%% Other exports +-export([format_status/2]). + +-include("memsup.hrl"). + +-record(state, + {os, % {OSfamily,OSname} | OSfamily + port_mode, % bool() + + mem_usage, % undefined | {Alloc, Total} + worst_mem_user, % undefined | {Pid, Alloc} + + sys_only, % bool() memsup_system_only + timeout, % int() memory_check_interval, ms + helper_timeout, % int() memsup_helper_timeout, ms + sys_mem_watermark, % float() system_memory_high_watermark, % + proc_mem_watermark, % float() process_memory_high_watermark, % + + pid, % undefined | pid() + wd_timer, % undefined | TimerRef + ext_wd_timer, % undefined | TimerRef + pending = [], % [reg | {reg,From} | {ext,From}] + ext_pending = [] % [{ext,From}] + }). + +%%---------------------------------------------------------------------- +%% API +%%---------------------------------------------------------------------- + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +get_os_wordsize() -> + os_mon:call(memsup, get_os_wordsize, infinity). + +get_memory_data() -> + os_mon:call(memsup, get_memory_data, infinity). + +get_system_memory_data() -> + os_mon:call(memsup, get_system_memory_data, infinity). + +get_check_interval() -> + os_mon:call(memsup, get_check_interval, infinity). +set_check_interval(Minutes) -> + case param_type(memory_check_interval, Minutes) of + true -> + MS = minutes_to_ms(Minutes), % for backwards compatibility + os_mon:call(memsup, {set_check_interval, MS}, infinity); + false -> + erlang:error(badarg) + end. + +get_procmem_high_watermark() -> + os_mon:call(memsup, get_procmem_high_watermark, infinity). +set_procmem_high_watermark(Float) -> + case param_type(process_memory_high_watermark, Float) of + true -> + os_mon:call(memsup, {set_procmem_high_watermark, Float}, + infinity); + false -> + erlang:error(badarg) + end. + +get_sysmem_high_watermark() -> + os_mon:call(memsup, get_sysmem_high_watermark, infinity). +set_sysmem_high_watermark(Float) -> + case param_type(system_memory_high_watermark, Float) of + true -> + os_mon:call(memsup, {set_sysmem_high_watermark, Float}, + infinity); + false -> + erlang:error(badarg) + end. + +get_helper_timeout() -> + os_mon:call(memsup, get_helper_timeout, infinity). +set_helper_timeout(Seconds) -> + case param_type(memsup_helper_timeout, Seconds) of + true -> + os_mon:call(memsup, {set_helper_timeout, Seconds}); + false -> + erlang:error(badarg) + end. + +dummy_reply(get_memory_data) -> + dummy_reply(get_memory_data, + os_mon:get_env(memsup, memsup_system_only)); +dummy_reply(get_system_memory_data) -> + []; +dummy_reply(get_os_wordsize) -> + 0; +dummy_reply(get_check_interval) -> + minutes_to_ms(os_mon:get_env(memsup, memory_check_interval)); +dummy_reply({set_check_interval, _}) -> + ok; +dummy_reply(get_procmem_high_watermark) -> + trunc(100 * os_mon:get_env(memsup, process_memory_high_watermark)); +dummy_reply({set_procmem_high_watermark, _}) -> + ok; +dummy_reply(get_sysmem_high_watermark) -> + trunc(100 * os_mon:get_env(memsup, system_memory_high_watermark)); +dummy_reply({set_sysmem_high_watermark, _}) -> + ok; +dummy_reply(get_helper_timeout) -> + os_mon:get_env(memsup, memsup_helper_timeout); +dummy_reply({set_helper_timeout, _}) -> + ok. +dummy_reply(get_memory_data, true) -> + {0,0,undefined}; +dummy_reply(get_memory_data, false) -> + {0,0,{self(),0}}. + +param_type(memsup_system_only, Val) when Val==true; Val==false -> true; +param_type(memory_check_interval, Val) when is_integer(Val), + Val>0 -> true; +param_type(memsup_helper_timeout, Val) when is_integer(Val), + Val>0 -> true; +param_type(system_memory_high_watermark, Val) when is_number(Val), + 0=<Val, + Val=<1 -> true; +param_type(process_memory_high_watermark, Val) when is_number(Val), + 0=<Val, + Val=<1 -> true; +param_type(_Param, _Val) -> false. + +param_default(memsup_system_only) -> false; +param_default(memory_check_interval) -> 1; +param_default(memsup_helper_timeout) -> 30; +param_default(system_memory_high_watermark) -> 0.80; +param_default(process_memory_high_watermark) -> 0.05. + +%%---------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------- + +init([]) -> + process_flag(trap_exit, true), + process_flag(priority, low), + + OS = os:type(), + PortMode = case OS of + {unix, darwin} -> false; + {unix, freebsd} -> false; + % Linux supports this. + {unix, linux} -> true; + {unix, openbsd} -> true; + {unix, irix64} -> true; + {unix, irix} -> true; + {unix, sunos} -> true; + {win32, _OSname} -> false; + vxworks -> true; + _ -> + exit({unsupported_os, OS}) + end, + Pid = if + PortMode -> + spawn_link(fun() -> port_init() end); + not PortMode -> + undefined + end, + + %% Read the values of some configuration parameters + SysOnly = os_mon:get_env(memsup, memsup_system_only), + Timeout = os_mon:get_env(memsup, memory_check_interval), + HelperTimeout = os_mon:get_env(memsup, memsup_helper_timeout), + SysMem = os_mon:get_env(memsup, system_memory_high_watermark), + ProcMem = os_mon:get_env(memsup, process_memory_high_watermark), + + %% Initiate first data collection + self() ! time_to_collect, + + {ok, #state{os=OS, port_mode=PortMode, + + sys_only = SysOnly, + timeout = minutes_to_ms(Timeout), + helper_timeout = sec_to_ms(HelperTimeout), + sys_mem_watermark = SysMem, + proc_mem_watermark = ProcMem, + + pid=Pid}}. + +handle_call(get_os_wordsize, _From, State) -> + Wordsize = get_os_wordsize(State#state.os), + {reply, Wordsize, State}; +handle_call(get_memory_data, From, State) -> + %% Return result of latest memory check + case State#state.mem_usage of + {Alloc, Total} -> + Worst = State#state.worst_mem_user, + {reply, {Total, Alloc, Worst}, State}; + + %% Special case: get_memory_data called before any memory data + %% has been collected + undefined -> + case State#state.wd_timer of + undefined -> + WDTimer = erlang:send_after(State#state.timeout, + self(), + reg_collection_timeout), + Pending = [{reg,From}], + if + State#state.port_mode -> + State#state.pid ! {self(), collect_sys}, + {noreply, State#state{wd_timer=WDTimer, + pending=Pending}}; + true -> + OS = State#state.os, + Self = self(), + Pid = spawn_link(fun() -> + MU = get_memory_usage(OS), + Self ! {collected_sys,MU} + end), + {noreply, State#state{pid=Pid, + wd_timer=WDTimer, + pending=Pending}} + end; + _TimerRef -> + Pending = [{reg,From} | State#state.pending], + {noreply, State#state{pending=Pending}} + end + end; + +handle_call(get_system_memory_data,From,#state{port_mode=true}=State) -> + %% When using a port, the extensive memory collection is slightly + %% different than a regular one + case State#state.ext_wd_timer of + undefined -> + WDTimer = erlang:send_after(State#state.helper_timeout, + self(), + ext_collection_timeout), + State#state.pid ! {self(), collect_ext_sys}, + {noreply, State#state{ext_wd_timer=WDTimer, + ext_pending=[{ext,From}]}}; + _TimerRef -> + Pending = [{ext,From} | State#state.ext_pending], + {noreply, State#state{ext_pending=Pending}} + end; +handle_call(get_system_memory_data, From, State) -> + %% When not using a port, the regular memory collection is used + %% for extensive memory data as well + case State#state.wd_timer of + undefined -> + WDTimer = erlang:send_after(State#state.helper_timeout, + self(), + reg_collection_timeout), + OS = State#state.os, + Self = self(), + Pid = spawn_link(fun() -> + MemUsage = get_memory_usage(OS), + Self ! {collected_sys, MemUsage} + end), + {noreply, State#state{pid=Pid, wd_timer=WDTimer, + pending=[{ext,From}]}}; + _TimerRef -> + Pending = [{ext,From} | State#state.pending], + {noreply, State#state{pending=Pending}} + end; + +handle_call(get_check_interval, _From, State) -> + {reply, State#state.timeout, State}; +handle_call({set_check_interval, MS}, _From, State) -> + {reply, ok, State#state{timeout=MS}}; + +handle_call(get_procmem_high_watermark, _From, State) -> + {reply, trunc(100 * State#state.proc_mem_watermark), State}; +handle_call({set_procmem_high_watermark, Float}, _From, State) -> + {reply, ok, State#state{proc_mem_watermark=Float}}; + +handle_call(get_sysmem_high_watermark, _From, State) -> + {reply, trunc(100 * State#state.sys_mem_watermark), State}; +handle_call({set_sysmem_high_watermark, Float}, _From, State) -> + {reply, ok, State#state{sys_mem_watermark=Float}}; + +handle_call(get_helper_timeout, _From, State) -> + {reply, ms_to_sec(State#state.helper_timeout), State}; +handle_call({set_helper_timeout, Seconds}, _From, State) -> + {reply, ok, State#state{helper_timeout=sec_to_ms(Seconds)}}; + +%% The following are only for test purposes (whitebox testing). +handle_call({set_sys_hw, HW}, _From, State) -> + {reply, ok, State#state{sys_mem_watermark=HW}}; +handle_call({set_pid_hw, HW}, _From, State) -> + {reply, ok, State#state{proc_mem_watermark=HW}}; +handle_call(get_state, _From, State) -> + {reply, State, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +%% It's time to check memory +handle_info(time_to_collect, State) -> + case State#state.wd_timer of + undefined -> + WDTimer = erlang:send_after(State#state.helper_timeout, + self(), + reg_collection_timeout), + if + State#state.port_mode -> + State#state.pid ! {self(), collect_sys}, + {noreply, State#state{wd_timer=WDTimer, + pending=[reg]}}; + true -> + OS = State#state.os, + Self = self(), + Pid = spawn_link(fun() -> + MU = get_memory_usage(OS), + Self ! {collected_sys,MU} + end), + {noreply, State#state{pid=Pid, wd_timer=WDTimer, + pending=[reg]}} + end; + _TimerRef -> + {noreply, State#state{pending=[reg|State#state.pending]}} + end; + +%% Memory data collected +handle_info({collected_sys, {Alloc,Total}}, State) -> + + %% Cancel watchdog timer (and as a security measure, + %% also flush any reg_collection_timeout message) + TimeSpent = case erlang:cancel_timer(State#state.wd_timer) of + false -> + State#state.helper_timeout; + TimeLeft -> + State#state.helper_timeout-TimeLeft + end, + flush(reg_collection_timeout), + + %% First check if this is the result of a periodic memory check + %% and update alarms and State if this is the case + State2 = + case lists:member(reg, State#state.pending) of + true -> + + %% Check if system alarm should be set/cleared + if + Alloc > State#state.sys_mem_watermark*Total -> + set_alarm(system_memory_high_watermark, []); + true -> + clear_alarm(system_memory_high_watermark) + end, + + %% Check if process data should be collected + case State#state.sys_only of + false -> + {Pid, Bytes} = get_worst_memory_user(), + Threshold= State#state.proc_mem_watermark*Total, + + %% Check if process alarm should be set/cleared + if + Bytes > Threshold -> + set_alarm(process_memory_high_watermark, + Pid); + true -> + clear_alarm(process_memory_high_watermark) + end, + + State#state{mem_usage={Alloc, Total}, + worst_mem_user={Pid, Bytes}}; + true -> + State#state{mem_usage={Alloc, Total}} + end; + false -> + State + end, + + %% Then send a reply to all waiting clients, in preserved time order + Worst = State2#state.worst_mem_user, + SysMemUsage = get_ext_memory_usage(State2#state.os, {Alloc,Total}), + reply(State2#state.pending, {Total,Alloc,Worst}, SysMemUsage), + + %% Last, if this was a periodic check, start a timer for the next + %% one. New timeout = interval-time spent collecting, + case lists:member(reg, State#state.pending) of + true -> + Time = case State2#state.timeout - TimeSpent of + MS when MS<0 -> + 0; + MS -> + MS + end, + erlang:send_after(Time, self(), time_to_collect); + false -> + ignore + end, + {noreply, State2#state{wd_timer=undefined, pending=[]}}; +handle_info({'EXIT', Pid, normal}, State) when is_pid(Pid) -> + %% Temporary pid terminating when job is done + {noreply, State}; + +%% Timeout during data collection +handle_info(reg_collection_timeout, State) -> + + %% Cancel memory collection (and as a security measure, + %% also flush any collected_sys message) + if + State#state.port_mode -> State#state.pid ! cancel; + true -> exit(State#state.pid, cancel) + end, + flush(collected_sys), + + %% Issue a warning message + Str = "OS_MON (memsup) timeout, no data collected~n", + error_logger:warning_msg(Str), + + %% Send a dummy reply to all waiting clients, preserving time order + reply(State#state.pending, + dummy_reply(get_memory_data, State#state.sys_only), + dummy_reply(get_system_memory_data)), + + %% If it is a periodic check which has timed out, start a timer for + %% the next one + %% New timeout = interval-helper timeout + case lists:member(reg, State#state.pending) of + true -> + Time = + case State#state.timeout-State#state.helper_timeout of + MS when MS<0 -> 0; + MS -> MS + end, + erlang:send_after(Time, self(), time_to_collect); + false -> + ignore + end, + {noreply, State#state{wd_timer=undefined, pending=[]}}; +handle_info({'EXIT', Pid, cancel}, State) when is_pid(Pid) -> + %% Temporary pid terminating as ordered + {noreply, State}; + +%% Extensive memory data collected (port_mode==true only) +handle_info({collected_ext_sys, SysMemUsage}, State) -> + + %% Cancel watchdog timer (and as a security mearure, + %% also flush any ext_collection_timeout message) + erlang:cancel_timer(State#state.ext_wd_timer), + flush(ext_collection_timeout), + + %% Send the reply to all waiting clients, preserving time order + reply(State#state.ext_pending, undef, SysMemUsage), + + {noreply, State#state{ext_wd_timer=undefined, ext_pending=[]}}; + +%% Timeout during ext memory data collection (port_mode==true only) +handle_info(ext_collection_timeout, State) -> + + %% Cancel memory collection (and as a security measure, + %% also flush any collected_ext_sys message) + State#state.pid ! ext_cancel, + flush(collected_ext_sys), + + %% Issue a warning message + Str = "OS_MON (memsup) timeout, no data collected~n", + error_logger:warning_msg(Str), + + %% Send a dummy reply to all waiting clients, preserving time order + SysMemUsage = dummy_reply(get_system_memory_data), + reply(State#state.ext_pending, undef, SysMemUsage), + + {noreply, State#state{ext_wd_timer=undefined, ext_pending=[]}}; + +%% Error in data collecting (port connected or temporary) process +handle_info({'EXIT', Pid, Reason}, State) when is_pid(Pid) -> + {stop, Reason, State}; + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, State) -> + if + State#state.port_mode -> State#state.pid ! close; + true -> ok + end, + clear_alarms(), + ok. + +%% os_mon-2.0.1 +%% For live downgrade to/upgrade from os_mon-1.8[.1] and -2.0 +code_change(Vsn, PrevState, "1.8") -> + case Vsn of + + %% Downgrade from this version + {down, _Vsn} -> + + %% Kill the helper process, if there is one, + %% and flush messages from it + case PrevState#state.pid of + Pid when is_pid(Pid) -> + unlink(Pid), % to prevent 'EXIT' message + exit(Pid, cancel); + undefined -> ignore + end, + flush(collected_sys), + flush(collected_ext_sys), + + %% Cancel timers, flush timeout messages + %% and send dummy replies to any pending clients + case PrevState#state.wd_timer of + undefined -> + ignore; + TimerRef1 -> + erlang:cancel_timer(TimerRef1), + SysOnly = PrevState#state.sys_only, + MemUsage = dummy_reply(get_memory_data, SysOnly), + SysMemUsage1 = dummy_reply(get_system_memory_data), + reply(PrevState#state.pending,MemUsage,SysMemUsage1) + end, + case PrevState#state.ext_wd_timer of + undefined -> + ignore; + TimerRef2 -> + erlang:cancel_timer(TimerRef2), + SysMemUsage2 = dummy_reply(get_system_memory_data), + reply(PrevState#state.pending, undef, SysMemUsage2) + end, + flush(reg_collection_timeout), + flush(ext_collection_timeout), + + %% Downgrade to old state record + State = {state, + PrevState#state.timeout, + PrevState#state.mem_usage, + PrevState#state.worst_mem_user, + PrevState#state.sys_mem_watermark, + PrevState#state.proc_mem_watermark, + not PrevState#state.sys_only, % collect_procmem + undefined, % wd_timer + [], % pending + undefined, % ext_wd_timer + [], % ext_pending + PrevState#state.helper_timeout}, + {ok, State}; + + %% Upgrade to this version + _Vsn -> + + %% Old state record + {state, + Timeout, MemUsage, WorstMemUser, + SysMemWatermark, ProcMemWatermark, CollProc, + WDTimer, Pending, ExtWDTimer, ExtPending, + HelperTimeout} = PrevState, + SysOnly = not CollProc, + + %% Flush memsup_helper messages + flush(collected_sys), + flush(collected_proc), + flush(collected_ext_sys), + + %% Cancel timers, flush timeout messages + %% and send dummy replies to any pending clients + case WDTimer of + undefined -> + ignore; + TimerRef1 -> + erlang:cancel_timer(TimerRef1), + MemUsage = dummy_reply(get_memory_data, SysOnly), + Pending2 = lists:map(fun(From) -> {reg,From} end, + Pending), + reply(Pending2, MemUsage, undef) + end, + case ExtWDTimer of + undefined -> + ignore; + TimerRef2 -> + erlang:cancel_timer(TimerRef2), + SysMemUsage = dummy_reply(get_system_memory_data), + ExtPending2 = lists:map(fun(From) -> {ext,From} end, + ExtPending), + reply(ExtPending2, undef, SysMemUsage) + end, + flush(reg_collection_timeout), + flush(ext_collection_timeout), + + OS = os:type(), + PortMode = case OS of + {unix, darwin} -> false; + {unix, freebsd} -> false; + {unix, linux} -> false; + {unix, openbsd} -> true; + {unix, sunos} -> true; + {win32, _OSname} -> false; + vxworks -> true + end, + Pid = if + PortMode -> spawn_link(fun() -> port_init() end); + not PortMode -> undefined + end, + + %% Upgrade to this state record + State = #state{os = OS, + port_mode = PortMode, + mem_usage = MemUsage, + worst_mem_user = WorstMemUser, + sys_only = SysOnly, + timeout = Timeout, + helper_timeout = HelperTimeout, + sys_mem_watermark = SysMemWatermark, + proc_mem_watermark = ProcMemWatermark, + pid = Pid, + wd_timer = undefined, + ext_wd_timer = undefined, + pending = [], + ext_pending = []}, + {ok, State} + end; +code_change(_Vsn, State, "2.0") -> + + %% Restart the port process (it must use new memsup code) + Pid = case State#state.port_mode of + true -> + State#state.pid ! close, + spawn_link(fun() -> port_init() end); + false -> + State#state.pid + end, + {ok, State#state{pid=Pid}}; + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------- +%% Other exports +%%---------------------------------------------------------------------- + +format_status(_Opt, [_PDict, #state{timeout=Timeout, mem_usage=MemUsage, + worst_mem_user=WorstMemUser}]) -> + {Allocated, Total} = MemUsage, + WorstMemFormat = case WorstMemUser of + {Pid, Mem} -> + [{"Pid", Pid}, {"Memory", Mem}]; + undefined -> + undefined + end, + [{data, [{"Timeout", Timeout}]}, + {items, {"Memory Usage", [{"Allocated", Allocated}, + {"Total", Total}]}}, + {items, {"Worst Memory User", WorstMemFormat}}]. + + +%%---------------------------------------------------------------------- +%% Internal functions +%%---------------------------------------------------------------------- + +%%-- Fetching kernel bit support --------------------------------------- + +get_os_wordsize({unix, sunos}) -> + String = clean_string(os:cmd("isainfo -b")), + erlang:list_to_integer(String); +get_os_wordsize({unix, irix64}) -> 64; +get_os_wordsize({unix, irix}) -> 32; +get_os_wordsize({unix, linux}) -> get_os_wordsize_with_uname(); +get_os_wordsize({unix, darwin}) -> get_os_wordsize_with_uname(); +get_os_wordsize({unix, netbsd}) -> get_os_wordsize_with_uname(); +get_os_wordsize({unix, freebsd}) -> get_os_wordsize_with_uname(); +get_os_wordsize({unix, openbsd}) -> get_os_wordsize_with_uname(); +get_os_wordsize(_) -> unsupported_os. + +get_os_wordsize_with_uname() -> + String = clean_string(os:cmd("uname -m")), + case String of + "x86_64" -> 64; + "sparc64" -> 64; + _ -> 32 + end. + +clean_string(String) -> lists:flatten(string:tokens(String,"\r\n\t ")). + + +%%--Replying to pending clients----------------------------------------- + +reply(Pending, MemUsage, SysMemUsage) -> + lists:foreach(fun(reg) -> + ignore; + ({reg, From}) -> + gen_server:reply(From, MemUsage); + ({ext, From}) -> + gen_server:reply(From, SysMemUsage) + end, + lists:reverse(Pending)). + +%%--Collect memory data, no port---------------------------------------- + +%% get_memory_usage(OS) -> {Alloc, Total} + +%% Darwin: +%% Uses vm_stat command. This appears to lie about the page size in +%% Mac OS X 10.2.2 - the pages given are based on 4000 bytes, but +%% the vm_stat command tells us that it is 4096... +get_memory_usage({unix,darwin}) -> + Str1 = os:cmd("/usr/bin/vm_stat"), + + {[Free], Str2} = fread_value("Pages free:~d.", Str1), + {[Active], Str3} = fread_value("Pages active:~d.", Str2), + {[Inactive], Str4} = fread_value("Pages inactive:~d.", Str3), + {[_], Str5} = fread_value("Pages speculative:~d.", Str4), + {[Wired], _} = fread_value("Pages wired down:~d.", Str5), + + NMemUsed = (Wired + Active + Inactive) * 4000, + NMemTotal = NMemUsed + Free * 4000, + {NMemUsed,NMemTotal}; + +%% FreeBSD: Look in /usr/include/sys/vmmeter.h for the format of struct +%% vmmeter +get_memory_usage({unix,freebsd}) -> + PageSize = freebsd_sysctl("vm.stats.vm.v_page_size"), + PageCount = freebsd_sysctl("vm.stats.vm.v_page_count"), + FreeCount = freebsd_sysctl("vm.stats.vm.v_free_count"), + NMemUsed = (PageCount - FreeCount) * PageSize, + NMemTotal = PageCount * PageSize, + {NMemUsed, NMemTotal}; + +%% Win32: Find out how much memory is in use by asking +%% the os_mon_sysinfo process. +get_memory_usage({win32,_OSname}) -> + [Result|_] = os_mon_sysinfo:get_mem_info(), + {ok, [_MemLoad, TotPhys, AvailPhys, + _TotPage, _AvailPage, _TotV, _AvailV], _RestStr} = + io_lib:fread("~d~d~d~d~d~d~d", Result), + {TotPhys-AvailPhys, TotPhys}. + +fread_value(Format, Str0) -> + case io_lib:fread(Format, skip_to_eol(Str0)) of + {error, {fread, input}} -> {[0], Str0}; + {ok, Value, Str1} -> {Value, Str1} + end. + +skip_to_eol([]) -> []; +skip_to_eol([$\n | T]) -> T; +skip_to_eol([_ | T]) -> skip_to_eol(T). + +freebsd_sysctl(Def) -> + list_to_integer(os:cmd("/sbin/sysctl -n " ++ Def) -- "\n"). + +%% get_ext_memory_usage(OS, {Alloc, Total}) -> [{Tag, Bytes}] +get_ext_memory_usage(OS, {Alloc, Total}) -> + case OS of + {win32, _} -> + [{total_memory, Total}, {free_memory, Total-Alloc}, + {system_total_memory, Total}]; + {unix, linux} -> + [{total_memory, Total}, {free_memory, Total-Alloc}, + %% corr. unless setrlimit() set + {system_total_memory, Total}]; + {unix, freebsd} -> + [{total_memory, Total}, {free_memory, Total-Alloc}, + {system_total_memory, Total}]; + {unix, darwin} -> + [{total_memory, Total}, {free_memory, Total-Alloc}, + {system_total_memory, Total}]; + _ -> % OSs using a port + dummy % not sent anyway + end. + +%%--Collect memory data, using port------------------------------------- + +port_init() -> + process_flag(trap_exit, true), + Port = start_portprogram(), + port_idle(Port). + +start_portprogram() -> + Command = filename:join([code:priv_dir(os_mon), "bin", "memsup"]), + open_port({spawn, Command}, [{packet, 1}]). + +%% The connected process loops are a bit awkward (several different +%% functions doing almost the same thing) as +%% a) strategies for receiving regular memory data and extensive +%% memory data are different +%% b) memory collection can be cancelled, in which case the process +%% should still wait for port response (which should come +%% eventually!) but not receive any requests or cancellations +%% meanwhile to prevent getting out of synch. +port_idle(Port) -> + receive + {Memsup, collect_sys} -> + Port ! {self(), {command, [?SHOW_MEM]}}, + get_memory_usage(Port, undefined, Memsup); + {Memsup, collect_ext_sys} -> + Port ! {self(), {command, [?SHOW_SYSTEM_MEM]}}, + get_ext_memory_usage(Port, [], Memsup); + cancel -> + %% Received after reply already has been delivered... + port_idle(Port); + ext_cancel -> + %% Received after reply already has been delivered... + port_idle(Port); + close -> + port_close(Port); + {Port, {data, Data}} -> + exit({port_error, Data}); + {'EXIT', Port, Reason} -> + exit({port_died, Reason}); + {'EXIT', _Memsup, _Reason} -> + port_close(Port) + end. + +get_memory_usage(Port, Alloc, Memsup) -> + receive + {Port, {data, Data}} when Alloc==undefined -> + get_memory_usage(Port, erlang:list_to_integer(Data, 16), Memsup); + {Port, {data, Data}} -> + Total = erlang:list_to_integer(Data, 16), + Memsup ! {collected_sys, {Alloc, Total}}, + port_idle(Port); + cancel -> + get_memory_usage_cancelled(Port, Alloc); + close -> + port_close(Port); + {'EXIT', Port, Reason} -> + exit({port_died, Reason}); + {'EXIT', _Memsup, _Reason} -> + port_close(Port) + end. +get_memory_usage_cancelled(Port, Alloc) -> + receive + {Port, {data, _Data}} when Alloc==undefined -> + get_memory_usage_cancelled(Port, 0); + {Port, {data, _Data}} -> + port_idle(Port); + close -> + port_close(Port); + {'EXIT', Port, Reason} -> + exit({port_died, Reason}); + {'EXIT', _Memsup, _Reason} -> + port_close(Port) + end. + +get_ext_memory_usage(Port, Accum, Memsup) -> + Tab = [ + {?MEM_SYSTEM_TOTAL, system_total_memory}, + {?MEM_TOTAL, total_memory}, + {?MEM_FREE, free_memory}, + {?MEM_BUFFERS, buffered_memory}, + {?MEM_CACHED, cached_memory}, + {?MEM_SHARED, shared_memory}, + {?MEM_LARGEST_FREE, largest_free}, + {?MEM_NUMBER_OF_FREE, number_of_free}, + {?SWAP_TOTAL, total_swap}, + {?SWAP_FREE, free_swap} + ], + receive + {Port, {data, [?SHOW_SYSTEM_MEM_END]}} -> + Memsup ! {collected_ext_sys, Accum}, + port_idle(Port); + {Port, {data, [Tag]}} -> + case lists:keysearch(Tag, 1, Tab) of + {value, {Tag, ATag}} -> + get_ext_memory_usage(ATag, Port, Accum, Memsup); + _ -> + exit({memsup_port_error, {Port,[Tag]}}) + end; + ext_cancel -> + get_ext_memory_usage_cancelled(Port); + close -> + port_close(Port); + {'EXIT', Port, Reason} -> + exit({port_died, Reason}); + {'EXIT', _Memsup, _Reason} -> + port_close(Port) + end. +get_ext_memory_usage_cancelled(Port) -> + Tab = [ + {?MEM_SYSTEM_TOTAL, system_total_memory}, + {?MEM_TOTAL, total_memory}, + {?MEM_FREE, free_memory}, + {?MEM_BUFFERS, buffered_memory}, + {?MEM_CACHED, cached_memory}, + {?MEM_SHARED, shared_memory}, + {?MEM_LARGEST_FREE, largest_free}, + {?MEM_NUMBER_OF_FREE, number_of_free}, + {?SWAP_TOTAL, total_swap}, + {?SWAP_FREE, free_swap} + ], + receive + {Port, {data, [?SHOW_SYSTEM_MEM_END]}} -> + port_idle(Port); + {Port, {data, [Tag]}} -> + case lists:keysearch(Tag, 1, Tab) of + {value, {Tag, ATag}} -> + get_ext_memory_usage_cancelled(ATag, Port); + _ -> + exit({memsup_port_error, {Port,[Tag]}}) + end; + close -> + port_close(Port); + {'EXIT', Port, Reason} -> + exit({port_died, Reason}); + {'EXIT', _Memsup, _Reason} -> + port_close(Port) + end. + +get_ext_memory_usage(ATag, Port, Accum0, Memsup) -> + receive + {Port, {data, Data}} -> + Accum = [{ATag,erlang:list_to_integer(Data, 16)}|Accum0], + get_ext_memory_usage(Port, Accum, Memsup); + cancel -> + get_ext_memory_usage_cancelled(ATag, Port); + close -> + port_close(Port); + {'EXIT', Port, Reason} -> + exit({port_died, Reason}); + {'EXIT', _Memsup, _Reason} -> + port_close(Port) + end. +get_ext_memory_usage_cancelled(_ATag, Port) -> + receive + {Port, {data, _Data}} -> + get_ext_memory_usage_cancelled(Port); + close -> + port_close(Port); + {'EXIT', Port, Reason} -> + exit({port_died, Reason}); + {'EXIT', _Memsup, _Reason} -> + port_close(Port) + end. + +%%--Collect process data------------------------------------------------ + +%% get_worst_memory_user() -> {Pid, Bytes} +get_worst_memory_user() -> + get_worst_memory_user(processes(), self(), 0). + +get_worst_memory_user([Pid|Pids], MaxPid, MaxMemBytes) -> + case process_memory(Pid) of + undefined -> + get_worst_memory_user(Pids, MaxPid, MaxMemBytes); + MemoryBytes when MemoryBytes>MaxMemBytes -> + get_worst_memory_user(Pids, Pid, MemoryBytes); + _MemoryBytes -> + get_worst_memory_user(Pids, MaxPid, MaxMemBytes) + end; +get_worst_memory_user([], MaxPid, MaxMemBytes) -> + {MaxPid, MaxMemBytes}. + +process_memory(Pid) -> + case process_info(Pid, memory) of + {memory, Bytes} -> + Bytes; + undefined -> % Pid must have died + undefined + end. + +%%--Alarm handling------------------------------------------------------ + +set_alarm(AlarmId, AlarmDescr) -> + case get(AlarmId) of + set -> + ok; + undefined -> + alarm_handler:set_alarm({AlarmId, AlarmDescr}), + put(AlarmId, set) + end. + +clear_alarm(AlarmId) -> + case get(AlarmId) of + set -> + alarm_handler:clear_alarm(AlarmId), + erase(AlarmId); + _ -> + ok + end. + +clear_alarms() -> + lists:foreach(fun({system_memory_high_watermark = Id, set}) -> + alarm_handler:clear_alarm(Id); + ({process_memory_high_watermark = Id, set}) -> + alarm_handler:clear_alarm(Id); + (_Other) -> + ignore + end, + get()). + +%%--Auxiliary----------------------------------------------------------- + +%% Type conversions +minutes_to_ms(Minutes) -> trunc(60000*Minutes). +sec_to_ms(Sec) -> trunc(1000*Sec). +ms_to_sec(MS) -> MS div 1000. + +flush(Msg) -> + receive + {Msg, _} -> true; + Msg -> true + after 0 -> + true + end. diff --git a/lib/os_mon/src/nteventlog.erl b/lib/os_mon/src/nteventlog.erl new file mode 100644 index 0000000000..d624048c29 --- /dev/null +++ b/lib/os_mon/src/nteventlog.erl @@ -0,0 +1,162 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1998-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(nteventlog). +-behaviour(gen_server). + +%% API +-export([start_link/2, start/2, stop/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {port, mfa}). + +%%---------------------------------------------------------------------- +%% API +%%---------------------------------------------------------------------- + +start_link(Ident, MFA) -> + gen_server:start_link({local, nteventlog}, nteventlog, + [Ident, MFA], []). + +start(Ident, MFA) -> + gen_server:start({local, nteventlog}, nteventlog, [Ident, MFA], []). + +stop() -> + gen_server:call(nteventlog, stop). + +%%---------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------- + +init([Identifier,MFA0]) -> + process_flag(trap_exit, true), + process_flag(priority, low), + + Port = case os:type() of + {win32, _OSname} -> start_portprogram(Identifier); + OS -> exit({unsupported_os, OS}) + end, + + %% If we're using os_sup:error_report/2, + %% the setting of os_sup_errortag should be used as argument + MFA = case MFA0 of + {os_sup, error_report, [_Tag]} -> + Tag = os_mon:get_env(os_sup, os_sup_errortag), + {os_sup, error_report, [Tag]}; + _ -> + MFA0 + end, + + {ok, #state{port=Port, mfa=MFA}}. + +handle_call(stop, _From, State) -> + {stop, normal, stopped, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({_Port, {data, Data}}, #state{mfa={M,F,A}} = State) -> + T = parse_log(Data), + apply(M, F, [T | A]), + State#state.port ! {self(), {command, "A"}}, + {noreply, State}; +handle_info({'EXIT', _Port, Reason}, State) -> + {stop, {port_died, Reason}, State#state{port=not_used}}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, State) -> + case State#state.port of + not_used -> ignore; + Port -> + port_close(Port) + end, + ok. + +%% os_mon-2.0 +%% For live downgrade to/upgrade from os_mon-1.8[.1] +code_change(Vsn, PrevState, "1.8") -> + case Vsn of + + %% Downgrade from this version + {down, _Vsn} -> + process_flag(trap_exit, false), + + %% Downgrade to old State tuple + State = {PrevState#state.port, PrevState#state.mfa}, + {ok, State}; + + %% Upgrade to this version + _Vsn -> + process_flag(trap_exit, true), + + %% Upgrade to this state record + {Port, MFA} = PrevState, + State = #state{port=Port, mfa=MFA}, + {ok, State} + end; +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------- +%% Internal functions +%%---------------------------------------------------------------------- + +start_portprogram(Identifier) -> + Command = + filename:join([code:priv_dir(os_mon),"bin","nteventlog.exe"]) ++ + " " ++ make_list(Identifier), + open_port({spawn,Command},[{packet,2}]). + +make_list(X) when is_atom(X) -> + atom_to_list(X); +make_list(X) -> + X. + +holl_len([$H | Rest], Sum) -> + {Sum, Rest}; +holl_len([ N | Rest], Sum) -> + NN = N - $0, + holl_len(Rest, Sum * 10 + NN). +holl_len(L) -> + holl_len(L,0). + +splitlist(L,N) -> + {lists:sublist(L,N),lists:nthtail(N,L)}. + +hollerith(Str) -> + {Len, Rest} = holl_len(Str), + splitlist(Rest,Len). + +holl_time(Str) -> + {Holl,Rest} = hollerith(Str), + Rev = lists:reverse(Holl), + B = list_to_integer(lists:reverse(lists:sublist(Rev,6))), + A = list_to_integer(lists:reverse(lists:nthtail(6,Rev))), + {{A,B,0},Rest}. + +parse_log(Str) -> + {Time, Rest1} = holl_time(Str), + {Category,Rest2} = hollerith(Rest1), + {Facility,Rest3} = hollerith(Rest2), + {Severity,Rest4} = hollerith(Rest3), + {Message,_} = hollerith(Rest4), + {Time,Category,Facility,Severity,Message}. diff --git a/lib/os_mon/src/os_mon.app.src b/lib/os_mon/src/os_mon.app.src new file mode 100644 index 0000000000..15bbd2663c --- /dev/null +++ b/lib/os_mon/src/os_mon.app.src @@ -0,0 +1,32 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +{application, os_mon, + [{description, "CPO CXC 138 46"}, + {vsn, "%VSN%"}, + {modules, [os_mon, os_mon_mib, os_sup, + disksup, memsup, cpu_sup, os_mon_sysinfo, nteventlog]}, + {registered, [os_mon_sup, os_mon_sysinfo, disksup, memsup, cpu_sup, + os_sup_server]}, + {applications, [kernel, stdlib, sasl]}, + {env, [{start_cpu_sup, true}, + {start_disksup, true}, + {start_memsup, true}, + {start_os_sup, false}]}, + {mod, {os_mon, []}}]}. diff --git a/lib/os_mon/src/os_mon.appup.src b/lib/os_mon/src/os_mon.appup.src new file mode 100644 index 0000000000..f8e09a7d87 --- /dev/null +++ b/lib/os_mon/src/os_mon.appup.src @@ -0,0 +1,41 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +{"%VSN%", + [ + {"2.1", + [{load_module, cpu_sup}, + {load_module, disksup}, + {load_module, memsup}, + {load_module, os_mon}, + {load_module, os_mon_mib}]}, + {"2.1.1", + [{load_module, os_mon_mib}]} + ], + [ + {"2.1", + [{load_module, cpu_sup}, + {load_module, disksup}, + {load_module, memsup}, + {load_module, os_mon}, + {load_module, os_mon_mib}]}, + {"2.1.1", + [{load_module, os_mon_mib}]} + ] +}. diff --git a/lib/os_mon/src/os_mon.erl b/lib/os_mon/src/os_mon.erl new file mode 100644 index 0000000000..ef368571db --- /dev/null +++ b/lib/os_mon/src/os_mon.erl @@ -0,0 +1,179 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(os_mon). + +-behaviour(application). +-behaviour(supervisor). + +%% API +-export([call/2, call/3, get_env/2]). + +%% Application callbacks +-export([start/2, stop/1]). + +%% Supervisor callbacks +-export([init/1]). + +%%%----------------------------------------------------------------- +%%% API +%%%----------------------------------------------------------------- + +call(Service, Request) -> + call(Service, Request, 5000). + +call(Service, Request, Timeout) -> + try gen_server:call(server_name(Service), Request, Timeout) + catch + exit:{noproc, Call} -> + case lists:keysearch(os_mon, 1, + application:which_applications()) of + {value, _AppInfo} -> + case startp(Service) of + true -> + erlang:exit({noproc, Call}); + false -> + String = "OS_MON (~p) called by ~p, " + "unavailable~n", + error_logger:warning_msg(String, + [Service, self()]), + Service:dummy_reply(Request) + end; + false -> + String = "OS_MON (~p) called by ~p, not started~n", + error_logger:warning_msg(String, [Service, self()]), + Service:dummy_reply(Request) + end + end. + +get_env(Service, Param) -> + case application:get_env(os_mon, Param) of + {ok, Value} -> + case Service:param_type(Param, Value) of + true -> + Value; + false -> + String = "OS_MON (~p), ignoring " + "bad configuration parameter (~p=~p)~n" + "Using default value instead~n", + error_logger:warning_msg(String, + [Service, Param, Value]), + Service:param_default(Param) + end; + undefined -> + Service:param_default(Param) + end. + +%%%----------------------------------------------------------------- +%%% Application callbacks +%%%----------------------------------------------------------------- + +start(_, _) -> + supervisor:start_link({local, os_mon_sup}, os_mon, []). + +stop(_) -> + ok. + +%%%----------------------------------------------------------------- +%%% Supervisor callbacks +%%%----------------------------------------------------------------- + +init([]) -> + SupFlags = case os:type() of + {win32, _} -> + {one_for_one, 5, 3600}; + _ -> + {one_for_one, 4, 3600} + end, + SysInf = childspec(sysinfo, startp(sysinfo)), + DskSup = childspec(disksup, startp(disksup)), + MemSup = childspec(memsup, startp(memsup)), + CpuSup = childspec(cpu_sup, startp(cpu_sup)), + OsSup = childspec(os_sup, startp(os_sup)), + {ok, {SupFlags, SysInf ++ DskSup ++ MemSup ++ CpuSup ++ OsSup}}. + +childspec(_Service, false) -> + []; +childspec(cpu_sup, true) -> + [{cpu_sup, {cpu_sup, start_link, []}, + permanent, 2000, worker, [cpu_sup]}]; +childspec(disksup, true) -> + [{disksup, {disksup, start_link, []}, + permanent, 2000, worker, [disksup]}]; +childspec(memsup, true) -> + [{memsup, {memsup, start_link, []}, + permanent, 2000, worker, [memsup]}]; +childspec(os_sup, true) -> + OS = os:type(), + Mod = case OS of + {win32, _} -> nteventlog; % windows + _ -> os_sup % solaris + end, + [{os_sup, {os_sup, start_link, [OS]}, + permanent, 10000, worker, [Mod]}]; +childspec(sysinfo, true) -> + [{os_mon_sysinfo, {os_mon_sysinfo, start_link, []}, + permanent, 2000, worker, [os_mon_sysinfo]}]. + +%%%----------------------------------------------------------------- +%%% Internal functions (OS_Mon configuration) +%%%----------------------------------------------------------------- + +startp(Service) -> + %% Available for this platform? + case lists:member(Service, services(os:type())) of + true -> + %% Is there a start configuration parameter? + case start_param(Service) of + none -> + true; + Param -> + %% Is the start configuration parameter 'true'? + case application:get_env(os_mon, Param) of + {ok, true} -> + true; + _ -> + false + end + end; + false -> + false + end. + +services({unix, sunos}) -> + [cpu_sup, disksup, memsup, os_sup]; +services({unix, _}) -> % Other unix. + [cpu_sup, disksup, memsup]; +services({win32, _}) -> + [disksup, memsup, os_sup, sysinfo]; +services(vxworks) -> + [memsup]; +services(_) -> + []. + +server_name(cpu_sup) -> cpu_sup; +server_name(disksup) -> disksup; +server_name(memsup) -> memsup; +server_name(os_sup) -> os_sup_server; +server_name(sysinfo) -> os_mon_sysinfo. + +start_param(cpu_sup) -> start_cpu_sup; +start_param(disksup) -> start_disksup; +start_param(memsup) -> start_memsup; +start_param(os_sup) -> start_os_sup; +start_param(sysinfo) -> none. diff --git a/lib/os_mon/src/os_mon_mib.erl b/lib/os_mon/src/os_mon_mib.erl new file mode 100644 index 0000000000..a4ce274a16 --- /dev/null +++ b/lib/os_mon/src/os_mon_mib.erl @@ -0,0 +1,250 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(os_mon_mib). +%%%----------------------------------------------------------------- +%%% Description: This module implements the OS-MON-MIB. +%%% The tables are implemented as shadow tables with the module +%%% snmp_shadow_table. Here the update functions are implemented. +%%%----------------------------------------------------------------- + +-include("../../otp_mibs/include/OTP-MIB.hrl"). + +%% API +-export([load/1, unload/1]). + +%% Deprecated API +-export([init/1, stop/1]). + +-deprecated([{init,1,eventually}, + {stop,1,eventually}]). + +%% SNMP instrumentation +-export([load_table/1, load_table/3, disk_table/1, disk_table/3, + mem_sys_mark/1, mem_proc_mark/1, disk_threshold/1]). + +%% SNMP shadow functions +-export([update_load_table/0, update_disk_table/0]). + +%% Exported for internal use via rpc +-export([get_load/1, get_disks/1]). + +%% Shadow tables +-record(loadTable, { + loadErlNodeName, + loadSystemTotalMemory, + loadSystemUsedMemory, + loadLargestErlProcess, + loadLargestErlProcessUsedMemory, + loadCpuLoad, + loadCpuLoad5, + loadCpuLoad15, + loadOsWordsize, + loadSystemTotalMemory64, + loadSystemUsedMemory64, + loadLargestErlProcessUsedMemory64}). + +-record(diskTable, + {key, diskDescr, diskKBytes, diskCapacity}). + +%% Shadow argument macros +-define(loadShadowArgs, + {loadTable, string, record_info(fields, loadTable), 5000, + {os_mon_mib, update_load_table}}). + +-define(diskShadowArgs, + {diskTable, {integer, integer}, record_info(fields, diskTable), 5000, + {os_mon_mib, update_disk_table}}). + +%% Misc +-record(diskAlloc, {diskDescr, diskId}). + +%%%========================================================================= +%%% API +%%%========================================================================= + +%%------------------------------------------------------------------------- +%% load(Agent) -> ok | {error, Reason} +%% Agent - pid() | atom() +%% Reason - term() +%% Description: Loads the OTP-OS-MON-MIB +%%------------------------------------------------------------------------- +load(Agent) -> + MibDir = filename:join(code:priv_dir(os_mon), "mibs"), + snmpa:load_mibs(Agent, [filename:join(MibDir, "OTP-OS-MON-MIB")]). + +%%------------------------------------------------------------------------- +%% unload(Agent) -> ok | {error, Reason} +%% Agent - pid() | atom() +%% Reason - term() +%% Description: Unloads the OTP-OS-MON-MIB +%%------------------------------------------------------------------------- +unload(Agent) -> + snmpa:unload_mibs(Agent, ["OTP-OS-MON-MIB"]). + +%% To be backwards compatible +init(Agent) -> + load(Agent). +stop(Agent) -> + unload(Agent). + +%%%========================================================================= +%%% SNMP instrumentation +%%%========================================================================= +load_table(Op) -> + snmp_shadow_table:table_func(Op, ?loadShadowArgs). +load_table(Op, RowIndex, Cols) -> + snmp_shadow_table:table_func(Op, RowIndex, Cols, ?loadShadowArgs). + +disk_table(new) -> + Tab = diskAlloc, + Storage = ram_copies, + case lists:member(Tab, mnesia:system_info(tables)) of + true -> + case mnesia:table_info(Tab, storage_type) of + unknown -> + {atomic, ok}=mnesia:add_table_copy(Tab, node(), Storage); + Storage -> + catch delete_all(Tab) + end; + false -> + Nodes = [node()], + Props = [{type, set}, + {attributes, record_info(fields, diskAlloc)}, + {local_content, true}, + {Storage, Nodes}], + {atomic, ok} = mnesia:create_table(Tab, Props) + + end, + Rec = #diskAlloc{diskDescr = next_index, diskId = 1}, + ok = mnesia:dirty_write(Rec), + snmp_shadow_table:table_func(new, ?diskShadowArgs). + +disk_table(Op, RowIndex, Cols) -> + snmp_shadow_table:table_func(Op, RowIndex, Cols, ?diskShadowArgs). + +mem_sys_mark(get) -> + {value, memsup:get_sysmem_high_watermark()}; +mem_sys_mark(_) -> + ok. + +mem_proc_mark(get) -> + {value, memsup:get_procmem_high_watermark()}; +mem_proc_mark(_) -> + ok. + +disk_threshold(get) -> + {value, disksup:get_almost_full_threshold()}; +disk_threshold(_) -> + ok. + +%%%========================================================================= +%%% SNMP shadow functions +%%%========================================================================= +update_load_table() -> + delete_all(loadTable), + lists:foreach( + fun(Node) -> + case rpc:call(Node, os_mon_mib, get_load, [Node]) of + Load when is_record(Load,loadTable) -> + ok = mnesia:dirty_write(Load); + _Else -> + ok + end + end, [node() | nodes()]). + + +update_disk_table() -> + delete_all(diskTable), + node_update_disk_table( + otp_mib:erl_node_table(get_next, [], [?erlNodeName,?erlNodeOutBytes])). + +%%%======================================================================== +%%% Exported for internal use via rpc +%%%======================================================================== +get_load(Node) -> + {Total, Allocated, PidString, PidAllocated} = case memsup:get_memory_data() of + {MemTot, MemAlloc, undefined} -> {MemTot, MemAlloc, "undefined", 0}; + {MemTot, MemAlloc, {Pid, PidMem}} -> {MemTot, MemAlloc, pid_to_str(Pid), PidMem} + end, + OsWordsize = case memsup:get_os_wordsize() of + WS when is_integer(WS) -> WS; + _ -> 0 + end, + #loadTable{ + loadErlNodeName = atom_to_list(Node), + loadSystemTotalMemory = mask_int32(Total), + loadSystemUsedMemory = mask_int32(Allocated), + loadLargestErlProcess = PidString, + loadLargestErlProcessUsedMemory = mask_int32(PidAllocated), + loadCpuLoad = get_cpu_load(avg1), + loadCpuLoad5 = get_cpu_load(avg5), + loadCpuLoad15 = get_cpu_load(avg15), + loadOsWordsize = OsWordsize, + loadSystemTotalMemory64 = Total, + loadSystemUsedMemory64 = Allocated, + loadLargestErlProcessUsedMemory64 = PidAllocated + }. + +mask_int32(Value) -> Value band ((1 bsl 32) - 1). + +get_disks(NodeId) -> + element(1, + lists:mapfoldl( + fun({Descr, KByte, Capacity}, DiskId) -> + {#diskTable{key = {NodeId, DiskId}, + diskDescr = Descr, + diskKBytes = KByte, + diskCapacity = Capacity}, + DiskId + 1} + end, 1, disksup:get_disk_data())). + + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== +node_update_disk_table([_, endOfTable]) -> + ok; + +node_update_disk_table([{[?erlNodeName | IndexList], NodeStr}, _]) -> + Disks = rpc:call(list_to_atom(NodeStr), os_mon_mib, get_disks, + IndexList), + lists:foreach(fun(Disk) -> + mnesia:dirty_write(Disk) + end, Disks), + node_update_disk_table(otp_mib:erl_node_table(get_next, + IndexList, + [?erlNodeName, + ?erlNodeOutBytes])). + +get_cpu_load(X) when X == avg1; X == avg5; X == avg15 -> + case erlang:round(apply(cpu_sup, X, [])/2.56) of + Large when Large > 100 -> + 100; + Load -> + Load + end. + +delete_all(Name) -> delete_all(mnesia:dirty_first(Name), Name). +delete_all('$end_of_table', _Name) -> done; +delete_all(Key, Name) -> + Next = mnesia:dirty_next(Name, Key), + ok = mnesia:dirty_delete({Name, Key}), + delete_all(Next, Name). + +pid_to_str(Pid) -> lists:flatten(io_lib:format("~w", [Pid])). diff --git a/lib/os_mon/src/os_mon_sysinfo.erl b/lib/os_mon/src/os_mon_sysinfo.erl new file mode 100644 index 0000000000..5d12bd95d1 --- /dev/null +++ b/lib/os_mon/src/os_mon_sysinfo.erl @@ -0,0 +1,147 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(os_mon_sysinfo). +-behaviour(gen_server). + +%% API +-export([start_link/0]). +-export([get_disk_info/0, get_disk_info/1, get_mem_info/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-define(DISK_INFO, $d). +-define(MEM_INFO, $m). +-define(OK, $o). + +-record(state, {port}). + +%%---------------------------------------------------------------------- +%% API +%%---------------------------------------------------------------------- + +start_link() -> + gen_server:start_link({local,os_mon_sysinfo}, os_mon_sysinfo, [],[]). + +get_disk_info() -> + gen_server:call(os_mon_sysinfo, get_disk_info). + +get_disk_info(DriveRoot) -> + gen_server:call(os_mon_sysinfo, {get_disk_info,DriveRoot}). + +get_mem_info() -> + gen_server:call(os_mon_sysinfo, get_mem_info). + +%%---------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------- + +init([]) -> + process_flag(trap_exit, true), + process_flag(priority, low), + Port = case os:type() of + {win32, _OSname} -> start_portprogram(); + OS -> exit({unsupported_os, OS}) + end, + {ok, #state{port=Port}}. + +handle_call(get_disk_info, _From, State) -> + {reply, get_disk_info1(State#state.port), State}; +handle_call({get_disk_info,RootList}, _From, State) -> + {reply, get_disk_info1(State#state.port,RootList), State}; +handle_call(get_mem_info, _From, State) -> + {reply, get_mem_info1(State#state.port), State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({'EXIT', _Port, Reason}, State) -> + {stop, {port_died, Reason}, State#state{port=not_used}}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, State) -> + case State#state.port of + not_used -> + ok; + Port -> + port_close(Port) + end, + ok. + +%% os_mon-2.0 +%% For live downgrade to/upgrade from os_mon-1.8[.1] +code_change(Vsn, PrevState, "1.8") -> + case Vsn of + + %% Downgrade from this version + {down, _Vsn} -> + process_flag(trap_exit, false); + + %% Upgrade to this version + _Vsn -> + process_flag(trap_exit, true) + end, + {ok, PrevState}; +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------- +%% Internal functions +%%---------------------------------------------------------------------- + +start_portprogram() -> + Command = + filename:join([code:priv_dir(os_mon),"bin","win32sysinfo.exe"]), + Port = open_port({spawn,Command}, [{packet,1}]), + receive + {Port, {data, [?OK]}} -> + Port; + {Port, {data, Data}} -> + exit({port_error, Data}); + {'EXIT', Port, Reason} -> + exit({port_died, Reason}) + after 5000 -> + exit({port_error, timeout}) + end. + +get_disk_info1(Port) -> + Port ! {self(),{command,[?DISK_INFO]}}, + get_data(Port,[]). + +get_disk_info1(Port,PathList) -> + Port ! {self(),{command,[?DISK_INFO|[P++[0]||P <- PathList]]}}, + get_data(Port,[]). + +get_mem_info1(Port) -> + Port ! {self(),{command,[?MEM_INFO]}}, + get_data(Port,[]). + +get_data(Port, Sofar) -> + receive + {Port, {data, [?OK]}} -> + lists:reverse(Sofar); + {Port, {data, Bytes}} -> + get_data(Port, [Bytes|Sofar]); + {'EXIT', Port, Reason} -> + exit({port_died, Reason}) + after 5000 -> + lists:reverse(Sofar) + end. diff --git a/lib/os_mon/src/os_sup.erl b/lib/os_mon/src/os_sup.erl new file mode 100644 index 0000000000..f5c6c138ba --- /dev/null +++ b/lib/os_mon/src/os_sup.erl @@ -0,0 +1,258 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(os_sup). +-behaviour(gen_server). + +%% API +-export([start_link/1, start/0, stop/0]). +-export([error_report/2]). +-export([enable/0, enable/2, disable/0, disable/2]). +-export([param_type/2, param_default/1]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {port, mfa, config, path, conf}). + +%%---------------------------------------------------------------------- +%% API +%%---------------------------------------------------------------------- + +start_link({win32, _OSname}) -> + Identifier = os_sup, + MFA = os_mon:get_env(os_sup, os_sup_mfa), + gen_server:start_link({local, os_sup_server}, nteventlog, + [Identifier, MFA], []); +start_link(_OS) -> + gen_server:start_link({local, os_sup_server}, os_sup, [], []). + +start() -> % for testing + gen_server:start({local, os_sup_server}, os_sup, [], []). + +stop() -> + gen_server:call(os_sup_server, stop). + +error_report(LogData, Tag) -> + error_logger:error_report(Tag, LogData). + +enable() -> + command(enable). +enable(Path, Conf) -> + command(enable, Path, Conf). + +disable() -> + command(disable). +disable(Path, Conf) -> + command(disable, Path, Conf). + +param_type(os_sup_errortag, Val) when is_atom(Val) -> true; +param_type(os_sup_own, Val) -> io_lib:printable_list(Val); +param_type(os_sup_syslogconf, Val) -> io_lib:printable_list(Val); +param_type(os_sup_enable, Val) when Val==true; Val==false -> true; +param_type(os_sup_mfa, {Mod,Func,Args}) when is_atom(Mod), + is_atom(Func), + is_list(Args) -> true; +param_type(_Param, _Val) -> false. + +param_default(os_sup_errortag) -> std_error; +param_default(os_sup_own) -> "/etc"; +param_default(os_sup_syslogconf) -> "/etc/syslog.conf"; +param_default(os_sup_enable) -> true; +param_default(os_sup_mfa) -> {os_sup, error_report, [std_error]}. + +%%---------------------------------------------------------------------- +%% gen_server callbacks +%%---------------------------------------------------------------------- + +init([]) -> + process_flag(trap_exit, true), + process_flag(priority, low), + + case os:type() of + {unix, sunos} -> + init2(); + OS -> {stop, {unsupported_os, OS}} + end. + +init2() -> % Enable service if configured to do so + ConfigP = os_mon:get_env(os_sup, os_sup_enable), + case ConfigP of + true -> % ..yes -- do enable + Path = os_mon:get_env(os_sup, os_sup_own), + Conf = os_mon:get_env(os_sup, os_sup_syslogconf), + case enable(Path, Conf) of + ok -> + init3(#state{config=ConfigP, path=Path, conf=Conf}); + {error, Error} -> + {stop, {mod_syslog, Error}} + end; + false -> % ..no -- skip directly to init3/1 + init3(#state{config=ConfigP}) + end. + +init3(State0) -> + Port = start_portprogram(), + + %% Read the values of some configuration parameters + MFA = case os_mon:get_env(os_sup, os_sup_mfa) of + {os_sup, error_report, _} -> + Tag = os_mon:get_env(os_sup, os_sup_errortag), + {os_sup, error_report, [Tag]}; + MFA0 -> + MFA0 + end, + + {ok, State0#state{port=Port, mfa=MFA}}. + +handle_call(stop, _From, State) -> + {stop, normal, ok, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({_Port, {data, Data}}, #state{mfa={M,F,A}} = State) -> + apply(M, F, [Data | A]), + {noreply, State}; +handle_info({'EXIT', _Port, Reason}, State) -> + {stop, {port_died, Reason}, State#state{port=not_used}}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, #state{port=Port} = State) -> + case State#state.config of + true when is_port(Port) -> + Port ! {self(), {command, "only_stdin"}}, + Res = disable(State#state.path, State#state.conf), + port_close(Port), + if + Res/="0" -> exit({mod_syslog, Res}); + true -> ok + end; + true -> + Res = disable(State#state.path, State#state.conf), + if + Res/="0" -> exit({mod_syslog, Res}); + true -> ok + end; + false when is_port(Port) -> + Port ! {self(), {command, "only_stdin"}}, + port_close(Port); + false -> + ok + end. + +%% os_mon-2.0 +%% For live downgrade to/upgrade from os_mon-1.8[.1] +code_change(Vsn, PrevState, "1.8") -> + case Vsn of + + %% Downgrade from this version + {down, _Vsn} -> + + %% Find out the error tag used + {DefM, DefF, _} = param_default(os_sup_mfa), + Tag = case PrevState#state.mfa of + + %% Default callback function is used, then use + %% the corresponding tag + {DefM, DefF, [Tag0]} -> + Tag0; + + %% Default callback function is *not* used + %% (before the downgrade, that is) + %% -- check the configuration parameter + _ -> + case application:get_env(os_mon, + os_sup_errortag) of + {ok, Tag1} -> + Tag1; + + %% (actually, if it has no value, + %% the process should terminate + %% according to 1.8.1 version, but that + %% seems too harsh here) + _ -> + std_error + end + end, + + %% Downgrade to old state record + State = {state, PrevState#state.port, Tag}, + {ok, State}; + + %% Upgrade to this version + _Vsn -> + + {state, Port, Tag} = PrevState, + + {DefM, DefF, _} = param_default(os_sup_mfa), + MFA = {DefM, DefF, [Tag]}, + + %% We can safely assume the following configuration + %% parameters are defined, otherwise os_sup would never had + %% started in the first place. + %% (We can *not* safely assume they haven't been changed, + %% but that's a weakness inherited from the 1.8.1 version) + Path = application:get_env(os_mon, os_sup_own), + Conf = application:get_env(os_mon, os_sup_syslogconf), + + %% Upgrade to this state record + State = #state{port=Port, mfa=MFA, config=true, + path=Path, conf=Conf}, + {ok, State} + end; +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------- +%% Internal functions +%%---------------------------------------------------------------------- + +start_portprogram() -> + OwnPath = os_mon:get_env(os_sup, os_sup_own), + Command = + filename:join([code:priv_dir(os_mon), "bin", "ferrule"]) ++ + " " ++ OwnPath, + open_port({spawn, Command}, [{packet, 2}]). + +%% os:cmd(cmd_str(enable)) should be done BEFORE starting os_sup +%% os:cmd(cmd_str(disable)) should be done AFTER os_sup is terminated +%% Both commands return "0" if successful +command(Mode) -> + command(Mode, "/etc", "/etc/syslog.conf"). +command(Mode, Path, Conf) -> + case os:cmd(cmd_str(Mode, Path, Conf)) of + "0" -> + ok; + Error -> + {error, Error} + end. + +cmd_str(Mode, Path, Conf) -> + %% modpgm modesw ownpath syslogconf + PrivDir = code:priv_dir(os_mon), + ModeSw = + case Mode of + enable -> + " otp "; + disable -> + " nootp " + end, + PrivDir ++ "/bin/mod_syslog" ++ ModeSw ++ Path ++ " " ++ Conf. |