updates and renaming

This commit is contained in:
Hannes Mehnert 2022-05-22 21:21:09 +02:00
parent 4a07e47893
commit 002aaf9321
7 changed files with 110 additions and 66 deletions

View file

@ -2,12 +2,19 @@
Using Influx, Telegraf, etc. Using Influx, Telegraf, etc.
![Monitoring](https://raw.githubusercontent.com/roburio/monitoring-experiments/master/one.png) ![Monitoring](https://raw.githubusercontent.com/roburio/mirage-monitoring/main/one.png)
Best used on a private network interface for your unikernel.
# Output metrics to influx / telegraf
The provided `ip:port` in `create` is used to send every `interval` seconds
gathered metrics to, using the Influx wire protocol.
# Dynamic adjustments of Log level and Metrics reporting # Dynamic adjustments of Log level and Metrics reporting
The create function has a *listener_port* argument. If this is provided, then The `create` function has a *listener_port* argument (defaulting to 2323). On the
on the given port TCP connections to the unikernel are possible. Each connection customizable port TCP connections to the unikernel are possible. Each connection
can transmit a command (as text) to adjust log level and enable or disable can transmit a command (as text) to adjust log level and enable or disable
metrics sources: metrics sources:
@ -31,3 +38,14 @@ Likewise, metrics status can be similarly inspected:
- `m` reports the default metrics status and the metrics status for all metrics sources with a different status. - `m` reports the default metrics status and the metrics status for all metrics sources with a different status.
- `m*` reports the default metrics status, all enabled tags, and the metrics status of all metrics sources. - `m*` reports the default metrics status, all enabled tags, and the metrics status of all metrics sources.
- `mmemory,net-solo5` reports the metrics status for memory and net-solo5 respectively. - `mmemory,net-solo5` reports the metrics status for memory and net-solo5 respectively.
# Memprof tracing
The `create` function has a `memtrace_port` argument (defualts to 4242). On this
port, when a TCP client connects,
[OCaml memory profiling](https://ocaml.org/api/Gc.Memprof.html) is started and
dumped to the client. The `sampling_rate` (defaults to 1e-4) is used. This uses
the [memprof-mirage](https://github.com/hannesm/memprof-mirage) package.
The output trace is best being viewed with
[memtrace_viewer](https://github.com/janestreet/memtrace_viewer/).

View file

@ -1,2 +1,2 @@
(lang dune 1.0) (lang dune 1.0)
(name monitoring-experiments) (name mirage-monitoring)

View file

@ -1,10 +1,10 @@
opam-version: "2.0" opam-version: "2.0"
maintainer: "Hannes Mehnert <hannes@mehnert.org>" maintainer: "Robur <team@robur.coop>"
authors: ["Hannes Mehnert <hannes@mehnert.org>"] authors: ["Robur <team@robur.coop>"]
homepage: "https://github.com/roburio/monitoring-experiments" homepage: "https://git.robur.io/robur/mirage-monitoring"
doc: "https://roburio.github.io/monitoring-experiments" doc: "https://roburio.github.io/mirage-monitoring"
dev-repo: "git+https://github.com/roburio/monitoring-experiments.git" dev-repo: "git+https://git.robur.io/robur/mirage-monitoring.git"
bug-reports: "https://github.com/roburio/monitoring-experiments/issues" bug-reports: "https://github.com/roburio/mirage-monitoring/issues"
license: "AGPL" license: "AGPL"
depends: [ depends: [
@ -19,6 +19,8 @@ depends: [
"mirage-solo5" {>= "0.6.4"} "mirage-solo5" {>= "0.6.4"}
"ocaml-freestanding" {>= "0.4.5"} "ocaml-freestanding" {>= "0.4.5"}
"mirage-runtime" "mirage-runtime"
"memtrace-mirage" {>= "0.2.1.2.2"}
"mirage-clock" {>= "4.0.0"}
] ]
build: [ build: [
["dune" "subst"] {dev} ["dune" "subst"] {dev}

View file

@ -1,6 +1,6 @@
(library (library
(name monitoring_experiments) (name mirage_monitoring)
(public_name monitoring-experiments) (public_name mirage-monitoring)
(wrapped false) (wrapped false)
(libraries logs metrics metrics-lwt metrics-influx mirage-time tcpip (libraries logs metrics metrics-lwt metrics-influx mirage-time mirage-clock
mirage-solo5 mirage-runtime)) memtrace-mirage tcpip mirage-solo5 mirage-runtime))

View file

@ -1,6 +1,6 @@
open Lwt.Infix open Lwt.Infix
let src = Logs.Src.create "monitoring-experiments" ~doc:"Monitoring experiments" let src = Logs.Src.create "mirage-monitoring" ~doc:"MirageOS monitoring"
module Log = (val Logs.src_log src : Logs.LOG) module Log = (val Logs.src_log src : Logs.LOG)
let ( let* ) = Result.bind let ( let* ) = Result.bind
@ -182,7 +182,8 @@ let adjust_metrics s =
srcs ; srcs ;
Ok `Empty Ok `Empty
module Make (T : Mirage_time.S) (S : Tcpip.Stack.V4V6) = struct module Make (T : Mirage_time.S) (P : Mirage_clock.PCLOCK) (S : Tcpip.Stack.V4V6) = struct
module Memtrace = Memtrace.Make(P)(S.TCP)
let timer conn get host stack dst = let timer conn get host stack dst =
let datas = let datas =
@ -226,46 +227,60 @@ module Make (T : Mirage_time.S) (S : Tcpip.Stack.V4V6) = struct
in in
one () one ()
let create_listener stack = function let create_listener stack port =
| None -> () S.TCP.listen (S.tcp stack) ~port (fun f ->
| Some port -> (S.TCP.read f >>= function
S.TCP.listen (S.tcp stack) ~port (fun f -> | Ok `Data data ->
(S.TCP.read f >>= function if Cstruct.length data > 0 then
| Ok `Data data -> let rest = Cstruct.(to_string (shift data 1)) in
if Cstruct.length data > 0 then let r =
let rest = Cstruct.(to_string (shift data 1)) in match Cstruct.get_char data 0 with
let r = | 'L' -> adjust_log_level rest
match Cstruct.get_char data 0 with | 'M' -> adjust_metrics rest
| 'L' -> adjust_log_level rest | 'l' -> get_log_levels rest
| 'M' -> adjust_metrics rest | 'm' -> get_metrics rest
| 'l' -> get_log_levels rest | _ -> Error "unknown command"
| 'm' -> get_metrics rest in
| _ -> Error "unknown command" let msg =
in match r with
let msg = | Ok `Empty -> "ok"
match r with | Ok `String reply -> "ok: " ^ reply
| Ok `Empty -> "ok" | Error msg -> "error: " ^ msg
| Ok `String reply -> "ok: " ^ reply in
| Error msg -> "error: " ^ msg S.TCP.write f (Cstruct.of_string msg) >|= function
in | Ok () -> ()
S.TCP.write f (Cstruct.of_string msg) >|= function | Error e ->
| Ok () -> () Log.warn (fun m -> m "write error on log & metrics listener %a"
| Error e -> S.TCP.pp_write_error e)
Log.warn (fun m -> m "write error on log & metrics listener %a" else
S.TCP.pp_write_error e) (Log.debug (fun m -> m "received empty data on log & metrics listener");
else Lwt.return_unit)
(Log.debug (fun m -> m "received empty data on log & metrics listener"); | Ok `Eof ->
Lwt.return_unit) Log.debug (fun m -> m "EOF on log & metrics listener");
| Ok `Eof -> Lwt.return_unit
Log.debug (fun m -> m "EOF on log & metrics listener"); | Error e ->
Lwt.return_unit Log.debug (fun m -> m "read error on log & metrics listener %a"
| Error e -> S.TCP.pp_error e);
Log.debug (fun m -> m "read error on log & metrics listener %a" Lwt.return_unit) >>= fun () ->
S.TCP.pp_error e); S.TCP.close f)
Lwt.return_unit) >>= fun () ->
S.TCP.close f)
let create ?(interval = 10) ?hostname dst ?(port = 8094) ?listen_port stack = let create ?(interval = 10) ?hostname dst ?(port = 8094) ?(listen_port = 2323)
?(memtrace_port = 4242) ?(sampling_rate = 1e-4) stack =
S.TCP.listen (S.tcp stack) ~port:memtrace_port
(fun f ->
(* only allow a single tracing client *)
match Memtrace.Memprof_tracer.active_tracer () with
| Some _ ->
Log.warn (fun m -> m "memtrace tracing already active");
S.TCP.close f
| None ->
Logs.info (fun m -> m "starting memtrace tracing");
let tracer = Memtrace.start_tracing ~context:None ~sampling_rate f in
Lwt.async (fun () ->
S.TCP.read f >|= fun _ ->
Logs.info (fun m -> m "memtrace tracing read returned, closing");
Memtrace.stop_tracing tracer);
Lwt.return_unit);
let get_cache, reporter = Metrics.cache_reporter () in let get_cache, reporter = Metrics.cache_reporter () in
Metrics.set_reporter reporter; Metrics.set_reporter reporter;
Metrics.enable_all (); Metrics.enable_all ();

20
src/mirage_monitoring.mli Normal file
View file

@ -0,0 +1,20 @@
val counter_metrics : f:('a -> string) -> string ->
(Metrics.field list, 'a -> Metrics.data) Metrics.src
val vmname : string -> Metrics.field
(** [vmname name] creates a [tag] with the virtual machine name. *)
module Make (T : Mirage_time.S) (P : Mirage_clock.PCLOCK) (S : Tcpip.Stack.V4V6) : sig
val create : ?interval:int -> ?hostname:string -> Ipaddr.t -> ?port:int ->
?listen_port:int -> ?memtrace_port:int -> ?sampling_rate:float -> S.t -> unit
(** [create ~interval ~hostname ip ~port ~listen_port ~memtrace_port ~sampling_rate stack]
initiates monitoring on [stack] for the unikernel. The metrics are reported
every [interval] (defaults to 10) seconds to [ip] on [port] (defaults to
8094) via TCP using the influxd wire protocol. On [listen_port] (defaults
to 2323) a TCP connection can be initiated to adjust the log level and
enable and disable metrics sources. On [memtrace_port] (defaults to 4242)
a single TCP client can connect simultaneously to receive a [Gc.Memprof]
trace. The [sampling_rate] defaults to [1e-4]. *)
end

View file

@ -1,11 +0,0 @@
val counter_metrics : f:('a -> string) -> string ->
(Metrics.field list, 'a -> Metrics.data) Metrics.src
val vmname : string -> Metrics.field
(** [vmname name] creates a [tag] with the virtual machine name. *)
module Make (T : Mirage_time.S) (S : Tcpip.Stack.V4V6) : sig
val create : ?interval:int -> ?hostname:string -> Ipaddr.t -> ?port:int ->
?listen_port:int -> S.t -> unit
end