diff --git a/README.md b/README.md index cfec7c8..23bd995 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,19 @@ Using Influx, Telegraf, etc. -![Monitoring](https://raw.githubusercontent.com/roburio/monitoring-experiments/master/one.png) +![Monitoring](https://raw.githubusercontent.com/roburio/mirage-monitoring/main/one.png) + +Best used on a private network interface for your unikernel. + +# Output metrics to influx / telegraf + +The provided `ip:port` in `create` is used to send every `interval` seconds +gathered metrics to, using the Influx wire protocol. # Dynamic adjustments of Log level and Metrics reporting -The create function has a *listener_port* argument. If this is provided, then -on the given port TCP connections to the unikernel are possible. Each connection +The `create` function has a *listener_port* argument (defaulting to 2323). On the +customizable port TCP connections to the unikernel are possible. Each connection can transmit a command (as text) to adjust log level and enable or disable metrics sources: @@ -31,3 +38,14 @@ Likewise, metrics status can be similarly inspected: - `m` reports the default metrics status and the metrics status for all metrics sources with a different status. - `m*` reports the default metrics status, all enabled tags, and the metrics status of all metrics sources. - `mmemory,net-solo5` reports the metrics status for memory and net-solo5 respectively. + +# Memprof tracing + +The `create` function has a `memtrace_port` argument (defualts to 4242). On this +port, when a TCP client connects, +[OCaml memory profiling](https://ocaml.org/api/Gc.Memprof.html) is started and +dumped to the client. The `sampling_rate` (defaults to 1e-4) is used. This uses +the [memprof-mirage](https://github.com/hannesm/memprof-mirage) package. + +The output trace is best being viewed with +[memtrace_viewer](https://github.com/janestreet/memtrace_viewer/). diff --git a/dune-project b/dune-project index 2e3a6b9..0a6233d 100644 --- a/dune-project +++ b/dune-project @@ -1,2 +1,2 @@ (lang dune 1.0) -(name monitoring-experiments) +(name mirage-monitoring) diff --git a/monitoring-experiments.opam b/mirage-monitoring.opam similarity index 58% rename from monitoring-experiments.opam rename to mirage-monitoring.opam index ca5aa18..ae010f3 100644 --- a/monitoring-experiments.opam +++ b/mirage-monitoring.opam @@ -1,10 +1,10 @@ opam-version: "2.0" -maintainer: "Hannes Mehnert " -authors: ["Hannes Mehnert "] -homepage: "https://github.com/roburio/monitoring-experiments" -doc: "https://roburio.github.io/monitoring-experiments" -dev-repo: "git+https://github.com/roburio/monitoring-experiments.git" -bug-reports: "https://github.com/roburio/monitoring-experiments/issues" +maintainer: "Robur " +authors: ["Robur "] +homepage: "https://git.robur.io/robur/mirage-monitoring" +doc: "https://roburio.github.io/mirage-monitoring" +dev-repo: "git+https://git.robur.io/robur/mirage-monitoring.git" +bug-reports: "https://github.com/roburio/mirage-monitoring/issues" license: "AGPL" depends: [ @@ -19,6 +19,8 @@ depends: [ "mirage-solo5" {>= "0.6.4"} "ocaml-freestanding" {>= "0.4.5"} "mirage-runtime" + "memtrace-mirage" {>= "0.2.1.2.2"} + "mirage-clock" {>= "4.0.0"} ] build: [ ["dune" "subst"] {dev} diff --git a/src/dune b/src/dune index 8b5f758..1e4e40d 100644 --- a/src/dune +++ b/src/dune @@ -1,6 +1,6 @@ (library - (name monitoring_experiments) - (public_name monitoring-experiments) + (name mirage_monitoring) + (public_name mirage-monitoring) (wrapped false) - (libraries logs metrics metrics-lwt metrics-influx mirage-time tcpip - mirage-solo5 mirage-runtime)) + (libraries logs metrics metrics-lwt metrics-influx mirage-time mirage-clock + memtrace-mirage tcpip mirage-solo5 mirage-runtime)) diff --git a/src/monitoring_experiments.ml b/src/mirage_monitoring.ml similarity index 74% rename from src/monitoring_experiments.ml rename to src/mirage_monitoring.ml index 28b927d..6b60639 100644 --- a/src/monitoring_experiments.ml +++ b/src/mirage_monitoring.ml @@ -1,6 +1,6 @@ open Lwt.Infix -let src = Logs.Src.create "monitoring-experiments" ~doc:"Monitoring experiments" +let src = Logs.Src.create "mirage-monitoring" ~doc:"MirageOS monitoring" module Log = (val Logs.src_log src : Logs.LOG) let ( let* ) = Result.bind @@ -182,7 +182,8 @@ let adjust_metrics s = srcs ; Ok `Empty -module Make (T : Mirage_time.S) (S : Tcpip.Stack.V4V6) = struct +module Make (T : Mirage_time.S) (P : Mirage_clock.PCLOCK) (S : Tcpip.Stack.V4V6) = struct + module Memtrace = Memtrace.Make(P)(S.TCP) let timer conn get host stack dst = let datas = @@ -226,46 +227,60 @@ module Make (T : Mirage_time.S) (S : Tcpip.Stack.V4V6) = struct in one () - let create_listener stack = function - | None -> () - | Some port -> - S.TCP.listen (S.tcp stack) ~port (fun f -> - (S.TCP.read f >>= function - | Ok `Data data -> - if Cstruct.length data > 0 then - let rest = Cstruct.(to_string (shift data 1)) in - let r = - match Cstruct.get_char data 0 with - | 'L' -> adjust_log_level rest - | 'M' -> adjust_metrics rest - | 'l' -> get_log_levels rest - | 'm' -> get_metrics rest - | _ -> Error "unknown command" - in - let msg = - match r with - | Ok `Empty -> "ok" - | Ok `String reply -> "ok: " ^ reply - | Error msg -> "error: " ^ msg - in - S.TCP.write f (Cstruct.of_string msg) >|= function - | Ok () -> () - | Error e -> - Log.warn (fun m -> m "write error on log & metrics listener %a" - S.TCP.pp_write_error e) - else - (Log.debug (fun m -> m "received empty data on log & metrics listener"); - Lwt.return_unit) - | Ok `Eof -> - Log.debug (fun m -> m "EOF on log & metrics listener"); - Lwt.return_unit - | Error e -> - Log.debug (fun m -> m "read error on log & metrics listener %a" - S.TCP.pp_error e); - Lwt.return_unit) >>= fun () -> - S.TCP.close f) + let create_listener stack port = + S.TCP.listen (S.tcp stack) ~port (fun f -> + (S.TCP.read f >>= function + | Ok `Data data -> + if Cstruct.length data > 0 then + let rest = Cstruct.(to_string (shift data 1)) in + let r = + match Cstruct.get_char data 0 with + | 'L' -> adjust_log_level rest + | 'M' -> adjust_metrics rest + | 'l' -> get_log_levels rest + | 'm' -> get_metrics rest + | _ -> Error "unknown command" + in + let msg = + match r with + | Ok `Empty -> "ok" + | Ok `String reply -> "ok: " ^ reply + | Error msg -> "error: " ^ msg + in + S.TCP.write f (Cstruct.of_string msg) >|= function + | Ok () -> () + | Error e -> + Log.warn (fun m -> m "write error on log & metrics listener %a" + S.TCP.pp_write_error e) + else + (Log.debug (fun m -> m "received empty data on log & metrics listener"); + Lwt.return_unit) + | Ok `Eof -> + Log.debug (fun m -> m "EOF on log & metrics listener"); + Lwt.return_unit + | Error e -> + Log.debug (fun m -> m "read error on log & metrics listener %a" + S.TCP.pp_error e); + Lwt.return_unit) >>= fun () -> + S.TCP.close f) - let create ?(interval = 10) ?hostname dst ?(port = 8094) ?listen_port stack = + let create ?(interval = 10) ?hostname dst ?(port = 8094) ?(listen_port = 2323) + ?(memtrace_port = 4242) ?(sampling_rate = 1e-4) stack = + S.TCP.listen (S.tcp stack) ~port:memtrace_port + (fun f -> + (* only allow a single tracing client *) + match Memtrace.Memprof_tracer.active_tracer () with + | Some _ -> + Log.warn (fun m -> m "memtrace tracing already active"); + S.TCP.close f + | None -> + Logs.info (fun m -> m "starting memtrace tracing"); + let tracer = Memtrace.start_tracing ~context:None ~sampling_rate f in + Lwt.async (fun () -> + S.TCP.read f >|= fun _ -> + Logs.info (fun m -> m "memtrace tracing read returned, closing"); + Memtrace.stop_tracing tracer); + Lwt.return_unit); let get_cache, reporter = Metrics.cache_reporter () in Metrics.set_reporter reporter; Metrics.enable_all (); diff --git a/src/mirage_monitoring.mli b/src/mirage_monitoring.mli new file mode 100644 index 0000000..6435666 --- /dev/null +++ b/src/mirage_monitoring.mli @@ -0,0 +1,20 @@ + +val counter_metrics : f:('a -> string) -> string -> + (Metrics.field list, 'a -> Metrics.data) Metrics.src + +val vmname : string -> Metrics.field +(** [vmname name] creates a [tag] with the virtual machine name. *) + +module Make (T : Mirage_time.S) (P : Mirage_clock.PCLOCK) (S : Tcpip.Stack.V4V6) : sig + + val create : ?interval:int -> ?hostname:string -> Ipaddr.t -> ?port:int -> + ?listen_port:int -> ?memtrace_port:int -> ?sampling_rate:float -> S.t -> unit + (** [create ~interval ~hostname ip ~port ~listen_port ~memtrace_port ~sampling_rate stack] + initiates monitoring on [stack] for the unikernel. The metrics are reported + every [interval] (defaults to 10) seconds to [ip] on [port] (defaults to + 8094) via TCP using the influxd wire protocol. On [listen_port] (defaults + to 2323) a TCP connection can be initiated to adjust the log level and + enable and disable metrics sources. On [memtrace_port] (defaults to 4242) + a single TCP client can connect simultaneously to receive a [Gc.Memprof] + trace. The [sampling_rate] defaults to [1e-4]. *) +end diff --git a/src/monitoring_experiments.mli b/src/monitoring_experiments.mli deleted file mode 100644 index 9922b71..0000000 --- a/src/monitoring_experiments.mli +++ /dev/null @@ -1,11 +0,0 @@ - -val counter_metrics : f:('a -> string) -> string -> - (Metrics.field list, 'a -> Metrics.data) Metrics.src - -val vmname : string -> Metrics.field -(** [vmname name] creates a [tag] with the virtual machine name. *) - -module Make (T : Mirage_time.S) (S : Tcpip.Stack.V4V6) : sig - val create : ?interval:int -> ?hostname:string -> Ipaddr.t -> ?port:int -> - ?listen_port:int -> S.t -> unit -end