Prometheus support added

Moving metrics handling to Prometheus from swagger stats.

Prometheus support added
Moving metrics handling to Prometheus from swagger stats.
742d8b49 · Nilanjan Daw · c7f1b2a2 · 742d8b49 · 742d8b49 · 742d8b49
Commit 742d8b49 authored Apr 02, 2020 by Nilanjan Daw
4 changed files
--- a/dispatch_system/dispatch_manager/metrics.js
+++ b/dispatch_system/dispatch_manager/metrics.js
@@ -4,11 +4,31 @@ const constants = require('.././constants.json');
 const secrets = require('./secrets.json')
 const fetch = require('node-fetch');
 const util = require('util')
+const prom = require('prom-client');
+const Registry = prom.Registry;
+const register = new Registry();
 const alpha = 0.99
 let log_channel = constants.topics.log_channel,
    metrics = {  }
+const intervalCollector = prom.collectDefaultMetrics({ prefix: 'xanadu', timeout: 5000, register });
+const workerCountMetric = new prom.Gauge({ name: "worker_count", help: "worker count" });
+const warmstartMetric = new prom.Histogram({ name: "warmstart", help: "warm start latency" });
+const coldstartMetric = new prom.Histogram({ name: "coldstart", help: "cold start latency"});
+const starttimeMetric = new prom.Histogram({ name: "starttime", help: "worker start times" });
+const requestMetric = new prom.Summary({ name: "requests", help: "request RTT times",
+    percentiles: [0.01, 0.05, 0.5, 0.9, 0.95, 0.99, 0.999]
+});
+register.registerMetric(workerCountMetric);
+register.registerMetric(warmstartMetric);
+register.registerMetric(coldstartMetric);
+register.registerMetric(starttimeMetric);
+register.registerMetric(requestMetric);
 let metricsDB = `http://${secrets.couchdb_username}:${secrets.couchdb_password}@${constants.couchdb_host}`
 metricsDB = metricsDB + "/" + constants.metrics_db_name + "/"
 let kafka = require('kafka-node'),
@@ -47,14 +67,20 @@ function collectMetrics(metric) {
    if (metric.type === 'coldstart') {
        metrics[metric.functionHash][metric.runtime].shortterm.coldstart += metric.value
        metrics[metric.functionHash][metric.runtime].shortterm.coldstart_total_request += 1
+        coldstartMetric.observe(metric.value)
+        requestMetric.observe(metric.value)
    } else if (metric.type === 'warmstart') {
        metrics[metric.functionHash][metric.runtime].shortterm.warmstart += metric.value
        metrics[metric.functionHash][metric.runtime].shortterm.warm_total_request += 1
+        warmstartMetric.observe(metric.value)
+        requestMetric.observe(metric.value)
    } else if (metric.type === 'scale') {
        metrics[metric.functionHash][metric.runtime].shortterm.worker_count = metric.value
+        workerCountMetric.set(metric.value)
        if (metric.starttime !== undefined) {
            metrics[metric.functionHash][metric.runtime].shortterm.starttime += metric.starttime
            metrics[metric.functionHash][metric.runtime].shortterm.scale_count += 1
+            starttimeMetric.observe(metric.starttime)
        }
    }
@@ -169,5 +195,5 @@ async function fetchData(functionHash, metric, runtime) {
 }
 module.exports = {
-    collectMetrics, broadcastMetrics
+    collectMetrics, broadcastMetrics, register
 }
\ No newline at end of file
--- a/dispatch_system/dispatch_manager/package.json
+++ b/dispatch_system/dispatch_manager/package.json
@@ -20,6 +20,7 @@
    "morgan": "^1.9.1",
    "mqtt": "^3.0.0",
    "node-fetch": "^2.6.0",
+    "prom-client": "^12.0.0",
    "redis": "^2.8.0",
    "request": "^2.88.0",
    "request-promise": "^4.2.5",

--- a/dispatch_system/prometheus.yml
+++ b/dispatch_system/prometheus.yml
+# my global config
+global:
+  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
+  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
+  # scrape_timeout is set to the global default (10s).
+  # Attach these labels to any time series or alerts when communicating with
+  # external systems (federation, remote storage, Alertmanager).
+  external_labels:
+      monitor: 'codelab-monitor'
+# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
+rule_files:
+  # - "first.rules"
+  # - "second.rules"
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'prometheus'
+    # metrics_path defaults to '/metrics'
+    # scheme defaults to 'http'.
+    static_configs:
+      - targets: ['localhost:9090']
+  - job_name: 'docker'
+         # metrics_path defaults to '/metrics'
+         # scheme defaults to 'http'.
+    static_configs:
+      - targets: ['localhost:9323']
+  - job_name: 'xanadu'
+    static_configs:
+      - targets: ['localhost:8080']
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,6 +5,7 @@ networks:
 services:
  zookeeper:
    image: 'bitnami/zookeeper:3'
+    restart: unless-stopped
    networks:
      - kafka-serverless
    ports:
@@ -15,6 +16,7 @@ services:
      - ALLOW_ANONYMOUS_LOGIN=yes
  kafka:
    image: 'bitnami/kafka:2'
+    restart: unless-stopped
    networks:
      - kafka-serverless
    ports: