Browse Source

migrate service from github

pull/4/head
fedy95 5 years ago
parent
commit
46f0ac217f
  1. 11
      README.md
  2. 11
      alertmanager/config.yml
  3. 207
      docker-compose.yml
  4. 12
      grafana/provisioning/dashboards/dashboard.yml
  5. 7
      grafana/provisioning/dashboards/dashboards.md
  6. 1270
      grafana/provisioning/dashboards/docker_containers.json
  7. 926
      grafana/provisioning/dashboards/i-o-statistics.json
  8. 3412
      grafana/provisioning/dashboards/monitor_services.json
  9. 1441
      grafana/provisioning/dashboards/nexus_host.json
  10. 567
      grafana/provisioning/dashboards/nginx-2.json
  11. 398
      grafana/provisioning/dashboards/nginx_container.json
  12. 3750
      grafana/provisioning/dashboards/node-exporter.json
  13. 1511
      grafana/provisioning/dashboards/smart-1.json
  14. 923
      grafana/provisioning/dashboards/smart-2.json
  15. 650
      grafana/provisioning/dashboards/smart-3.json
  16. 653
      grafana/provisioning/dashboards/smart-4.json
  17. 18
      grafana/provisioning/datasources/datasource.yml
  18. 1
      nodeexporter/.gitignore
  19. 40
      prometheus/alert.rules
  20. 44
      prometheus/prometheus.yml
  21. 26
      promtail/etc/promtail/config.yml
  22. 16
      smartctl/Dockerfile
  23. 13
      smartctl/script/entrypoint.sh
  24. 194
      smartctl/script/smartmon.sh

11
README.md

@ -1,2 +1,11 @@
# monitoring
### info
- [Loki/Promtail](https://github.com/grafana/loki)
- [Prometheus](https://github.com/prometheus/prometheus)
- [NodeExporter](https://github.com/prometheus/node_exporter)
- [cAdvisor](https://github.com/google/cadvisor)
- [Pushgateway](https://github.com/prometheus/pushgateway)
- [Grafana](https://github.com/grafana/grafana)
- [Alertmanager](https://github.com/prometheus/alertmanager)
- [dockprom](https://github.com/stefanprodan/dockprom)
- [loki](https://github.com/grafana/loki)

11
alertmanager/config.yml

@ -0,0 +1,11 @@
route:
receiver: 'slack'
receivers:
- name: 'slack'
slack_configs:
- send_resolved: true
text: "{{ .CommonAnnotations.description }}"
username: 'Prometheus'
channel: '#<channel-name>'
api_url: 'https://hooks.slack.com/services/<webhook-id>'

207
docker-compose.yml

@ -0,0 +1,207 @@
version: "3"
networks:
monitor-net:
driver: bridge
volumes:
grafana_data: {}
prometheus_data: {}
promtail-data: {}
services:
alertmanager:
image: prom/alertmanager:v0.21.0
container_name: alertmanager
hostname: alertmanager
restart: unless-stopped
volumes:
- ./alertmanager:/etc/alertmanager
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
expose:
- "9093"
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.37.0
container_name: cadvisor
hostname: cadvisor
restart: unless-stopped
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
expose:
- "8080"
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
grafana:
image: grafana/grafana:7.1.1
container_name: grafana
hostname: grafana
restart: unless-stopped
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
environment:
- GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin}
- GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
ports:
- "3002:3000"
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
loki:
image: grafana/loki:1.6.0
container_name: loki
hostname: loki
restart: on-failure
command:
- '-config.file=/etc/loki/local-config.yaml'
expose:
- "3100"
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
nginxexporter:
image: nginx/nginx-prometheus-exporter:0.5.0
container_name: nginxexporter
hostname: nginxexporter
restart: on-failure
logging:
driver: json-file
options:
max-size: "10m"
max-file: "5"
environment:
- SCRAPE_URI=http://nginx:80/nginx_status
- TELEMETRY_PATH=/metrics
- NGINX_RETRIES=10
expose:
- "9113"
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
nodeexporter:
image: prom/node-exporter:v1.0.1
container_name: nodeexporter
hostname: nodeexporter
restart: unless-stopped
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- ./nodeexporter/etc:/var/lib/node_exporter
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--path.rootfs=/rootfs'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
- '--collector.textfile.directory=/var/lib/node_exporter'
ports:
- "3003:9100"
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
prometheus:
image: prom/prometheus:v2.20.0
container_name: prometheus
hostname: prometheus
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
ports:
- "3004:9090"
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
promtail:
image: grafana/promtail:1.6.0
container_name: promtail
hostname: promtail
restart: on-failure
volumes:
- promtail-data:/var/lib/promtail/positions
- ./promtail/etc/promtail/config.yml:/etc/promtail/config.yml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/log:/var/log
command:
- '-config.file=/etc/promtail/config.yml'
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
pushgateway:
image: prom/pushgateway:v1.2.0
container_name: pushgateway
hostname: pushgateway
restart: unless-stopped
expose:
- "9091"
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
smartctl:
build:
context: ./smartctl
container_name: smartctl
hostname: smartctl
restart: on-failure
privileged: true
volumes:
- ./nodeexporter/etc:/var/lib/node_exporter
environment:
- INTERVAL=300
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"

12
grafana/provisioning/dashboards/dashboard.yml

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: 'Prometheus'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards

7
grafana/provisioning/dashboards/dashboards.md

@ -0,0 +1,7 @@
### custom dashboards
- nginx
- https://github.com/nginxinc/nginx-prometheus-exporter/blob/master/grafana/dashboard.json
- smart
- https://grafana.com/grafana/dashboards?search=Smartmon

1270
grafana/provisioning/dashboards/docker_containers.json
File diff suppressed because it is too large
View File

926
grafana/provisioning/dashboards/i-o-statistics.json

@ -0,0 +1,926 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "6.6.2"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": 11801,
"graphTooltip": 0,
"id": null,
"iteration": 1582642997792,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 11,
"w": 11,
"x": 0,
"y": 0
},
"hiddenSeries": false,
"id": 6,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_disk_reads_completed_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m]) ",
"legendFormat": "{{instance}} - {{device}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Read IOPS",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"transparent": true,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "iops",
"label": "Reads",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 11,
"w": 13,
"x": 11,
"y": 0
},
"hiddenSeries": false,
"id": 8,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_disk_writes_completed_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m]) ",
"legendFormat": "{{instance}} - {{device}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Write IOPS",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"transparent": true,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "iops",
"label": "Reads",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 11,
"w": 11,
"x": 0,
"y": 11
},
"hiddenSeries": false,
"id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "nl-t1-mon01.tst.nan:9100 - vda - lat",
"yaxis": 1
},
{
"alias": "nl-t1-graph01.tst.nan:9100 - vda - lat",
"yaxis": 1
},
{
"alias": "nl-t1-graph01.tst.nan:9100 - vda - iops",
"yaxis": 2
},
{
"alias": "nl-t1-mon01.tst.nan:9100 - vda - iops",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_disk_read_time_seconds_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m]) / rate(node_disk_reads_completed_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m]) * 1000000",
"hide": false,
"instant": false,
"intervalFactor": 1,
"legendFormat": "{{instance}} - {{device}} ",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 20000,
"yaxis": "left"
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Read Latency",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"transparent": true,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "µs",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"decimals": null,
"format": "iops",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 11,
"w": 13,
"x": 11,
"y": 11
},
"hiddenSeries": false,
"id": 4,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_disk_write_time_seconds_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m]) / rate(node_disk_writes_completed_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m]) * 1000000",
"legendFormat": "{{instance}} - {{device}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 20000,
"yaxis": "left"
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Write Latency",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"transparent": true,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "µs",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 11,
"x": 0,
"y": 22
},
"hiddenSeries": false,
"id": 13,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(node_disk_read_bytes_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m])) by (instance) ",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Read Troughput",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"transparent": true,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "Bps",
"label": "Reads",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 13,
"x": 11,
"y": 22
},
"hiddenSeries": false,
"id": 15,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(node_disk_written_bytes_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m])) by (instance) ",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Write Troughput",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"transparent": true,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "Bps",
"label": "Reads",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 11,
"w": 11,
"x": 0,
"y": 31
},
"hiddenSeries": false,
"id": 10,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_disk_read_bytes_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m]) / rate(node_disk_reads_completed_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m]) ",
"legendFormat": "{{instance}} - {{device}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Read Request Size",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"transparent": true,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 11,
"w": 13,
"x": 11,
"y": 31
},
"hiddenSeries": false,
"id": 12,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_disk_written_bytes_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m]) / rate(node_disk_writes_completed_total{instance=~\"$hostname\", job=~\"$job\", device=~\"$device\"}[1m])",
"legendFormat": "{{instance}} - {{device}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Write Request Size",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"transparent": true,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "5s",
"schemaVersion": 22,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(node_uname_info, instance)",
"hide": 0,
"includeAll": true,
"label": "Hostname",
"multi": true,
"name": "hostname",
"options": [],
"query": "label_values(node_uname_info, instance)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(node_disk_io_now{instance=~\"$hostname\"}, device)",
"hide": 0,
"includeAll": true,
"label": "Devicename",
"multi": true,
"name": "device",
"options": [],
"query": "label_values(node_disk_io_now{instance=~\"$hostname\"}, device)",
"refresh": 1,
"regex": "/.*sd|dm|vd.*/",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(node_uname_info, job)",
"hide": 0,
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"options": [],
"query": "label_values(node_uname_info, job)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "browser",
"title": "Storage I/O Statistics",
"uid": "RzrUuuQZk",
"version": 42,
"description": "This dashboards shows IOPS, I/O Latency, Average Request size and throughput. Alerts are configured for latency."
}

3412
grafana/provisioning/dashboards/monitor_services.json
File diff suppressed because it is too large
View File

1441
grafana/provisioning/dashboards/nexus_host.json
File diff suppressed because it is too large
View File

567
grafana/provisioning/dashboards/nginx-2.json

@ -0,0 +1,567 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "5.0.0"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "singlestat",
"name": "Singlestat",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Official dashboard for NGINX Prometheus exporter",
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"iteration": 1562682051068,
"links": [],
"panels": [
{
"datasource": "${DS_PROMETHEUS}",
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"panels": [],
"title": "Status",
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"cacheTimeout": null,
"colorBackground": true,
"colorPostfix": false,
"colorPrefix": false,
"colorValue": false,
"colors": [
"#E02F44",
"#FF9830",
"#299c46"
],
"decimals": null,
"description": "",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 3,
"w": 12,
"x": 0,
"y": 1
},
"id": 8,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"repeat": "instance",
"repeatDirection": "h",
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "nginx_up{instance=~\"$instance\"}",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"refId": "A"
}
],
"thresholds": "1,1",
"timeFrom": null,
"timeShift": null,
"title": "NGINX Status for $instance",
"type": "singlestat",
"valueFontSize": "100%",
"valueMaps": [
{
"op": "=",
"text": "Down",
"value": "0"
},
{
"op": "=",
"text": "Up",
"value": "1"
}
],
"valueName": "current"
},
{
"datasource": "${DS_PROMETHEUS}",
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 4
},
"id": 6,
"panels": [],
"title": "Metrics",
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"decimals": null,
"description": "",
"fill": 1,
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 5
},
"id": 10,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"hideEmpty": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "irate(nginx_connections_accepted{instance=~\"$instance\"}[5m])",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"legendFormat": "{{instance}} accepted",
"refId": "A"
},
{
"expr": "irate(nginx_connections_handled{instance=~\"$instance\"}[5m])",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"legendFormat": "{{instance}} handled",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Processed connections",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 1,
"format": "short",
"label": "Connections (rate)",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": "${DS_PROMETHEUS}",
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"decimals": 0,
"fill": 1,
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 5
},
"id": 12,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "nginx_connections_active{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} active",
"refId": "A"
},
{
"expr": "nginx_connections_reading{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} reading",
"refId": "B"
},
{
"expr": "nginx_connections_waiting{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} waiting",
"refId": "C"
},
{
"expr": "nginx_connections_writing{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} writing",
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Active Connections",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"label": "Connections",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": "${DS_PROMETHEUS}",
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"fill": 1,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 15
},
"id": 15,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "irate(nginx_http_requests_total{instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} total requests",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Total requests",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "5s",
"schemaVersion": 18,
"style": "dark",
"tags": [
"nginx",
"prometheus",
"nginx prometheus exporter"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"tags": [],
"text": "default",
"value": "default"
},
"hide": 0,
"includeAll": false,
"label": "datasource",
"multi": false,
"name": "DS_PROMETHEUS",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": null,
"current": {},
"datasource": "${DS_PROMETHEUS}",
"definition": "label_values(nginx_up, instance)",
"hide": 0,
"includeAll": true,
"label": "",
"multi": true,
"name": "instance",
"options": [],
"query": "label_values(nginx_up, instance)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "nginx-2",
"uid": "MsjffzSZz",
"version": 1
}

398
grafana/provisioning/dashboards/nginx_container.json

@ -0,0 +1,398 @@
{
"id": null,
"title": "Nginx",
"description": "Nginx exporter metrics",
"tags": [
"nginx"
],
"style": "dark",
"timezone": "browser",
"editable": true,
"hideControls": false,
"sharedCrosshair": true,
"rows": [
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "Prometheus",
"decimals": 2,
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 3,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(nginx_connections_processed_total{stage=\"any\"}[5m])) by (stage)",
"hide": false,
"interval": "",
"intervalFactor": 10,
"legendFormat": "requests",
"metric": "",
"refId": "B",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Requests/sec",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"datasource": "Prometheus",
"decimals": 2,
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 2,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(nginx_connections_current) by (state)",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{state}}",
"metric": "",
"refId": "A",
"step": 2
}
],
"timeFrom": null,
"timeShift": null,
"title": "Connections",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"datasource": "Prometheus",
"decimals": 2,
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 1,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(nginx_connections_processed_total{stage!=\"any\"}[5m])) by (stage)",
"hide": false,
"interval": "",
"intervalFactor": 10,
"legendFormat": "{{stage}}",
"metric": "",
"refId": "B",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Connections rate",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "Nginx exporter metrics"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": null,
"editable": true,
"error": false,
"fill": 1,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 4,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_cpu_usage_seconds_total{name=~\"nginx\"}[5m])) / count(node_cpu_seconds_total{mode=\"system\"}) * 100",
"intervalFactor": 2,
"legendFormat": "nginx",
"refId": "A",
"step": 2
}
],
"timeFrom": null,
"timeShift": null,
"title": "CPU usage",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "Nginx container metrics"
}
],
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"templating": {
"list": []
},
"annotations": {
"list": []
},
"refresh": "10s",
"schemaVersion": 12,
"version": 9,
"links": [],
"gnetId": null
}

3750
grafana/provisioning/dashboards/node-exporter.json
File diff suppressed because it is too large
View File

1511
grafana/provisioning/dashboards/smart-1.json
File diff suppressed because it is too large
View File

923
grafana/provisioning/dashboards/smart-2.json

@ -0,0 +1,923 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "panel",
"id": "gauge",
"name": "Gauge",
"version": ""
},
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "6.4.4"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Extended smartmon Dashboard for disk SMART data",
"editable": true,
"gnetId": 10664,
"graphTooltip": 0,
"id": null,
"iteration": 1574521336451,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 13,
"x": 0,
"y": 0
},
"id": 1,
"interval": "",
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "connected",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 1,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "avg(smartmon_temperature_celsius_raw_value{instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"}) by (device_model,serial_number)",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "1m",
"intervalFactor": 1,
"legendFormat": "{{device_model}} (S/N: {{serial_number}})",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Disk Temperature",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "short",
"label": null,
"logBase": 1,
"max": "60",
"min": "20",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"columns": [],
"datasource": "Prometheus",
"fontSize": "100%",
"gridPos": {
"h": 5,
"w": 11,
"x": 13,
"y": 0
},
"id": 12,
"links": [],
"options": {},
"pageSize": null,
"scroll": true,
"showHeader": true,
"sort": {
"col": 0,
"desc": true
},
"styles": [
{
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "Healthy",
"colorMode": "row",
"colors": [
"rgba(245, 54, 54, 0.9)",
"#73BF69",
"#73BF69"
],
"decimals": 2,
"mappingType": 1,
"pattern": "Value",
"thresholds": [
"1"
],
"type": "string",
"unit": "short",
"valueMaps": [
{
"text": "Yes",
"value": "1"
},
{
"text": "No",
"value": "0"
}
]
}
],
"targets": [
{
"expr": " max(smartmon_device_smart_healthy{instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"}) by (disk,device_model,serial_number) < 1",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Unhealthy Disks",
"transform": "table",
"type": "table"
},
{
"aliasColors": {},
"bars": true,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": 0,
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 5,
"w": 11,
"x": 13,
"y": 5
},
"id": 5,
"legend": {
"alignAsTable": false,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": true,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": true
},
"lines": false,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": true,
"targets": [
{
"expr": "smartmon_reallocated_sector_ct_raw_value{instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"} > 0",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "Reallocated {{disk}}",
"refId": "B"
},
{
"expr": "smartmon_current_pending_sector_raw_value{instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"} > 0",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "Pending {{disk}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Pending and Reallocated Sectors",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"columns": [],
"datasource": "Prometheus",
"fontSize": "100%",
"gridPos": {
"h": 10,
"w": 13,
"x": 0,
"y": 7
},
"id": 10,
"interval": "1h",
"links": [],
"options": {},
"pageSize": null,
"scroll": true,
"showHeader": true,
"sort": {
"col": 4,
"desc": true
},
"styles": [
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Time",
"thresholds": [],
"type": "hidden",
"unit": "short"
},
{
"alias": "Power On Time",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"decimals": 2,
"pattern": "Value #A",
"thresholds": [],
"type": "number",
"unit": "h"
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "instance",
"thresholds": [],
"type": "hidden",
"unit": "short"
},
{
"alias": "Load Cycles",
"colorMode": "value",
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 1,
"mappingType": 1,
"pattern": "Value #B",
"thresholds": [
"20000",
"50000"
],
"type": "number",
"unit": "short",
"valueMaps": [
{
"text": "",
"value": ""
},
{
"text": "",
"value": ""
}
]
},
{
"alias": "Healthy",
"colorMode": "row",
"colors": [
"#F2495C",
"#F2495C",
"rgb(14, 54, 10)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Value #C",
"thresholds": [
"0",
"1"
],
"type": "string",
"unit": "short",
"valueMaps": [
{
"text": "Yes",
"value": "1"
},
{
"text": "No",
"value": "0"
}
]
}
],
"targets": [
{
"expr": "max(smartmon_power_on_hours_raw_value{instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"}) by (device_model,serial_number)",
"format": "table",
"instant": true,
"interval": "",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
},
{
"expr": "max(smartmon_load_cycle_count_raw_value{instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"}) by (device_model,serial_number)",
"format": "table",
"instant": true,
"interval": "",
"intervalFactor": 2,
"legendFormat": "Load Cycles",
"refId": "B"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Disk Lifetime",
"transform": "table",
"type": "table"
},
{
"datasource": "Prometheus",
"gridPos": {
"h": 6,
"w": 11,
"x": 13,
"y": 10
},
"id": 8,
"links": [],
"options": {
"fieldOptions": {
"calcs": [
"last"
],
"defaults": {
"mappings": [],
"max": 100,
"min": 0,
"thresholds": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 60
},
{
"color": "red",
"value": 80
}
],
"title": "",
"unit": "percent"
},
"override": {},
"values": false
},
"orientation": "horizontal",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "6.4.4",
"targets": [
{
"expr": "sort_desc(100 - smartmon_wear_leveling_count_value {instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"})",
"format": "time_series",
"instant": true,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Wear Level on SSDs",
"type": "gauge"
},
{
"cacheTimeout": null,
"columns": [],
"datasource": "Prometheus",
"fontSize": "100%",
"gridPos": {
"h": 6,
"w": 11,
"x": 13,
"y": 16
},
"id": 3,
"links": [],
"options": {},
"pageSize": null,
"pluginVersion": "6.2.5",
"scroll": true,
"showHeader": true,
"sort": {
"col": 2,
"desc": false
},
"styles": [
{
"alias": "Total Written",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"decimals": 1,
"pattern": "Value #B",
"thresholds": [],
"type": "number",
"unit": "bytes"
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"mappingType": 1,
"pattern": "Time|instance|model_family|job|lun_id|product|revision|smart_id|type|vendor|firmware_version",
"thresholds": [],
"type": "hidden",
"unit": "short"
},
{
"alias": "Wear Level",
"colorMode": "value",
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"mappingType": 1,
"pattern": "Value #A",
"thresholds": [
"60",
"80"
],
"type": "number",
"unit": "percent"
}
],
"targets": [
{
"expr": "(smartmon_total_lbas_written_raw_value{instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"} * on(disk,instance,device_model,serial_number) smartmon_device_sector_size_logical)",
"format": "table",
"instant": true,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{disk}} ({{device_model}}, sn: {{serial_number}})",
"refId": "B"
},
{
"expr": "(100 - smartmon_wear_leveling_count_value {instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"})",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Total Data Written on SSDs",
"transform": "table",
"type": "table"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": 0,
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 5,
"w": 13,
"x": 0,
"y": 17
},
"id": 2,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": true,
"targets": [
{
"expr": "avg(smartmon_raw_read_error_rate_raw_value{instance=~\"$instance\",disk=~\"$disk\",type=~\"$type\",serial_number=~\"$serial_number\",model_family=~\"$model_family\",device_model=~\"$device_model\"}) by (device_model,serial_number) > 0",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{device_model}} (S/N: {{serial_number}})",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Raw Error Read Rate",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "1m",
"schemaVersion": 20,
"style": "dark",
"tags": [
"prometheus",
"node_exporter",
"smartmon"
],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "instance",
"options": [],
"query": "label_values(smartmon_smartctl_version, instance)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(smartmon_device_smart_available, disk)",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "disk",
"options": [],
"query": "label_values(smartmon_device_smart_available, disk)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(smartmon_device_smart_available, type)",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "type",
"options": [],
"query": "label_values(smartmon_device_smart_available, type)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(smartmon_device_smart_available, device_model)",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "device_model",
"options": [],
"query": "label_values(smartmon_device_smart_available, device_model)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(smartmon_device_smart_available, serial_number)",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "serial_number",
"options": [],
"query": "label_values(smartmon_device_smart_available, serial_number)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(smartmon_device_smart_available, model_family)",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "model_family",
"options": [],
"query": "label_values(smartmon_device_smart_available, model_family)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-7d",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "smart-2",
"uid": "eak7BovZk",
"version": 24
}

650
grafana/provisioning/dashboards/smart-3.json

@ -0,0 +1,650 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS-INFRA",
"label": "prometheus-infra",
"description": "Prometheus with node_exporter that collects smartmon.sh script output",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "4.6.1"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Smartmon Texfile node_exporter Dashboard",
"editable": true,
"gnetId": 3992,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [],
"rows": [
{
"collapse": false,
"height": "170px",
"panels": [
{
"columns": [],
"datasource": "Prometheus",
"fontSize": "100%",
"height": "170px",
"id": 4,
"links": [],
"pageSize": null,
"scroll": true,
"showHeader": true,
"sort": {
"col": 0,
"desc": true
},
"span": 12,
"styles": [
{
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "date"
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"decimals": 2,
"pattern": "/.*/",
"thresholds": [],
"type": "number",
"unit": "short"
}
],
"targets": [
{
"expr": "smartmon_device_smart_healthy{instance=~\"$instance\"} < 1",
"format": "table",
"interval": "",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"title": "Unhealthy Disks",
"transform": "table",
"type": "table"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 0,
"id": 1,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
"values": true
},
"lines": false,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 1,
"points": true,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "avg(smartmon_airflow_temperature_cel_value{instance=~\"$instance\"}) by (instance, disk)",
"format": "time_series",
"hide": false,
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "val {{instance}} {{disk}}",
"refId": "B"
},
{
"expr": "avg(smartmon_airflow_temperature_cel_raw_value{instance=~\"$instance\"}) by (instance, disk)",
"format": "time_series",
"hide": false,
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "raw {{instance}} {{disk}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Disk Temperature",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
},
{
"collapse": false,
"height": 250,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "avg(smartmon_raw_read_error_rate_value{instance=~\"$instance\"}) by (instance, disk)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "val {{instance}} {{disk}}",
"refId": "A"
},
{
"expr": "avg(smartmon_spin_retry_count_raw_value{instance=~\"$instance\"}) by (instance, disk)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "raw {{instance}} {{disk}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Raw Error Read Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"id": 3,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "avg(smartmon_total_lbas_written_value{instance=~\"$instance\"}) by (instance, disk)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "val {{instance}} {{disk}}",
"refId": "A"
},
{
"expr": "avg(smartmon_total_lbas_written_raw_value{instance=~\"$instance\"}) by (instance, disk)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "raw {{instance}} {{disk}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Raw Error Read Rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
},
{
"collapse": false,
"height": 250,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"id": 5,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "smartmon_reallocated_sector_ct_raw_value{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "raw {{instance}} {{disk}}",
"refId": "B"
},
{
"expr": "smartmon_reallocated_sector_ct_value{instance=~\"$instance\"}",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "val {{instance}} {{disk}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Reallocated Sectors",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"id": 6,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "smartmon_current_pending_sector_raw_value{instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "raw {{instance}} {{disk}}",
"refId": "B"
},
{
"expr": "smartmon_current_pending_sector_value{instance=~\"$instance\"}",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "val {{instance}} {{disk}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Current Pending Sectors",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"prometheus",
"node_exporter",
"smartmon"
],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "instance",
"options": [],
"query": "label_values(smartmon_smartctl_version, instance)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "smart-3",
"version": 6
}

653
grafana/provisioning/dashboards/smart-4.json

@ -0,0 +1,653 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "panel",
"id": "gauge",
"name": "Gauge",
"version": ""
},
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "6.2.5"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "text",
"name": "Text",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Smartmon Texfile node_exporter Dashboard\n\nExtends the following dashboard:\n\nhttps://grafana.com/dashboards/10530",
"editable": true,
"gnetId": 10531,
"graphTooltip": 0,
"id": null,
"iteration": 1563096551333,
"links": [],
"panels": [
{
"content": "<p></p>\n<p style=\"text-align: center;font-size: x-large;face:arial;\">__________ General Errors __________</p>",
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 0
},
"id": 22,
"links": [],
"mode": "html",
"options": {},
"timeFrom": null,
"timeShift": null,
"title": "",
"type": "text"
},
{
"gridPos": {
"h": 4,
"w": 12,
"x": 0,
"y": 2
},
"id": 18,
"links": [],
"options": {
"fieldOptions": {
"calcs": [
"mean"
],
"defaults": {
"decimals": null,
"max": "50",
"min": 0,
"unit": "none"
},
"mappings": [],
"override": {},
"thresholds": [
{
"color": "green",
"index": 0,
"value": null
},
{
"color": "red",
"index": 1,
"value": 10
}
],
"values": false
},
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "6.2.5",
"targets": [
{
"expr": "smartmon_raw_read_error_rate_raw_value{ instance=~\"$instance\", disk=~\"$disk\" }",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Read_Error_Rate",
"type": "gauge"
},
{
"description": "Number of reallocation events.\nCaused by read errors and broken sectors.",
"gridPos": {
"h": 4,
"w": 12,
"x": 12,
"y": 2
},
"id": 38,
"links": [],
"options": {
"fieldOptions": {
"calcs": [
"mean"
],
"defaults": {
"max": 200,
"min": 0
},
"mappings": [],
"override": {},
"thresholds": [
{
"color": "green",
"index": 0,
"value": null
},
{
"color": "red",
"index": 1,
"value": 50
}
],
"values": false
},
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "6.2.5",
"targets": [
{
"expr": "smartmon_reallocated_event_count_raw_value{ instance=~\"$instance\", disk=~\"$disk\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Reallocate_Events",
"type": "gauge"
},
{
"description": "Number of unstable sectors.\nCaused by read errors. \nWill possibly be remapped.",
"gridPos": {
"h": 4,
"w": 8,
"x": 0,
"y": 6
},
"id": 39,
"links": [],
"options": {
"fieldOptions": {
"calcs": [
"mean"
],
"defaults": {
"max": 200,
"min": 0
},
"mappings": [],
"override": {},
"thresholds": [
{
"color": "green",
"index": 0,
"value": null
},
{
"color": "red",
"index": 1,
"value": 50
}
],
"values": false
},
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "6.2.5",
"targets": [
{
"expr": "smartmon_current_pending_sector_raw_value{ instance=~\"$instance\", disk=~\"$disk\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Pending_Sectors",
"type": "gauge"
},
{
"description": "Number of uncorrectable sectors.\nDetected by a self-test (called offline scan)",
"gridPos": {
"h": 4,
"w": 8,
"x": 8,
"y": 6
},
"id": 40,
"links": [],
"options": {
"fieldOptions": {
"calcs": [
"mean"
],
"defaults": {
"max": 200,
"min": 0
},
"mappings": [],
"override": {},
"thresholds": [
{
"color": "green",
"index": 0,
"value": null
},
{
"color": "red",
"index": 1,
"value": 50
}
],
"values": false
},
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "6.2.5",
"targets": [
{
"expr": "smartmon_offline_uncorrectable_raw_value{ instance=~\"$instance\", disk=~\"$disk\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Offline_Uncorrectable_Sectors",
"type": "gauge"
},
{
"description": "Monitors connection quality between disk and motherboard.\nIndicates bad cable connection/cables.",
"gridPos": {
"h": 4,
"w": 8,
"x": 16,
"y": 6
},
"id": 41,
"links": [],
"options": {
"fieldOptions": {
"calcs": [
"mean"
],
"defaults": {
"max": 200,
"min": 0
},
"mappings": [],
"override": {},
"thresholds": [
{
"color": "green",
"index": 0,
"value": null
},
{
"color": "#EAB839",
"index": 1,
"value": 25
},
{
"color": "red",
"index": 2,
"value": 100
}
],
"values": false
},
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "6.2.5",
"targets": [
{
"expr": "smartmon_udma_crc_error_count_raw_value{ instance=~\"$instance\", disk=~\"$disk\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "UDMA_CRC_Errors",
"type": "gauge"
},
{
"content": "<p></p>\n<p style=\"text-align: center;font-size: x-large;face:arial;\">__________ HDD Specific Values __________</p>",
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 10
},
"id": 43,
"links": [],
"mode": "html",
"options": {},
"timeFrom": null,
"timeShift": null,
"title": "",
"type": "text"
},
{
"description": "Errors during read action.\nPossible positioning problem of read/write unit",
"gridPos": {
"h": 4,
"w": 9,
"x": 0,
"y": 12
},
"id": 28,
"links": [],
"options": {
"fieldOptions": {
"calcs": [
"mean"
],
"defaults": {
"max": 200,
"min": 0
},
"mappings": [],
"override": {},
"thresholds": [
{
"color": "green",
"index": 0,
"value": null
},
{
"color": "#EAB839",
"index": 1,
"value": 20
},
{
"color": "red",
"index": 2,
"value": 100
}
],
"values": false
},
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "6.2.5",
"targets": [
{
"expr": "smartmon_seek_error_rate_raw_value{ instance=~\"$instance\", disk=~\"$disk\" }",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Seek_Error_Rate",
"type": "gauge"
},
{
"description": "Errors during write",
"gridPos": {
"h": 4,
"w": 8,
"x": 9,
"y": 12
},
"id": 42,
"links": [],
"options": {
"fieldOptions": {
"calcs": [
"last"
],
"defaults": {
"max": 200,
"min": 0
},
"mappings": [],
"override": {},
"thresholds": [
{
"color": "green",
"index": 0,
"value": null
},
{
"color": "red",
"index": 1,
"value": 50
}
],
"values": false
},
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "6.2.5",
"targets": [
{
"expr": "smartmon_multi_zone_error_rate_raw_value{ instance=~\"$instance\", disk=~\"$disk\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Multi_Zone_Error_Rate",
"type": "gauge"
},
{
"description": "Cont unsuccessful attempts to spin up to operational speed. \nCritical value indicating a mechanical problem of the motor.",
"gridPos": {
"h": 4,
"w": 7,
"x": 17,
"y": 12
},
"id": 30,
"links": [],
"options": {
"fieldOptions": {
"calcs": [
"mean"
],
"defaults": {
"max": 100,
"min": 0
},
"mappings": [],
"override": {},
"thresholds": [
{
"color": "green",
"index": 0,
"value": null
},
{
"color": "#EAB839",
"index": 1,
"value": 10
},
{
"color": "red",
"index": 2,
"value": 50
}
],
"values": false
},
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "6.2.5",
"targets": [
{
"expr": "smartmon_spin_retry_count_raw_value{ instance=~\"$instance\", disk=~\"$disk\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{disk}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Spin_Retry_Count",
"type": "gauge"
}
],
"refresh": false,
"schemaVersion": 18,
"style": "dark",
"tags": [
"prometheus",
"node_exporter",
"smartmon"
],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "instance",
"options": [],
"query": "label_values(smartmon_smartctl_version, instance)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(smartmon_device_info, device_model)",
"hide": 2,
"includeAll": true,
"label": null,
"multi": true,
"name": "device_model",
"options": [],
"query": "label_values(smartmon_device_info, device_model)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"definition": "label_values(smartmon_device_info, disk)",
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "disk",
"options": [],
"query": "label_values(smartmon_device_info, disk)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-7d",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "smart-4",
"uid": "HIwbBGIZk",
"version": 7
}

18
grafana/provisioning/datasources/datasource.yml

@ -0,0 +1,18 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
basicAuth: false
isDefault: true
editable: true
- name: Loki
type: loki
access: proxy
url: http://loki:3100
basicAuth: false
isDefault: false
editable: true

1
nodeexporter/.gitignore

@ -0,0 +1 @@
/etc

40
prometheus/alert.rules

@ -0,0 +1,40 @@
groups:
- name: targets
rules:
- alert: monitor_service_down
expr: up == 0
for: 30s
labels:
severity: critical
annotations:
summary: "Monitor service non-operational"
description: "Service {{ $labels.instance }} is down."
- name: host
rules:
- alert: high_cpu_load
expr: node_load1 > 1.5
for: 30s
labels:
severity: warning
annotations:
summary: "Server under high load"
description: "Docker host is under high load, the avg load 1m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
- alert: high_memory_load
expr: (sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100 > 85
for: 30s
labels:
severity: warning
annotations:
summary: "Server memory is almost full"
description: "Docker host memory usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
- alert: high_storage_load
expr: (node_filesystem_size_bytes{fstype="aufs"} - node_filesystem_free_bytes{fstype="aufs"}) / node_filesystem_size_bytes{fstype="aufs"} * 100 > 85
for: 30s
labels:
severity: warning
annotations:
summary: "Server storage is almost full"
description: "Docker host storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."

44
prometheus/prometheus.yml

@ -0,0 +1,44 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: 'docker-host-alpha'
rule_files:
- "alert.rules"
scrape_configs:
- job_name: 'prometheus'
scrape_interval: 10s
static_configs:
- targets: ['localhost:9090']
- job_name: 'nodeexporter'
scrape_interval: 5s
static_configs:
- targets: ['nodeexporter:9100']
- job_name: 'nginxexporter'
scrape_interval: 5s
static_configs:
- targets: ['nginxexporter:9113']
- job_name: 'cadvisor'
scrape_interval: 5s
static_configs:
- targets: ['cadvisor:8080']
- job_name: 'pushgateway'
scrape_interval: 10s
honor_labels: true
static_configs:
- targets: ['pushgateway:9091']
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- 'alertmanager:9093'

26
promtail/etc/promtail/config.yml

@ -0,0 +1,26 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://loki:3100/loki/api/v1/push
scrape_configs:
- job_name: containers
entry_parser: docker
static_configs:
- targets:
- localhost
labels:
job: containerlogs
__path__: /var/lib/docker/containers/*/*log
- job_name: system
static_configs:
- targets:
- localhost
labels:
job: varlogs
__path__: /var/log/*log

16
smartctl/Dockerfile

@ -0,0 +1,16 @@
FROM ubuntu:20.04
RUN apt-get -q update && \
apt-get install -y --no-install-recommends smartmontools && \
apt-get clean && \
rm -rf /tmp/* && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /var/tmp/*
ADD script/entrypoint.sh /entrypoint.sh
ADD script/smartmon.sh /smartmon.sh
RUN chmod +x /entrypoint.sh && \
chmod +x /smartmon.sh
ENTRYPOINT ["/entrypoint.sh"]

13
smartctl/script/entrypoint.sh

@ -0,0 +1,13 @@
#!/usr/bin/env bash
if [ -n "$DEBUG" ]; then
set -ex
fi
INTERVAL="${INTERVAL:-300}"
echo "Starting smartmon.sh loop ..."
while true; do
/smartmon.sh > /var/lib/node_exporter/smartmon.prom
sleep "$INTERVAL"
done

194
smartctl/script/smartmon.sh

@ -0,0 +1,194 @@
#!/bin/bash
# https://github.com/prometheus-community/node-exporter-textfile-collector-scripts
# Script informed by the collectd monitoring script for smartmontools (using smartctl)
# by Samuel B. <samuel_._behan_(at)_dob_._sk> (c) 2012
# source at: http://devel.dob.sk/collectd-scripts/
parse_smartctl_attributes_awk="$(
cat <<'SMARTCTLAWK'
$1 ~ /^ *[0-9]+$/ && $2 ~ /^[a-zA-Z0-9_-]+$/ {
gsub(/-/, "_");
printf "%s_value{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $4
printf "%s_worst{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $5
printf "%s_threshold{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $6
printf "%s_raw_value{%s,smart_id=\"%s\"} %e\n", $2, labels, $1, $10
}
SMARTCTLAWK
)"
smartmon_attrs="$(
cat <<'SMARTMONATTRS'
airflow_temperature_cel
command_timeout
current_pending_sector
end_to_end_error
erase_fail_count
g_sense_error_rate
hardware_ecc_recovered
host_reads_32mib
host_reads_mib
host_writes_32mib
host_writes_mib
load_cycle_count
media_wearout_indicator
nand_writes_1gib
offline_uncorrectable
power_cycle_count
power_on_hours
program_fail_cnt_total
program_fail_count
raw_read_error_rate
reallocated_event_count
reallocated_sector_ct
reported_uncorrect
runtime_bad_block
sata_downshift_count
seek_error_rate
spin_retry_count
spin_up_time
start_stop_count
temperature_case
temperature_celsius
temperature_internal
total_lbas_read
total_lbas_written
udma_crc_error_count
unsafe_shutdown_count
unused_rsvd_blk_cnt_tot
wear_leveling_count
workld_host_reads_perc
workld_media_wear_indic
workload_minutes
SMARTMONATTRS
)"
smartmon_attrs="$(echo "${smartmon_attrs}" | xargs | tr ' ' '|')"
parse_smartctl_attributes() {
local disk="$1"
local disk_type="$2"
local labels="disk=\"${disk}\",type=\"${disk_type}\""
sed 's/^ \+//g' |
awk -v labels="${labels}" "${parse_smartctl_attributes_awk}" 2>/dev/null |
tr '[:upper:]' '[:lower:]' |
grep -E "(${smartmon_attrs})"
}
parse_smartctl_scsi_attributes() {
local disk="$1"
local disk_type="$2"
local labels="disk=\"${disk}\",type=\"${disk_type}\""
while read -r line; do
attr_type="$(echo "${line}" | tr '=' ':' | cut -f1 -d: | sed 's/^ \+//g' | tr ' ' '_')"
attr_value="$(echo "${line}" | tr '=' ':' | cut -f2 -d: | sed 's/^ \+//g')"
case "${attr_type}" in
number_of_hours_powered_up_) power_on="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
Current_Drive_Temperature) temp_cel="$(echo "${attr_value}" | cut -f1 -d' ' | awk '{ printf "%e\n", $1 }')" ;;
Blocks_sent_to_initiator_) lbas_read="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
Blocks_received_from_initiator_) lbas_written="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
Accumulated_start-stop_cycles) power_cycle="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
Elements_in_grown_defect_list) grown_defects="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
esac
done
[ -n "$power_on" ] && echo "power_on_hours_raw_value{${labels},smart_id=\"9\"} ${power_on}"
[ -n "$temp_cel" ] && echo "temperature_celsius_raw_value{${labels},smart_id=\"194\"} ${temp_cel}"
[ -n "$lbas_read" ] && echo "total_lbas_read_raw_value{${labels},smart_id=\"242\"} ${lbas_read}"
[ -n "$lbas_written" ] && echo "total_lbas_written_raw_value{${labels},smart_id=\"242\"} ${lbas_written}"
[ -n "$power_cycle" ] && echo "power_cycle_count_raw_value{${labels},smart_id=\"12\"} ${power_cycle}"
[ -n "$grown_defects" ] && echo "grown_defects_count_raw_value{${labels},smart_id=\"12\"} ${grown_defects}"
}
parse_smartctl_info() {
local -i smart_available=0 smart_enabled=0 smart_healthy=
local disk="$1" disk_type="$2"
local model_family='' device_model='' serial_number='' fw_version='' vendor='' product='' revision='' lun_id=''
while read -r line; do
info_type="$(echo "${line}" | cut -f1 -d: | tr ' ' '_')"
info_value="$(echo "${line}" | cut -f2- -d: | sed 's/^ \+//g' | sed 's/"/\\"/')"
case "${info_type}" in
Model_Family) model_family="${info_value}" ;;
Device_Model) device_model="${info_value}" ;;
Serial_Number) serial_number="${info_value}" ;;
Firmware_Version) fw_version="${info_value}" ;;
Vendor) vendor="${info_value}" ;;
Product) product="${info_value}" ;;
Revision) revision="${info_value}" ;;
Logical_Unit_id) lun_id="${info_value}" ;;
esac
if [[ "${info_type}" == 'SMART_support_is' ]]; then
case "${info_value:0:7}" in
Enabled) smart_available=1; smart_enabled=1 ;;
Availab) smart_available=1; smart_enabled=0 ;;
Unavail) smart_available=0; smart_enabled=0 ;;
esac
fi
if [[ "${info_type}" == 'SMART_overall-health_self-assessment_test_result' ]]; then
case "${info_value:0:6}" in
PASSED) smart_healthy=1 ;;
*) smart_healthy=0 ;;
esac
elif [[ "${info_type}" == 'SMART_Health_Status' ]]; then
case "${info_value:0:2}" in
OK) smart_healthy=1 ;;
*) smart_healthy=0 ;;
esac
fi
done
echo "device_info{disk=\"${disk}\",type=\"${disk_type}\",vendor=\"${vendor}\",product=\"${product}\",revision=\"${revision}\",lun_id=\"${lun_id}\",model_family=\"${model_family}\",device_model=\"${device_model}\",serial_number=\"${serial_number}\",firmware_version=\"${fw_version}\"} 1"
echo "device_smart_available{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_available}"
echo "device_smart_enabled{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_enabled}"
[[ "${smart_healthy}" != "" ]] && echo "device_smart_healthy{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_healthy}"
}
output_format_awk="$(
cat <<'OUTPUTAWK'
BEGIN { v = "" }
v != $1 {
print "# HELP smartmon_" $1 " SMART metric " $1;
print "# TYPE smartmon_" $1 " gauge";
v = $1
}
{print "smartmon_" $0}
OUTPUTAWK
)"
format_output() {
sort |
awk -F'{' "${output_format_awk}"
}
smartctl_version="$(/usr/sbin/smartctl -V | head -n1 | awk '$1 == "smartctl" {print $2}')"
echo "smartctl_version{version=\"${smartctl_version}\"} 1" | format_output
if [[ "$(expr "${smartctl_version}" : '\([0-9]*\)\..*')" -lt 6 ]]; then
exit
fi
device_list="$(/usr/sbin/smartctl --scan-open | awk '/^\/dev/{print $1 "|" $3}')"
for device in ${device_list}; do
disk="$(echo "${device}" | cut -f1 -d'|')"
type="$(echo "${device}" | cut -f2 -d'|')"
active=1
echo "smartctl_run{disk=\"${disk}\",type=\"${type}\"}" "$(TZ=UTC date '+%s')"
# Check if the device is in a low-power mode
/usr/sbin/smartctl -n standby -d "${type}" "${disk}" > /dev/null || active=0
echo "device_active{disk=\"${disk}\",type=\"${type}\"}" "${active}"
# Skip further metrics to prevent the disk from spinning up
test ${active} -eq 0 && continue
# Get the SMART information and health
/usr/sbin/smartctl -i -H -d "${type}" "${disk}" | parse_smartctl_info "${disk}" "${type}"
# Get the SMART attributes
case ${type} in
sat) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;;
sat+megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;;
scsi) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;;
megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;;
nvme*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;;
*)
(>&2 echo "disk type is not sat, scsi, nvme or megaraid but ${type}")
exit
;;
esac
done | format_output