Browse Source

add base vmware_exporter

pull/34/head
fedy95 4 years ago
parent
commit
a293b8e76c
  1. 5
      .env.example
  2. 1
      .gitignore
  3. 25
      Makefile
  4. 18
      README.md
  5. 48
      docker-compose.yml
  6. 7
      grafana/provisioning/dashboards/dashboard.yml
  7. 6
      grafana/provisioning/dashboards/dashboards.md
  8. 2030
      grafana/provisioning/dashboards/esxi/cluster.json
  9. 617
      grafana/provisioning/dashboards/esxi/esx.json
  10. 1788
      grafana/provisioning/dashboards/esxi/esxi.json
  11. 1983
      grafana/provisioning/dashboards/esxi/virtualmachine.json
  12. 1955
      grafana/provisioning/dashboards/esxi/vmware-stats_rev1.json
  13. 0
      grafana/provisioning/dashboards/other/docker_containers.json
  14. 0
      grafana/provisioning/dashboards/other/i-o-statistics.json
  15. 0
      grafana/provisioning/dashboards/other/monitor_services.json
  16. 0
      grafana/provisioning/dashboards/other/nexus_host.json
  17. 0
      grafana/provisioning/dashboards/other/nginx_container.json
  18. 0
      grafana/provisioning/dashboards/other/node-exporter.json
  19. 0
      grafana/provisioning/dashboards/other/smart-1.json
  20. 0
      grafana/provisioning/dashboards/other/smart-2.json
  21. 0
      grafana/provisioning/dashboards/other/smart-3.json
  22. 0
      grafana/provisioning/dashboards/other/smart-4.json
  23. 30
      prometheus/prometheus.yml

5
.env.example

@ -0,0 +1,5 @@
VSPHERE_USER=
VSPHERE_PASSWORD=
VSPHERE_HOST=
VSPHERE_IGNORE_SSL=
VSPHERE_SPECS_SIZE=

1
.gitignore

@ -0,0 +1 @@
/.env

25
Makefile

@ -20,16 +20,21 @@ lint-yaml:
LINT-JSON-CMD=docker run --rm -it -v $(PWD):/data ${REGISTRY}/${JSONLINT_TAG}
lint-json:
docker pull ${REGISTRY}/${JSONLINT_TAG}
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/docker_containers.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/i-o-statistics.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/monitor_services.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/nexus_host.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/nginx_container.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/node-exporter.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/smart-1.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/smart-2.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/smart-3.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/smart-4.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/esxi/cluster.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/esxi/esx.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/esxi/esxi.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/esxi/virtualmachine.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/esxi/vmware-stats_rev1.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/docker_containers.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/i-o-statistics.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/monitor_services.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/nexus_host.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/nginx_container.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/node-exporter.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/smart-1.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/smart-2.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/smart-3.json
$(LINT-JSON-CMD) -qc grafana/provisioning/dashboards/other/smart-4.json
restart:
docker-compose -f docker-compose.yml pull

18
README.md

@ -3,11 +3,17 @@
---
### services
- [Loki/Promtail](https://github.com/grafana/loki)
- [Prometheus](https://github.com/prometheus/prometheus)
- [NodeExporter](https://github.com/prometheus/node_exporter)
- [alertmanager](https://github.com/prometheus/alertmanager)
- [cAdvisor](https://github.com/google/cadvisor)
- [Pushgateway](https://github.com/prometheus/pushgateway)
- [Grafana](https://github.com/grafana/grafana)
- [Alertmanager](https://github.com/prometheus/alertmanager)
- [grafana](https://github.com/grafana/grafana)
- [loki](https://hub.docker.com/r/grafana/loki)
- [nginx_exporter](https://hub.docker.com/r/nginx/nginx-prometheus-exporter)
- [node_exporter](https://github.com/prometheus/node_exporter)
- [prometheus](https://github.com/prometheus/prometheus)
- [vmware_exporter](https://hub.docker.com/r/pryorda/vmware_exporter)
- [promtail](hhttps://hub.docker.com/r/grafana/promtail)
- [pushgateway](https://github.com/prometheus/pushgateway)
- smartctl
### alternatives
- [dockprom](https://github.com/stefanprodan/dockprom)

48
docker-compose.yml

@ -61,6 +61,7 @@ services:
- GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin}
- GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
- DS_PROMETHEUS-LOCAL=Prometheus
ports:
- "3002:3000"
@ -157,6 +158,21 @@ services:
labels:
org.label-schema.group: "monitoring"
vmware_exporter:
image: pryorda/vmware_exporter:v0.16.1
container_name: vmware_exporter
hostname: vmware_exporter
restart: always
ports:
- "9272"
env_file:
- .env
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
promtail:
image: grafana/promtail:2.1.0
container_name: promtail
@ -189,20 +205,20 @@ services:
labels:
org.label-schema.group: "monitoring"
smartctl:
build:
context: ./smartctl
container_name: smartctl
hostname: smartctl
restart: on-failure
privileged: true
volumes:
- ./nodeexporter/etc:/var/lib/node_exporter
environment:
- INTERVAL=300
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
# smartctl:
# build:
# context: ./smartctl
# container_name: smartctl
# hostname: smartctl
# restart: on-failure
# privileged: true
#
# volumes:
# - ./nodeexporter/etc:/var/lib/node_exporter
# environment:
# - INTERVAL=300
# networks:
# - monitor-net
# labels:
# org.label-schema.group: "monitoring"
...

7
grafana/provisioning/dashboards/dashboard.yml

@ -2,13 +2,14 @@
apiVersion: 1
providers:
- name: 'Prometheus'
- name: dashboards
orgId: 1
folder: ''
type: file
disableDeletion: false
disableDeletion: true
editable: true
allowUiUpdates: true
updateIntervalSeconds: 30
options:
path: /etc/grafana/provisioning/dashboards
foldersFromFilesStructure: true
...

6
grafana/provisioning/dashboards/dashboards.md

@ -2,6 +2,10 @@
- nginx
- https://github.com/nginxinc/nginx-prometheus-exporter/blob/master/grafana/dashboard.json
- smart
- https://grafana.com/grafana/dashboards?search=Smartmon
- vmware-stats
- https://grafana.com/grafana/dashboards/11243/revisions
``

2030
grafana/provisioning/dashboards/esxi/cluster.json
File diff suppressed because it is too large
View File

617
grafana/provisioning/dashboards/esxi/esx.json

@ -0,0 +1,617 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "4.3.0"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "singlestat",
"name": "Singlestat",
"version": ""
}
],
"annotations": {
"list": []
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [],
"refresh": "10s",
"rows": [
{
"collapse": false,
"height": 250,
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"datasource": "Prometheus",
"decimals": 1,
"description": "System uptime",
"format": "s",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 3,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"minSpan": 1,
"nullPointMode": "connected",
"nullText": null,
"postfix": "s",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "time() - vmware_host_boot_timestamp_seconds{host_name=\"$hostname\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 4
}
],
"thresholds": "",
"title": "Uptime",
"type": "singlestat",
"valueFontSize": "50%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": true,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "Prometheus",
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 4,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"minSpan": 1,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"repeat": null,
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"expr": "vmware_host_cpu_usage{host_name=\"$hostname\"} / vmware_host_cpu_max{host_name=\"$hostname\"} * 100",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A",
"step": 2
}
],
"thresholds": "80,90",
"title": "CPU Usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": true,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "Prometheus",
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 5,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"minSpan": 1,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"expr": "vmware_host_memory_usage{host_name=\"$hostname\"} / vmware_host_memory_max{host_name=\"$hostname\"} * 100",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A",
"step": 2
}
],
"thresholds": "80,90",
"title": "Memory Usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": true,
"colorValue": false,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"datasource": "Prometheus",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 6,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "vmware_host_power_state{host_name=\"$hostname\"}",
"format": "time_series",
"intervalFactor": 2,
"refId": "A",
"step": 4
}
],
"thresholds": "1,1",
"title": "Host State",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "UP",
"value": "1"
},
{
"op": "=",
"text": "DOWN",
"value": "0"
}
],
"valueName": "current"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
},
{
"collapse": false,
"height": 398,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": 1,
"fill": 0,
"id": 1,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": false,
"max": false,
"min": false,
"rightSide": true,
"show": false,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"minSpan": 2,
"nullPointMode": "null",
"percentage": true,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "vmware_host_cpu_usage{host_name=\"$hostname\"} / vmware_host_cpu_max{host_name=\"$hostname\"} * 100",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "$hostname",
"metric": "",
"refId": "A",
"step": 2
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "ESX Host CPU Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"label": "",
"logBase": 1,
"max": "100",
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": 1,
"fill": 1,
"id": 2,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": false,
"max": false,
"min": false,
"rightSide": true,
"show": false,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"minSpan": 2,
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "vmware_host_memory_usage{host_name=\"$hostname\"} / vmware_host_memory_max{host_name=\"$hostname\"} * 100",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "$hostname",
"metric": "",
"refId": "A",
"step": 2
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "ESX Host RAM Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"label": "",
"logBase": 1,
"max": "100",
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "Prometheus",
"hide": 0,
"includeAll": false,
"label": "Host:",
"multi": false,
"name": "hostname",
"options": [],
"query": "label_values(vmware_host_boot_timestamp_seconds,host_name)",
"refresh": 1,
"regex": "/([^:]+)/",
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "ESX Hosts Information",
"version": 17
}

1788
grafana/provisioning/dashboards/esxi/esxi.json
File diff suppressed because it is too large
View File

1983
grafana/provisioning/dashboards/esxi/virtualmachine.json
File diff suppressed because it is too large
View File

1955
grafana/provisioning/dashboards/esxi/vmware-stats_rev1.json
File diff suppressed because it is too large
View File

0
grafana/provisioning/dashboards/docker_containers.json → grafana/provisioning/dashboards/other/docker_containers.json

0
grafana/provisioning/dashboards/i-o-statistics.json → grafana/provisioning/dashboards/other/i-o-statistics.json

0
grafana/provisioning/dashboards/monitor_services.json → grafana/provisioning/dashboards/other/monitor_services.json

0
grafana/provisioning/dashboards/nexus_host.json → grafana/provisioning/dashboards/other/nexus_host.json

0
grafana/provisioning/dashboards/nginx_container.json → grafana/provisioning/dashboards/other/nginx_container.json

0
grafana/provisioning/dashboards/node-exporter.json → grafana/provisioning/dashboards/other/node-exporter.json

0
grafana/provisioning/dashboards/smart-1.json → grafana/provisioning/dashboards/other/smart-1.json

0
grafana/provisioning/dashboards/smart-2.json → grafana/provisioning/dashboards/other/smart-2.json

0
grafana/provisioning/dashboards/smart-3.json → grafana/provisioning/dashboards/other/smart-3.json

0
grafana/provisioning/dashboards/smart-4.json → grafana/provisioning/dashboards/other/smart-4.json

30
prometheus/prometheus.yml

@ -10,25 +10,25 @@ rule_files:
- "alert.rules"
scrape_configs:
- job_name: 'prometheus'
scrape_interval: 10s
static_configs:
- targets: [ 'localhost:9090' ]
- job_name: 'nodeexporter'
- job_name: 'cadvisor'
scrape_interval: 5s
static_configs:
- targets: [ 'nodeexporter:9100' ]
- targets: [ 'cadvisor:8080' ]
- job_name: 'nginxexporter'
scrape_interval: 5s
static_configs:
- targets: [ 'nginxexporter:9113' ]
- job_name: 'cadvisor'
- job_name: 'nodeexporter'
scrape_interval: 5s
static_configs:
- targets: [ 'cadvisor:8080' ]
- targets: [ 'nodeexporter:9100' ]
- job_name: 'prometheus'
scrape_interval: 10s
static_configs:
- targets: [ 'localhost:9090' ]
- job_name: 'pushgateway'
scrape_interval: 10s
@ -36,6 +36,18 @@ scrape_configs:
static_configs:
- targets: [ 'pushgateway:9091' ]
- job_name: 'vmware_exporter'
scrape_interval: 10s
metrics_path: '/metrics'
static_configs:
- targets: [ 'vmware_exporter:9272' ]
relabel_configs:
- source_labels: [ __address__ ]
target_label: __param_target
- source_labels: [ __param_target ]
target_label: instance
- target_label: __address__
replacement: vmware_exporter:9272
alerting:
alertmanagers: