8 changed files with 314 additions and 300 deletions
-
34.drone.yml
-
10.editorconfig
-
18alertmanager/config.yml
-
396docker-compose.yml
-
20grafana/provisioning/dashboards/dashboard.yml
-
32grafana/provisioning/datasources/datasource.yml
-
64prometheus/prometheus.yml
-
40promtail/etc/promtail/config.yml
@ -1,27 +1,29 @@ |
|||
--- |
|||
kind: pipeline |
|||
type: ssh |
|||
name: default |
|||
|
|||
clone: |
|||
disable: true |
|||
disable: true |
|||
|
|||
server: |
|||
host: |
|||
from_secret: host |
|||
user: |
|||
from_secret: user |
|||
password: |
|||
from_secret: password |
|||
host: |
|||
from_secret: host |
|||
user: |
|||
from_secret: user |
|||
password: |
|||
from_secret: password |
|||
|
|||
steps: |
|||
- name: release |
|||
commands: |
|||
- cd /mnt/md0/infra/monitoring |
|||
- git pull |
|||
- make restart |
|||
- name: release |
|||
commands: |
|||
- cd /mnt/md0/infra/monitoring |
|||
- git pull |
|||
- make restart |
|||
|
|||
trigger: |
|||
branch: |
|||
- master |
|||
event: |
|||
- push |
|||
branch: |
|||
- master |
|||
event: |
|||
- push |
|||
... |
@ -1,10 +1,10 @@ |
|||
root = true |
|||
|
|||
[*] |
|||
end_of_line = lf |
|||
insert_final_newline = true |
|||
charset = utf-8 |
|||
|
|||
[*.json] |
|||
end_of_line = lf |
|||
indent_style = space |
|||
indent_size = 4 |
|||
indent_size = 2 |
|||
trim_trailing_whitespace = true |
|||
insert_final_newline = true |
|||
max_line_length = 100 |
@ -1,11 +1,13 @@ |
|||
--- |
|||
route: |
|||
receiver: 'slack' |
|||
receiver: 'slack' |
|||
|
|||
receivers: |
|||
- name: 'slack' |
|||
slack_configs: |
|||
- send_resolved: true |
|||
text: "{{ .CommonAnnotations.description }}" |
|||
username: 'Prometheus' |
|||
channel: '#<channel-name>' |
|||
api_url: 'https://hooks.slack.com/services/<webhook-id>' |
|||
- name: 'slack' |
|||
slack_configs: |
|||
- send_resolved: true |
|||
text: "{{ .CommonAnnotations.description }}" |
|||
username: 'Prometheus' |
|||
channel: '#<channel-name>' |
|||
api_url: 'https://hooks.slack.com/services/<webhook-id>' |
|||
... |
@ -1,204 +1,206 @@ |
|||
--- |
|||
version: "3" |
|||
|
|||
networks: |
|||
monitor-net: |
|||
driver: bridge |
|||
monitor-net: |
|||
driver: bridge |
|||
|
|||
volumes: |
|||
grafana_data: {} |
|||
grafana_data: { } |
|||
|
|||
services: |
|||
alertmanager: |
|||
image: prom/alertmanager:v0.21.0 |
|||
container_name: alertmanager |
|||
hostname: alertmanager |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- ./alertmanager:/etc/alertmanager |
|||
command: |
|||
- '--config.file=/etc/alertmanager/config.yml' |
|||
- '--storage.path=/alertmanager' |
|||
|
|||
expose: |
|||
- "9093" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
cadvisor: |
|||
image: gcr.io/cadvisor/cadvisor:v0.37.0 |
|||
container_name: cadvisor |
|||
hostname: cadvisor |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- /:/rootfs:ro |
|||
- /var/run:/var/run:rw |
|||
- /sys:/sys:ro |
|||
- /var/lib/docker:/var/lib/docker:ro |
|||
|
|||
expose: |
|||
- "8080" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
grafana: |
|||
image: grafana/grafana:7.3.7 |
|||
container_name: grafana |
|||
hostname: grafana |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- grafana_data:/var/lib/grafana |
|||
- ./grafana/provisioning:/etc/grafana/provisioning |
|||
environment: |
|||
- GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin} |
|||
- GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} |
|||
- GF_USERS_ALLOW_SIGN_UP=false |
|||
|
|||
ports: |
|||
- "3002:3000" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
loki: |
|||
image: grafana/loki:2.1.0 |
|||
container_name: loki |
|||
hostname: loki |
|||
restart: on-failure |
|||
|
|||
command: |
|||
- '-config.file=/etc/loki/local-config.yaml' |
|||
|
|||
expose: |
|||
- "3100" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
nginxexporter: |
|||
image: nginx/nginx-prometheus-exporter:0.8.0 |
|||
container_name: nginxexporter |
|||
hostname: nginxexporter |
|||
restart: on-failure |
|||
|
|||
logging: |
|||
driver: json-file |
|||
options: |
|||
max-size: "10m" |
|||
max-file: "5" |
|||
|
|||
environment: |
|||
- SCRAPE_URI=http://nginx:80/nginx_status |
|||
- TELEMETRY_PATH=/metrics |
|||
- NGINX_RETRIES=10 |
|||
expose: |
|||
- "9113" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
|
|||
nodeexporter: |
|||
image: prom/node-exporter:v1.0.1 |
|||
container_name: nodeexporter |
|||
hostname: nodeexporter |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- /proc:/host/proc:ro |
|||
- /sys:/host/sys:ro |
|||
- /:/rootfs:ro |
|||
- ./nodeexporter/etc:/var/lib/node_exporter |
|||
command: |
|||
- '--path.procfs=/host/proc' |
|||
- '--path.sysfs=/host/sys' |
|||
- '--path.rootfs=/rootfs' |
|||
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' |
|||
- '--collector.textfile.directory=/var/lib/node_exporter' |
|||
ports: |
|||
- "3003:9100" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
prometheus: |
|||
image: prom/prometheus:v2.24.1 |
|||
container_name: prometheus |
|||
hostname: prometheus |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- ./prometheus:/etc/prometheus |
|||
command: |
|||
- '--config.file=/etc/prometheus/prometheus.yml' |
|||
- '--storage.tsdb.path=/prometheus' |
|||
- '--web.console.libraries=/etc/prometheus/console_libraries' |
|||
- '--web.console.templates=/etc/prometheus/consoles' |
|||
- '--storage.tsdb.retention.time=200h' |
|||
- '--web.enable-lifecycle' |
|||
|
|||
ports: |
|||
- "3004:9090" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
promtail: |
|||
image: grafana/promtail:2.1.0 |
|||
container_name: promtail |
|||
hostname: promtail |
|||
restart: on-failure |
|||
|
|||
volumes: |
|||
- ./promtail/etc/var/lib/promtail/positions:/var/lib/promtail/positions |
|||
- ./promtail/etc/promtail/config.yml:/etc/promtail/config.yml |
|||
- /var/lib/docker/containers:/var/lib/docker/containers:ro |
|||
- /var/log:/var/log |
|||
command: |
|||
- '-config.file=/etc/promtail/config.yml' |
|||
|
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
pushgateway: |
|||
image: prom/pushgateway:v1.4.0 |
|||
container_name: pushgateway |
|||
hostname: pushgateway |
|||
restart: unless-stopped |
|||
|
|||
expose: |
|||
- "9091" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
smartctl: |
|||
build: |
|||
context: ./smartctl |
|||
container_name: smartctl |
|||
hostname: smartctl |
|||
restart: on-failure |
|||
privileged: true |
|||
|
|||
volumes: |
|||
- ./nodeexporter/etc:/var/lib/node_exporter |
|||
environment: |
|||
- INTERVAL=300 |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
alertmanager: |
|||
image: prom/alertmanager:v0.21.0 |
|||
container_name: alertmanager |
|||
hostname: alertmanager |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- ./alertmanager:/etc/alertmanager |
|||
command: |
|||
- '--config.file=/etc/alertmanager/config.yml' |
|||
- '--storage.path=/alertmanager' |
|||
|
|||
expose: |
|||
- "9093" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
cadvisor: |
|||
image: gcr.io/cadvisor/cadvisor:v0.37.0 |
|||
container_name: cadvisor |
|||
hostname: cadvisor |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- /:/rootfs:ro |
|||
- /var/run:/var/run:rw |
|||
- /sys:/sys:ro |
|||
- /var/lib/docker:/var/lib/docker:ro |
|||
|
|||
expose: |
|||
- "8080" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
grafana: |
|||
image: grafana/grafana:7.3.7 |
|||
container_name: grafana |
|||
hostname: grafana |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- grafana_data:/var/lib/grafana |
|||
- ./grafana/provisioning:/etc/grafana/provisioning |
|||
environment: |
|||
- GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin} |
|||
- GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin} |
|||
- GF_USERS_ALLOW_SIGN_UP=false |
|||
|
|||
ports: |
|||
- "3002:3000" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
loki: |
|||
image: grafana/loki:2.1.0 |
|||
container_name: loki |
|||
hostname: loki |
|||
restart: on-failure |
|||
|
|||
command: |
|||
- '-config.file=/etc/loki/local-config.yaml' |
|||
|
|||
expose: |
|||
- "3100" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
nginxexporter: |
|||
image: nginx/nginx-prometheus-exporter:0.8.0 |
|||
container_name: nginxexporter |
|||
hostname: nginxexporter |
|||
restart: on-failure |
|||
|
|||
logging: |
|||
driver: json-file |
|||
options: |
|||
max-size: "10m" |
|||
max-file: "5" |
|||
|
|||
environment: |
|||
- SCRAPE_URI=http://nginx:80/nginx_status |
|||
- TELEMETRY_PATH=/metrics |
|||
- NGINX_RETRIES=10 |
|||
expose: |
|||
- "9113" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
|
|||
nodeexporter: |
|||
image: prom/node-exporter:v1.0.1 |
|||
container_name: nodeexporter |
|||
hostname: nodeexporter |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- /proc:/host/proc:ro |
|||
- /sys:/host/sys:ro |
|||
- /:/rootfs:ro |
|||
- ./nodeexporter/etc:/var/lib/node_exporter |
|||
command: |
|||
- '--path.procfs=/host/proc' |
|||
- '--path.sysfs=/host/sys' |
|||
- '--path.rootfs=/rootfs' |
|||
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' |
|||
- '--collector.textfile.directory=/var/lib/node_exporter' |
|||
ports: |
|||
- "3003:9100" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
prometheus: |
|||
image: prom/prometheus:v2.24.1 |
|||
container_name: prometheus |
|||
hostname: prometheus |
|||
restart: unless-stopped |
|||
|
|||
volumes: |
|||
- ./prometheus:/etc/prometheus |
|||
command: |
|||
- '--config.file=/etc/prometheus/prometheus.yml' |
|||
- '--storage.tsdb.path=/prometheus' |
|||
- '--web.console.libraries=/etc/prometheus/console_libraries' |
|||
- '--web.console.templates=/etc/prometheus/consoles' |
|||
- '--storage.tsdb.retention.time=200h' |
|||
- '--web.enable-lifecycle' |
|||
|
|||
ports: |
|||
- "3004:9090" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
promtail: |
|||
image: grafana/promtail:2.1.0 |
|||
container_name: promtail |
|||
hostname: promtail |
|||
restart: on-failure |
|||
|
|||
volumes: |
|||
- ./promtail/etc/var/lib/promtail/positions:/var/lib/promtail/positions |
|||
- ./promtail/etc/promtail/config.yml:/etc/promtail/config.yml |
|||
- /var/lib/docker/containers:/var/lib/docker/containers:ro |
|||
- /var/log:/var/log |
|||
command: |
|||
- '-config.file=/etc/promtail/config.yml' |
|||
|
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
pushgateway: |
|||
image: prom/pushgateway:v1.4.0 |
|||
container_name: pushgateway |
|||
hostname: pushgateway |
|||
restart: unless-stopped |
|||
|
|||
expose: |
|||
- "9091" |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
|
|||
smartctl: |
|||
build: |
|||
context: ./smartctl |
|||
container_name: smartctl |
|||
hostname: smartctl |
|||
restart: on-failure |
|||
privileged: true |
|||
|
|||
volumes: |
|||
- ./nodeexporter/etc:/var/lib/node_exporter |
|||
environment: |
|||
- INTERVAL=300 |
|||
networks: |
|||
- monitor-net |
|||
labels: |
|||
org.label-schema.group: "monitoring" |
|||
... |
@ -1,12 +1,14 @@ |
|||
--- |
|||
apiVersion: 1 |
|||
|
|||
providers: |
|||
- name: 'Prometheus' |
|||
orgId: 1 |
|||
folder: '' |
|||
type: file |
|||
disableDeletion: false |
|||
editable: true |
|||
allowUiUpdates: true |
|||
options: |
|||
path: /etc/grafana/provisioning/dashboards |
|||
- name: 'Prometheus' |
|||
orgId: 1 |
|||
folder: '' |
|||
type: file |
|||
disableDeletion: false |
|||
editable: true |
|||
allowUiUpdates: true |
|||
options: |
|||
path: /etc/grafana/provisioning/dashboards |
|||
... |
@ -1,18 +1,20 @@ |
|||
--- |
|||
apiVersion: 1 |
|||
|
|||
datasources: |
|||
- name: Prometheus |
|||
type: prometheus |
|||
access: proxy |
|||
orgId: 1 |
|||
url: http://prometheus:9090 |
|||
basicAuth: false |
|||
isDefault: true |
|||
editable: true |
|||
- name: Loki |
|||
type: loki |
|||
access: proxy |
|||
url: http://loki:3100 |
|||
basicAuth: false |
|||
isDefault: false |
|||
editable: true |
|||
- name: Prometheus |
|||
type: prometheus |
|||
access: proxy |
|||
orgId: 1 |
|||
url: http://prometheus:9090 |
|||
basicAuth: false |
|||
isDefault: true |
|||
editable: true |
|||
- name: Loki |
|||
type: loki |
|||
access: proxy |
|||
url: http://loki:3100 |
|||
basicAuth: false |
|||
isDefault: false |
|||
editable: true |
|||
... |
@ -1,44 +1,46 @@ |
|||
--- |
|||
global: |
|||
scrape_interval: 15s |
|||
evaluation_interval: 15s |
|||
scrape_interval: 15s |
|||
evaluation_interval: 15s |
|||
|
|||
external_labels: |
|||
monitor: 'docker-host-alpha' |
|||
external_labels: |
|||
monitor: 'docker-host-alpha' |
|||
|
|||
rule_files: |
|||
- "alert.rules" |
|||
- "alert.rules" |
|||
|
|||
scrape_configs: |
|||
- job_name: 'prometheus' |
|||
scrape_interval: 10s |
|||
static_configs: |
|||
- targets: ['localhost:9090'] |
|||
- job_name: 'prometheus' |
|||
scrape_interval: 10s |
|||
static_configs: |
|||
- targets: [ 'localhost:9090' ] |
|||
|
|||
- job_name: 'nodeexporter' |
|||
scrape_interval: 5s |
|||
static_configs: |
|||
- targets: ['nodeexporter:9100'] |
|||
- job_name: 'nodeexporter' |
|||
scrape_interval: 5s |
|||
static_configs: |
|||
- targets: [ 'nodeexporter:9100' ] |
|||
|
|||
- job_name: 'nginxexporter' |
|||
scrape_interval: 5s |
|||
static_configs: |
|||
- targets: ['nginxexporter:9113'] |
|||
- job_name: 'nginxexporter' |
|||
scrape_interval: 5s |
|||
static_configs: |
|||
- targets: [ 'nginxexporter:9113' ] |
|||
|
|||
- job_name: 'cadvisor' |
|||
scrape_interval: 5s |
|||
static_configs: |
|||
- targets: ['cadvisor:8080'] |
|||
- job_name: 'cadvisor' |
|||
scrape_interval: 5s |
|||
static_configs: |
|||
- targets: [ 'cadvisor:8080' ] |
|||
|
|||
- job_name: 'pushgateway' |
|||
scrape_interval: 10s |
|||
honor_labels: true |
|||
static_configs: |
|||
- targets: ['pushgateway:9091'] |
|||
- job_name: 'pushgateway' |
|||
scrape_interval: 10s |
|||
honor_labels: true |
|||
static_configs: |
|||
- targets: [ 'pushgateway:9091' ] |
|||
|
|||
|
|||
alerting: |
|||
alertmanagers: |
|||
- scheme: http |
|||
static_configs: |
|||
- targets: |
|||
- 'alertmanager:9093' |
|||
alertmanagers: |
|||
- scheme: http |
|||
static_configs: |
|||
- targets: |
|||
- 'alertmanager:9093' |
|||
... |
@ -1,26 +1,28 @@ |
|||
--- |
|||
server: |
|||
http_listen_port: 9080 |
|||
grpc_listen_port: 0 |
|||
http_listen_port: 9080 |
|||
grpc_listen_port: 0 |
|||
|
|||
positions: |
|||
filename: /tmp/positions.yaml |
|||
filename: /tmp/positions.yaml |
|||
|
|||
clients: |
|||
- url: http://loki:3100/loki/api/v1/push |
|||
- url: http://loki:3100/loki/api/v1/push |
|||
|
|||
scrape_configs: |
|||
- job_name: containers |
|||
entry_parser: docker |
|||
static_configs: |
|||
- targets: |
|||
- localhost |
|||
labels: |
|||
job: containerlogs |
|||
__path__: /var/lib/docker/containers/*/*log |
|||
- job_name: system |
|||
static_configs: |
|||
- targets: |
|||
- localhost |
|||
labels: |
|||
job: varlogs |
|||
__path__: /var/log/*log |
|||
- job_name: containers |
|||
entry_parser: docker |
|||
static_configs: |
|||
- targets: |
|||
- localhost |
|||
labels: |
|||
job: containerlogs |
|||
__path__: /var/lib/docker/containers/*/*log |
|||
- job_name: system |
|||
static_configs: |
|||
- targets: |
|||
- localhost |
|||
labels: |
|||
job: varlogs |
|||
__path__: /var/log/*log |
|||
... |
Reference in new issue