From 01cda8792213340fdedcd364dedb2a73564d27f2 Mon Sep 17 00:00:00 2001 From: Mitch Riedstra Date: Fri, 13 Nov 2020 17:47:31 -0500 Subject: Remove systemd support. Add alertmanager. Remove old template --- defaults/main.yml | 58 ++++++++++++++++---- .../__pycache__/forEachAppend.cpython-38.pyc | Bin 0 -> 784 bytes filter_plugins/forEachAppend.py | 10 ++++ handlers/main.yml | 8 +++ tasks/alertmanager.yml | 59 +++++++++++++++++++++ tasks/main.yml | 51 ++++++++++++++---- tasks/runit.yml | 17 ------ tasks/systemd.yml | 9 ---- templates/alertmanager.runit | 6 +++ templates/prometheus.runit | 5 ++ templates/prometheus/prometheus.yml | 53 ------------------ templates/prometheus/runit/run | 5 -- templates/prometheus/systemd.unit | 12 ----- 13 files changed, 175 insertions(+), 118 deletions(-) create mode 100644 filter_plugins/__pycache__/forEachAppend.cpython-38.pyc create mode 100644 filter_plugins/forEachAppend.py create mode 100644 handlers/main.yml create mode 100644 tasks/alertmanager.yml delete mode 100644 tasks/runit.yml delete mode 100644 tasks/systemd.yml create mode 100644 templates/alertmanager.runit create mode 100644 templates/prometheus.runit delete mode 100644 templates/prometheus/prometheus.yml delete mode 100644 templates/prometheus/runit/run delete mode 100644 templates/prometheus/systemd.unit diff --git a/defaults/main.yml b/defaults/main.yml index afe2627..7d67f8e 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -1,18 +1,15 @@ --- -# Use this to override the template for more complicated uses -prometheus_template_source: prometheus/prometheus.yml - prometheus_ui_listen: ':9090' prometheus_home_dir: /var/lib/prometheus prometheus_data_dir: '{{prometheus_home_dir}}/data' prometheus_retention_time: "730d" prometheus_log: '{{prometheus_home_dir}}/log' prometheus_user: prometheus -prometheus_version: "2.15.1" +prometheus_version: "2.22.1" # Should resolve to "linux" prometheus_os: "{{ansible_facts['system']|lower}}" -prometheus_checksum: f8d2b9e8f9551a3dd6f476a43ea9f699d18d2cc6558baf0eebe0143d8f9c1d5a +prometheus_checksum: 9001a9cb939e0a6d9f2b67d22506c620bc9457777272fced43274b032ba35f44 prometheus_architecture: amd64 prometheus_url: "https://github.com/prometheus/prometheus/releases/download/v{{prometheus_version}}/prometheus-{{prometheus_version}}.{{prometheus_os}}-{{prometheus_architecture}}.tar.gz" @@ -26,10 +23,49 @@ prometheus_opts: | --web.enable-admin-api -# The default template makes it easy to scrape Netdata from internal hosts -prometheus_netdata_hosts: - - localhost:19999 +# Filtered through 'to_nice_yaml' +# prometheus_config: + +alertmanager: true + +alertmanager_version: '0.21.0' +alertmanager_checksum_alg: sha256 +alertmanager_checksum: 9ccd863937436fd6bfe650e22521a7f2e6a727540988eef515dde208f9aef232 +alertmanager_url: 'https://github.com/prometheus/alertmanager/releases/download/v{{alertmanager_version}}/alertmanager-{{alertmanager_version}}.{{prometheus_os}}-{{prometheus_architecture}}.tar.gz' +alertmanager_port: '9093' +# Note that this is also UDP +alertmanager_cluster_port: '9094' +alertmanager_data_dir: '{{prometheus_home_dir}}/alertmanager-data' +alertmanager_log: '{{prometheus_home_dir}}/alertmanager.log' + +# Newlines are automatically replaced with spaces +alertmanager_opts: | + --config.file "{{prometheus_home_dir}}/conf/alertmanager.yml" + --storage.path {{alertmanager_data_dir}} + --web.listen-address "0.0.0.0:{{alertmanager_port}}" + --cluster.listen-address "0.0.0.0:{{alertmanager_cluster_port}}" + + +# You can either specify them by hand, or pull them out of the inventory with +# a query() +# alertmanager_nodes: +# - prometheus0.example.com +# - prometheus1.example.com +# alertmanager_nodes: "{{query('inventory_hostnames', 'prometheus')}}" + +# Append the configured port number, used in the config in a few spots +alertmanager_nodes_w_port: '{{alertmanager_nodes | forEachAppend(":" + alertmanager_cluster_port)}}' -# Only inserted into the template if defined -# prometheus_netdata_tls_hosts: -# - example.com:19443 +# alertmanager_conf: +# global: +# # The API URL to use for Slack notifications. +# slack_api_url: '{{slack_webhook_uri}}' +# +# route: +# receiver: 'slack-notifications' +# +# receivers: +# - name: 'slack-notifications' +# slack_configs: +# - channel: '#{{slack_alerts_channel}}' +# send_resolved: true diff --git a/filter_plugins/__pycache__/forEachAppend.cpython-38.pyc b/filter_plugins/__pycache__/forEachAppend.cpython-38.pyc new file mode 100644 index 0000000..124454c Binary files /dev/null and b/filter_plugins/__pycache__/forEachAppend.cpython-38.pyc differ diff --git a/filter_plugins/forEachAppend.py b/filter_plugins/forEachAppend.py new file mode 100644 index 0000000..b5d676d --- /dev/null +++ b/filter_plugins/forEachAppend.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +class FilterModule(object): + def filters(self): + return { + 'forEachAppend': self.forEachAppend, + } + + def forEachAppend(self, arr, _str): + return [ x +_str for x in arr ] diff --git a/handlers/main.yml b/handlers/main.yml new file mode 100644 index 0000000..abaa110 --- /dev/null +++ b/handlers/main.yml @@ -0,0 +1,8 @@ +- name: Restart prometheus + runit: + name: prometheus + state: restarted +- name: Restart alertmanager + runit: + name: alertmanager + state: restarted diff --git a/tasks/alertmanager.yml b/tasks/alertmanager.yml new file mode 100644 index 0000000..4c9e4d3 --- /dev/null +++ b/tasks/alertmanager.yml @@ -0,0 +1,59 @@ +--- +- name: Download Alertmanager + get_url: + url: "{{alertmanager_url}}" + dest: "{{prometheus_home_dir}}/alertmanager-{{alertmanager_version}}.tgz" + mode: '0600' + owner: "{{prometheus_user}}" + checksum: '{{alertmanager_checksum_alg}}:{{alertmanager_checksum}}' +- set_fact: "alertmanager_extract_dir=alertmanager-{{alertmanager_version}}.{{prometheus_os}}-{{prometheus_architecture}}" +- name: Extract Alertmanager + shell: | + #!/bin/sh + set -e + if ! [ -e "{{prometheus_home_dir}}/{{alertmanager_extract_dir}}" ] ; then + su - '{{prometheus_user}}' -c 'tar xzf alertmanager-{{alertmanager_version}}.tgz' + exit 50 + fi + register: res + changed_when: res is defined and res.rc == 50 + ignore_errors: true + notify: Restart alertmanager +- name: Link alertmanager directroy + file: + state: link + src: '{{alertmanager_extract_dir}}' + dest: '{{prometheus_home_dir}}/alertmanager' + force: yes +- name: Create Data directory + file: + state: directory + dest: "{{alertmanager_data_dir}}" + mode: '0700' + owner: "{{prometheus_user}}" +- name: Write alertmanager configuration file + copy: + content: '{{alertmanager_conf | to_nice_yaml}}' + dest: '{{prometheus_home_dir}}/conf/alertmanager.yml' + owner: '{{prometheus_user}}' + mode: '0600' + notify: Restart alertmanager + tags: + - configuration +- name: Create Runit Directory + file: + state: directory + dest: /etc/sv/alertmanager + owner: root + mode: '0755' +- name: Write runit service + template: + src: alertmanager.runit + dest: /etc/sv/alertmanager/run + owner: root + mode: '0755' +- name: Enable Runit service + file: + state: link + src: /etc/sv/alertmanager + dest: /var/service/alertmanager diff --git a/tasks/main.yml b/tasks/main.yml index cb2645a..19ca131 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -22,15 +22,21 @@ dest: "{{prometheus_home_dir}}/prometheus-{{prometheus_version}}.tgz" mode: '0600' owner: "{{prometheus_user}}" - checksum: 'sha256:{{prometheus_checksum}}' + checksum: 'sha256:{{prometheus_checksum}}' # There's a bug in become_user that doesn't set permissions on the temp directory # which is asinine when my user has sudo persmissions. - name: Extract Prometheus shell: | + #!/bin/sh + set -e if ! [ -e "{{prometheus_home_dir}}/{{prometheus_extract_dir}}" ] ; then su - '{{prometheus_user}}' -c 'tar xzf prometheus-{{prometheus_version}}.tgz' + exit 50 fi - changed_when: false + register: res + changed_when: res is defined and res.rc == 50 + ignore_errors: true + notify: Restart Prometheus - name: Link prometheus directory file: state: link @@ -45,15 +51,38 @@ force: yes loop: - prometheus.yml -- name: Write prometheus template - template: - src: '{{prometheus_template_source}}' +- name: Write prometheus configuration file + copy: + content: '{{prometheus_config | to_nice_yaml}}' dest: '{{prometheus_home_dir}}/conf/prometheus.yml' owner: '{{prometheus_user}}' mode: '0600' -- name: Run Runit Specific tasks - include_tasks: systemd.yml - when: ansible_facts['distribution'].lower() == "ubuntu" or ansible_facts['distribution'].lower() == "centos" -- name: Run Runit Specific tasks - include_tasks: runit.yml - when: ansible_facts['distribution'].lower() == "void" or ansible_facts['distribution'].lower() == "alpine" + notify: Restart prometheus + tags: + - configuration +- name: Create Runit Directory + file: + state: directory + dest: /etc/sv/prometheus + owner: root + mode: '0755' +- name: Write runit service + template: + src: prometheus.runit + dest: /etc/sv/prometheus/run + owner: root + mode: '0755' +- name: Enable Runit service + file: + state: link + src: /etc/sv/prometheus + dest: /var/service/prometheus +- name: Include Aertmanager tasks + include_tasks: + file: alertmanager.yml + apply: + tags: + - alertmanager + tags: + - alertmanager + when: alertmanager diff --git a/tasks/runit.yml b/tasks/runit.yml deleted file mode 100644 index d8bf8d4..0000000 --- a/tasks/runit.yml +++ /dev/null @@ -1,17 +0,0 @@ -- name: Create Runit Directory - file: - state: directory - dest: /etc/sv/prometheus - owner: root - mode: '0755' -- name: Write runit service - template: - src: prometheus/runit/run - dest: /etc/sv/prometheus/run - owner: root - mode: '0755' -- name: Enable Runit service - file: - state: link - src: /etc/sv/prometheus - dest: /var/service/prometheus diff --git a/tasks/systemd.yml b/tasks/systemd.yml deleted file mode 100644 index 031f036..0000000 --- a/tasks/systemd.yml +++ /dev/null @@ -1,9 +0,0 @@ -- name: Write Systemd Unit - template: - src: prometheus/systemd.unit - dest: /etc/systemd/system/prometheus.service -- name: 'Enable Prometheus [Systemd]' - systemd: - enabled: yes - daemon_reload: yes - name: prometheus diff --git a/templates/alertmanager.runit b/templates/alertmanager.runit new file mode 100644 index 0000000..e300d47 --- /dev/null +++ b/templates/alertmanager.runit @@ -0,0 +1,6 @@ +#!/bin/sh +exec chpst -u {{prometheus_user}} \ + {{prometheus_home_dir}}/alertmanager/alertmanager \ + {{alertmanager_opts | replace('\n', ' ')}} \ + >> {{alertmanager_log}} 2>&1 + diff --git a/templates/prometheus.runit b/templates/prometheus.runit new file mode 100644 index 0000000..b14e2d1 --- /dev/null +++ b/templates/prometheus.runit @@ -0,0 +1,5 @@ +#!/bin/sh +exec chpst -u {{prometheus_user}} \ + {{prometheus_home_dir}}/prometheus/prometheus \ + {{prometheus_opts | replace('\n', ' ')}} \ + > {{prometheus_log}} 2>&1 diff --git a/templates/prometheus/prometheus.yml b/templates/prometheus/prometheus.yml deleted file mode 100644 index 2099b8b..0000000 --- a/templates/prometheus/prometheus.yml +++ /dev/null @@ -1,53 +0,0 @@ -# my global config -global: - scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. - evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. - # scrape_timeout is set to the global default (10s). - -# Alertmanager configuration -alerting: - alertmanagers: - - static_configs: - - targets: - # - alertmanager:9093 - -# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. -rule_files: - # - "first_rules.yml" - # - "second_rules.yml" - -# A scrape configuration containing exactly one endpoint to scrape: -# Here it's Prometheus itself. -scrape_configs: - # The job name is added as a label `job=` to any timeseries scraped from this config. - - job_name: 'prometheus' - - # metrics_path defaults to '/metrics' - # scheme defaults to 'http'. - - static_configs: - - targets: ['localhost:9090'] - - - job_name: 'netdata' - - metrics_path: /api/v1/allmetrics - params: - format: [ prometheus ] - - static_configs: -{% for h in prometheus_netdata_hosts %} - targets: ['{{h}}'] -{%endfor%} - -{% if prometheus_netdata_tls_hosts is defined %} - - job_name: 'netdata_tls' - - scheme: https - - metrics_path: /api/v1/allmetrics - params: - format: [ prometheus ] - - static_configs: -{% for h in prometheus_netdata_tls_hosts %} - targets: ['{{h}}'] -{%endfor%} -{% endif %} diff --git a/templates/prometheus/runit/run b/templates/prometheus/runit/run deleted file mode 100644 index b14e2d1..0000000 --- a/templates/prometheus/runit/run +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -exec chpst -u {{prometheus_user}} \ - {{prometheus_home_dir}}/prometheus/prometheus \ - {{prometheus_opts | replace('\n', ' ')}} \ - > {{prometheus_log}} 2>&1 diff --git a/templates/prometheus/systemd.unit b/templates/prometheus/systemd.unit deleted file mode 100644 index b1bd6cb..0000000 --- a/templates/prometheus/systemd.unit +++ /dev/null @@ -1,12 +0,0 @@ -# /etc/systemd/system/prometheus.service -[Unit] -Description=Prometheus Server -Documentation=https://prometheus.io/docs/introduction/overview/ -After=network-online.target - -[Service] -User=prometheus -Restart=on-failure -ExecStart={{prometheus_home_dir}}/prometheus/prometheus {{prometheus_opts | replace('\n', ' ')}} -[Install] -WantedBy=multi-user.target -- cgit v1.2.3