下载node-exporter
https://github.com/prometheus/node_exporter/releases
下载ansible
https://github.com/ansible/ansible/releases
启动node-exporter service文件
node-exporter.service
[Unit]
Description=node_exporter
Requires=network.target remote-fs.target
After=network.target remote-fs.target[Service]
Type=simple
ExecStart=/usr/local/bin/node_exporter --web.listen-address=:9100
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
RestartSec=5s[Install]
WantedBy=multi-user.target
ansbile的hosts文件
[node-exporter]
192.168.56.123 ansible_ssh_port=22 ansible_ssh_user="root" ansible_ssh_pass="root" ansible_sudo_pass="root"
192.168.56.124 ansible_ssh_port=22 ansible_ssh_user="root" ansible_ssh_pass="root" ansible_sudo_pass="root"
远程普通用户若要使用 sudo 权限,需要在 /etc/ansible/ansible.cfg 进行配置,若远程是root用户则可跳过。
[privilege_escalation]
become=True
become_method=sudo
ansible的deploy文件
ansible-deploy.yaml
---
- hosts: node-exporterbecome: yestasks:- name: "Check if node-exporter is installed"command: systemctl status node-exporterregister: node_exporter_checkignore_errors: true- name: "Print node-exporter status"debug:msg: "Node exporter is {{ 'running' if 'active (running)' in node_exporter_check.stdout else 'not running' }}"- name: "Download and install node-exporter dir"block:- name: "copy node-exporter"copy:src: "{{item.src}}"dest: "{{item.dest}}"mode: 0755with_items:- {src: "{{ playbook_dir }}/node-exporter", dest: "/tmp/"}- name: "copy node-exporter"shell: sudo cp /tmp/node-exporter/node_exporter /usr/local/binnotify:- reload- name: "copy node-exporter.service"shell: sudo cp /tmp/node-exporter/node-exporter.service /usr/lib/systemd/system/notify:- reload- name: "start and enable server"shell: sudo systemctl start node-exporter.service && sudo systemctl enable node-exporter.servicewhen: node_exporter_check.rc != 0handlers:- name: reloadshell: sudo systemctl daemon-reload
---
- hosts: allbecome: yesserial: 30gather_facts: yes # 如果你遇到locale问题,可能需要先解决或者临时设置为noenvironment:LC_ALL: C # 或者选择其他适合的值,比如 'en_US.UTF-8'LANG: C # 确保与LC_ALL一致,避免冲突tasks:- name: "Check if node_exporter.service file exists"stat:path: /etc/systemd/system/node_exporter.serviceregister: node_exporter_file_check- name: "Check if node_exporter.service file exists"debug:msg: "node_exporter.service is exists."when: node_exporter_file_check.stat.exists- name: "Check if node_exporter.service file exists"shell: sudo systemctl disable node_exporter.service && sudo systemctl stop node_exporter.service && sudo rm -rf /etc/systemd/system/node_exporter.servicenotify:- reloadwhen: node_exporter_file_check.stat.exists- name: "Check if node-exporter is installed with node-exporter"command: systemctl status node-exporterregister: node_exporter_checkignore_errors: true- name: "Check if node-exporter is installed with node_exporter"command: systemctl status node_exporterregister: node_exporter_check_extignore_errors: true- name: "Print node-exporter status running"debug:msg: "Node exporter is {{ 'running' if ( 'active (running)' in node_exporter_check.stdout or 'active (running)' in node_exporter_check_ext.stdout ) else 'not running' }}"- name: "do install node-exporter"block:- name: "copy node-exporter"copy:src: "{{item.src}}"dest: "{{item.dest}}"mode: 0755with_items:- {src: "{{ playbook_dir }}/node-exporter", dest: "/tmp/"}- name: "copy node-exporter"shell: sudo cp /tmp/node-exporter/node_exporter /usr/local/binnotify:- reload- name: "copy node-exporter.service"shell: sudo cp /tmp/node-exporter/node-exporter.service /usr/lib/systemd/system/notify:- reload- name: "start and enable server"shell: sudo systemctl start node-exporter.service && sudo systemctl enable node-exporter.servicewhen: node_exporter_check.rc != 0 and node_exporter_check_ext.rc != 0handlers:- name: reloadshell: sudo systemctl daemon-reload
部署node-exporter
#!/bin/bash#SOURCE_FILE="/etc/ansible/hosts"
## 备份文件存放目录
#BACKUP_DIR="/etc/ansible/backup"
#
## 删除超过的天数
#DAYS=10
## 使用 find 查找并删除10天前的备份文件
#find "$BACKUP_DIR" -type f -name "hosts_backup_*.bak" -mtime +$((DAYS-1)) -exec rm -f {} \;
#echo "Deleted backup files older than $DAYS days in $BACKUP_DIR"
#
## 检查目录是否存在
#if [ -d "$BACKUP_DIR" ]; then
# echo "Directory exists."
#else
# echo "Directory does not exist, create it."
# # 确保备份目录存在:创建目录
# mkdir -p "$BACKUP_DIR"
#fi
#
## 获取当前时间戳,格式为 YYYYMMDD_HHMMSS
#TIMESTAMP=$(date +%Y%m%d_%H%M%S)
## 备份文件
#cp "$SOURCE_FILE" "${BACKUP_DIR}/hosts_backup_${TIMESTAMP}.bak"
#echo "Backup created with timestamp: ${TIMESTAMP}"
#rm -rf "$SOURCE_FILE"
#cp hosts "/etc/ansible"# 获取当前工作目录
CURRENT_DIR=$(pwd)# 获取当前时间戳,格式为 YYYYMMDD_HHMMSS
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
# 检查某个子目录是否存在
HOSTS_FILE="$CURRENT_DIR/hosts"
LOG_DIR="$CURRENT_DIR/logs"
# 检查目录是否存在
if [ -d "$LOG_DIR" ]; thenecho "Directory exists."
elseecho "Directory does not exist, create it."# 确保备份目录存在:创建目录mkdir -p "$LOG_DIR"
fiLOG_FILE="${LOG_DIR}/ansible_${TIMESTAMP}.log"ansible-playbook -i "$HOSTS_FILE" ansible-deploy.yaml > "$LOG_FILE"
最终目录结构
ansible-deploy-node-exporter-x86|-ansible-deploy.sh|-ansible-deploy.yaml|-hosts|-node-exporter|-node-exporter.service|-node_exporter
构建ansible的docker镜像
FROM swr.cn-north-4.myhuaweicloud.com/ddn-k8s/gcr.io/iguazio/alpine:3.17
RUN apk add --no-cache openssh-client ansible bash sshpass shadow tzdata# 将时区设置为 Asia/Shanghai
ENV TZ=Asia/Shanghai
# 创建本地时间的符号链接
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone# 创建非 root 用户
ARG USERNAME=ansibleuser
ARG USER_UID=1000
ARG USER_GID=$USER_UID# 添加组和用户
RUN groupadd --gid $USER_GID $USERNAME && \useradd --uid $USER_UID --gid $USER_GID -m -s /bin/bash $USERNAME# 设置环境变量以使用新的临时目录
ENV ANSIBLE_LOCAL_TEMP="/home/$USERNAME/.ansible/tmp"
ENV ANSIBLE_REMOTE_TEMP="/home/$USERNAME/.ansible/tmp_remote"# 创建并设置权限
RUN mkdir -p $ANSIBLE_LOCAL_TEMP $ANSIBLE_REMOTE_TEMP && \chown -R $USERNAME:$USERNAME /home/$USERNAME/.ansible# 切换到新创建的用户
USER $USERNAMECMD ["sh", "-c", "sleep 3600"]