Skip to content

Commit

Permalink
add deploy monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
bai-charisu authored and YunhuiChen committed Aug 27, 2020
1 parent 7a99248 commit 22aaf39
Show file tree
Hide file tree
Showing 29 changed files with 1,042 additions and 49 deletions.
18 changes: 17 additions & 1 deletion curve-ansible/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@ ansible-playbook deploy_curve.yml -i server.ini --tags snapshotclone
ansible-playbook deploy_curve.yml -i server.ini --tags snapshotclone_nginx
```

#### 1.10 部署监控
一键部署监控命令:
```shell
ansible-playbook deploy_monitor.yml -i server.ini
```

### 2、集群升级
目前curve的升级流程为先升级mds,后升级chunkserver和快照克隆最后升级client。使用ansible需要指定一台主控机,我们规定主控机为mds节点之一。ansible-playbook同时需要yml文件和inventory文件,yml文件规定了要做哪些操作,inventory指定了机器列表并定义了一些变量。yml文件在curve仓库的curve-ansible目录中,inventory每个环境一份,由用户自行管理。

Expand Down Expand Up @@ -326,6 +332,7 @@ ansible-playbook stop_nebd_server.yml -i client.ini
├── common_tasks # 放置可以复用的代码(可以理解为各种函数)
│   ├── check_chunkserver.yml # 检查chunkserver机器配置
│   ├── check_cluster_healthy_status.yml # 检查集群健康状态
│   ├── check_docker_exists.yml # 检查是否安装了docker
│   ├── check_if_nbd_exists_in_kernel.yml # 检查内核是否有nbd模块
│   ├── check_mds.yml # 检查mds机器配置
│   ├── create_dir.yml # 创建目录
Expand All @@ -341,7 +348,6 @@ ansible-playbook stop_nebd_server.yml -i client.ini
│   ├── wait_until_server_down.yml # 等待直到server停掉
│   └── wait_until_server_up.yml # 等待直到server起来
├── group_vars # 组变量
│   ├── all.yml # 属于所有host的变量
│   ├── chunkservers.yml # 属于chunkserver的变量
│   └── mds.yml # 属于mds的变量
├── host_vars # 主机变量, 其中的文件名要合inventory中定义的主机名一致
Expand Down Expand Up @@ -394,6 +400,7 @@ ansible-playbook stop_nebd_server.yml -i client.ini
│   │   │   │   ├── copy_file_to_remote.yml # 将文件拷贝到远端
│   │   │   │   ├── install_curve-chunkserver.yml # 安装chunkserver
│   │   │   │   ├── install_curve-mds.yml # 安装mds
│   │   │   │   ├── install_curve-monitor.yml
│   │   │   │   ├── install_curve-nbd.yml # 安装nbd
│   │   │   │   ├── install_curve-sdk.yml # 安装curve-sdk
│   │   │   │   ├── install_curve-snapshotcloneserver-nginx.yml # 安装快照克隆使用的Nginx
Expand Down Expand Up @@ -434,17 +441,25 @@ ansible-playbook stop_nebd_server.yml -i client.ini
│   │   ├── templates # 配置文件的模板
│   │   │   ├── chunkserver.conf.j2
│   │   │   ├── client.conf.j2
│   │   │   ├── docker-compose.yml.j2 # docker config for curve monitor
│   │   │   ├── etcd.conf.yml.j2
│   │   │   ├── grafana.ini.j2
│   │   │   ├── mds.conf.j2
│   │   │   ├── nebd-client.conf.j2
│   │   │   ├── nebd-server.conf.j2
│   │   │   ├── nginx_config.lua.j2
│   │   │   ├── nginx.conf.j2
│   │   │   ├── prometheus.yml.j2
│   │   │   ├── s3.conf.j2
│   │   │   ├── snapshot_clone_server.conf.j2
│   │   │   ├── snapshot_tools.conf.j2
│   │   │   ├── tools.conf.j2
│   │   │   └── topo.json.j2
│   │   └── vars
│   │   └── main.yml
│   ├── grafana_settings # set grafana datasource and dashboard
│   │   └── tasks
│   │   └── main.yml
│   ├── stop_service # 停止服务的role
│   │   ├── tasks
│   │   │   ├── include
Expand Down Expand Up @@ -485,6 +500,7 @@ ansible-playbook stop_nebd_server.yml -i client.ini
├── deploy_curve.yml # 一键部署curve集群
├── deploy_etcd.yml # 部署etcd
├── deploy_mds.yml # 部署mds
├── deploy_monitor.yml # 部署监控服务
├── deploy_nbd.yml # 部署nbd
├── deploy_nebd.yml # 部署nebd
├── deploy_snapshotcloneserver_nginx.yml # 部署快照克隆所用Nginx
Expand Down
1 change: 1 addition & 0 deletions curve-ansible/client.ini
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ ansible_ssh_port=22
lib_install_prefix=/usr/local
bin_install_prefix=/usr
ansible_connection=local
wait_service_timeout=20
25 changes: 25 additions & 0 deletions curve-ansible/common_tasks/check_docker_exists.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
---
#
# Copyright (c) 2020 NetEase Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

- name: check docker exist
shell: docker -v
register: docker_res

- name: fail when docker not exist
fail:
msg: "Docker should be installed on this machine!"
when: docker_res.rc != 0
12 changes: 11 additions & 1 deletion curve-ansible/deploy_curve.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
tags: ["install_package", "install_tool_package"] }
- { role: generate_config, template_name: mds.conf, conf_path: "{{ mds_config_path }}",
tags: ["generate_config", "generage_mds_conf"] }
- { role: generate_config, template_name: tools.conf, conf_path: "{{ tool_config_path }}",
- { role: generate_config, template_name: tools.conf, conf_path: "{{ curve_ops_tool_config }}",
tags: ["generate_config", "generage_tools_conf"] }
- { role: generate_config, template_name: topo.json, conf_path: "{{ topo_file_path }}",
tags: ["generate_config", "generage_topo_json"] }
Expand Down Expand Up @@ -202,6 +202,16 @@


############################## deploy snapshotcloneserver_nginx ##############################
- name: check docker exist
hosts: snapshotclone_nginx
any_errors_fatal: true
gather_facts: no
tags:
- snapshotclone_nginx
- never
tasks:
- include_tasks: common_tasks/check_docker_exists.yml

- name: prepare snapshotcloneserver_nginx
hosts: snapshotclone_nginx
any_errors_fatal: true
Expand Down
74 changes: 74 additions & 0 deletions curve-ansible/deploy_monitor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
---
#
# Copyright (c) 2020 NetEase Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# The deploy playbook of Curve

- name: check_ansible_version
tags:
- always
import_playbook: check_ansible_version.yml

- name: check docker exist
hosts: monitor
any_errors_fatal: true
gather_facts: no
tasks:
- include_tasks: common_tasks/check_docker_exists.yml

- name: prepare monitor
hosts: monitor
any_errors_fatal: true
gather_facts: no
become: yes
become_user: "{{ sudo_user }}"
become_flags: -iu {{ sudo_user }}
roles:
- { role: install_package, package_name: curve-monitor, package_version: "{{ monitor_package_version }}",
tags: ["install_package", "install_monitor_package"] }
- { role: install_package, package_name: curve-tools, package_version: "{{ tool_package_version }}",
tags: ["install_package", "install_tool_package"] }
- { role: generate_config, template_name: docker-compose.yml, conf_path: "{{ monitor_work_dir }}/docker-compose.yml",
tags: ["generate_config", "generage_docker_compose"] }
- { role: generate_config, template_name: grafana.ini, conf_path: "{{ monitor_work_dir }}/grafana/grafana.ini",
tags: ["generate_config", "generage_grafana_ini"] }
- { role: generate_config, template_name: prometheus.yml, conf_path: "{{ monitor_work_dir }}/prometheus/prometheus.yml",
tags: ["generate_config", "generage_prometheus_yml"] }
- { role: generate_config, template_name: tools.conf, conf_path: "{{ curve_ops_tool_config }}",
tags: ["generate_config", "generage_tools_conf"] }

- name: start monitor
hosts: monitor
any_errors_fatal: true
gather_facts: no
become: yes
become_user: "{{ sudo_user }}"
become_flags: -iu {{ sudo_user }}
tags:
- start
roles:
- { role: start_service, service_name: monitor }

- name: set grafana data source and dashboards
hosts: monitor
any_errors_fatal: true
gather_facts: no
become: yes
become_user: "{{ sudo_user }}"
become_flags: -iu {{ sudo_user }}
tags:
- grafana_settings
roles:
- { role: grafana_settings }
2 changes: 1 addition & 1 deletion curve-ansible/roles/generate_config/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ chunkserver_register_timeout: 1000
chunkserver_heartbeat_interval: 10
chunkserver_heartbeat_timeout: 5000
chunkserver_stor_uri: local://./0/
chunkserver_meta_uri: local://./0/chunkserver_dat
chunkserver_meta_uri: local://./0/chunkserver.dat
chunkserver_disk_type: nvme
chunkserver_snapshot_throttle_throughput_bytes: 20971520
chunkserver_snapshot_throttle_check_cycles: 4
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
version: '2.0'

services:

prometheus:
image: prom/prometheus:latest
volumes:
- ./prometheus/:/etc/prometheus/:rw
- ./prometheus/data:/prometheus:rw
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time={{ monitor_retention_time }}'
- '--storage.tsdb.retention.size={{ monitor_retention_size }}'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
- '--web.listen-address=:{{ prometheus_listen_port }}'
network_mode: host

grafana:
image: grafana/grafana
depends_on:
- prometheus
network_mode: host
volumes:
- ./grafana/data:/var/lib/grafana:rw
- ./grafana/grafana.ini:/etc/grafana/grafana.ini:rw
environment:
- GF_INSTALL_PLUGINS=grafana-piechart-panel
- GF_SECURITY_ADMIN_USER={{ grafana_username }}
- GF_SECURITY_ADMIN_PASSWORD={{ grafana_password }}

reporter:
image: promoon/reporter:latest
volumes:
- /etc/localtime:/etc/localtime:ro
- /etc/timezone:/etc/timezone:ro
- ./grafana/report:/tmp/report:rw
network_mode: host
Loading

0 comments on commit 22aaf39

Please sign in to comment.