From 364f9cfc88fb20bfc0008e4145b8f3c0bc532617 Mon Sep 17 00:00:00 2001 From: Michael Friedrich Date: Wed, 19 Mar 2014 18:54:02 +0100 Subject: [PATCH] Documentation: Add cluster scenarios. Fixes #5443 --- doc/6.04-cluster.md | 229 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 228 insertions(+), 1 deletion(-) diff --git a/doc/6.04-cluster.md b/doc/6.04-cluster.md index d720c75b8..d77a6d947 100644 --- a/doc/6.04-cluster.md +++ b/doc/6.04-cluster.md @@ -197,4 +197,231 @@ You can either set that variable as constant configuration definition in [icinga2.conf](#icinga2-conf) or pass it as runtime variable to the Icinga 2 daemon. - # icinga2 -c /etc/icinga2/node1/icinga2.conf -DIcingaLocalStateDir=/opt/node1/var \ No newline at end of file + # icinga2 -c /etc/icinga2/node1/icinga2.conf -DIcingaLocalStateDir=/opt/node1/var + + +### Cluster Scenarios + +#### Features in Cluster + +Each cluster instance may use available features. If you have multiple locations +or departments, they may write to their local database, or populate graphite. +Even further all commands are distributed (unless prohibited using [Domains](#domains)). + +DB IDO on the left, graphite on the right side - works. +Icinga Web 2 on the left, checker and notifications on the right side - works too. +Everything on the left and on the right side - make sure to deal with duplicated notifications +and automated check distribution. + +#### Location Based Cluster + +That scenario fits if your instances are spread over the globe and they all report +to a central instance. Their network connection only works towards the central master +(or the master is able to connect, depending on firewall policies) which means +remote instances won't see each/connect to each other. + +All events are synced to the central node, but the remote nodes can still run +local features such as a web interface, reporting, graphing, etc. + +Imagine the following example with a central node in Nuremberg, and two remote DMZ +based instances in Berlin and Vienna. The configuration tree on the central instance +could look like this: + + conf.d/ + templates/ + germany/ + nuremberg/ + hosts.conf + berlin/ + hosts.conf + austria/ + vienna/ + hosts.conf + +The configuration deployment should look like: + +* The node `nuremberg` sends `conf.d/germany/berlin` to the `berlin` node. +* The node `nuremberg` sends `conf.d/austria/vienna` to the `vienna` node. + +`conf.d/templates` is shared on all nodes. + +The endpoint configuration on the `nuremberg` node would look like: + + object Endpoint "nuremberg" { + host = "nuremberg.icinga.org", + port = 8888, + } + + object Endpoint "berlin" { + host = "berlin.icinga.org", + port = 8888, + config_files_recursive = [ "/etc/icinga2/conf.d/templates", + "/etc/icinga2/conf.d/germany/berlin" ] + } + + object Endpoint "vienna" { + host = "vienna.icinga.org", + port = 8888, + config_files_recursive = [ "/etc/icinga2/conf.d/templates", + "/etc/icinga2/conf.d/austria/vienna" ] + } + +Each remote node will only peer with the central `nuremberg` node. Therefore +only two endpoints are required for cluster connection. Furthermore the remote +node must include the received configuration by the cluster functionality. + +Example for the configuration on the `berlin` node: + + object Endpoint "nuremberg" { + host = "nuremberg.icinga.org", + port = 8888, + } + + object Endpoint "berlin" { + host = "berlin.icinga.org", + port = 8888, + accept_config = [ "nuremberg" ] + } + + include_recursive IcingaLocalStateDir + "/lib/icinga2/cluster/config" + +Depenending on the network connectivity the connections can be either +established by the remote node or the central node. + +Example for `berlin` node connecting to central `nuremberg` node: + + library "cluster" + + object ClusterListener "berlin-cluster" { + ca_path = "/etc/icinga2/ca/ca.crt", + cert_path = "/etc/icinga2/ca/berlin.crt", + key_path = "/etc/icinga2/ca/berlin.key", + bind_port = 8888, + peers = [ "nuremberg" ] + } + +Example for central `nuremberg` node connecting to remote nodes: + + library "cluster" + + object ClusterListener "nuremberg-cluster" { + ca_path = "/etc/icinga2/ca/ca.crt", + cert_path = "/etc/icinga2/ca/nuremberg.crt", + key_path = "/etc/icinga2/ca/nuremberg.key", + bind_port = 8888, + peers = [ "berlin", "vienna" ] + } + +The central node should not do any checks by itself. There's two possibilities to achieve +that: + +* Disable the `checker` feature +* Pin the service object configuration to the remote endpoints using the [authorities](#assign-services-to-cluster-nodes) +attribute. + + +#### Load Distribution + +If you are planning to off-load the checks to a defined set of remote workers +you can achieve that by: + +* Deploying the configuration on all nodes. +* Let Icinga 2 distribute the load amongst all available nodes. + +That way all remote check instances will receive the same configuration +but only execute their part. The central instance can also execute checks, +but you may also disable the `Checker` feature. + + conf.d/ + templates/ + many/ + +If you are planning to have some checks executed by a specific set of checker nodes +just pin them using the [authorities](#assign-services-to-cluster-nodes) attribute. + +Example on the `central` node: + + object Endpoint "central" { + host = "central.icinga.org", + port = 8888, + } + + object Endpoint "checker1" { + host = "checker1.icinga.org", + port = 8888, + config_files_recursive = [ "/etc/icinga2/conf.d" ] + } + + object Endpoint "checker2" { + host = "checker2.icinga.org", + port = 8888, + config_files_recursive = [ "/etc/icinga2/conf.d" ] + } + + object ClusterListener "central-cluster" { + ca_path = "/etc/icinga2/ca/ca.crt", + cert_path = "/etc/icinga2/ca/central.crt", + key_path = "/etc/icinga2/ca/central.key", + bind_port = 8888, + peers = [ "checker1", "checker2" ] + } + +Example on `checker1` node: + + object Endpoint "central" { + host = "central.icinga.org", + port = 8888, + } + + object Endpoint "checker1" { + host = "checker1.icinga.org", + port = 8888, + accept_config = [ "central" ] + } + + object Endpoint "checker2" { + host = "checker2.icinga.org", + port = 8888, + accept_config = [ "central" ] + } + + object ClusterListener "checker1-cluster" { + ca_path = "/etc/icinga2/ca/ca.crt", + cert_path = "/etc/icinga2/ca/checker1.crt", + key_path = "/etc/icinga2/ca/checker1.key", + bind_port = 8888 + } + + +#### High Availability + +Two nodes in a high availability setup require an [initial cluster sync](#initial-cluster-sync). +Furthermore the active master node should deploy the configuration to the +second node, if that does not already happen by your provisioning tool. It primarly +depends which features are enabled/used. It is still required that some failover +mechanism detects for example which instance will be the notification "master". + + +#### Multiple Hierachies + +Your central instance collects all check results for reporting and graphing and also +does some sort of additional notifications. +The customers got their own instances in their local DMZs. They are limited to read/write +only their services, but replicate all events back to the central instance. +Within each DMZ there are additional check instances also serving interfaces for local +departments. The customers instances will collect all results, but also send them back to +your central instance. +Additionally the customers instance on the second level in the middle prohibits you from +sending commands to the down below department nodes. You're only allowed to receive the +results, and a subset of each customers configuration too. + +Your central instance will generate global reports, aggregate alert notifications and check +additional dependencies (for example, the customers internet uplink and bandwidth usage). + +The customers instance will only check a subset of local services and delegate the rest +to each department. Even though it acts as configuration master with a central dashboard +for all departments managing their configuration tree which is then deployed to all +department instances. Furthermore the central NOC is able to see what's going on. + +The instances in the departments will serve a local interface, and allow the administrators +to reschedule checks or acknowledge problems for their services. \ No newline at end of file -- 2.40.0