Skip to content

Commit

Permalink
More addressing
Browse files Browse the repository at this point in the history
  • Loading branch information
zippolyte committed Jul 28, 2017
1 parent 5110f7f commit 4a15ce6
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 19 deletions.
30 changes: 24 additions & 6 deletions cassandra_nodetool/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class CassandraNodetoolCheck(AgentCheck):
datacenter_name_re = re.compile('^Datacenter: (.*)')
node_status_re = re.compile('^(?P<status>[UD])[NLJM] +(?P<address>\d+\.\d+\.\d+\.\d+) +'
'(?P<load>\d+\.\d*) (?P<load_unit>(K|M|G|T)?B) +\d+ +'
'(?P<owns>(\d+\.\d+%)|\?) +(?P<id>[a-fA-F0-9-]*) +(?P<rack>.*)')
'(?P<owns>(\d+\.\d+)|\?)%? +(?P<id>[a-fA-F0-9-]*) +(?P<rack>.*)')

def __init__(self, name, init_config, agentConfig, instances=None):
AgentCheck.__init__(self, name, init_config, agentConfig, instances)
Expand All @@ -43,6 +43,9 @@ def check(self, instance):
password = instance.get("password", "")
tags = instance.get("tags", [])

# Flag to send service checks only once and not for every keyspace
send_service_checks = True

for keyspace in keyspaces:
# Build the nodetool command
cmd = nodetool_cmd + ['-h', host, '-p', port]
Expand All @@ -58,23 +61,38 @@ def check(self, instance):

percent_up_by_dc = defaultdict(float)
percent_total_by_dc = defaultdict(float)
# Send the stats per node and compute the stats per datacenter
for node in nodes:
if node['status'] == 'U' and node['owns'] != '?':
percent_up_by_dc[node['datacenter']] += float(node['owns'][:-1])
percent_total_by_dc[node['datacenter']] += float(node['owns'][:-1])

node_tags = ['node_address:%s' % node['address'],
'node_id:%s' % node['id'],
'datacenter:%s' % node['datacenter'],
'rack:%s' % node['rack']]

# nodetool prints `?` when it can't compute the value of `owns` for certain keyspaces (e.g. system)
# don't send metric in this case
if node['owns'] != '?':
owns = float(node['owns'])
if node['status'] == 'U':
percent_up_by_dc[node['datacenter']] += owns
percent_total_by_dc[node['datacenter']] += owns
self.gauge('cassandra.nodetool.status.owns', owns,
tags=tags + node_tags + ['keyspace:%s' % keyspace])

# Send service check only once for each node
if send_service_checks:
status = AgentCheck.OK if node['status'] == 'U' else AgentCheck.CRITICAL
self.service_check('cassandra.nodetool.node_up', status, tags + node_tags)

self.gauge('cassandra.nodetool.status.status', 1 if node['status'] == 'U' else 0,
tags=tags + node_tags)
self.gauge('cassandra.nodetool.status.load', float(node['load']) * TO_BYTES[node['load_unit']],
tags=tags + node_tags)
self.gauge('cassandra.nodetool.status.owns', float(node['owns'][:-1]),
tags=tags + node_tags)

# All service checks have been sent, don't resend
send_service_checks = False

# Send the stats per datacenter
for datacenter, percent_up in percent_up_by_dc.items():
self.gauge('cassandra.nodetool.status.replication_availability', percent_up,
tags=tags + ['keyspace:%s' % keyspace, 'datacenter:%s' % datacenter])
Expand Down
3 changes: 2 additions & 1 deletion cassandra_nodetool/ci/cassandra_nodetool.rake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ namespace :ci do
sh %(docker start #{container_name})

sh %(docker create --name #{container_name2} \
-e CASSANDRA_SEEDS="$(docker inspect --format='{{ .NetworkSettings.IPAddress }}' #{container_name})" cassandra:#{cassandra_nodetool_version})
-e CASSANDRA_SEEDS="$(docker inspect --format='{{ .NetworkSettings.IPAddress }}' #{container_name})" \
cassandra:#{cassandra_nodetool_version})
sh %(docker start #{container_name2})
end

Expand Down
7 changes: 5 additions & 2 deletions cassandra_nodetool/metadata.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name
cassandra.nodetool.status.replication_availability,gauge,,,,Percentage of data available per keyspace times replication factor,+1,cassandra_nodetool,available data
cassandra.nodetool.status.replication_factor,gauge,,,,Replication factor per keyspace,0,cassandra_nodetool,replication factor
cassandra.nodetool.status.replication_availability,gauge,,percent,,Percentage of data available per keyspace times replication factor,1,cassandra_nodetool,available data
cassandra.nodetool.status.replication_factor,gauge,,,,Replication factor per keyspace,0,cassandra_nodetool,replication factor
cassandra.nodetool.status.status,gauge,,,,Node status: up (1) or down (0),1,cassandra_nodetool,node status
cassandra.nodetool.status.owns,gauge,,percent,,Percentage of the data owned by the node per datacenter times the replication factor,0,cassandra_nodetool,owns
cassandra.nodetool.status.load,gauge,,byte,,Amount of file system data under the cassandra data directory without snapshot content,0,cassandra_nodetool,load
22 changes: 12 additions & 10 deletions cassandra_nodetool/test_cassandra_nodetool.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class TestCassandraNodetoolCheck(AgentCheckTest):
'instances': [
{
'nodetool': 'docker exec %s nodetool' % CASSANDRA_CONTAINER_NAME,
'keyspaces': ['test'],
'keyspaces': ['system', 'test'],
'username': 'controlRole',
'password': 'QED',
'tags': ['foo', 'bar']
Expand All @@ -37,6 +37,7 @@ def test_check(self, mock_output):

self.run_check(self.config)

# test per datacenter metrics
self.assertEquals(mock_output.call_args[0][0],
['docker', 'exec', CASSANDRA_CONTAINER_NAME, 'nodetool', '-h', 'localhost', '-p',
'7199', '-u', 'controlRole', '-pw', 'QED', 'status', '--', 'test'])
Expand All @@ -48,15 +49,14 @@ def test_check(self, mock_output):
tags=['keyspace:test', 'datacenter:dc1', 'foo', 'bar'])
self.assertMetric('cassandra.nodetool.status.replication_factor', value=2,
tags=['keyspace:test', 'datacenter:dc2', 'foo', 'bar'])
self.assertMetric('cassandra.nodetool.status.status', value=1,
tags=['datacenter:dc2', 'node_id:e521a2a4-39d3-4311-a195-667bf56450f4',
'node_address:172.21.0.4', 'rack:RAC1', 'foo', 'bar'])
self.assertMetric('cassandra.nodetool.status.owns', value=100,
tags=['datacenter:dc2', 'node_id:e521a2a4-39d3-4311-a195-667bf56450f4',
'node_address:172.21.0.4', 'rack:RAC1', 'foo', 'bar'])
self.assertMetric('cassandra.nodetool.status.load', value=223340,
tags=['datacenter:dc2', 'node_id:e521a2a4-39d3-4311-a195-667bf56450f4',
'node_address:172.21.0.4', 'rack:RAC1', 'foo', 'bar'])
# test per node metrics
tags = ['datacenter:dc2', 'node_id:e521a2a4-39d3-4311-a195-667bf56450f4',
'node_address:172.21.0.4', 'rack:RAC1', 'foo', 'bar']
self.assertMetric('cassandra.nodetool.status.status', value=1, tags=tags)
self.assertMetric('cassandra.nodetool.status.owns', value=100, tags=tags + ['keyspace:test'])
self.assertMetric('cassandra.nodetool.status.load', value=223340, tags=tags)
self.assertServiceCheckOK('cassandra.nodetool.node_up', count=4)
self.assertServiceCheckCritical('cassandra.nodetool.node_up', count=1)

@attr(requires='cassandra_nodetool')
def test_integration(self):
Expand All @@ -66,3 +66,5 @@ def test_integration(self):
tags=['keyspace:test', 'datacenter:datacenter1', 'foo', 'bar'])
self.assertMetric('cassandra.nodetool.status.replication_factor', value=2,
tags=['keyspace:test', 'datacenter:datacenter1', 'foo', 'bar'])
# We should get only 2 of those since for the `system` keyspace there won't be any because of `?`
self.assertMetric('cassandra.nodetool.status.owns', count=2)

0 comments on commit 4a15ce6

Please sign in to comment.