diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index df40a2af19556..da02d8dbac463 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -16,6 +16,7 @@ boto,PyPI,MIT,Mitch Garnaat boto3,PyPI,Apache-2.0,Amazon Web Services botocore,PyPI,Apache-2.0,Amazon Web Services cachetools,PyPI,MIT,Thomas Kemmer +check-postgres,"https://github.com/bucardo/",BSD-2-Clause,Greg Sabino Mullane clickhouse-cityhash,PyPI,MIT,Alexander [Amper] Marshalov clickhouse-driver,PyPI,MIT,Konstantin Lebedev contextlib2,PyPI,PSF,Nick Coghlan diff --git a/postgres/LICENSE-3rdparty-extra.csv b/postgres/LICENSE-3rdparty-extra.csv new file mode 100644 index 0000000000000..d7821f1e2821c --- /dev/null +++ b/postgres/LICENSE-3rdparty-extra.csv @@ -0,0 +1,2 @@ +Component,Origin,License,Copyright +check-postgres,"https://github.com/bucardo/",BSD-2-Clause,Greg Sabino Mullane diff --git a/postgres/datadog_checks/postgres/relationsmanager.py b/postgres/datadog_checks/postgres/relationsmanager.py index 96445a3df11cc..60744f0fcf912 100644 --- a/postgres/datadog_checks/postgres/relationsmanager.py +++ b/postgres/datadog_checks/postgres/relationsmanager.py @@ -111,7 +111,6 @@ {relations}""", } - # The pg_statio_all_tables view will contain one row for each table in the current database, # showing statistics about I/O on that specific table. The pg_statio_user_tables views contain the same information, # but filtered to only show user tables. @@ -136,8 +135,63 @@ 'relation': True, } +# adapted from https://wiki.postgresql.org/wiki/Show_database_bloat and https://github.com/bucardo/check_postgres/ +BLOAT_QUERY = """ +SELECT + schemaname, relname, iname, + ROUND((CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages::float/otta END)::numeric,1) AS tbloat +FROM ( + SELECT + schemaname, tablename, cc.relname as relname, cc.reltuples, cc.relpages, bs, + CEIL((cc.reltuples*((datahdr+ma- + (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta, + COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages, + COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols + FROM ( + SELECT + ma,bs,schemaname,tablename, + (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr, + (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2 + FROM ( + SELECT + schemaname, tablename, hdr, ma, bs, + SUM((1-null_frac)*avg_width) AS datawidth, + MAX(null_frac) AS maxfracsum, + hdr+( + SELECT 1+count(*)/8 + FROM pg_stats s2 + WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename + ) AS nullhdr + FROM pg_stats s, ( + SELECT + (SELECT current_setting('block_size')::numeric) AS bs, + CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr, + CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma + FROM (SELECT version() AS v) AS foo + ) AS constants + GROUP BY 1,2,3,4,5 + ) AS foo + ) AS rs + JOIN pg_class cc ON cc.relname = rs.tablename + JOIN pg_namespace nn ON cc.relnamespace = nn.oid + AND nn.nspname = rs.schemaname + AND nn.nspname <> 'information_schema' + LEFT JOIN pg_index i ON indrelid = cc.oid + LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid +) AS sml WHERE {relations}; +""" + +# The estimated table bloat +BLOAT_METRICS = { + 'descriptors': [('schemaname', 'schema'), ('relname', 'table'), ('iname', 'index')], + 'metrics': { + 'tbloat': ('postgresql.table_bloat', AgentCheck.gauge), + }, + 'query': BLOAT_QUERY, + 'relation': True, +} -RELATION_METRICS = [LOCK_METRICS, REL_METRICS, IDX_METRICS, SIZE_METRICS, STATIO_METRICS] +RELATION_METRICS = [LOCK_METRICS, REL_METRICS, IDX_METRICS, SIZE_METRICS, STATIO_METRICS, BLOAT_METRICS] class RelationsManager(object): diff --git a/postgres/metadata.csv b/postgres/metadata.csv index 4d18ece928af2..5dc758745ce20 100644 --- a/postgres/metadata.csv +++ b/postgres/metadata.csv @@ -52,6 +52,7 @@ postgresql.heap_blocks_read,gauge,,block,second,The number of disk blocks read f postgresql.heap_blocks_hit,gauge,,hit,second,The number of buffer hits in this table.,0,postgres,heap blks hit postgresql.index_blocks_read,gauge,,block,second,The number of disk blocks read from all indexes on this table.,0,postgres,idx blks read postgresql.index_blocks_hit,gauge,,hit,second,The number of buffer hits in all indexes on this table.,0,postgres,idx blks hit +postgresql.table_bloat,gauge,,percent,,The estimated percentage of table bloat.,0,postgres,tbloat postgresql.toast_blocks_read,gauge,,block,second,The number of disk blocks read from this table's TOAST table.,0,postgres,toast blks read postgresql.toast_blocks_hit,gauge,,hit,second,The number of buffer hits in this table's TOAST table.,0,postgres,toast blks hit postgresql.toast_index_blocks_read,gauge,,block,second,The number of disk blocks read from this table's TOAST table index.,0,postgres,toast idx blks read diff --git a/postgres/tests/test_relations.py b/postgres/tests/test_relations.py index 1c31ab559dde7..d7fa6960085b2 100644 --- a/postgres/tests/test_relations.py +++ b/postgres/tests/test_relations.py @@ -76,6 +76,26 @@ def test_relations_metrics(aggregator, integration_check, pg_instance): aggregator.assert_metric(name, count=1, tags=expected_size_tags) +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_bloat_metric(aggregator, integration_check, pg_instance): + pg_instance['relations'] = ['pg_index'] + + posgres_check = integration_check(pg_instance) + posgres_check.check(pg_instance) + + expected_tags = pg_instance['tags'] + [ + 'server:{}'.format(pg_instance['host']), + 'port:{}'.format(pg_instance['port']), + 'db:%s' % pg_instance['dbname'], + 'table:pg_index', + 'schema:pg_catalog', + 'index:pg_index_indrelid_index', + ] + + aggregator.assert_metric('postgresql.table_bloat', count=1, tags=expected_tags) + + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_relations_metrics_regex(aggregator, integration_check, pg_instance):