From d8e6faa8d26ecc4a48c9334feaeb752e1cd65278 Mon Sep 17 00:00:00 2001 From: Brendan Shaklovitz Date: Fri, 13 Jul 2018 11:15:51 -0500 Subject: [PATCH 1/2] Make cassandra reconnect down hosts. * Fix #767 by enabling gocql setting `ReconnectInterval` to reconnect to down Cassandra hosts at a regular interval. Signed-off-by: Brendan Shaklovitz --- pkg/cassandra/config/config.go | 5 ++++ plugin/storage/cassandra/options.go | 43 +++++++++++++++++------------ 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/pkg/cassandra/config/config.go b/pkg/cassandra/config/config.go index a6fb09299c2..f33a577c511 100644 --- a/pkg/cassandra/config/config.go +++ b/pkg/cassandra/config/config.go @@ -31,6 +31,7 @@ type Configuration struct { Keyspace string `validate:"nonzero"` ConnectionsPerHost int `validate:"min=1" yaml:"connections_per_host"` Timeout time.Duration `validate:"min=500"` + ReconnectInterval time.Duration `validate:"min=500" yaml:"reconnect_interval"` SocketKeepAlive time.Duration `validate:"min=0" yaml:"socket_keep_alive"` MaxRetryAttempts int `validate:"min=0" yaml:"max_retry_attempt"` ProtoVersion int `yaml:"proto_version"` @@ -74,6 +75,9 @@ func (c *Configuration) ApplyDefaults(source *Configuration) { if c.Timeout == 0 { c.Timeout = source.Timeout } + if c.ReconnectInterval == 0 { + c.ReconnectInterval = source.ReconnectInterval + } if c.Port == 0 { c.Port = source.Port } @@ -109,6 +113,7 @@ func (c *Configuration) NewCluster() *gocql.ClusterConfig { cluster.Keyspace = c.Keyspace cluster.NumConns = c.ConnectionsPerHost cluster.Timeout = c.Timeout + cluster.ReconnectInterval = c.ReconnectInterval cluster.SocketKeepalive = c.SocketKeepAlive if c.ProtoVersion > 0 { cluster.ProtoVersion = c.ProtoVersion diff --git a/plugin/storage/cassandra/options.go b/plugin/storage/cassandra/options.go index 3a99340d7d7..3409fe6a7c7 100644 --- a/plugin/storage/cassandra/options.go +++ b/plugin/storage/cassandra/options.go @@ -26,24 +26,25 @@ import ( const ( // session settings - suffixEnabled = ".enabled" - suffixConnPerHost = ".connections-per-host" - suffixMaxRetryAttempts = ".max-retry-attempts" - suffixTimeout = ".timeout" - suffixServers = ".servers" - suffixPort = ".port" - suffixKeyspace = ".keyspace" - suffixConsistency = ".consistency" - suffixProtoVer = ".proto-version" - suffixSocketKeepAlive = ".socket-keep-alive" - suffixUsername = ".username" - suffixPassword = ".password" - suffixTLS = ".tls" - suffixCert = ".tls.cert" - suffixKey = ".tls.key" - suffixCA = ".tls.ca" - suffixServerName = ".tls.server-name" - suffixVerifyHost = ".tls.verify-host" + suffixEnabled = ".enabled" + suffixConnPerHost = ".connections-per-host" + suffixMaxRetryAttempts = ".max-retry-attempts" + suffixTimeout = ".timeout" + suffixReconnectInterval = ".reconnect-interval" + suffixServers = ".servers" + suffixPort = ".port" + suffixKeyspace = ".keyspace" + suffixConsistency = ".consistency" + suffixProtoVer = ".proto-version" + suffixSocketKeepAlive = ".socket-keep-alive" + suffixUsername = ".username" + suffixPassword = ".password" + suffixTLS = ".tls" + suffixCert = ".tls.cert" + suffixKey = ".tls.key" + suffixCA = ".tls.ca" + suffixServerName = ".tls.server-name" + suffixVerifyHost = ".tls.verify-host" // common storage settings suffixSpanStoreWriteCacheTTL = ".span-store-write-cache-ttl" @@ -83,6 +84,7 @@ func NewOptions(primaryNamespace string, otherNamespaces ...string) *Options { Keyspace: "jaeger_v1_test", ProtoVersion: 4, ConnectionsPerHost: 2, + ReconnectInterval: 60 * time.Second, }, servers: "127.0.0.1", namespace: primaryNamespace, @@ -130,6 +132,10 @@ func addFlags(flagSet *flag.FlagSet, nsConfig *namespaceConfig) { nsConfig.namespace+suffixTimeout, nsConfig.Timeout, "Timeout used for queries") + flagSet.Duration( + nsConfig.namespace+suffixReconnectInterval, + nsConfig.ReconnectInterval, + "Reconnect interval to retry connecting to downed hosts") flagSet.String( nsConfig.namespace+suffixServers, nsConfig.servers, @@ -204,6 +210,7 @@ func (cfg *namespaceConfig) initFromViper(v *viper.Viper) { cfg.ConnectionsPerHost = v.GetInt(cfg.namespace + suffixConnPerHost) cfg.MaxRetryAttempts = v.GetInt(cfg.namespace + suffixMaxRetryAttempts) cfg.Timeout = v.GetDuration(cfg.namespace + suffixTimeout) + cfg.ReconnectInterval = v.GetDuration(cfg.namespace + suffixReconnectInterval) cfg.servers = v.GetString(cfg.namespace + suffixServers) cfg.Port = v.GetInt(cfg.namespace + suffixPort) cfg.Keyspace = v.GetString(cfg.namespace + suffixKeyspace) From 67352afacb33716d75e42a3f437605ce20ef0f75 Mon Sep 17 00:00:00 2001 From: Brendan Shaklovitz Date: Fri, 13 Jul 2018 12:07:15 -0500 Subject: [PATCH 2/2] Add cassandra `ReconnectInterval` test. Signed-off-by: Brendan Shaklovitz --- plugin/storage/cassandra/options_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plugin/storage/cassandra/options_test.go b/plugin/storage/cassandra/options_test.go index 9e817808ad0..262172b4edc 100644 --- a/plugin/storage/cassandra/options_test.go +++ b/plugin/storage/cassandra/options_test.go @@ -41,6 +41,7 @@ func TestOptions(t *testing.T) { assert.Equal(t, primary.Keyspace, aux.Keyspace) assert.Equal(t, primary.Servers, aux.Servers) assert.Equal(t, primary.ConnectionsPerHost, aux.ConnectionsPerHost) + assert.Equal(t, primary.ReconnectInterval, aux.ReconnectInterval) } func TestOptionsWithFlags(t *testing.T) { @@ -50,6 +51,7 @@ func TestOptionsWithFlags(t *testing.T) { "--cas.keyspace=jaeger", "--cas.servers=1.1.1.1,2.2.2.2", "--cas.connections-per-host=42", + "--cas.reconnect-interval=42s", "--cas.max-retry-attempts=42", "--cas.timeout=42s", "--cas.port=4242", @@ -75,6 +77,7 @@ func TestOptionsWithFlags(t *testing.T) { assert.Equal(t, 42, aux.ConnectionsPerHost) assert.Equal(t, 42, aux.MaxRetryAttempts) assert.Equal(t, 42*time.Second, aux.Timeout) + assert.Equal(t, 42*time.Second, aux.ReconnectInterval) assert.Equal(t, 4242, aux.Port) assert.Equal(t, "", aux.Consistency, "aux storage does not inherit consistency from primary") assert.Equal(t, 3, aux.ProtoVersion)