Github user beobal commented on a diff in the pull request:
https://github.com/apache/cassandra-dtest/pull/37#discussion_r218336055
--- Diff: repair_tests/incremental_repair_test.py ---
@@ -918,3 +931,196 @@ def test_subrange(self):
self.assertRepairedAndUnrepaired(node1, 'ks')
self.assertRepairedAndUnrepaired(node2, 'ks')
self.assertRepairedAndUnrepaired(node3, 'ks')
+
+ @since('4.0')
+ def test_repaired_tracking_with_partition_deletes(self):
+ """
+ check that when an tracking repaired data status following a digest mismatch,
+ repaired data mismatches are marked as unconfirmed as we may skip sstables
+ after the partition delete are encountered.
+ @jira_ticket CASSANDRA-14145
+ """
+ session, node1, node2 = self.setup_for_repaired_data_tracking()
+ stmt = SimpleStatement("INSERT INTO ks.tbl (k, c, v) VALUES (%s, %s, %s)")
+ stmt.consistency_level = ConsistencyLevel.ALL
+ for i in range(10):
+ session.execute(stmt, (i, i, i))
+
+ for node in self.cluster.nodelist():
+ node.flush()
+ self.assertNoRepairedSSTables(node, 'ks')
+
+ node1.repair(options=['ks'])
+ node2.stop(wait_other_notice=True)
+
+ session.execute("delete from ks.tbl where k = 5")
+
+ node1.flush()
+ node2.start(wait_other_notice=True)
+
+ # expect unconfirmed inconsistencies as the partition deletes cause some sstables
to be skipped
+ with JolokiaAgent(node1) as jmx:
+ self.query_and_check_repaired_mismatches(jmx, session, "SELECT * FROM ks.tbl
WHERE k = 5",
+ expect_unconfirmed_inconsistencies=True)
+ self.query_and_check_repaired_mismatches(jmx, session, "SELECT * FROM ks.tbl
WHERE k = 5 AND c = 5",
+ expect_unconfirmed_inconsistencies=True)
+ # no digest reads for range queries so blocking read repair metric isn't
incremented
+ # *all* sstables are read for partition ranges too, and as the repaired set
is still in sync there should
+ # be no inconsistencies
+ self.query_and_check_repaired_mismatches(jmx, session, "SELECT * FROM ks.tbl",
expect_read_repair=False)
+
+ @since('4.0')
+ def test_repaired_tracking_with_varying_sstable_sets(self):
+ """
+ verify that repaired data digests are computed over the merged data for each
replica
+ and that the particular number of sstables on each doesn't affect the comparisons
+ both replicas start with the same repaired set, comprising 2 sstables. node1's
is
+ then compacted and additional unrepaired data added (which overwrites some in
the
+ repaired set). We expect the repaired digests to still match as the tracking
will
+ force all sstables containing the partitions to be read
+ there are two variants of this, for single partition slice & names reads
and range reads
+ @jira_ticket CASSANDRA-14145
+ """
+ session, node1, node2 = self.setup_for_repaired_data_tracking()
+ stmt = SimpleStatement("INSERT INTO ks.tbl (k, c, v) VALUES (%s, %s, %s)")
+ stmt.consistency_level = ConsistencyLevel.ALL
+ for i in range(10):
+ session.execute(stmt, (i, i, i))
+
+ for node in self.cluster.nodelist():
+ node.flush()
+
+ for i in range(10,20):
+ session.execute(stmt, (i, i, i))
+
+ for node in self.cluster.nodelist():
+ node.flush()
+ self.assertNoRepairedSSTables(node, 'ks')
+
+ node1.repair(options=['ks'])
+ node2.stop(wait_other_notice=True)
+
+ session.execute("insert into ks.tbl (k, c, v) values (5, 5, 55)")
+ session.execute("insert into ks.tbl (k, c, v) values (15, 15, 155)")
+ node1.flush()
+ node1.compact()
+ node1.compact()
+ node2.start(wait_other_notice=True)
+
+ # we don't expect any inconsistencies as all repaired data is read on both replicas
+ with JolokiaAgent(node1) as jmx:
+ self.query_and_check_repaired_mismatches(jmx, session, "SELECT * FROM ks.tbl
WHERE k = 5")
+ self.query_and_check_repaired_mismatches(jmx, session, "SELECT * FROM ks.tbl
WHERE k = 5 AND c = 5")
+ # no digest reads for range queries so read repair metric isn't incremented
+ self.query_and_check_repaired_mismatches(jmx, session, "SELECT * FROM ks.tbl",
expect_read_repair=False)
+
+ @since('4.0')
+ def test_repaired_tracking_with_mismatching_replicas(self):
+ """
+ there are two variants of this, for single partition slice & names reads
and range reads
+ @jira_ticket CASSANDRA-14145
+ """
+ session, node1, node2 = self.setup_for_repaired_data_tracking()
+ stmt = SimpleStatement("INSERT INTO ks.tbl (k, c, v) VALUES (%s, %s, %s)")
+ stmt.consistency_level = ConsistencyLevel.ALL
+ for i in range(10):
+ session.execute(stmt, (i, i, i))
+
+ for node in self.cluster.nodelist():
+ node.flush()
+
+ for i in range(10,20):
+ session.execute(stmt, (i, i, i))
+
+ for node in self.cluster.nodelist():
+ node.flush()
+ self.assertNoRepairedSSTables(node, 'ks')
+
+ # stop node 2 and mark its sstables repaired
+ node2.stop(wait_other_notice=True)
+ node2.run_sstablerepairedset(keyspace='ks')
+ # before restarting node2 overwrite some data on node1 to trigger digest mismatches
+ session.execute("insert into ks.tbl (k, c, v) values (5, 5, 55)")
+ node2.start(wait_for_binary_proto=True)
+
+ out1 = node1.run_sstablemetadata(keyspace='ks').stdout
+ out2 = node2.run_sstablemetadata(keyspace='ks').stdout
+
+ # verify the repaired at times for the sstables on node1/node2
+ assert all(t == 0 for t in [int(x) for x in [y.split(' ')[0] for y in findall('(?<=Repaired
at: ).*', out1)]])
+ assert all(t > 0 for t in [int(x) for x in [y.split(' ')[0] for y in findall('(?<=Repaired
at: ).*', out2)]])
+
+ # we expect inconsistencies due to sstables being marked repaired on one replica
only
+ # these are marked confirmed because no sessions are pending & all sstables
are
+ # skipped due to partition deletes
+ with JolokiaAgent(node1) as jmx:
+ self.query_and_check_repaired_mismatches(jmx, session, "SELECT * FROM ks.tbl
WHERE k = 5",
+ expect_confirmed_inconsistencies=True)
+ self.query_and_check_repaired_mismatches(jmx, session, "SELECT * FROM ks.tbl
WHERE k = 5 AND c = 5",
+ expect_confirmed_inconsistencies=True)
+ # no digest reads for range queries so read repair metric isn't incremented
+ self.query_and_check_repaired_mismatches(jmx, session, "SELECT * FROM ks.tbl",
expect_read_repair=False)
+
+ def setup_for_repaired_data_tracking(self):
+ self.fixture_dtest_setup.setup_overrides.cluster_options = ImmutableMapping({'hinted_handoff_enabled':
'false',
+
'num_tokens': 1,
+
'commitlog_sync_period_in_ms': 500})
+ self.fixture_dtest_setup.init_default_config()
+ self.cluster.populate(2)
+ node1, node2 = self.cluster.nodelist()
+ remove_perf_disable_shared_mem(node1) # necessary for jmx
+ self.cluster.start()
+
+ session = self.patient_exclusive_cql_connection(node1)
+ session.execute("CREATE KEYSPACE ks WITH REPLICATION={'class':'SimpleStrategy',
'replication_factor': 2}")
+ session.execute("CREATE TABLE ks.tbl (k INT, c INT, v INT, PRIMARY KEY (k,c))
with read_repair='NONE'")
+ return session, node1, node2
+
+ def query_and_check_repaired_mismatches(self, jmx, session, query,
+ expect_read_repair=True,
+ expect_unconfirmed_inconsistencies=False,
+ expect_confirmed_inconsistencies=False):
+
+ rr_count = make_mbean('metrics', type='ReadRepair', name='ReconcileRead')
+ unconfirmed_count = make_mbean('metrics', type='Table,keyspace=ks', name='RepairedDataInconsistenciesUnconfirmed,scope=tbl')
+ confirmed_count = make_mbean('metrics', type='Table,keyspace=ks', name='RepairedDataInconsistenciesConfirmed,scope=tbl')
+
+ rr_before = self.get_attribute_count(jmx, rr_count)
+ uc_before = self.get_attribute_count(jmx, unconfirmed_count)
+ cc_before = self.get_attribute_count(jmx, confirmed_count)
+
+ stmt = SimpleStatement(query)
+ stmt.consistency_level = ConsistencyLevel.ALL
+ session.execute(stmt)
+
+ rr_after = self.get_attribute_count(jmx, rr_count)
+ uc_after = self.get_attribute_count(jmx, unconfirmed_count)
+ cc_after = self.get_attribute_count(jmx, confirmed_count)
+
+ logger.debug("RR: {before}, {after}".format(before=rr_before, after=rr_after))
+ logger.debug("UI: {before}, {after}".format(before=uc_before, after=uc_after))
+ logger.debug("CI: {before}, {after}".format(before=cc_before, after=cc_after))
+
+ if expect_read_repair:
+ assert rr_after > rr_before
+ else:
+ assert rr_after == rr_before
+
+ if expect_unconfirmed_inconsistencies:
+ assert uc_after > uc_before
+ else:
+ assert uc_after == uc_before
+
+ if expect_confirmed_inconsistencies:
+ assert cc_after > cc_before
+ else:
+ assert cc_after == cc_before
+
+ def get_attribute_count(self, jmx, bean):
+ # the MBean may not have been initialized, in which case Jolokia agent will return
+ # a HTTP 404 response. If we receive such, we know that the count can only be
0
+ if jmx.has_mbean(bean):
+ # expect 0 digest mismatches
--- End diff --
Removed, it was a hangover from an earlier version of the test where only the digest mismatch
count was read via JMX
---
---------------------------------------------------------------------
To unsubscribe, e-mail: pr-unsubscribe@cassandra.apache.org
For additional commands, e-mail: pr-help@cassandra.apache.org
|