From ed9ec657c76122bcab968d52b5bd7445d0398516 Mon Sep 17 00:00:00 2001 From: Aaron Mildenstein Date: Fri, 18 Jul 2014 13:28:33 -0500 Subject: [PATCH 1/6] Updating testing to work with new date formatting --- test_curator/integration/__init__.py | 12 ++++++++---- test_curator/test_curator.py | 12 ++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/test_curator/integration/__init__.py b/test_curator/integration/__init__.py index acb41589..56c82eee 100644 --- a/test_curator/integration/__init__.py +++ b/test_curator/integration/__init__.py @@ -83,10 +83,14 @@ def run_curator(self, **kwargs): def create_indices(self, count, unit=None): now = datetime.utcnow() unit = unit if unit else self.args['time_unit'] - if unit == 'days': - format = self.args['separator'].join(('%Y', '%m', '%d')) - else: - format = self.args['separator'].join(('%Y', '%m', '%d', '%H')) + if unit == 'months': + format = '%Y.%m' + elif unit == 'weeks': + format = '%Y.%U' + elif unit == 'days': + format = '%Y.%m.%d' + elif unit == 'hours': + format = '%Y.%m.%d.%H' step = timedelta(**{unit: 1}) for x in range(count): diff --git a/test_curator/test_curator.py b/test_curator/test_curator.py index aaea1a2f..086bf2ef 100644 --- a/test_curator/test_curator.py +++ b/test_curator/test_curator.py @@ -7,13 +7,13 @@ class TestUtils(TestCase): def test_get_index_time(self): - for text, sep, dt in [ - ('2014.01.19', '.', datetime(2014, 1, 19)), - ('2014-01-19', '-', datetime(2014, 1, 19)), - ('2010-12-29', '-', datetime(2010, 12, 29)), - ('2010.12.29.12', '.', datetime(2010, 12, 29, 12)), + for text, datestring, dt in [ + ('2014.01.19', '%Y.%m.%d', datetime(2014, 1, 19)), + ('2014-01-19', '%Y.%m.%d', datetime(2014, 1, 19)), + ('2010-12-29', '%Y.%m.%d', datetime(2010, 12, 29)), + ('2010.12.29.12', '%Y.%m.%d.%H', datetime(2010, 12, 29, 12)), ]: - self.assertEqual(dt, curator.get_index_time(text, sep)) + self.assertEqual(dt, curator.get_index_time(text, datestring)) class TestShowIndices(TestCase): def test_show_indices(self): From 8f907464a6c42235d830f1e2bc42182b6241e230 Mon Sep 17 00:00:00 2001 From: Aaron Mildenstein Date: Fri, 18 Jul 2014 14:18:23 -0500 Subject: [PATCH 2/6] Fixed tests to use new time formatting. --- test_curator/integration/test_snapshots.py | 8 ++++---- test_curator/integration/test_time_based.py | 14 +++++++------- test_curator/test_curator.py | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/test_curator/integration/test_snapshots.py b/test_curator/integration/test_snapshots.py index 651eefa4..11d53d8f 100644 --- a/test_curator/integration/test_snapshots.py +++ b/test_curator/integration/test_snapshots.py @@ -37,20 +37,20 @@ def test_snapshot_will_not_be_created_twice(self): def test_curator_will_create_and_delete_multiple_snapshots(self): self.create_indices(10) self.create_repository() - curator.command_loop(self.client, command='snapshot', older_than=3, delete_older_than=None, most_recent=None, repository=self.args['repository']) + curator.command_loop(self.client, command='snapshot', older_than=3, timestring='%Y.%m.%d', delete_older_than=None, most_recent=None, repository=self.args['repository']) result = curator.get_snapshot(self.client, self.args['repository'], '_all') self.assertEqual(7, len(result['snapshots'])) - curator.command_loop(self.client, command='snapshot', delete_older_than=6, repository=self.args['repository']) + curator.command_loop(self.client, command='snapshot', delete_older_than=6, timestring='%Y.%m.%d', repository=self.args['repository']) result = curator.get_snapshot(self.client, self.args['repository'], '_all') self.assertEqual(3, len(result['snapshots'])) def test_curator_will_manage_hourly_snapshots(self): self.create_indices(10, 'hours') self.create_repository() - curator.command_loop(self.client, time_unit='hours', command='snapshot', older_than=3, delete_older_than=None, most_recent=None, repository=self.args['repository']) + curator.command_loop(self.client, time_unit='hours', command='snapshot', older_than=3, timestring='%Y.%m.%d.%H', delete_older_than=None, most_recent=None, repository=self.args['repository']) result = curator.get_snapshot(self.client, self.args['repository'], '_all') self.assertEqual(7, len(result['snapshots'])) - curator.command_loop(self.client, time_unit='hours', command='snapshot', delete_older_than=6, repository=self.args['repository']) + curator.command_loop(self.client, time_unit='hours', command='snapshot', timestring='%Y.%m.%d.%H', delete_older_than=6, repository=self.args['repository']) result = curator.get_snapshot(self.client, self.args['repository'], '_all') self.assertEqual(3, len(result['snapshots'])) diff --git a/test_curator/integration/test_time_based.py b/test_curator/integration/test_time_based.py index 213081a7..a1df261c 100644 --- a/test_curator/integration/test_time_based.py +++ b/test_curator/integration/test_time_based.py @@ -7,13 +7,13 @@ class TestTimeBasedDeletion(CuratorTestCase): def test_curator_will_properly_delete_indices(self): self.create_indices(10) - curator.command_loop(self.client, command='delete', older_than=3) + curator.command_loop(self.client, command='delete', older_than=3, timestring='%Y.%m.%d') mtd = self.client.cluster.state(index=self.args['prefix'] + '*', metric='metadata') self.assertEquals(3, len(mtd['metadata']['indices'].keys())) def test_curator_will_properly_delete_hourly_indices(self): self.create_indices(10, 'hours') - curator.command_loop(self.client, command='delete', time_unit='hours', older_than=3) + curator.command_loop(self.client, command='delete', time_unit='hours', older_than=3, timestring='%Y.%m.%d.%H') mtd = self.client.cluster.state(index=self.args['prefix'] + '*', metric='metadata') self.assertEquals(3, len(mtd['metadata']['indices'].keys())) @@ -25,7 +25,7 @@ def test_find_closed_indices(self): # all indices should be expired index_list = curator.get_object_list(self.client, prefix='l-') - expired = list(curator.find_expired_data(self.client, time_unit='days', older_than=1, object_list=index_list, + expired = list(curator.find_expired_data(self.client, time_unit='days', older_than=1, timestring='%Y.%m.%d', object_list=index_list, utc_now=datetime(2014, 1, 8, 3, 45, 50), prefix='l-')) self.assertEquals( @@ -42,7 +42,7 @@ def test_find_indices_ignores_indices_with_different_prefix_or_time_unit(self): self.create_index('logstash-2012.01.01.00') index_list = curator.get_object_list(self.client, prefix=self.args['prefix']) - expired = list(curator.find_expired_data(self.client, time_unit='hours', older_than=1, object_list=index_list)) + expired = list(curator.find_expired_data(self.client, time_unit='hours', older_than=1, timestring='%Y.%m.%d.%H', object_list=index_list)) self.assertEquals(1, len(expired)) self.assertEquals('logstash-2012.01.01.00', expired[0][0]) @@ -53,7 +53,7 @@ def test_find_reports_correct_time_interval_from_cutoff(self): self.create_index('l-2014.01.03') index_list = curator.get_object_list(self.client, prefix='l-') - expired = list(curator.find_expired_data(self.client, time_unit='days', older_than=1, object_list=index_list, + expired = list(curator.find_expired_data(self.client, time_unit='days', older_than=1, timestring='%Y.%m.%d', object_list=index_list, utc_now=datetime(2014, 1, 4, 3, 45, 50), prefix='l-')) self.assertEquals( [ @@ -70,7 +70,7 @@ def test_curator_will_properly_alias_and_unalias_indices(self): self.create_index('dummy') self.client.indices.put_alias(index='dummy', name=alias) self.create_indices(10) - curator.alias_loop(self.client, prefix=self.args['prefix'], alias=alias, alias_older_than=3, unalias_older_than=None) + curator.alias_loop(self.client, prefix=self.args['prefix'], alias=alias, timestring='%Y.%m.%d', alias_older_than=3, unalias_older_than=None) self.assertEquals(8, len(self.client.indices.get_alias(name=alias))) - curator.alias_loop(self.client, prefix=self.args['prefix'], alias=alias, alias_older_than=None, unalias_older_than=3) + curator.alias_loop(self.client, prefix=self.args['prefix'], alias=alias, timestring='%Y.%m.%d', alias_older_than=None, unalias_older_than=3) self.assertEquals(1, len(self.client.indices.get_alias(name=alias))) diff --git a/test_curator/test_curator.py b/test_curator/test_curator.py index 086bf2ef..705b7fdc 100644 --- a/test_curator/test_curator.py +++ b/test_curator/test_curator.py @@ -9,8 +9,8 @@ class TestUtils(TestCase): def test_get_index_time(self): for text, datestring, dt in [ ('2014.01.19', '%Y.%m.%d', datetime(2014, 1, 19)), - ('2014-01-19', '%Y.%m.%d', datetime(2014, 1, 19)), - ('2010-12-29', '%Y.%m.%d', datetime(2010, 12, 29)), + ('2014-01-19', '%Y-%m-%d', datetime(2014, 1, 19)), + ('2010-12-29', '%Y-%m-%d', datetime(2010, 12, 29)), ('2010.12.29.12', '%Y.%m.%d.%H', datetime(2010, 12, 29, 12)), ]: self.assertEqual(dt, curator.get_index_time(text, datestring)) @@ -48,7 +48,7 @@ def test_all_daily_indices_found(self): 'prefix-2013.01.03.10': True, } index_list = curator.get_object_list(client, prefix='prefix-') - expired = curator.find_expired_data(client, object_list=index_list, time_unit='days', older_than=4, prefix='prefix-', utc_now=datetime(2014, 1, 3)) + expired = curator.find_expired_data(client, object_list=index_list, time_unit='days', older_than=4, prefix='prefix-', timestring='%Y.%m.%d', utc_now=datetime(2014, 1, 3)) expired = list(expired) From 3f079221c752e900b3e3521bd378c32b5fd38230 Mon Sep 17 00:00:00 2001 From: Aaron Mildenstein Date: Fri, 18 Jul 2014 17:02:40 -0500 Subject: [PATCH 3/6] Tentative fix for week of year date math --- curator/curator.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/curator/curator.py b/curator/curator.py index 3f6f4ca6..4a94b468 100755 --- a/curator/curator.py +++ b/curator/curator.py @@ -181,12 +181,31 @@ def show(client, **kwargs): print('{0}'.format(snapshot)) sys.exit(0) +def iso_year_start(iso_year): + "The gregorian calendar date of the first day of the given ISO year" + fourth_jan = datetime.date(iso_year, 1, 4) + delta = datetime.timedelta(fourth_jan.isoweekday()-1) + return fourth_jan - delta + +def iso_to_gregorian(iso_year, iso_week, iso_day): + "Gregorian calendar date for the given ISO year, week and day" + year_start = iso_year_start(iso_year) + return year_start + datetime.timedelta(days=iso_day-1, weeks=iso_week-1) + def get_index_time(index_timestamp, timestring): """ Gets the time of the index. :param index_timestamp: A string of the format timestring :return The creation time (datetime) of the index. """ + # Compensate for week of year by appending '%w' to the timestring + # and '1' (Monday) to index_timestamp + if '%W' in timestring: + timestring += '%w' + index_timestamp += '1' + elif '%U' in timestring: + timestring += '%w' + index_timestamp += '1' return datetime.strptime(index_timestamp, timestring) def get_indices(client, prefix='logstash-', exclude_pattern=None): From 0bd124530713fb9361dce5b2b9f725a4add2589a Mon Sep 17 00:00:00 2001 From: Aaron Mildenstein Date: Fri, 18 Jul 2014 18:16:07 -0500 Subject: [PATCH 4/6] Fixes and working tests Cleaned out months (very small percentage of users would use this) Hacked in a few work-arounds to make week math work Pruned unneeded code (iso date math) I dabbled with. --- curator/curator.py | 45 ++++++++------------- test_curator/integration/__init__.py | 6 +-- test_curator/integration/test_snapshots.py | 10 +++++ test_curator/integration/test_time_based.py | 24 +++++++++++ test_curator/test_curator.py | 1 + 5 files changed, 53 insertions(+), 33 deletions(-) diff --git a/curator/curator.py b/curator/curator.py index 4a94b468..40957efa 100755 --- a/curator/curator.py +++ b/curator/curator.py @@ -80,7 +80,7 @@ def make_parser(): parser_alias.set_defaults(func=alias_loop) parser_alias.add_argument('-p', '--prefix', help='Prefix for the indices. Indices that do not have this prefix are skipped. Default: logstash-', default=DEFAULT_ARGS['prefix']) parser_alias.add_argument('--timestring', help="Python strftime string to match your index definition, e.g. 2014.07.15 would be %%Y.%%m.%%d", type=str, default=None) - parser_alias.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks|months] Default: days', + parser_alias.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) parser_alias.add_argument('--exclude-pattern', help='Exclude indices matching provided pattern, e.g. 2014.06.08', type=str, default=None) parser_alias.add_argument('--alias', required=True, help='Alias name', type=str) @@ -93,7 +93,7 @@ def make_parser(): parser_allocation.set_defaults(func=command_loop) parser_allocation.add_argument('-p', '--prefix', help='Prefix for the indices. Indices that do not have this prefix are skipped. Default: logstash-', default=DEFAULT_ARGS['prefix']) parser_allocation.add_argument('--timestring', help="Python strftime string to match your index definition, e.g. 2014.07.15 would be %%Y.%%m.%%d", type=str, default=None) - parser_allocation.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks|months] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) + parser_allocation.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) parser_allocation.add_argument('--older-than', required=True, help='Apply rule to indices older than n TIME_UNITs', type=int) parser_allocation.add_argument('--rule', required=True, help='Routing allocation rule to apply, e.g. tag=ssd', type=str) parser_allocation.add_argument('--exclude-pattern', help='Exclude indices matching provided pattern, e.g. 2014.06.08', type=str, default=None) @@ -103,7 +103,7 @@ def make_parser(): parser_bloom.set_defaults(func=command_loop) parser_bloom.add_argument('-p', '--prefix', help='Prefix for the indices. Indices that do not have this prefix are skipped. Default: logstash-', default=DEFAULT_ARGS['prefix']) parser_bloom.add_argument('--timestring', help="Python strftime string to match your index definition, e.g. 2014.07.15 would be %%Y.%%m.%%d", type=str, default=None) - parser_bloom.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks|months] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) + parser_bloom.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) parser_bloom.add_argument('--older-than', required=True, help='Disable bloom filter cache for indices older than n TIME_UNITs', type=int) parser_bloom.add_argument('--exclude-pattern', help='Exclude indices matching provided pattern, e.g. 2014.06.08', type=str, default=None) @@ -112,7 +112,7 @@ def make_parser(): parser_close.set_defaults(func=command_loop) parser_close.add_argument('-p', '--prefix', help='Prefix for the indices. Indices that do not have this prefix are skipped. Default: logstash-', default=DEFAULT_ARGS['prefix']) parser_close.add_argument('--timestring', help="Python strftime string to match your index definition, e.g. 2014.07.15 would be %%Y.%%m.%%d", type=str, default=None) - parser_close.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks|months] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) + parser_close.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) parser_close.add_argument('--older-than', required=True, help='Close indices older than n TIME_UNITs', type=int) parser_close.add_argument('--exclude-pattern', help='Exclude indices matching provided pattern, e.g. 2014.06.08', type=str, default=None) @@ -121,7 +121,7 @@ def make_parser(): parser_delete.set_defaults(func=command_loop) parser_delete.add_argument('-p', '--prefix', help='Prefix for the indices. Indices that do not have this prefix are skipped. Default: logstash-', default=DEFAULT_ARGS['prefix']) parser_delete.add_argument('--timestring', help="Python strftime string to match your index definition, e.g. 2014.07.15 would be %%Y.%%m.%%d", type=str, default=None) - parser_delete.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks|months] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) + parser_delete.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) parser_delete.add_argument('--exclude-pattern', help='Exclude indices matching provided pattern, e.g. 2014.06.08', type=str, default=None) delete_group = parser_delete.add_mutually_exclusive_group() delete_group.add_argument('--older-than', help='Delete indices older than n TIME_UNITs', type=int) @@ -132,7 +132,7 @@ def make_parser(): parser_optimize.set_defaults(func=command_loop) parser_optimize.add_argument('-p', '--prefix', help='Prefix for the indices. Indices that do not have this prefix are skipped. Default: logstash-', default=DEFAULT_ARGS['prefix']) parser_optimize.add_argument('--timestring', help="Python strftime string to match your index definition, e.g. 2014.07.15 would be %%Y.%%m.%%d", type=str, default=None) - parser_optimize.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks|months] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) + parser_optimize.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) parser_optimize.add_argument('--older-than', required=True, help='Optimize indices older than n TIME_UNITs', type=int) parser_optimize.add_argument('--max_num_segments', help='Optimize segment count to n segments per shard.', default=DEFAULT_ARGS['max_num_segments'], type=int) @@ -151,7 +151,7 @@ def make_parser(): parser_snapshot.set_defaults(func=command_loop) parser_snapshot.add_argument('-p', '--prefix', help='Prefix for the indices. Indices that do not have this prefix are skipped. Default: logstash-', default=DEFAULT_ARGS['prefix']) parser_snapshot.add_argument('--timestring', help="Python strftime string to match your index definition, e.g. 2014.07.15 would be %%Y.%%m.%%d", type=str, default=None) - parser_snapshot.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks|months] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) + parser_snapshot.add_argument('-T', '--time-unit', dest='time_unit', action='store', help='Unit of time to reckon by: [hours|days|weeks] Default: days', default=DEFAULT_ARGS['time_unit'], type=str) parser_snapshot.add_argument('--exclude-pattern', help='Exclude indices matching provided pattern, e.g. 2014.06.08', type=str, default=None) parser_snapshot.add_argument('--repository', required=True, type=str, help='Repository name') @@ -181,17 +181,6 @@ def show(client, **kwargs): print('{0}'.format(snapshot)) sys.exit(0) -def iso_year_start(iso_year): - "The gregorian calendar date of the first day of the given ISO year" - fourth_jan = datetime.date(iso_year, 1, 4) - delta = datetime.timedelta(fourth_jan.isoweekday()-1) - return fourth_jan - delta - -def iso_to_gregorian(iso_year, iso_week, iso_day): - "Gregorian calendar date for the given ISO year, week and day" - year_start = iso_year_start(iso_year) - return year_start + datetime.timedelta(days=iso_day-1, weeks=iso_week-1) - def get_index_time(index_timestamp, timestring): """ Gets the time of the index. @@ -291,11 +280,14 @@ def find_expired_data(client, object_list=[], utc_now=None, time_unit='days', ol if time_unit == 'days': utc_now = utc_now.replace(hour=0) - - if time_unit == 'months': # timedelta doesn't support months - cutoff = utc_now - timedelta(days=(older_than * 32)) # Prevent accidental deletion of current month - else: # This cutoff must be a multiple of time_units - cutoff = utc_now - timedelta(**{time_unit: (older_than - 1)}) + if time_unit == 'weeks': + # Since week math always uses Monday as the start of the week, + # this work-around resets utc_now to be Monday of the current week. + weeknow = utc_now.strftime('%Y-%W') + utc_now = get_index_time(weeknow, '%Y-%W') + + # This cutoff must be a multiple of time_units + cutoff = utc_now - timedelta(**{time_unit: (older_than - 1)}) for object_name in object_list: @@ -616,9 +608,6 @@ def validate_timestring(timestring, time_unit): fail = False elif '%U' in timestring: fail = False - elif time_unit == 'months': - if '%m' in timestring: - fail = False if fail: print('Timestring {0} does not match time unit {1}'.format(timestring, time_unit)) sys.exit(1) @@ -707,9 +696,7 @@ def main(): elif arguments.time_unit == 'days': arguments.timestring = '%Y.%m.%d' elif arguments.time_unit == 'weeks': - arguments.timestring = '%Y.%U' - elif arguments.time_unit == 'months': - arguments.timestring = '%Y.%m' + arguments.timestring = '%Y.%W' logging.debug("Setting default timestring for {0} to {1}".format(arguments.time_unit, arguments.timestring)) logging.debug("Matching indices with pattern: {0}{1}".format(arguments.prefix,arguments.timestring)) # Execute the command specified in the arguments diff --git a/test_curator/integration/__init__.py b/test_curator/integration/__init__.py index 56c82eee..1dba48a8 100644 --- a/test_curator/integration/__init__.py +++ b/test_curator/integration/__init__.py @@ -83,10 +83,8 @@ def run_curator(self, **kwargs): def create_indices(self, count, unit=None): now = datetime.utcnow() unit = unit if unit else self.args['time_unit'] - if unit == 'months': - format = '%Y.%m' - elif unit == 'weeks': - format = '%Y.%U' + if unit == 'weeks': + format = '%Y.%W' elif unit == 'days': format = '%Y.%m.%d' elif unit == 'hours': diff --git a/test_curator/integration/test_snapshots.py b/test_curator/integration/test_snapshots.py index 11d53d8f..aec2a405 100644 --- a/test_curator/integration/test_snapshots.py +++ b/test_curator/integration/test_snapshots.py @@ -54,6 +54,16 @@ def test_curator_will_manage_hourly_snapshots(self): result = curator.get_snapshot(self.client, self.args['repository'], '_all') self.assertEqual(3, len(result['snapshots'])) + def test_curator_will_manage_weekly_snapshots(self): + self.create_indices(10, 'weeks') + self.create_repository() + curator.command_loop(self.client, time_unit='weeks', command='snapshot', older_than=3, timestring='%Y.%W', delete_older_than=None, most_recent=None, repository=self.args['repository']) + result = curator.get_snapshot(self.client, self.args['repository'], '_all') + self.assertEqual(7, len(result['snapshots'])) + curator.command_loop(self.client, time_unit='weeks', command='snapshot', timestring='%Y.%W', delete_older_than=6, repository=self.args['repository']) + result = curator.get_snapshot(self.client, self.args['repository'], '_all') + self.assertEqual(3, len(result['snapshots'])) + def test_curator_will_snap_latest_n_indices(self): self.create_indices(10) self.create_repository() diff --git a/test_curator/integration/test_time_based.py b/test_curator/integration/test_time_based.py index a1df261c..196305a1 100644 --- a/test_curator/integration/test_time_based.py +++ b/test_curator/integration/test_time_based.py @@ -17,6 +17,12 @@ def test_curator_will_properly_delete_hourly_indices(self): mtd = self.client.cluster.state(index=self.args['prefix'] + '*', metric='metadata') self.assertEquals(3, len(mtd['metadata']['indices'].keys())) + def test_curator_will_properly_delete_weely_indices(self): + self.create_indices(10, 'weeks') + curator.command_loop(self.client, command='delete', time_unit='weeks', older_than=3, timestring='%Y.%W') + mtd = self.client.cluster.state(index=self.args['prefix'] + '*', metric='metadata') + self.assertEquals(3, len(mtd['metadata']['indices'].keys())) + class TestFindExpiredIndices(CuratorTestCase): def test_find_closed_indices(self): self.create_index('l-2014.01.03') @@ -36,6 +42,24 @@ def test_find_closed_indices(self): expired ) + def test_find_closed_weekly_indices(self): + self.create_index('l-2014.03') + self.client.indices.close(index='l-2014.03') + self.create_index('l-2014.04') + + # all indices should be expired + index_list = curator.get_object_list(self.client, prefix='l-') + expired = list(curator.find_expired_data(self.client, time_unit='weeks', older_than=1, timestring='%Y.%W', object_list=index_list, + utc_now=datetime(2014, 2, 4, 0, 0, 0), prefix='l-')) + + self.assertEquals( + [ + ('l-2014.03', timedelta(weeks=2)), + ('l-2014.04', timedelta(weeks=1)), + ], + expired + ) + def test_find_indices_ignores_indices_with_different_prefix_or_time_unit(self): self.create_index('logstash-2012.01.01') # wrong precision self.create_index('not-logstash-2012.01.01.00') # wrong prefix diff --git a/test_curator/test_curator.py b/test_curator/test_curator.py index 705b7fdc..3879c238 100644 --- a/test_curator/test_curator.py +++ b/test_curator/test_curator.py @@ -11,6 +11,7 @@ def test_get_index_time(self): ('2014.01.19', '%Y.%m.%d', datetime(2014, 1, 19)), ('2014-01-19', '%Y-%m-%d', datetime(2014, 1, 19)), ('2010-12-29', '%Y-%m-%d', datetime(2010, 12, 29)), + ('2014-28', '%Y-%W', datetime(2014, 7, 14)), ('2010.12.29.12', '%Y.%m.%d.%H', datetime(2010, 12, 29, 12)), ]: self.assertEqual(dt, curator.get_index_time(text, datestring)) From f5167a69ecbb66d4dbf24fa8cfa7dde465711c4e Mon Sep 17 00:00:00 2001 From: Aaron Mildenstein Date: Wed, 23 Jul 2014 17:39:16 -0500 Subject: [PATCH 5/6] =?UTF-8?q?Refactored=20all=20logging=20and=20find=5Fe?= =?UTF-8?q?xpired=5Fdata=20=E2=80=A6and=20all=20calls=20to=20`find=5Fexpir?= =?UTF-8?q?ed=5Fdata`=20in=20curator=20and=20the=20integration=20tests.=20?= =?UTF-8?q?Sending=20back=20a=20tuple=20with=20the=20expired=20index=20and?= =?UTF-8?q?=20the=20time=20difference=20was=20fancy,=20but=20not=20needed.?= =?UTF-8?q?=20=20It=20was=20causing=20issues,=20so=20I=20cleaned=20up=20lo?= =?UTF-8?q?gging,=20added=20a=20logstash=20log=20format,=20fixed=20dry=20r?= =?UTF-8?q?un=20reporting=20in=20logging=20so=20it's=20more=20clear=20that?= =?UTF-8?q?=20changes=20will=20not=20be=20made,=20and=20cleared=20up=20som?= =?UTF-8?q?e=20ambiguity=20reported=20in=20#112=20Tests=20were=20fixed=20t?= =?UTF-8?q?o=20reflect=20changes=20in=20code.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- curator/curator.py | 89 ++++++++++++--------- test_curator/integration/test_time_based.py | 18 ++--- test_curator/test_curator.py | 6 +- 3 files changed, 64 insertions(+), 49 deletions(-) diff --git a/curator/curator.py b/curator/curator.py index 40957efa..898a0ae8 100755 --- a/curator/curator.py +++ b/curator/curator.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import os import sys import time import logging @@ -39,6 +40,7 @@ def emit(self, record): 'dry_run': False, 'debug': False, 'log_level': 'INFO', + 'logformat': 'Default', 'show_indices': False, 'wait_for_completion': True, 'ignore_unavailable': False, @@ -69,6 +71,7 @@ def make_parser(): parser.add_argument('-D', '--debug', dest='debug', action='store_true', help='Debug mode', default=DEFAULT_ARGS['debug']) parser.add_argument('--loglevel', dest='log_level', action='store', help='Log level', default=DEFAULT_ARGS['log_level'], type=str) parser.add_argument('-l', '--logfile', dest='log_file', help='log file', type=str) + parser.add_argument('--logformat', dest='logformat', help='Log output format [default|logstash]. Default: default', default=DEFAULT_ARGS['logformat'], type=str) # Command sub_parsers subparsers = parser.add_subparsers( @@ -171,6 +174,13 @@ def make_parser(): return parser +class Whitelist(logging.Filter): + def __init__(self, *whitelist): + self.whitelist = [logging.Filter(name) for name in whitelist] + + def filter(self, record): + return any(f.filter(record) for f in self.whitelist) + def show(client, **kwargs): if kwargs['show_indices']: for index_name in get_indices(client, kwargs['prefix']): @@ -301,10 +311,9 @@ def find_expired_data(client, object_list=[], utc_now=None, time_unit='days', ol # if the index is older than the cutoff if object_time < cutoff: - yield object_name, cutoff-object_time - + yield object_name else: - logger.info('{0} is {1} above the cutoff.'.format(object_name, object_time-cutoff)) + logger.info('{0} is within the threshold period ({1} {2}).'.format(object_name, older_than, time_unit)) def find_overusage_indices(client, disk_space=2097152.0, prefix='logstash-', **kwargs): """ Generator that yields over usage indices. @@ -489,16 +498,15 @@ def snap_latest_indices(client, most_recent=0, prefix='logstash-', dry_run=False """Snapshot 'count' most recent indices matching prefix""" indices = [] # initialize... indices = get_indices(client, prefix) + prepend = "DRY RUN: " if dry_run else '' for index_name in indices[-most_recent:]: - if dry_run: - logger.info('Would have attempted creating snapshot for {0}.'.format(index_name)) - continue + if not index_closed(client, index_name): + logger.info(prepend + 'Attempting to create snapshot for {0}...'.format(index_name)) else: - if not index_closed(client, index_name): - logger.info('Attempting to create snapshot for {0}...'.format(index_name)) - else: - logger.warn('Unable to perform snapshot on closed index {0}'.format(index_name)) - continue + logger.warn(prepend + 'Unable to perform snapshot on closed index {0}'.format(index_name)) + continue + if dry_run: + continue # Don't do the work on a dry run skipped = _create_snapshot(client, index_name, prefix, **kwargs) @@ -506,10 +514,11 @@ def snap_latest_indices(client, most_recent=0, prefix='logstash-', dry_run=False continue # if no error was raised and we got here that means the operation succeeded logger.info('Snapshot operation for index {0} succeeded.'.format(index_name)) - logger.info('Snapshot \'latest\' {0} indices operations completed.'.format(most_recent)) + logger.info(prepend + 'Snapshot \'latest\' {0} indices operations completed.'.format(most_recent)) def alias_loop(client, dry_run=False, **kwargs): - logging.info("Beginning ALIAS operations...") + prepend = "DRY RUN: " if dry_run else '' + logging.info(prepend + "Beginning ALIAS operations...") if kwargs['alias_older_than']: kwargs['older_than'] = kwargs['alias_older_than'] op = _add_to_alias @@ -520,23 +529,22 @@ def alias_loop(client, dry_run=False, **kwargs): words = ['remove', 'from', 'removed'] index_list = get_object_list(client, **kwargs) expired_indices = find_expired_data(client, object_list=index_list, **kwargs) - for index_name, expiration in expired_indices: + for index_name in expired_indices: + logger.info(prepend + 'Attempting to {0} index {1} {2} alias {3}.'.format(words[0], index_name, words[1], kwargs['alias'])) if dry_run: - logger.info('Would have attempted to {0} index {1} {2} alias {3} because it is {4} older than the calculated cutoff.'.format(words[0], index_name, words[1], kwargs['alias'], expiration)) continue - else: - logger.info('Attempting to {0} index {1} {2} alias {3} because it is {4} older than cutoff.'.format(words[0], index_name, words[1], kwargs['alias'], expiration)) skipped = op(client, index_name, **kwargs) if skipped: continue # if no error was raised and we got here that means the operation succeeded logger.info('{0}: Successfully {1} {2} alias {3}.'.format(index_name, words[2], words[1], kwargs['alias'])) - logger.info('Index ALIAS operations completed.') + logger.info(prepend + 'Index ALIAS operations completed.') def command_loop(client, dry_run=False, **kwargs): + prepend = "DRY RUN: " if dry_run else '' command = kwargs['command'] - logging.info("Beginning {0} operations...".format(command.upper())) + logging.info(prepend + "Beginning {0} operations...".format(command.upper())) op, words = OP_MAP[command] by_space = kwargs['disk_space'] if 'disk_space' in kwargs else False if command == 'delete' and by_space: @@ -554,18 +562,13 @@ def command_loop(client, dry_run=False, **kwargs): index_list = get_object_list(client, **kwargs) expired_indices = find_expired_data(client, object_list=index_list, **kwargs) - for index_name, expiration in expired_indices: - if dry_run and not by_space: - logger.info('Would have attempted {0} index {1} because it is {2} older than the calculated cutoff.'.format(words['gerund'].lower(), index_name, expiration)) - continue - elif dry_run and by_space: - logger.info('Would have attempted {0} index {1} due to space constraints.'.format(words['gerund'].lower(), index_name)) - continue - + for index_name in expired_indices: if not by_space: - logger.info('Attempting to {0} index {1} because it is {2} older than cutoff.'.format(words['op'], index_name, expiration)) + logger.info(prepend + 'Attempting to {0} index {1}.'.format(words['op'], index_name)) else: - logger.info('Attempting to {0} index {1} due to space constraints.'.format(words['op'].lower(), index_name)) + logger.info(prepend + 'Attempting to {0} index {1} due to space constraints.'.format(words['op'].lower(), index_name)) + if dry_run: + continue # Don't act on dry run skipped = op(client, index_name, **kwargs) @@ -578,7 +581,7 @@ def command_loop(client, dry_run=False, **kwargs): w = words['op'][:-4] else: w = words['op'] - logger.info('{0} index operations completed.'.format(w.upper())) + logger.info(prepend + '{0} index operations completed.'.format(w.upper())) def get_segmentcount(client, index_name): """Return a list of shardcount, segmentcount""" @@ -637,7 +640,6 @@ def main(): if arguments.command == 'show': # Do not log and force dry-run if we opt to show indices or snapshots. - import os arguments.log_file = os.devnull arguments.dry_run = True if not arguments.show_indices and not arguments.show_snapshots: @@ -662,18 +664,33 @@ def main(): if not isinstance(numeric_log_level, int): raise ValueError('Invalid log level: %s' % arguments.log_level) + if arguments.logformat == 'logstash': + os.environ['TZ'] = 'UTC' + time.tzset() + format_string = '{"@timestamp":"%(asctime)s.%(msecs)03dZ", "loglevel":"%(levelname)s", "function":"%(funcName)s", "linenum":"%(lineno)d", "message":"%(message)s"}' + date_string = '%Y-%m-%dT%H:%M:%S' + else: + format_string = '%(asctime)s %(levelname)-9s %(funcName)22s:%(lineno)-4d %(message)s' + date_string = None logging.basicConfig(level=numeric_log_level, - format='%(asctime)s.%(msecs)03d %(levelname)-9s %(funcName)22s:%(lineno)-4d %(message)s', - datefmt="%Y-%m-%dT%H:%M:%S", + format=format_string, + datefmt=date_string, stream=open(arguments.log_file, 'a') if arguments.log_file else sys.stderr) - logging.info("Job starting...") - if arguments.dry_run: - logging.info("DRY RUN MODE. No changes will be made.") + + # Filter out logging from Elasticsearch and associated modules by default + if not arguments.debug: + for handler in logging.root.handlers: + handler.addFilter(Whitelist('root', '__main__')) # Setting up NullHandler to handle nested elasticsearch.trace Logger instance in elasticsearch python client logging.getLogger('elasticsearch.trace').addHandler(NullHandler()) + logging.info("Job starting...") + + if arguments.dry_run: + logging.info("DRY RUN MODE. No changes will be made.") + # Override the timestamp in case the end-user doesn't. if timeout_override and arguments.timeout == 30: logger.info('Default timeout of 30 seconds is too low for command {0}. Overriding to 21,600 seconds (6 hours).'.format(arguments.command.upper())) diff --git a/test_curator/integration/test_time_based.py b/test_curator/integration/test_time_based.py index 196305a1..60125966 100644 --- a/test_curator/integration/test_time_based.py +++ b/test_curator/integration/test_time_based.py @@ -36,8 +36,8 @@ def test_find_closed_indices(self): self.assertEquals( [ - ('l-2014.01.01', timedelta(7)), - ('l-2014.01.03', timedelta(5)), + 'l-2014.01.01', + 'l-2014.01.03', ], expired ) @@ -53,9 +53,8 @@ def test_find_closed_weekly_indices(self): utc_now=datetime(2014, 2, 4, 0, 0, 0), prefix='l-')) self.assertEquals( - [ - ('l-2014.03', timedelta(weeks=2)), - ('l-2014.04', timedelta(weeks=1)), + [ 'l-2014.03', + 'l-2014.04', ], expired ) @@ -68,7 +67,7 @@ def test_find_indices_ignores_indices_with_different_prefix_or_time_unit(self): index_list = curator.get_object_list(self.client, prefix=self.args['prefix']) expired = list(curator.find_expired_data(self.client, time_unit='hours', older_than=1, timestring='%Y.%m.%d.%H', object_list=index_list)) self.assertEquals(1, len(expired)) - self.assertEquals('logstash-2012.01.01.00', expired[0][0]) + self.assertEquals('logstash-2012.01.01.00', expired[0]) def test_find_reports_correct_time_interval_from_cutoff(self): self.create_index('l-2014.01.01') @@ -80,10 +79,9 @@ def test_find_reports_correct_time_interval_from_cutoff(self): expired = list(curator.find_expired_data(self.client, time_unit='days', older_than=1, timestring='%Y.%m.%d', object_list=index_list, utc_now=datetime(2014, 1, 4, 3, 45, 50), prefix='l-')) self.assertEquals( - [ - (u'l-2014.01.01', timedelta(3)), - (u'l-2014.01.02', timedelta(2)), - (u'l-2014.01.03', timedelta(1)), + [ 'l-2014.01.01', + 'l-2014.01.02', + 'l-2014.01.03', ], expired ) diff --git a/test_curator/test_curator.py b/test_curator/test_curator.py index 3879c238..f8517061 100644 --- a/test_curator/test_curator.py +++ b/test_curator/test_curator.py @@ -54,9 +54,9 @@ def test_all_daily_indices_found(self): expired = list(expired) self.assertEquals([ - ('prefix-2013.01.03', timedelta(days=362)), - ('prefix-2013.12.29', timedelta(days=2)), - ('prefix-2013.12.30', timedelta(days=1)), + 'prefix-2013.01.03', + 'prefix-2013.12.29', + 'prefix-2013.12.30', ], expired ) From 4f394942d9a935cf96998296e22899bf280dcb54 Mon Sep 17 00:00:00 2001 From: Aaron Mildenstein Date: Wed, 23 Jul 2014 17:58:43 -0500 Subject: [PATCH 6/6] Bump version to 1.2.0-dev Push logging changes made in curator to es_repo_mgr --- CONTRIBUTORS | 1 + VERSION | 2 +- curator/curator.py | 2 +- curator/es_repo_mgr.py | 31 ++++++++++++++++++++++++++++--- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index d542b6d8..b8fb941a 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -30,3 +30,4 @@ Contributors: * Kamil Essekkat (ekamil) * (gbutt) * Ben Buchacher (bbuchacher) +* Ehtesh Choudhury (shurane) diff --git a/VERSION b/VERSION index 738a8397..0c64b5c3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1.4-dev +1.2.0-dev diff --git a/curator/curator.py b/curator/curator.py index 898a0ae8..97d6c765 100755 --- a/curator/curator.py +++ b/curator/curator.py @@ -18,7 +18,7 @@ class NullHandler(Handler): def emit(self, record): pass -__version__ = '1.1.4-dev' +__version__ = '1.2.0-dev' # Elasticsearch versions supported version_max = (2, 0, 0) diff --git a/curator/es_repo_mgr.py b/curator/es_repo_mgr.py index d26d2a0a..6a759491 100755 --- a/curator/es_repo_mgr.py +++ b/curator/es_repo_mgr.py @@ -16,7 +16,7 @@ class NullHandler(Handler): def emit(self, record): pass -__version__ = '1.1.4-dev' +__version__ = '1.2.0-dev' # Elasticsearch versions supported version_max = (2, 0, 0) @@ -34,6 +34,7 @@ def emit(self, record): 'dry_run': False, 'debug': False, 'log_level': 'INFO', + 'logformat': 'default', } def make_parser(): @@ -57,6 +58,7 @@ def make_parser(): parser.add_argument('-D', '--debug', dest='debug', action='store_true', help='Debug mode', default=DEFAULT_ARGS['debug']) parser.add_argument('--loglevel', dest='log_level', action='store', help='Log level', default=DEFAULT_ARGS['log_level'], type=str) parser.add_argument('-l', '--logfile', dest='log_file', help='log file', type=str) + parser.add_argument('--logformat', dest='logformat', help='Log output format [default|logstash]. Default: default', default=DEFAULT_ARGS['logformat'], type=str) # Command sub_parsers subparsers = parser.add_subparsers(title='Commands', dest='command', description='Select one of the following commands:', @@ -116,6 +118,12 @@ def make_parser(): return parser +class Whitelist(logging.Filter): + def __init__(self, *whitelist): + self.whitelist = [logging.Filter(name) for name in whitelist] + + def filter(self, record): + return any(f.filter(record) for f in self.whitelist) def show(client, **kwargs): for repository in sorted(get_repository(client, '_all').keys()): @@ -150,10 +158,13 @@ def _create_repository(client, dry_run=False, **kwargs): try: repo_name = kwargs['repository'] body = create_repo_body(**kwargs) + logging.info("Checking if repository {0} already exists...".format(repo_name)) result = get_repository(client, repo_name) if not result: + logging.info("Repository {0} not found. Continuing...".format(repo_name)) client.snapshot.create_repository(repository=repo_name, body=body) elif result is not None and repo_name not in result and not kwargs['dry_run']: + logging.info("Repository {0} not found. Continuing...".format(repo_name)) client.snapshot.create_repository(repository=repo_name, body=body) else: logger.error("Unable to create repository {0}. A repository with that name already exists.".format(repo_name)) @@ -227,10 +238,24 @@ def main(): if not isinstance(numeric_log_level, int): raise ValueError('Invalid log level: %s' % arguments.log_level) + if arguments.logformat == 'logstash': + os.environ['TZ'] = 'UTC' + time.tzset() + format_string = '{"@timestamp":"%(asctime)s.%(msecs)03dZ", "loglevel":"%(levelname)s", "function":"%(funcName)s", "linenum":"%(lineno)d", "message":"%(message)s"}' + date_string = '%Y-%m-%dT%H:%M:%S' + else: + format_string = '%(asctime)s %(levelname)-9s %(funcName)22s:%(lineno)-4d %(message)s' + date_string = None logging.basicConfig(level=numeric_log_level, - format='%(asctime)s.%(msecs)03d %(levelname)-9s %(funcName)22s:%(lineno)-4d %(message)s', - datefmt="%Y-%m-%dT%H:%M:%S", + format=format_string, + datefmt=date_string, stream=open(arguments.log_file, 'a') if arguments.log_file else sys.stderr) + + + # Filter out logging from Elasticsearch and associated modules by default + if not arguments.debug: + for handler in logging.root.handlers: + handler.addFilter(Whitelist('root', '__main__')) logging.info("Job starting...") # Setting up NullHandler to handle nested elasticsearch.trace Logger instance in elasticsearch python client