summaryrefslogtreecommitdiff
blob: ff3b3eeb0b36c3c3e4dcd08b0cd068be38dd2b71 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
From 690c05ca495f1d55a469724c94e1551cbfa836f2 Mon Sep 17 00:00:00 2001
From: Rajesh Tailor <rajesh.tailor@nttdata.com>
Date: Wed, 4 Mar 2015 05:05:19 -0800
Subject: [PATCH] Delete orphaned instance files from compute nodes

While resizing/revert-resizing instance, if instance gets deleted
in between, then instance files remains either on the source or
destination compute node.

To address this issue, added a new periodic task
'_cleanup_incomplete_migrations' which takes care of deleting
instance files from source/destination compute nodes and then
mark migration record as failed so that it doesn't appear again
in the next periodic task run.

SecurityImpact

Closes-Bug: 1392527
Change-Id: I9866d8e32e99b9f907921f4b226edf7b62bd83a7
(cherry picked from commit 4655751cdd97a4b527a25c7c0a96044ba212cd19)
---
 nova/compute/manager.py                     | 61 ++++++++++++++++++++++--
 nova/tests/unit/compute/test_compute_mgr.py | 72 +++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 4 deletions(-)

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index bf5585e..24a5811 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -267,15 +267,21 @@ def errors_out_migration(function):
     def decorated_function(self, context, *args, **kwargs):
         try:
             return function(self, context, *args, **kwargs)
-        except Exception:
+        except Exception as ex:
             with excutils.save_and_reraise_exception():
                 wrapped_func = utils.get_wrapped_function(function)
                 keyed_args = safe_utils.getcallargs(wrapped_func, context,
                                                     *args, **kwargs)
                 migration = keyed_args['migration']
-                status = migration.status
-                if status not in ['migrating', 'post-migrating']:
-                    return
+
+                # NOTE(rajesht): If InstanceNotFound error is thrown from
+                # decorated function, migration status should be set to
+                # 'error', without checking current migration status.
+                if not isinstance(ex, exception.InstanceNotFound):
+                    status = migration.status
+                    if status not in ['migrating', 'post-migrating']:
+                        return
+
                 migration.status = 'error'
                 try:
                     with migration.obj_as_admin():
@@ -3727,6 +3733,7 @@ class ComputeManager(manager.Manager):
     @wrap_exception()
     @reverts_task_state
     @wrap_instance_event
+    @errors_out_migration
     @wrap_instance_fault
     def revert_resize(self, context, instance, migration, reservations):
         """Destroys the new instance on the destination machine.
@@ -3783,6 +3790,7 @@ class ComputeManager(manager.Manager):
     @wrap_exception()
     @reverts_task_state
     @wrap_instance_event
+    @errors_out_migration
     @wrap_instance_fault
     def finish_revert_resize(self, context, instance, reservations, migration):
         """Finishes the second half of reverting a resize.
@@ -6578,6 +6586,51 @@ class ComputeManager(manager.Manager):
                 with utils.temporary_mutation(context, read_deleted='yes'):
                     instance.save()
 
+    @periodic_task.periodic_task(spacing=CONF.instance_delete_interval)
+    def _cleanup_incomplete_migrations(self, context):
+        """Delete instance files on failed resize/revert-resize operation
+
+        During resize/revert-resize operation, if that instance gets deleted
+        in-between then instance files might remain either on source or
+        destination compute node because of race condition.
+        """
+        LOG.debug('Cleaning up deleted instances with incomplete migration ')
+        migration_filters = {'host': CONF.host,
+                             'status': 'error'}
+        migrations = objects.MigrationList.get_by_filters(context,
+                                                          migration_filters)
+
+        if not migrations:
+            return
+
+        inst_uuid_from_migrations = set([migration.instance_uuid for migration
+                                         in migrations])
+
+        inst_filters = {'deleted': True, 'soft_deleted': False,
+                        'uuid': inst_uuid_from_migrations}
+        attrs = ['info_cache', 'security_groups', 'system_metadata']
+        with utils.temporary_mutation(context, read_deleted='yes'):
+            instances = objects.InstanceList.get_by_filters(
+                context, inst_filters, expected_attrs=attrs, use_slave=True)
+
+        for instance in instances:
+            if instance.host != CONF.host:
+                for migration in migrations:
+                    if instance.uuid == migration.instance_uuid:
+                        # Delete instance files if not cleanup properly either
+                        # from the source or destination compute nodes when
+                        # the instance is deleted during resizing.
+                        self.driver.delete_instance_files(instance)
+                        try:
+                            migration.status = 'failed'
+                            with migration.obj_as_admin():
+                                migration.save()
+                        except exception.MigrationNotFound:
+                            LOG.warning(_LW("Migration %s is not found."),
+                                        migration.id, context=context,
+                                        instance=instance)
+                        break
+
     @messaging.expected_exceptions(exception.InstanceQuiesceNotSupported,
                                    exception.NovaException,
                                    NotImplementedError)
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 4b7234e..ee1ab47 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -1374,6 +1374,78 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
         self.assertFalse(c.cleaned)
         self.assertEqual('1', c.system_metadata['clean_attempts'])
 
+    @mock.patch.object(objects.Migration, 'obj_as_admin')
+    @mock.patch.object(objects.Migration, 'save')
+    @mock.patch.object(objects.MigrationList, 'get_by_filters')
+    @mock.patch.object(objects.InstanceList, 'get_by_filters')
+    def _test_cleanup_incomplete_migrations(self, inst_host,
+                                            mock_inst_get_by_filters,
+                                            mock_migration_get_by_filters,
+                                            mock_save, mock_obj_as_admin):
+        def fake_inst(context, uuid, host):
+            inst = objects.Instance(context)
+            inst.uuid = uuid
+            inst.host = host
+            return inst
+
+        def fake_migration(uuid, status, inst_uuid, src_host, dest_host):
+            migration = objects.Migration()
+            migration.uuid = uuid
+            migration.status = status
+            migration.instance_uuid = inst_uuid
+            migration.source_compute = src_host
+            migration.dest_compute = dest_host
+            return migration
+
+        fake_instances = [fake_inst(self.context, '111', inst_host),
+                          fake_inst(self.context, '222', inst_host)]
+
+        fake_migrations = [fake_migration('123', 'error', '111',
+                                          'fake-host', 'fake-mini'),
+                           fake_migration('456', 'error', '222',
+                                          'fake-host', 'fake-mini')]
+
+        mock_migration_get_by_filters.return_value = fake_migrations
+        mock_inst_get_by_filters.return_value = fake_instances
+
+        with mock.patch.object(self.compute.driver, 'delete_instance_files'):
+            self.compute._cleanup_incomplete_migrations(self.context)
+
+        # Ensure that migration status is set to 'failed' after instance
+        # files deletion for those instances whose instance.host is not
+        # same as compute host where periodic task is running.
+        for inst in fake_instances:
+            if inst.host != CONF.host:
+                for mig in fake_migrations:
+                    if inst.uuid == mig.instance_uuid:
+                        self.assertEqual('failed', mig.status)
+
+    def test_cleanup_incomplete_migrations_dest_node(self):
+        """Test to ensure instance files are deleted from destination node.
+
+        If instance gets deleted during resizing/revert-resizing operation,
+        in that case instance files gets deleted from instance.host (source
+        host here), but there is possibility that instance files could be
+        present on destination node.
+        This test ensures that `_cleanup_incomplete_migration` periodic
+        task deletes orphaned instance files from destination compute node.
+        """
+        self.flags(host='fake-mini')
+        self._test_cleanup_incomplete_migrations('fake-host')
+
+    def test_cleanup_incomplete_migrations_source_node(self):
+        """Test to ensure instance files are deleted from source node.
+
+        If instance gets deleted during resizing/revert-resizing operation,
+        in that case instance files gets deleted from instance.host (dest
+        host here), but there is possibility that instance files could be
+        present on source node.
+        This test ensures that `_cleanup_incomplete_migration` periodic
+        task deletes orphaned instance files from source compute node.
+        """
+        self.flags(host='fake-host')
+        self._test_cleanup_incomplete_migrations('fake-mini')
+
     def test_attach_interface_failure(self):
         # Test that the fault methods are invoked when an attach fails
         db_instance = fake_instance.fake_db_instance()
-- 
2.4.5