1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
|
From b74a5ea8399d1a0466c55332f557863acdae21b6 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 24 Sep 2024 14:34:30 +0200
Subject: [PATCH 20/35] x86/pv: Fix merging of new status bits into %dr6
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
All #DB exceptions result in an update of %dr6, but this isn't captured in
Xen's handling, and is buggy just about everywhere.
To begin resolving this issue, add a new pending_dbg field to x86_event
(unioned with cr2 to avoid taking any extra space, adjusting users to avoid
old-GCC bugs with anonymous unions), and introduce pv_inject_DB() to replace
the current callers using pv_inject_hw_exception().
Push the adjustment of v->arch.dr6 into pv_inject_event(), and use the new
x86_merge_dr6() rather than the current incorrect logic.
A key property is that pending_dbg is taken with positive polarity to deal
with RTM/BLD sensibly. Most callers pass in a constant, but callers passing
in a hardware %dr6 value need to XOR the value with X86_DR6_DEFAULT to flip to
positive polarity.
This fixes the behaviour of the breakpoint status bits; that any left pending
are generally discarded when a new #DB is raised. In principle it would fix
RTM/BLD too, except PV guests can't turn these capabilities on to start with.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
master commit: db39fa4b27ea470902d4625567cb6fa24030ddfa
master date: 2024-08-21 23:59:19 +0100
---
xen/arch/x86/include/asm/domain.h | 18 ++++++++++++++++--
xen/arch/x86/include/asm/hvm/hvm.h | 3 ++-
xen/arch/x86/pv/emul-priv-op.c | 5 +----
xen/arch/x86/pv/emulate.c | 9 +++++++--
xen/arch/x86/pv/ro-page-fault.c | 2 +-
xen/arch/x86/pv/traps.c | 16 ++++++++++++----
xen/arch/x86/traps.c | 2 +-
xen/arch/x86/x86_emulate/x86_emulate.h | 5 ++++-
8 files changed, 44 insertions(+), 16 deletions(-)
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index f5daeb182b..5d92891e6f 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -731,15 +731,29 @@ static inline void pv_inject_hw_exception(unsigned int vector, int errcode)
pv_inject_event(&event);
}
+static inline void pv_inject_DB(unsigned long pending_dbg)
+{
+ struct x86_event event = {
+ .vector = X86_EXC_DB,
+ .type = X86_EVENTTYPE_HW_EXCEPTION,
+ .error_code = X86_EVENT_NO_EC,
+ };
+
+ event.pending_dbg = pending_dbg;
+
+ pv_inject_event(&event);
+}
+
static inline void pv_inject_page_fault(int errcode, unsigned long cr2)
{
- const struct x86_event event = {
+ struct x86_event event = {
.vector = X86_EXC_PF,
.type = X86_EVENTTYPE_HW_EXCEPTION,
.error_code = errcode,
- .cr2 = cr2,
};
+ event.cr2 = cr2;
+
pv_inject_event(&event);
}
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 1c01e22c8e..238eece0cf 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -525,9 +525,10 @@ static inline void hvm_inject_page_fault(int errcode, unsigned long cr2)
.vector = X86_EXC_PF,
.type = X86_EVENTTYPE_HW_EXCEPTION,
.error_code = errcode,
- .cr2 = cr2,
};
+ event.cr2 = cr2;
+
hvm_inject_event(&event);
}
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index aa11ecadaa..15c83b9d23 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -1366,10 +1366,7 @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs)
ctxt.bpmatch |= DR_STEP;
if ( ctxt.bpmatch )
- {
- curr->arch.dr6 |= ctxt.bpmatch | DR_STATUS_RESERVED_ONE;
- pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
- }
+ pv_inject_DB(ctxt.bpmatch);
/* fall through */
case X86EMUL_RETRY:
diff --git a/xen/arch/x86/pv/emulate.c b/xen/arch/x86/pv/emulate.c
index e7a1c0a2cc..8c44dea123 100644
--- a/xen/arch/x86/pv/emulate.c
+++ b/xen/arch/x86/pv/emulate.c
@@ -71,10 +71,15 @@ void pv_emul_instruction_done(struct cpu_user_regs *regs, unsigned long rip)
{
regs->rip = rip;
regs->eflags &= ~X86_EFLAGS_RF;
+
if ( regs->eflags & X86_EFLAGS_TF )
{
- current->arch.dr6 |= DR_STEP | DR_STATUS_RESERVED_ONE;
- pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
+ /*
+ * TODO: this should generally use TF from the start of the
+ * instruction. It's only a latent bug for now, as this path isn't
+ * used for any instruction which modifies eflags.
+ */
+ pv_inject_DB(X86_DR6_BS);
}
}
diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c
index cad28ef928..d0fe07e3a1 100644
--- a/xen/arch/x86/pv/ro-page-fault.c
+++ b/xen/arch/x86/pv/ro-page-fault.c
@@ -390,7 +390,7 @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs)
/* Fallthrough */
case X86EMUL_OKAY:
if ( ctxt.retire.singlestep )
- pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
+ pv_inject_DB(X86_DR6_BS);
/* Fallthrough */
case X86EMUL_RETRY:
diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c
index 83e84e2762..5a7341abf0 100644
--- a/xen/arch/x86/pv/traps.c
+++ b/xen/arch/x86/pv/traps.c
@@ -12,6 +12,7 @@
#include <xen/lib.h>
#include <xen/softirq.h>
+#include <asm/debugreg.h>
#include <asm/pv/trace.h>
#include <asm/shared.h>
#include <asm/traps.h>
@@ -50,9 +51,9 @@ void pv_inject_event(const struct x86_event *event)
tb->cs = ti->cs;
tb->eip = ti->address;
- if ( event->type == X86_EVENTTYPE_HW_EXCEPTION &&
- vector == X86_EXC_PF )
+ switch ( vector | -(event->type == X86_EVENTTYPE_SW_INTERRUPT) )
{
+ case X86_EXC_PF:
curr->arch.pv.ctrlreg[2] = event->cr2;
arch_set_cr2(curr, event->cr2);
@@ -62,9 +63,16 @@ void pv_inject_event(const struct x86_event *event)
error_code |= PFEC_user_mode;
trace_pv_page_fault(event->cr2, error_code);
- }
- else
+ break;
+
+ case X86_EXC_DB:
+ curr->arch.dr6 = x86_merge_dr6(curr->domain->arch.cpu_policy,
+ curr->arch.dr6, event->pending_dbg);
+ fallthrough;
+ default:
trace_pv_trap(vector, regs->rip, use_error_code, error_code);
+ break;
+ }
if ( use_error_code )
{
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 78e83f6fc1..8e2df3e719 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2032,7 +2032,7 @@ void asmlinkage do_debug(struct cpu_user_regs *regs)
return;
}
- pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
+ pv_inject_DB(0 /* N/A, already merged */);
}
void asmlinkage do_entry_CP(struct cpu_user_regs *regs)
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
index d92be69d84..e8a0e57228 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -78,7 +78,10 @@ struct x86_event {
uint8_t type; /* X86_EVENTTYPE_* */
uint8_t insn_len; /* Instruction length */
int32_t error_code; /* X86_EVENT_NO_EC if n/a */
- unsigned long cr2; /* Only for X86_EXC_PF h/w exception */
+ union {
+ unsigned long cr2; /* #PF */
+ unsigned long pending_dbg; /* #DB (new DR6 bits, positive polarity) */
+ };
};
/*
--
2.46.1
|