removed in the kernel source tree. Every entry should contain what
exactly is going away, why it is happening, and who is going to be doing
the work. When the feature is removed from the kernel, it should also
-be removed from this file.
+be removed from this file. The suggested deprecation period is 3 releases.
+
+---------------------------
+
+What: ddebug_query="query" boot cmdline param
+When: v3.8
+Why: obsoleted by dyndbg="query" and module.dyndbg="query"
+Who: Jim Cromie <jim.cromie@gmail.com>, Jason Baron <jbaron@redhat.com>
---------------------------
----------------------------
+What: Removing the pn544 raw driver.
+When: 3.6
+Why: With the introduction of the NFC HCI and SHDL kernel layers, pn544.c
+ is being replaced by pn544_hci.c which is accessible through the netlink
+ and socket NFC APIs. Moreover, pn544.c is outdated and does not seem to
+ work properly with the latest Android stacks.
+ Having 2 drivers for the same hardware is confusing and as such we
+ should only keep the one following the kernel NFC APIs.
+Who: Samuel Ortiz <sameo@linux.intel.com>
+
+----------------------------
+
What: setitimer accepts user NULL pointer (value)
When: 3.6
Why: setitimer is not returning -EFAULT if user pointer is NULL. This
----------------------------
+What: remove bogus DV presets V4L2_DV_1080I29_97, V4L2_DV_1080I30 and
+ V4L2_DV_1080I25
+When: 3.6
+Why: These HDTV formats do not exist and were added by a confused mind
+ (that was me, to be precise...)
+Who: Hans Verkuil <hans.verkuil@cisco.com>
+
+----------------------------
+
+What: V4L2_CID_HCENTER, V4L2_CID_VCENTER V4L2 controls
+When: 3.7
+Why: The V4L2_CID_VCENTER, V4L2_CID_HCENTER controls have been deprecated
+ for about 4 years and they are not used by any mainline driver.
+ There are newer controls (V4L2_CID_PAN*, V4L2_CID_TILT*) that provide
+ similar functionality.
+Who: Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
+
+----------------------------
+
+What: cgroup option updates via remount
+When: March 2013
+Why: Remount currently allows changing bound subsystems and
+ release_agent. Rebinding is hardly useful as it only works
+ when the hierarchy is empty and release_agent itself should be
+ replaced with conventional fsnotify.
+
+----------------------------
++
+ What: KVM debugfs statistics
+ When: 2013
+ Why: KVM tracepoints provide mostly equivalent information in a much more
+ flexible fashion.
+
++----------------------------
#define H_MR_CONDITION -43
#define H_NOT_ENOUGH_RESOURCES -44
#define H_R_STATE -45
-#define H_RESCINDEND -46
-#define H_MULTI_THREADS_ACTIVE -9005
+#define H_RESCINDED -46
+#define H_P2 -55
+#define H_P3 -56
+#define H_P4 -57
+#define H_P5 -58
+#define H_P6 -59
+#define H_P7 -60
+#define H_P8 -61
+#define H_P9 -62
+#define H_TOO_BIG -64
+#define H_OVERLAP -68
+#define H_INTERRUPT -69
+#define H_BAD_DATA -70
+#define H_NOT_ACTIVE -71
+#define H_SG_LIST -72
+#define H_OP_MODE -73
+#define H_COP_HW -74
+#define H_UNSUPPORTED_FLAG_START -256
+#define H_UNSUPPORTED_FLAG_END -511
+#define H_MULTI_THREADS_ACTIVE -9005
+#define H_OUTSTANDING_COP_OPS -9006
/* Long Busy is a condition that can be returned by the firmware
#define H_PP1 (1UL<<(63-62))
#define H_PP2 (1UL<<(63-63))
+ /* Flags for H_REGISTER_VPA subfunction field */
+ #define H_VPA_FUNC_SHIFT (63-18) /* Bit posn of subfunction code */
+ #define H_VPA_FUNC_MASK 7UL
+ #define H_VPA_REG_VPA 1UL /* Register Virtual Processor Area */
+ #define H_VPA_REG_DTL 2UL /* Register Dispatch Trace Log */
+ #define H_VPA_REG_SLB 3UL /* Register SLB shadow buffer */
+ #define H_VPA_DEREG_VPA 5UL /* Deregister Virtual Processor Area */
+ #define H_VPA_DEREG_DTL 6UL /* Deregister Dispatch Trace Log */
+ #define H_VPA_DEREG_SLB 7UL /* Deregister SLB shadow buffer */
+
/* VASI States */
#define H_VASI_INVALID 0
#define H_VASI_ENABLED 1
#define H_GET_MPP 0x2D4
#define H_HOME_NODE_ASSOCIATIVITY 0x2EC
#define H_BEST_ENERGY 0x2F4
+#define H_RANDOM 0x300
+#define H_COP 0x304
#define H_GET_MPP_X 0x314
#define MAX_HCALL_OPCODE H_GET_MPP_X
u64 sdr1;
u64 hior;
u64 msr_mask;
- u64 vsid_next;
#ifdef CONFIG_PPC_BOOK3S_32
u32 vsid_pool[VSID_POOL_SIZE];
+ u32 vsid_next;
#else
- u64 vsid_first;
- u64 vsid_max;
+ u64 proto_vsid_first;
+ u64 proto_vsid_max;
+ u64 proto_vsid_next;
#endif
int context_id[SID_CONTEXTS];
#define INS_DCBZ 0x7c0007ec
+ /* LPIDs we support with this build -- runtime limit may be lower */
+ #define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
+
#endif /* __ASM_KVM_BOOK3S_H__ */
void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
void release_thread(struct task_struct *);
-/* Prepare to copy thread state - unlazy all lazy status */
-extern void prepare_to_copy(struct task_struct *tsk);
-
/* Create a new kernel thread. */
extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void* kvm_shadow_vcpu; /* KVM internal data */
#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
+ #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+ struct kvm_vcpu *kvm_vcpu;
+ #endif
#ifdef CONFIG_PPC64
unsigned long dscr;
int dscr_inherit;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
-void cpu_idle_wait(void);
#ifdef CONFIG_PSERIES_IDLE
extern void update_smt_snooze_delay(int snooze);
#ifndef __ASM_POWERPC_REG_BOOKE_H__
#define __ASM_POWERPC_REG_BOOKE_H__
-#ifdef CONFIG_BOOKE_WDT
-extern u32 booke_wdt_enabled;
-extern u32 booke_wdt_period;
-#endif /* CONFIG_BOOKE_WDT */
-
/* Machine State Register (MSR) Fields */
#define MSR_GS (1<<28) /* Guest state */
#define MSR_UCLE (1<<26) /* User-mode cache lock enable */
#define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */
#define SPRN_EPCR 0x133 /* Embedded Processor Control Register */
#define SPRN_DBCR2 0x136 /* Debug Control Register 2 */
+ #define SPRN_MSRP 0x137 /* MSR Protect Register */
#define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */
#define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */
#define SPRN_DVC1 0x13E /* Data Value Compare Register 1 */
#define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */
+ #define SPRN_LPID 0x152 /* Logical Partition ID */
#define SPRN_MAS8 0x155 /* MMU Assist Register 8 */
#define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */
#define SPRN_TLB1PS 0x159 /* TLB 1 Page Size Register */
#define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */
#define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */
#define SPRN_EPTCFG 0x15e /* Embedded Page Table Config */
+ #define SPRN_GSPRG0 0x170 /* Guest SPRG0 */
+ #define SPRN_GSPRG1 0x171 /* Guest SPRG1 */
+ #define SPRN_GSPRG2 0x172 /* Guest SPRG2 */
+ #define SPRN_GSPRG3 0x173 /* Guest SPRG3 */
#define SPRN_MAS7_MAS3 0x174 /* MMU Assist Register 7 || 3 */
#define SPRN_MAS0_MAS1 0x175 /* MMU Assist Register 0 || 1 */
+ #define SPRN_GSRR0 0x17A /* Guest SRR0 */
+ #define SPRN_GSRR1 0x17B /* Guest SRR1 */
+ #define SPRN_GEPR 0x17C /* Guest EPR */
+ #define SPRN_GDEAR 0x17D /* Guest DEAR */
+ #define SPRN_GPIR 0x17E /* Guest PIR */
+ #define SPRN_GESR 0x17F /* Guest Exception Syndrome Register */
#define SPRN_IVOR0 0x190 /* Interrupt Vector Offset Register 0 */
#define SPRN_IVOR1 0x191 /* Interrupt Vector Offset Register 1 */
#define SPRN_IVOR2 0x192 /* Interrupt Vector Offset Register 2 */
#define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */
#define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */
#define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */
+ #define SPRN_GIVOR2 0x1B8 /* Guest IVOR2 */
+ #define SPRN_GIVOR3 0x1B9 /* Guest IVOR3 */
+ #define SPRN_GIVOR4 0x1BA /* Guest IVOR4 */
+ #define SPRN_GIVOR8 0x1BB /* Guest IVOR8 */
+ #define SPRN_GIVOR13 0x1BC /* Guest IVOR13 */
+ #define SPRN_GIVOR14 0x1BD /* Guest IVOR14 */
+ #define SPRN_GIVPR 0x1BF /* Guest IVPR */
#define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */
#define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */
#define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */
#define MCSR_LDG 0x00002000UL /* Guarded Load */
#define MCSR_TLBSYNC 0x00000002UL /* Multiple tlbsyncs detected */
#define MCSR_BSL2_ERR 0x00000001UL /* Backside L2 cache error */
+
+ #define MSRP_UCLEP 0x04000000 /* Protect MSR[UCLE] */
+ #define MSRP_DEP 0x00000200 /* Protect MSR[DE] */
+ #define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */
#endif
#ifdef CONFIG_E200
#define SPRN_EPCR_DMIUH 0x00400000 /* Disable MAS Interrupt updates
* for hypervisor */
+ /* Bit definitions for EPLC/EPSC */
+ #define EPC_EPR 0x80000000 /* 1 = user, 0 = kernel */
+ #define EPC_EPR_SHIFT 31
+ #define EPC_EAS 0x40000000 /* Address Space */
+ #define EPC_EAS_SHIFT 30
+ #define EPC_EGS 0x20000000 /* 1 = guest, 0 = hypervisor */
+ #define EPC_EGS_SHIFT 29
+ #define EPC_ELPID 0x00ff0000
+ #define EPC_ELPID_SHIFT 16
+ #define EPC_EPID 0x00003fff
+ #define EPC_EPID_SHIFT 0
/*
* The IBM-403 is an even more odd special case, as it is much
struct thread_struct *next);
extern void giveup_fpu(struct task_struct *);
+ extern void load_up_fpu(void);
extern void disable_kernel_fp(void);
extern void enable_kernel_fp(void);
extern void flush_fp_to_thread(struct task_struct *);
extern void enable_kernel_altivec(void);
-extern void giveup_altivec(struct task_struct *);
extern void load_up_altivec(struct task_struct *);
extern int emulate_altivec(struct pt_regs *);
extern void __giveup_vsx(struct task_struct *);
#ifdef CONFIG_ALTIVEC
extern void flush_altivec_to_thread(struct task_struct *);
+extern void giveup_altivec(struct task_struct *);
+extern void giveup_altivec_notask(void);
#else
static inline void flush_altivec_to_thread(struct task_struct *t)
{
}
+static inline void giveup_altivec(struct task_struct *t)
+{
+}
#endif
#ifdef CONFIG_VSX
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
#endif
+ #ifdef CONFIG_KVM_BOOKE_HV
+ DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
+ #endif
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
DEFINE(SLBSHADOW_STACKESID,
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
- DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
- DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
- DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
- DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use));
DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count));
#ifdef CONFIG_KVM
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+ DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr));
DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
+ DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
+ DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
+
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_64_HV
- DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
+ DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
#endif
#ifdef CONFIG_PPC_BOOK3S
- DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
- DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
HSTATE_FIELD(HSTATE_NAPPING, napping);
#ifdef CONFIG_KVM_BOOK3S_64_HV
+ HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
+ HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
#endif
+ #ifdef CONFIG_KVM_BOOKE_HV
+ DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
+ DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
+ DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
+ #endif
+
#ifdef CONFIG_KVM_EXIT_TIMING
DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
arch.timing_exit.tv32.tbu));
GET_PACA(r13)
#ifdef CONFIG_KVM_BOOK3S_64_HV
- lbz r0,PACAPROCSTART(r13)
- cmpwi r0,0x80
- bne 1f
- li r0,1
- stb r0,PACAPROCSTART(r13)
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 1f
b kvm_start_guest
1:
#endif
data_access_pSeries:
HMT_MEDIUM
SET_SCRATCH0(r13)
-#ifndef CONFIG_POWER4_ONLY
BEGIN_FTR_SECTION
b data_access_check_stab
data_access_not_stab:
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
-#endif
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
KVMTEST, 0x300)
EXC_STD, KVMTEST, 0x200)
KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
-#ifndef CONFIG_POWER4_ONLY
/* moved from 0x300 */
data_access_check_stab:
GET_PACA(r13)
GET_SCRATCH0(r10)
std r10,PACA_EXSLB+EX_R13(r13)
EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
-#endif /* CONFIG_POWER4_ONLY */
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
std r3,_DAR(r1)
std r4,_DSISR(r1)
bl .save_nvgprs
+ DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
- ENABLE_INTS
bl .alignment_exception
b .ret_from_except
interrupt_base:
/* Critical Input Interrupt */
- CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+ CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+ CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+ machine_check_exception)
MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
/* Data Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
+ do_IRQ, EXC_XFER_LITE)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
#ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
#else
- EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
+ FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
EXC_XFER_EE_LITE(0x0c00, DoSyscall)
/* Auxiliary Processor Unavailable Interrupt */
- EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
+ AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
+ unknown_exception, EXC_XFER_EE)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException)
+ CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException)
#else
- CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception)
+ CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception)
#endif
/* Data TLB Error Interrupt */
sync
blr
-/*
- * extern void giveup_altivec(struct task_struct *prev)
- *
- * The 44x core does not have an AltiVec unit.
- */
-_GLOBAL(giveup_altivec)
- blr
-
/*
* extern void giveup_fpu(struct task_struct *prev)
*
interrupt_base:
/* Critical Input Interrupt */
- CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+ CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
#ifdef CONFIG_E200
/* no RFMCI, MCSRRs on E200 */
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+ CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+ machine_check_exception)
#else
MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
#endif
/* Data Storage Interrupt */
START_EXCEPTION(DataStorage)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
#else
#ifdef CONFIG_E200
/* E200 treats 'normal' floating point instructions as FP Unavail exception */
- EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+ program_check_exception, EXC_XFER_EE)
#else
- EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+ unknown_exception, EXC_XFER_EE)
#endif
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(SYSCALL)
EXC_XFER_EE_LITE(0x0c00, DoSyscall)
/* Auxiliary Processor Unavailable Interrupt */
- EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
+ unknown_exception, EXC_XFER_EE)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
+ unknown_exception, EXC_XFER_EE)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException)
+ CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException)
#else
- CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception)
+ CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception)
#endif
/* Data TLB Error Interrupt */
mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
mfspr r10, SPRN_SPRG_THREAD
stw r11, THREAD_NORMSAVE(0)(r10)
+ #ifdef CONFIG_KVM_BOOKE_HV
+ BEGIN_FTR_SECTION
+ mfspr r11, SPRN_SRR1
+ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+ #endif
stw r12, THREAD_NORMSAVE(1)(r10)
stw r13, THREAD_NORMSAVE(2)(r10)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
+ DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
mfspr r10, SPRN_SPRG_THREAD
stw r11, THREAD_NORMSAVE(0)(r10)
+ #ifdef CONFIG_KVM_BOOKE_HV
+ BEGIN_FTR_SECTION
+ mfspr r11, SPRN_SRR1
+ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+ #endif
stw r12, THREAD_NORMSAVE(1)(r10)
stw r13, THREAD_NORMSAVE(2)(r10)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
+ DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
#ifdef CONFIG_SPE
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
bne load_up_spe
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0x2010, KernelSPE)
#else
- EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
+ unknown_exception, EXC_XFER_EE)
#endif /* CONFIG_SPE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
- EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE);
+ EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \
+ SPEFloatingPointException, EXC_XFER_EE);
/* SPE Floating Point Round */
- EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+ SPEFloatingPointRoundException, EXC_XFER_EE)
#else
- EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \
+ unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+ unknown_exception, EXC_XFER_EE)
#endif /* CONFIG_SPE */
/* Performance Monitor */
- EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD)
+ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
+ performance_monitor_exception, EXC_XFER_STD)
- EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD)
+ EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
- CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception)
+ CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
+ CriticalDoorbell, unknown_exception)
/* Debug Interrupt */
DEBUG_DEBUG_EXCEPTION
DEBUG_CRIT_EXCEPTION
+ GUEST_DOORBELL_EXCEPTION
+
+ CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \
+ unknown_exception)
+
+ /* Hypercall */
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
+
+ /* Embedded Hypervisor Privilege */
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
+
/*
* Local functions
*/
mtspr SPRN_IVOR36,r3
li r3,CriticalDoorbell@l
mtspr SPRN_IVOR37,r3
+
+ /*
+ * We only want to touch IVOR38-41 if we're running on hardware
+ * that supports category E.HV. The architectural way to determine
+ * this is MMUCFG[LPIDSIZE].
+ */
+ mfspr r3, SPRN_MMUCFG
+ andis. r3, r3, MMUCFG_LPIDSIZE@h
+ beq no_hv
+ li r3,GuestDoorbell@l
+ mtspr SPRN_IVOR38,r3
+ li r3,CriticalGuestDoorbell@l
+ mtspr SPRN_IVOR39,r3
+ li r3,Hypercall@l
+ mtspr SPRN_IVOR40,r3
+ li r3,Ehvpriv@l
+ mtspr SPRN_IVOR41,r3
+ skip_hv_ivors:
sync
blr
+ no_hv:
+ lwz r3, CPU_SPEC_FEATURES(r5)
+ rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV
+ stw r3, CPU_SPEC_FEATURES(r5)
+ b skip_hv_ivors
-/*
- * extern void giveup_altivec(struct task_struct *prev)
- *
- * The e500 core does not have an AltiVec unit.
- */
-_GLOBAL(giveup_altivec)
- blr
-
#ifdef CONFIG_SPE
/*
* extern void giveup_spe(struct task_struct *prev)
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
- #define NR_LPIDS (LPID_RSVD + 1)
- unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];
long kvmppc_alloc_hpt(struct kvm *kvm)
{
unsigned long hpt;
- unsigned long lpid;
+ long lpid;
struct revmap_entry *rev;
struct kvmppc_linear_info *li;
}
kvm->arch.revmap = rev;
- /* Allocate the guest's logical partition ID */
- do {
- lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
- if (lpid >= NR_LPIDS) {
- pr_err("kvm_alloc_hpt: No LPIDs free\n");
- goto out_freeboth;
- }
- } while (test_and_set_bit(lpid, lpid_inuse));
+ lpid = kvmppc_alloc_lpid();
+ if (lpid < 0)
+ goto out_freeboth;
kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
kvm->arch.lpid = lpid;
void kvmppc_free_hpt(struct kvm *kvm)
{
- clear_bit(kvm->arch.lpid, lpid_inuse);
+ kvmppc_free_lpid(kvm->arch.lpid);
vfree(kvm->arch.revmap);
if (kvm->arch.hpt_li)
kvm_release_hpt(kvm->arch.hpt_li);
if (!cpu_has_feature(CPU_FTR_HVMODE))
return -EINVAL;
- memset(lpid_inuse, 0, sizeof(lpid_inuse));
-
+ /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
if (cpu_has_feature(CPU_FTR_ARCH_206)) {
host_lpid = mfspr(SPRN_LPID); /* POWER7 */
rsvd_lpid = LPID_RSVD;
rsvd_lpid = MAX_LPID_970;
}
- set_bit(host_lpid, lpid_inuse);
+ kvmppc_init_lpid(rsvd_lpid + 1);
+
+ kvmppc_claim_lpid(host_lpid);
/* rsvd_lpid is reserved for use in partition switching */
- set_bit(rsvd_lpid, lpid_inuse);
+ kvmppc_claim_lpid(rsvd_lpid);
return 0;
}
!(memslot->userspace_addr & (s - 1))) {
start &= ~(s - 1);
pgsize = s;
+ get_page(hpage);
+ put_page(page);
page = hpage;
}
}
err = 0;
out:
- if (got) {
- if (PageHuge(page))
- page = compound_head(page);
+ if (got)
put_page(page);
- }
return err;
up_err:
}
static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned long gpa, int is_store)
+ unsigned long gpa, gva_t ea, int is_store)
{
int ret;
u32 last_inst;
*/
vcpu->arch.paddr_accessed = gpa;
+ vcpu->arch.vaddr_accessed = ea;
return kvmppc_emulate_mmio(run, vcpu);
}
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa,
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
}
SetPageDirty(page);
out_put:
- if (page)
- put_page(page);
+ if (page) {
+ /*
+ * We drop pages[0] here, not page because page might
+ * have been set to the head page of a compound, but
+ * we have to drop the reference on the correct tail
+ * page to match the get inside gup()
+ */
+ put_page(pages[0]);
+ }
return ret;
out_unlock:
pa = *physp;
}
page = pfn_to_page(pa >> PAGE_SHIFT);
+ get_page(page);
} else {
hva = gfn_to_hva_memslot(memslot, gfn);
npages = get_user_pages_fast(hva, 1, 1, pages);
page = compound_head(page);
psize <<= compound_order(page);
}
- if (!kvm->arch.using_mmu_notifiers)
- get_page(page);
offset = gpa & (psize - 1);
if (nb_ret)
*nb_ret = psize - offset;
{
struct page *page = virt_to_page(va);
- page = compound_head(page);
put_page(page);
}
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
local_paca->kvm_hstate.kvm_vcpu = vcpu;
- local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
+ local_paca->kvm_hstate.kvm_vcore = vc;
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+ vc->stolen_tb += mftb() - vc->preempt_tb;
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+ vc->preempt_tb = mftb();
}
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
vpa->yield_count = 1;
}
+ /* Length for a per-processor buffer is passed in at offset 4 in the buffer */
+ struct reg_vpa {
+ u32 dummy;
+ union {
+ u16 hword;
+ u32 word;
+ } length;
+ };
+
+ static int vpa_is_registered(struct kvmppc_vpa *vpap)
+ {
+ if (vpap->update_pending)
+ return vpap->next_gpa != 0;
+ return vpap->pinned_addr != NULL;
+ }
+
static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
unsigned long flags,
unsigned long vcpuid, unsigned long vpa)
unsigned long len, nb;
void *va;
struct kvm_vcpu *tvcpu;
- int err = H_PARAMETER;
+ int err;
+ int subfunc;
+ struct kvmppc_vpa *vpap;
tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
if (!tvcpu)
return H_PARAMETER;
- flags >>= 63 - 18;
- flags &= 7;
- if (flags == 0 || flags == 4)
- return H_PARAMETER;
- if (flags < 4) {
- if (vpa & 0x7f)
+ subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
+ if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
+ subfunc == H_VPA_REG_SLB) {
+ /* Registering new area - address must be cache-line aligned */
+ if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
return H_PARAMETER;
- if (flags >= 2 && !tvcpu->arch.vpa)
- return H_RESOURCE;
- /* registering new area; convert logical addr to real */
+
+ /* convert logical addr to kernel addr and read length */
va = kvmppc_pin_guest_page(kvm, vpa, &nb);
if (va == NULL)
return H_PARAMETER;
- if (flags <= 1)
- len = *(unsigned short *)(va + 4);
+ if (subfunc == H_VPA_REG_VPA)
+ len = ((struct reg_vpa *)va)->length.hword;
else
- len = *(unsigned int *)(va + 4);
- if (len > nb)
- goto out_unpin;
- switch (flags) {
- case 1: /* register VPA */
- if (len < 640)
- goto out_unpin;
- if (tvcpu->arch.vpa)
- kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
- tvcpu->arch.vpa = va;
- init_vpa(vcpu, va);
- break;
- case 2: /* register DTL */
- if (len < 48)
- goto out_unpin;
- len -= len % 48;
- if (tvcpu->arch.dtl)
- kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
- tvcpu->arch.dtl = va;
- tvcpu->arch.dtl_end = va + len;
+ len = ((struct reg_vpa *)va)->length.word;
+ kvmppc_unpin_guest_page(kvm, va);
+
+ /* Check length */
+ if (len > nb || len < sizeof(struct reg_vpa))
+ return H_PARAMETER;
+ } else {
+ vpa = 0;
+ len = 0;
+ }
+
+ err = H_PARAMETER;
+ vpap = NULL;
+ spin_lock(&tvcpu->arch.vpa_update_lock);
+
+ switch (subfunc) {
+ case H_VPA_REG_VPA: /* register VPA */
+ if (len < sizeof(struct lppaca))
break;
- case 3: /* register SLB shadow buffer */
- if (len < 16)
- goto out_unpin;
- if (tvcpu->arch.slb_shadow)
- kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
- tvcpu->arch.slb_shadow = va;
+ vpap = &tvcpu->arch.vpa;
+ err = 0;
+ break;
+
+ case H_VPA_REG_DTL: /* register DTL */
+ if (len < sizeof(struct dtl_entry))
break;
- }
- } else {
- switch (flags) {
- case 5: /* unregister VPA */
- if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
- return H_RESOURCE;
- if (!tvcpu->arch.vpa)
- break;
- kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
- tvcpu->arch.vpa = NULL;
+ len -= len % sizeof(struct dtl_entry);
+
+ /* Check that they have previously registered a VPA */
+ err = H_RESOURCE;
+ if (!vpa_is_registered(&tvcpu->arch.vpa))
break;
- case 6: /* unregister DTL */
- if (!tvcpu->arch.dtl)
- break;
- kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
- tvcpu->arch.dtl = NULL;
+
+ vpap = &tvcpu->arch.dtl;
+ err = 0;
+ break;
+
+ case H_VPA_REG_SLB: /* register SLB shadow buffer */
+ /* Check that they have previously registered a VPA */
+ err = H_RESOURCE;
+ if (!vpa_is_registered(&tvcpu->arch.vpa))
break;
- case 7: /* unregister SLB shadow buffer */
- if (!tvcpu->arch.slb_shadow)
- break;
- kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
- tvcpu->arch.slb_shadow = NULL;
+
+ vpap = &tvcpu->arch.slb_shadow;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_VPA: /* deregister VPA */
+ /* Check they don't still have a DTL or SLB buf registered */
+ err = H_RESOURCE;
+ if (vpa_is_registered(&tvcpu->arch.dtl) ||
+ vpa_is_registered(&tvcpu->arch.slb_shadow))
break;
- }
+
+ vpap = &tvcpu->arch.vpa;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_DTL: /* deregister DTL */
+ vpap = &tvcpu->arch.dtl;
+ err = 0;
+ break;
+
+ case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
+ vpap = &tvcpu->arch.slb_shadow;
+ err = 0;
+ break;
+ }
+
+ if (vpap) {
+ vpap->next_gpa = vpa;
+ vpap->len = len;
+ vpap->update_pending = 1;
}
- return H_SUCCESS;
- out_unpin:
- kvmppc_unpin_guest_page(kvm, va);
+ spin_unlock(&tvcpu->arch.vpa_update_lock);
+
return err;
}
+ static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
+ {
+ void *va;
+ unsigned long nb;
+
+ vpap->update_pending = 0;
+ va = NULL;
+ if (vpap->next_gpa) {
+ va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
+ if (nb < vpap->len) {
+ /*
+ * If it's now too short, it must be that userspace
+ * has changed the mappings underlying guest memory,
+ * so unregister the region.
+ */
+ kvmppc_unpin_guest_page(kvm, va);
+ va = NULL;
+ }
+ }
+ if (vpap->pinned_addr)
+ kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
+ vpap->pinned_addr = va;
+ if (va)
+ vpap->pinned_end = va + vpap->len;
+ }
+
+ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
+ {
+ struct kvm *kvm = vcpu->kvm;
+
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (vcpu->arch.vpa.update_pending) {
+ kvmppc_update_vpa(kvm, &vcpu->arch.vpa);
+ init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+ }
+ if (vcpu->arch.dtl.update_pending) {
+ kvmppc_update_vpa(kvm, &vcpu->arch.dtl);
+ vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
+ vcpu->arch.dtl_index = 0;
+ }
+ if (vcpu->arch.slb_shadow.update_pending)
+ kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ }
+
+ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc)
+ {
+ struct dtl_entry *dt;
+ struct lppaca *vpa;
+ unsigned long old_stolen;
+
+ dt = vcpu->arch.dtl_ptr;
+ vpa = vcpu->arch.vpa.pinned_addr;
+ old_stolen = vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = vc->stolen_tb;
+ if (!dt || !vpa)
+ return;
+ memset(dt, 0, sizeof(struct dtl_entry));
+ dt->dispatch_reason = 7;
+ dt->processor_id = vc->pcpu + vcpu->arch.ptid;
+ dt->timebase = mftb();
+ dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
+ dt->srr0 = kvmppc_get_pc(vcpu);
+ dt->srr1 = vcpu->arch.shregs.msr;
+ ++dt;
+ if (dt == vcpu->arch.dtl.pinned_end)
+ dt = vcpu->arch.dtl.pinned_addr;
+ vcpu->arch.dtl_ptr = dt;
+ /* order writing *dt vs. writing vpa->dtl_idx */
+ smp_wmb();
+ vpa->dtl_idx = ++vcpu->arch.dtl_index;
+ }
+
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
unsigned long req = kvmppc_get_gpr(vcpu, 3);
/* default to host PVR, since we can't spoof it */
vcpu->arch.pvr = mfspr(SPRN_PVR);
kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+ spin_lock_init(&vcpu->arch.vpa_update_lock);
kvmppc_mmu_book3s_hv_init(vcpu);
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
init_waitqueue_head(&vcore->wq);
+ vcore->preempt_tb = mftb();
}
kvm->arch.vcores[core] = vcore;
}
++vcore->num_threads;
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
+ vcpu->arch.stolen_logged = vcore->stolen_tb;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.dtl)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
- if (vcpu->arch.slb_shadow)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
- if (vcpu->arch.vpa)
- kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (vcpu->arch.dtl.pinned_addr)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
+ if (vcpu->arch.slb_shadow.pinned_addr)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
+ if (vcpu->arch.vpa.pinned_addr)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
list_del(&vcpu->arch.run_list);
}
+ static int kvmppc_grab_hwthread(int cpu)
+ {
+ struct paca_struct *tpaca;
+ long timeout = 1000;
+
+ tpaca = &paca[cpu];
+
+ /* Ensure the thread won't go into the kernel if it wakes */
+ tpaca->kvm_hstate.hwthread_req = 1;
+
+ /*
+ * If the thread is already executing in the kernel (e.g. handling
+ * a stray interrupt), wait for it to get back to nap mode.
+ * The smp_mb() is to ensure that our setting of hwthread_req
+ * is visible before we look at hwthread_state, so if this
+ * races with the code at system_reset_pSeries and the thread
+ * misses our setting of hwthread_req, we are sure to see its
+ * setting of hwthread_state, and vice versa.
+ */
+ smp_mb();
+ while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
+ if (--timeout <= 0) {
+ pr_err("KVM: couldn't grab cpu %d\n", cpu);
+ return -EBUSY;
+ }
+ udelay(1);
+ }
+ return 0;
+ }
+
+ static void kvmppc_release_hwthread(int cpu)
+ {
+ struct paca_struct *tpaca;
+
+ tpaca = &paca[cpu];
+ tpaca->kvm_hstate.hwthread_req = 0;
+ tpaca->kvm_hstate.kvm_vcpu = NULL;
+ }
+
static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
{
int cpu;
smp_wmb();
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
if (vcpu->arch.ptid) {
- tpaca->cpu_start = 0x80;
- wmb();
+ kvmppc_grab_hwthread(cpu);
xics_wake_cpu(cpu);
++vc->n_woken;
}
struct kvm_vcpu *vcpu, *vcpu0, *vnext;
long ret;
u64 now;
- int ptid;
+ int ptid, i;
/* don't start if any threads have a signal pending */
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
vc->nap_count = 0;
vc->entry_exit_count = 0;
vc->vcore_state = VCORE_RUNNING;
+ vc->stolen_tb += mftb() - vc->preempt_tb;
vc->in_guest = 0;
vc->pcpu = smp_processor_id();
vc->napping_threads = 0;
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
kvmppc_start_thread(vcpu);
+ if (vcpu->arch.vpa.update_pending ||
+ vcpu->arch.slb_shadow.update_pending ||
+ vcpu->arch.dtl.update_pending)
+ kvmppc_update_vpas(vcpu);
+ kvmppc_create_dtl_entry(vcpu, vc);
+ }
+ /* Grab any remaining hw threads so they can't go into the kernel */
+ for (i = ptid; i < threads_per_core; ++i)
+ kvmppc_grab_hwthread(vc->pcpu + i);
preempt_disable();
spin_unlock(&vc->lock);
kvm_guest_enter();
__kvmppc_vcore_entry(NULL, vcpu0);
+ for (i = 0; i < threads_per_core; ++i)
+ kvmppc_release_hwthread(vc->pcpu + i);
spin_lock(&vc->lock);
/* disable sending of IPIs on virtual external irqs */
spin_lock(&vc->lock);
out:
vc->vcore_state = VCORE_INACTIVE;
+ vc->preempt_tb = mftb();
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
if (vcpu->arch.ret != RESUME_GUEST) {
spin_lock(&vc->lock);
continue;
}
+ vc->runner = vcpu;
n_ceded = 0;
list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
n_ceded += v->arch.ceded;
wake_up(&v->arch.cpu_run);
}
}
+ vc->runner = NULL;
}
if (signal_pending(current)) {
return r;
}
- static long kvmppc_stt_npages(unsigned long window_size)
- {
- return ALIGN((window_size >> SPAPR_TCE_SHIFT)
- * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
- }
-
- static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
- {
- struct kvm *kvm = stt->kvm;
- int i;
-
- mutex_lock(&kvm->lock);
- list_del(&stt->list);
- for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
- __free_page(stt->pages[i]);
- kfree(stt);
- mutex_unlock(&kvm->lock);
-
- kvm_put_kvm(kvm);
- }
-
- static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
- {
- struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
- struct page *page;
-
- if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
- return VM_FAULT_SIGBUS;
-
- page = stt->pages[vmf->pgoff];
- get_page(page);
- vmf->page = page;
- return 0;
- }
-
- static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
- .fault = kvm_spapr_tce_fault,
- };
-
- static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
- {
- vma->vm_ops = &kvm_spapr_tce_vm_ops;
- return 0;
- }
-
- static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
- {
- struct kvmppc_spapr_tce_table *stt = filp->private_data;
-
- release_spapr_tce_table(stt);
- return 0;
- }
-
- static struct file_operations kvm_spapr_tce_fops = {
- .mmap = kvm_spapr_tce_mmap,
- .release = kvm_spapr_tce_release,
- };
-
- long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args)
- {
- struct kvmppc_spapr_tce_table *stt = NULL;
- long npages;
- int ret = -ENOMEM;
- int i;
-
- /* Check this LIOBN hasn't been previously allocated */
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == args->liobn)
- return -EBUSY;
- }
-
- npages = kvmppc_stt_npages(args->window_size);
-
- stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
- GFP_KERNEL);
- if (!stt)
- goto fail;
-
- stt->liobn = args->liobn;
- stt->window_size = args->window_size;
- stt->kvm = kvm;
-
- for (i = 0; i < npages; i++) {
- stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!stt->pages[i])
- goto fail;
- }
-
- kvm_get_kvm(kvm);
-
- mutex_lock(&kvm->lock);
- list_add(&stt->list, &kvm->arch.spapr_tce_tables);
-
- mutex_unlock(&kvm->lock);
-
- return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
- stt, O_RDWR);
-
- fail:
- if (stt) {
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
-
- kfree(stt);
- }
- return ret;
- }
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
return fd;
}
+ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
+ int linux_psize)
+ {
+ struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
+
+ if (!def->shift)
+ return;
+ (*sps)->page_shift = def->shift;
+ (*sps)->slb_enc = def->sllp;
+ (*sps)->enc[0].page_shift = def->shift;
+ (*sps)->enc[0].pte_enc = def->penc;
+ (*sps)++;
+ }
+
+ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+ {
+ struct kvm_ppc_one_seg_page_size *sps;
+
+ info->flags = KVM_PPC_PAGE_SIZES_REAL;
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ info->flags |= KVM_PPC_1T_SEGMENTS;
+ info->slb_size = mmu_slb_size;
+
+ /* We only support these sizes for now, and no muti-size segments */
+ sps = &info->sps[0];
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
+ kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
+
+ return 0;
+ }
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
continue;
pfn = physp[j] >> PAGE_SHIFT;
page = pfn_to_page(pfn);
- if (PageHuge(page))
- page = compound_head(page);
SetPageDirty(page);
put_page(page);
}
return EMULATE_FAIL;
}
- int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
return EMULATE_FAIL;
}
- int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
{
return EMULATE_FAIL;
}
/* First clear RI in our current MSR value */
li r0, MSR_RI
andc r6, r6, r0
- MTMSR_EERI(r6)
- mtsrr0 r9
- mtsrr1 r4
PPC_LL r0, SVCPU_R0(r3)
PPC_LL r1, SVCPU_R1(r3)
PPC_LL r2, SVCPU_R2(r3)
- PPC_LL r4, SVCPU_R4(r3)
PPC_LL r5, SVCPU_R5(r3)
- PPC_LL r6, SVCPU_R6(r3)
PPC_LL r7, SVCPU_R7(r3)
PPC_LL r8, SVCPU_R8(r3)
- PPC_LL r9, SVCPU_R9(r3)
PPC_LL r10, SVCPU_R10(r3)
PPC_LL r11, SVCPU_R11(r3)
PPC_LL r12, SVCPU_R12(r3)
PPC_LL r13, SVCPU_R13(r3)
+ MTMSR_EERI(r6)
+ mtsrr0 r9
+ mtsrr1 r4
+
+ PPC_LL r4, SVCPU_R4(r3)
+ PPC_LL r6, SVCPU_R6(r3)
+ PPC_LL r9, SVCPU_R9(r3)
PPC_LL r3, (SVCPU_R3)(r3)
RFI
/* Save guest PC and MSR */
#ifdef CONFIG_PPC64
BEGIN_FTR_SECTION
- mr r10, r12
- andi. r0,r12,0x2
+ andi. r0, r12, 0x2
+ cmpwi cr1, r0, 0
beq 1f
mfspr r3,SPRN_HSRR0
mfspr r4,SPRN_HSRR1
beq ld_last_prev_inst
cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT
beq- ld_last_inst
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
+ beq- ld_last_inst
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
b no_ld_last_inst
#ifdef CONFIG_PPC64
BEGIN_FTR_SECTION
- andi. r0,r10,0x2
- beq 1f
+ beq cr1, 1f
mtspr SPRN_HSRR1, r6
mtspr SPRN_HSRR0, r8
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
#include <asm/desc.h>
#include <asm/mtrr.h>
#include <asm/msr-index.h>
+#include <asm/asm.h>
#define KVM_MAX_VCPUS 254
#define KVM_SOFT_MAX_VCPUS 160
#define DR7_FIXED_1 0x00000400
#define DR7_VOLATILE 0xffff23ff
+ /* apic attention bits */
+ #define KVM_APIC_CHECK_VAPIC 0
+
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
#endif
int write_flooding_count;
-
- struct rcu_head rcu;
};
struct kvm_pio_request {
u64 efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
+ unsigned long apic_attention;
int32_t apic_arb_prio;
int mp_state;
int sipi_vector;
u64 hv_guest_os_id;
u64 hv_hypercall;
- atomic_t reader_counter;
-
#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
#endif
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
- int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
- struct kvm_memory_slot *slot);
+ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
__ASM_SIZE(push) " $666b \n\t" \
"call kvm_spurious_fault \n\t" \
".popsection \n\t" \
- ".pushsection __ex_table, \"a\" \n\t" \
- _ASM_PTR " 666b, 667b \n\t" \
- ".popsection"
+ _ASM_EXTABLE(666b, 667b)
#define __kvm_handle_fault_on_reboot(insn) \
____kvm_handle_fault_on_reboot(insn, "")
extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
+ #ifdef CONFIG_KVM_CLOCK
+ bool kvm_check_and_clear_guest_paused(void);
+ #else
+ static inline bool kvm_check_and_clear_guest_paused(void)
+ {
+ return false;
+ }
+ #endif /* CONFIG_KVMCLOCK */
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
unsigned int eax, ebx, ecx, edx;
char signature[13];
- cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
- memcpy(signature + 0, &ebx, 4);
- memcpy(signature + 4, &ecx, 4);
- memcpy(signature + 8, &edx, 4);
- signature[12] = 0;
+ if (boot_cpu_data.cpuid_level < 0)
+ return 0; /* So we don't blow up on old processors */
+
+ if (cpu_has_hypervisor) {
+ cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
+ memcpy(signature + 0, &ebx, 4);
+ memcpy(signature + 4, &ecx, 4);
+ memcpy(signature + 8, &edx, 4);
+ signature[12] = 0;
- if (strcmp(signature, "KVMKVMKVM") == 0)
- return 1;
+ if (strcmp(signature, "KVMKVMKVM") == 0)
+ return 1;
+ }
return 0;
}
case KVM_CAP_ASYNC_PF:
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_PCI_2_3:
+ case KVM_CAP_KVMCLOCK_CTRL:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
return r;
}
+ /*
+ * kvm_set_guest_paused() indicates to the guest kernel that it has been
+ * stopped by the hypervisor. This function will be called from the host only.
+ * EINVAL is returned when the host attempts to set the flag for a guest that
+ * does not support pv clocks.
+ */
+ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
+ {
+ struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
+ if (!vcpu->arch.time_page)
+ return -EINVAL;
+ src->flags |= PVCLOCK_GUEST_STOPPED;
+ mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+ return 0;
+ }
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
r = vcpu->arch.virtual_tsc_khz;
goto out;
}
+ case KVM_KVMCLOCK_CTRL: {
+ r = kvm_set_guest_paused(vcpu);
+ goto out;
+ }
default:
r = -EINVAL;
}
}
/**
- * write_protect_slot - write protect a slot for dirty logging
- * @kvm: the kvm instance
- * @memslot: the slot we protect
- * @dirty_bitmap: the bitmap indicating which pages are dirty
- * @nr_dirty_pages: the number of dirty pages
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
*
- * We have two ways to find all sptes to protect:
- * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and
- * checks ones that have a spte mapping a page in the slot.
- * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap.
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently. So, to avoid losing data, we keep the following order for
+ * each bit:
*
- * Generally speaking, if there are not so many dirty pages compared to the
- * number of shadow pages, we should use the latter.
+ * 1. Take a snapshot of the bit and clear it if needed.
+ * 2. Write protect the corresponding page.
+ * 3. Flush TLB's if needed.
+ * 4. Copy the snapshot to the userspace.
*
- * Note that letting others write into a page marked dirty in the old bitmap
- * by using the remaining tlb entry is not a problem. That page will become
- * write protected again when we flush the tlb and then be reported dirty to
- * the user space by copying the old bitmap.
- */
- static void write_protect_slot(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- unsigned long *dirty_bitmap,
- unsigned long nr_dirty_pages)
- {
- spin_lock(&kvm->mmu_lock);
-
- /* Not many dirty pages compared to # of shadow pages. */
- if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
- unsigned long gfn_offset;
-
- for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
- unsigned long gfn = memslot->base_gfn + gfn_offset;
-
- kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
- }
- kvm_flush_remote_tlbs(kvm);
- } else
- kvm_mmu_slot_remove_write_access(kvm, memslot->id);
-
- spin_unlock(&kvm->mmu_lock);
- }
-
- /*
- * Get (and clear) the dirty memory log for a memory slot.
+ * Between 2 and 3, the guest may write to the page using the remaining TLB
+ * entry. This is not a problem because the page will be reported dirty at
+ * step 4 using the snapshot taken before and step 3 ensures that successive
+ * writes will be logged for the next call.
*/
- int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
- struct kvm_dirty_log *log)
+ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
int r;
struct kvm_memory_slot *memslot;
- unsigned long n, nr_dirty_pages;
+ unsigned long n, i;
+ unsigned long *dirty_bitmap;
+ unsigned long *dirty_bitmap_buffer;
+ bool is_dirty = false;
mutex_lock(&kvm->slots_lock);
goto out;
memslot = id_to_memslot(kvm->memslots, log->slot);
+
+ dirty_bitmap = memslot->dirty_bitmap;
r = -ENOENT;
- if (!memslot->dirty_bitmap)
+ if (!dirty_bitmap)
goto out;
n = kvm_dirty_bitmap_bytes(memslot);
- nr_dirty_pages = memslot->nr_dirty_pages;
- /* If nothing is dirty, don't bother messing with page tables. */
- if (nr_dirty_pages) {
- struct kvm_memslots *slots, *old_slots;
- unsigned long *dirty_bitmap, *dirty_bitmap_head;
+ dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+ memset(dirty_bitmap_buffer, 0, n);
- dirty_bitmap = memslot->dirty_bitmap;
- dirty_bitmap_head = memslot->dirty_bitmap_head;
- if (dirty_bitmap == dirty_bitmap_head)
- dirty_bitmap_head += n / sizeof(long);
- memset(dirty_bitmap_head, 0, n);
+ spin_lock(&kvm->mmu_lock);
- r = -ENOMEM;
- slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL);
- if (!slots)
- goto out;
+ for (i = 0; i < n / sizeof(long); i++) {
+ unsigned long mask;
+ gfn_t offset;
- memslot = id_to_memslot(slots, log->slot);
- memslot->nr_dirty_pages = 0;
- memslot->dirty_bitmap = dirty_bitmap_head;
- update_memslots(slots, NULL);
+ if (!dirty_bitmap[i])
+ continue;
- old_slots = kvm->memslots;
- rcu_assign_pointer(kvm->memslots, slots);
- synchronize_srcu_expedited(&kvm->srcu);
- kfree(old_slots);
+ is_dirty = true;
- write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages);
+ mask = xchg(&dirty_bitmap[i], 0);
+ dirty_bitmap_buffer[i] = mask;
- r = -EFAULT;
- if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
- goto out;
- } else {
- r = -EFAULT;
- if (clear_user(log->dirty_bitmap, n))
- goto out;
+ offset = i * BITS_PER_LONG;
+ kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
}
+ if (is_dirty)
+ kvm_flush_remote_tlbs(kvm);
+
+ spin_unlock(&kvm->mmu_lock);
+
+ r = -EFAULT;
+ if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+ goto out;
r = 0;
out:
static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
- memcpy(val, vcpu->mmio_data, bytes);
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_phys_addr, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, *(u64 *)val);
vcpu->mmio_read_completed = 0;
return 1;
}
static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- memcpy(vcpu->mmio_data, val, bytes);
- memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+ struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
+
+ memcpy(vcpu->run->mmio.data, frag->data, frag->len);
return X86EMUL_CONTINUE;
}
gpa_t gpa;
int handled, ret;
bool write = ops->write;
-
- if (ops->read_write_prepare &&
- ops->read_write_prepare(vcpu, val, bytes))
- return X86EMUL_CONTINUE;
+ struct kvm_mmio_fragment *frag;
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
bytes -= handled;
val += handled;
- vcpu->mmio_needed = 1;
- vcpu->run->exit_reason = KVM_EXIT_MMIO;
- vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
- vcpu->mmio_size = bytes;
- vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
- vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
- vcpu->mmio_index = 0;
+ while (bytes) {
+ unsigned now = min(bytes, 8U);
- return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
+ frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
+ frag->gpa = gpa;
+ frag->data = val;
+ frag->len = now;
+
+ gpa += now;
+ val += now;
+ bytes -= now;
+ }
+ return X86EMUL_CONTINUE;
}
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
struct read_write_emulator_ops *ops)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ gpa_t gpa;
+ int rc;
+
+ if (ops->read_write_prepare &&
+ ops->read_write_prepare(vcpu, val, bytes))
+ return X86EMUL_CONTINUE;
+
+ vcpu->mmio_nr_fragments = 0;
/* Crossing a page boundary? */
if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
- int rc, now;
+ int now;
now = -addr & ~PAGE_MASK;
rc = emulator_read_write_onepage(addr, val, now, exception,
bytes -= now;
}
- return emulator_read_write_onepage(addr, val, bytes, exception,
- vcpu, ops);
+ rc = emulator_read_write_onepage(addr, val, bytes, exception,
+ vcpu, ops);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ if (!vcpu->mmio_nr_fragments)
+ return rc;
+
+ gpa = vcpu->mmio_fragments[0].gpa;
+
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_cur_fragment = 0;
+
+ vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
+ vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->mmio.phys_addr = gpa;
+
+ return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
}
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
kvm_deliver_pmi(vcpu);
}
- r = kvm_mmu_reload(vcpu);
- if (unlikely(r))
- goto out;
-
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
inject_pending_event(vcpu);
}
}
+ r = kvm_mmu_reload(vcpu);
+ if (unlikely(r)) {
+ kvm_x86_ops->cancel_injection(vcpu);
+ goto out;
+ }
+
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
return r;
}
+ /*
+ * Implements the following, as a state machine:
+ *
+ * read:
+ * for each fragment
+ * write gpa, len
+ * exit
+ * copy data
+ * execute insn
+ *
+ * write:
+ * for each fragment
+ * write gpa, len
+ * copy data
+ * exit
+ */
static int complete_mmio(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
+ struct kvm_mmio_fragment *frag;
int r;
if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
return 1;
if (vcpu->mmio_needed) {
- vcpu->mmio_needed = 0;
+ /* Complete previous fragment */
+ frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
if (!vcpu->mmio_is_write)
- memcpy(vcpu->mmio_data + vcpu->mmio_index,
- run->mmio.data, 8);
- vcpu->mmio_index += 8;
- if (vcpu->mmio_index < vcpu->mmio_size) {
- run->exit_reason = KVM_EXIT_MMIO;
- run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
- memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
- run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
- run->mmio.is_write = vcpu->mmio_is_write;
- vcpu->mmio_needed = 1;
- return 0;
+ memcpy(frag->data, run->mmio.data, frag->len);
+ if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+ vcpu->mmio_needed = 0;
+ if (vcpu->mmio_is_write)
+ return 1;
+ vcpu->mmio_read_completed = 1;
+ goto done;
}
+ /* Initiate next fragment */
+ ++frag;
+ run->exit_reason = KVM_EXIT_MMIO;
+ run->mmio.phys_addr = frag->gpa;
if (vcpu->mmio_is_write)
- return 1;
- vcpu->mmio_read_completed = 1;
+ memcpy(run->mmio.data, frag->data, frag->len);
+ run->mmio.len = frag->len;
+ run->mmio.is_write = vcpu->mmio_is_write;
+ return 0;
+
}
+ done:
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (npages && !old.rmap) {
unsigned long userspace_addr;
- down_write(¤t->mm->mmap_sem);
- userspace_addr = do_mmap(NULL, 0,
+ userspace_addr = vm_mmap(NULL, 0,
npages * PAGE_SIZE,
PROT_READ | PROT_WRITE,
map_flags,
0);
- up_write(¤t->mm->mmap_sem);
if (IS_ERR((void *)userspace_addr))
return PTR_ERR((void *)userspace_addr);
if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
int ret;
- down_write(¤t->mm->mmap_sem);
- ret = do_munmap(current->mm, old.userspace_addr,
+ ret = vm_munmap(old.userspace_addr,
old.npages * PAGE_SIZE);
- up_write(¤t->mm->mmap_sem);
if (ret < 0)
printk(KERN_WARNING
"kvm_vm_ioctl_set_memory_region: "
kvm_cpu_has_interrupt(vcpu));
}
- void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
- int me;
- int cpu = vcpu->cpu;
-
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
- ++vcpu->stat.halt_wakeup;
- }
-
- me = get_cpu();
- if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
- if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
- smp_send_reschedule(cpu);
- put_cpu();
+ return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
kvm_inject_page_fault(vcpu, &fault);
}
vcpu->arch.apf.halted = false;
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/memory.h>
+ #include <linux/module.h>
#include <linux/platform_device.h>
#include <asm/chpid.h>
#include <asm/sclp.h>
u64 facilities; /* 48-55 */
u8 _reserved2[84 - 56]; /* 56-83 */
u8 fac84; /* 84 */
- u8 _reserved3[91 - 85]; /* 85-90 */
+ u8 fac85; /* 85 */
+ u8 _reserved3[91 - 86]; /* 86-90 */
u8 flags; /* 91 */
u8 _reserved4[100 - 92]; /* 92-99 */
u32 rnsize2; /* 100-103 */
u64 sclp_facilities;
static u8 sclp_fac84;
+ static u8 sclp_fac85;
static unsigned long long rzm;
static unsigned long long rnmax;
sccb = &early_read_info_sccb;
sclp_facilities = sccb->facilities;
sclp_fac84 = sccb->fac84;
+ sclp_fac85 = sccb->fac85;
rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
rzm <<= 20;
return rzm;
}
+ u8 sclp_get_fac85(void)
+ {
+ return sclp_fac85;
+ }
+ EXPORT_SYMBOL_GPL(sclp_get_fac85);
+
/*
* This function will be called after sclp_facilities_detect(), which gets
* called from early.c code. Therefore the sccb should have valid contents.
static int sclp_assign_storage(u16 rn)
{
- return do_assign_storage(0x000d0001, rn);
+ unsigned long long start, address;
+ int rc;
+
+ rc = do_assign_storage(0x000d0001, rn);
+ if (rc)
+ goto out;
+ start = address = rn2addr(rn);
+ for (; address < start + rzm; address += PAGE_SIZE)
+ page_set_storage_key(address, PAGE_DEFAULT_KEY, 0);
+out:
+ return rc;
}
static int sclp_unassign_storage(u16 rn)