]> Pileus Git - ~andy/linux/commitdiff
Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 24 May 2012 23:17:30 +0000 (16:17 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 24 May 2012 23:17:30 +0000 (16:17 -0700)
Pull KVM changes from Avi Kivity:
 "Changes include additional instruction emulation, page-crossing MMIO,
  faster dirty logging, preventing the watchdog from killing a stopped
  guest, module autoload, a new MSI ABI, and some minor optimizations
  and fixes.  Outside x86 we have a small s390 and a very large ppc
  update.

  Regarding the new (for kvm) rebaseless workflow, some of the patches
  that were merged before we switch trees had to be rebased, while
  others are true pulls.  In either case the signoffs should be correct
  now."

Fix up trivial conflicts in Documentation/feature-removal-schedule.txt
arch/powerpc/kvm/book3s_segment.S and arch/x86/include/asm/kvm_para.h.

I suspect the kvm_para.h resolution ends up doing the "do I have cpuid"
check effectively twice (it was done differently in two different
commits), but better safe than sorry ;)

* 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (125 commits)
  KVM: make asm-generic/kvm_para.h have an ifdef __KERNEL__ block
  KVM: s390: onereg for timer related registers
  KVM: s390: epoch difference and TOD programmable field
  KVM: s390: KVM_GET/SET_ONEREG for s390
  KVM: s390: add capability indicating COW support
  KVM: Fix mmu_reload() clash with nested vmx event injection
  KVM: MMU: Don't use RCU for lockless shadow walking
  KVM: VMX: Optimize %ds, %es reload
  KVM: VMX: Fix %ds/%es clobber
  KVM: x86 emulator: convert bsf/bsr instructions to emulate_2op_SrcV_nobyte()
  KVM: VMX: unlike vmcs on fail path
  KVM: PPC: Emulator: clean up SPR reads and writes
  KVM: PPC: Emulator: clean up instruction parsing
  kvm/powerpc: Add new ioctl to retreive server MMU infos
  kvm/book3s: Make kernel emulated H_PUT_TCE available for "PR" KVM
  KVM: PPC: bookehv: Fix r8/r13 storing in level exception handler
  KVM: PPC: Book3S: Enable IRQs during exit handling
  KVM: PPC: Fix PR KVM on POWER7 bare metal
  KVM: PPC: Fix stbux emulation
  KVM: PPC: bookehv: Use lwz/stw instead of PPC_LL/PPC_STL for 32-bit fields
  ...

17 files changed:
1  2 
Documentation/feature-removal-schedule.txt
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/reg_booke.h
arch/powerpc/include/asm/switch_to.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/head_44x.S
arch/powerpc/kernel/head_fsl_booke.S
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_segment.S
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_para.h
arch/x86/kvm/x86.c
drivers/s390/char/sclp_cmd.c

index 50d82ae09e2a685bf54146123c1d02dbfc3ed886,223fe9fcfd94eceb57c398b70642a4f6ab521ff7..4ba1eb7590a70d489a131fc879bac5c7c5f6d5da
@@@ -2,14 -2,7 +2,14 @@@ The following is a list of files and fe
  removed in the kernel source tree.  Every entry should contain what
  exactly is going away, why it is happening, and who is going to be doing
  the work.  When the feature is removed from the kernel, it should also
 -be removed from this file.
 +be removed from this file.  The suggested deprecation period is 3 releases.
 +
 +---------------------------
 +
 +What: ddebug_query="query" boot cmdline param
 +When: v3.8
 +Why:  obsoleted by dyndbg="query" and module.dyndbg="query"
 +Who:  Jim Cromie <jim.cromie@gmail.com>, Jason Baron <jbaron@redhat.com>
  
  ---------------------------
  
@@@ -541,18 -534,6 +541,18 @@@ Who:     Kees Cook <keescook@chromium.org
  
  ----------------------------
  
 +What: Removing the pn544 raw driver.
 +When: 3.6
 +Why:  With the introduction of the NFC HCI and SHDL kernel layers, pn544.c
 +      is being replaced by pn544_hci.c which is accessible through the netlink
 +      and socket NFC APIs. Moreover, pn544.c is outdated and does not seem to
 +      work properly with the latest Android stacks.
 +      Having 2 drivers for the same hardware is confusing and as such we
 +      should only keep the one following the kernel NFC APIs.
 +Who:  Samuel Ortiz <sameo@linux.intel.com>
 +
 +----------------------------
 +
  What: setitimer accepts user NULL pointer (value)
  When: 3.6
  Why:  setitimer is not returning -EFAULT if user pointer is NULL. This
@@@ -561,30 -542,8 +561,37 @@@ Who:     Sasikantha Babu <sasikanth.v19@gma
  
  ----------------------------
  
 +What: remove bogus DV presets V4L2_DV_1080I29_97, V4L2_DV_1080I30 and
 +      V4L2_DV_1080I25
 +When: 3.6
 +Why:  These HDTV formats do not exist and were added by a confused mind
 +      (that was me, to be precise...)
 +Who:  Hans Verkuil <hans.verkuil@cisco.com>
 +
 +----------------------------
 +
 +What: V4L2_CID_HCENTER, V4L2_CID_VCENTER V4L2 controls
 +When: 3.7
 +Why:  The V4L2_CID_VCENTER, V4L2_CID_HCENTER controls have been deprecated
 +      for about 4 years and they are not used by any mainline driver.
 +      There are newer controls (V4L2_CID_PAN*, V4L2_CID_TILT*) that provide
 +      similar functionality.
 +Who:  Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
 +
 +----------------------------
 +
 +What: cgroup option updates via remount
 +When: March 2013
 +Why:  Remount currently allows changing bound subsystems and
 +      release_agent.  Rebinding is hardly useful as it only works
 +      when the hierarchy is empty and release_agent itself should be
 +      replaced with conventional fsnotify.
 +
 +----------------------------
++
+ What: KVM debugfs statistics
+ When: 2013
+ Why:  KVM tracepoints provide mostly equivalent information in a much more
+         flexible fashion.
++----------------------------
index 612252388190ccfafeab132ba93e84d8f60b632e,318bac9f8752bbef3145811fcf04356ff8f2573d..423cf9eaf4a4fddf35811e62de33dc16363812e5
  #define H_MR_CONDITION  -43
  #define H_NOT_ENOUGH_RESOURCES -44
  #define H_R_STATE       -45
 -#define H_RESCINDEND    -46
 -#define H_MULTI_THREADS_ACTIVE -9005
 +#define H_RESCINDED     -46
 +#define H_P2          -55
 +#define H_P3          -56
 +#define H_P4          -57
 +#define H_P5          -58
 +#define H_P6          -59
 +#define H_P7          -60
 +#define H_P8          -61
 +#define H_P9          -62
 +#define H_TOO_BIG     -64
 +#define H_OVERLAP     -68
 +#define H_INTERRUPT   -69
 +#define H_BAD_DATA    -70
 +#define H_NOT_ACTIVE  -71
 +#define H_SG_LIST     -72
 +#define H_OP_MODE     -73
 +#define H_COP_HW      -74
 +#define H_UNSUPPORTED_FLAG_START      -256
 +#define H_UNSUPPORTED_FLAG_END                -511
 +#define H_MULTI_THREADS_ACTIVE        -9005
 +#define H_OUTSTANDING_COP_OPS -9006
  
  
  /* Long Busy is a condition that can be returned by the firmware
  #define H_PP1                 (1UL<<(63-62))
  #define H_PP2                 (1UL<<(63-63))
  
+ /* Flags for H_REGISTER_VPA subfunction field */
+ #define H_VPA_FUNC_SHIFT      (63-18) /* Bit posn of subfunction code */
+ #define H_VPA_FUNC_MASK               7UL
+ #define H_VPA_REG_VPA         1UL     /* Register Virtual Processor Area */
+ #define H_VPA_REG_DTL         2UL     /* Register Dispatch Trace Log */
+ #define H_VPA_REG_SLB         3UL     /* Register SLB shadow buffer */
+ #define H_VPA_DEREG_VPA               5UL     /* Deregister Virtual Processor Area */
+ #define H_VPA_DEREG_DTL               6UL     /* Deregister Dispatch Trace Log */
+ #define H_VPA_DEREG_SLB               7UL     /* Deregister SLB shadow buffer */
  /* VASI States */
  #define H_VASI_INVALID          0
  #define H_VASI_ENABLED          1
  #define H_GET_MPP             0x2D4
  #define H_HOME_NODE_ASSOCIATIVITY 0x2EC
  #define H_BEST_ENERGY         0x2F4
 +#define H_RANDOM              0x300
 +#define H_COP                 0x304
  #define H_GET_MPP_X           0x314
  #define MAX_HCALL_OPCODE      H_GET_MPP_X
  
index fd07f43d66224c42b367b6d911dda9ddb8a33bd4,046041ff847f62837602553253b6338ec7805f76..f0e0c6a66d973fdb833138e2433b54be860dc4f3
@@@ -81,13 -81,12 +81,13 @@@ struct kvmppc_vcpu_book3s 
        u64 sdr1;
        u64 hior;
        u64 msr_mask;
 -      u64 vsid_next;
  #ifdef CONFIG_PPC_BOOK3S_32
        u32 vsid_pool[VSID_POOL_SIZE];
 +      u32 vsid_next;
  #else
 -      u64 vsid_first;
 -      u64 vsid_max;
 +      u64 proto_vsid_first;
 +      u64 proto_vsid_max;
 +      u64 proto_vsid_next;
  #endif
        int context_id[SID_CONTEXTS];
  
@@@ -453,4 -452,7 +453,7 @@@ static inline bool kvmppc_critical_sect
  
  #define INS_DCBZ                      0x7c0007ec
  
+ /* LPIDs we support with this build -- runtime limit may be lower */
+ #define KVMPPC_NR_LPIDS                       (LPID_RSVD + 1)
  #endif /* __ASM_KVM_BOOK3S_H__ */
index 55e85631c42e3be5ab150ce48e2d3dbf767e9c6a,2a25ab0f58960d28a68417e613469567ff47f8d2..413a5eaef56c94340d16451a0f65e56453c5b38d
@@@ -74,6 -74,9 +74,6 @@@ struct task_struct
  void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
  void release_thread(struct task_struct *);
  
 -/* Prepare to copy thread state - unlazy all lazy status */
 -extern void prepare_to_copy(struct task_struct *tsk);
 -
  /* Create a new kernel thread. */
  extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
  
@@@ -240,6 -243,9 +240,9 @@@ struct thread_struct 
  #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
        void*           kvm_shadow_vcpu; /* KVM internal data */
  #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
+ #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+       struct kvm_vcpu *kvm_vcpu;
+ #endif
  #ifdef CONFIG_PPC64
        unsigned long   dscr;
        int             dscr_inherit;
@@@ -383,6 -389,7 +386,6 @@@ extern unsigned long cpuidle_disable
  enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
  
  extern int powersave_nap;     /* set if nap mode can be used in idle loop */
 -void cpu_idle_wait(void);
  
  #ifdef CONFIG_PSERIES_IDLE
  extern void update_smt_snooze_delay(int snooze);
index 8a97aa7289d36b155a1e01df4211a261a7a110e8,815e404f8c1820b840510a565c6a1250754a06cb..2d916c4982c5136a71b10f917606757aa743792f
  #ifndef __ASM_POWERPC_REG_BOOKE_H__
  #define __ASM_POWERPC_REG_BOOKE_H__
  
 -#ifdef CONFIG_BOOKE_WDT
 -extern u32 booke_wdt_enabled;
 -extern u32 booke_wdt_period;
 -#endif /* CONFIG_BOOKE_WDT */
 -
  /* Machine State Register (MSR) Fields */
  #define MSR_GS                (1<<28) /* Guest state */
  #define MSR_UCLE      (1<<26) /* User-mode cache lock enable */
  #define SPRN_SPRG7W   0x117   /* Special Purpose Register General 7 Write */
  #define SPRN_EPCR     0x133   /* Embedded Processor Control Register */
  #define SPRN_DBCR2    0x136   /* Debug Control Register 2 */
+ #define SPRN_MSRP     0x137   /* MSR Protect Register */
  #define SPRN_IAC3     0x13A   /* Instruction Address Compare 3 */
  #define SPRN_IAC4     0x13B   /* Instruction Address Compare 4 */
  #define SPRN_DVC1     0x13E   /* Data Value Compare Register 1 */
  #define SPRN_DVC2     0x13F   /* Data Value Compare Register 2 */
+ #define SPRN_LPID     0x152   /* Logical Partition ID */
  #define SPRN_MAS8     0x155   /* MMU Assist Register 8 */
  #define SPRN_TLB0PS   0x158   /* TLB 0 Page Size Register */
  #define SPRN_TLB1PS   0x159   /* TLB 1 Page Size Register */
  #define SPRN_MAS5_MAS6        0x15c   /* MMU Assist Register 5 || 6 */
  #define SPRN_MAS8_MAS1        0x15d   /* MMU Assist Register 8 || 1 */
  #define SPRN_EPTCFG   0x15e   /* Embedded Page Table Config */
+ #define SPRN_GSPRG0   0x170   /* Guest SPRG0 */
+ #define SPRN_GSPRG1   0x171   /* Guest SPRG1 */
+ #define SPRN_GSPRG2   0x172   /* Guest SPRG2 */
+ #define SPRN_GSPRG3   0x173   /* Guest SPRG3 */
  #define SPRN_MAS7_MAS3        0x174   /* MMU Assist Register 7 || 3 */
  #define SPRN_MAS0_MAS1        0x175   /* MMU Assist Register 0 || 1 */
+ #define SPRN_GSRR0    0x17A   /* Guest SRR0 */
+ #define SPRN_GSRR1    0x17B   /* Guest SRR1 */
+ #define SPRN_GEPR     0x17C   /* Guest EPR */
+ #define SPRN_GDEAR    0x17D   /* Guest DEAR */
+ #define SPRN_GPIR     0x17E   /* Guest PIR */
+ #define SPRN_GESR     0x17F   /* Guest Exception Syndrome Register */
  #define SPRN_IVOR0    0x190   /* Interrupt Vector Offset Register 0 */
  #define SPRN_IVOR1    0x191   /* Interrupt Vector Offset Register 1 */
  #define SPRN_IVOR2    0x192   /* Interrupt Vector Offset Register 2 */
  #define SPRN_IVOR39   0x1B1   /* Interrupt Vector Offset Register 39 */
  #define SPRN_IVOR40   0x1B2   /* Interrupt Vector Offset Register 40 */
  #define SPRN_IVOR41   0x1B3   /* Interrupt Vector Offset Register 41 */
+ #define SPRN_GIVOR2   0x1B8   /* Guest IVOR2 */
+ #define SPRN_GIVOR3   0x1B9   /* Guest IVOR3 */
+ #define SPRN_GIVOR4   0x1BA   /* Guest IVOR4 */
+ #define SPRN_GIVOR8   0x1BB   /* Guest IVOR8 */
+ #define SPRN_GIVOR13  0x1BC   /* Guest IVOR13 */
+ #define SPRN_GIVOR14  0x1BD   /* Guest IVOR14 */
+ #define SPRN_GIVPR    0x1BF   /* Guest IVPR */
  #define SPRN_SPEFSCR  0x200   /* SPE & Embedded FP Status & Control */
  #define SPRN_BBEAR    0x201   /* Branch Buffer Entry Address Register */
  #define SPRN_BBTAR    0x202   /* Branch Buffer Target Address Register */
  #define MCSR_LDG      0x00002000UL /* Guarded Load */
  #define MCSR_TLBSYNC  0x00000002UL /* Multiple tlbsyncs detected */
  #define MCSR_BSL2_ERR 0x00000001UL /* Backside L2 cache error */
+ #define MSRP_UCLEP    0x04000000 /* Protect MSR[UCLE] */
+ #define MSRP_DEP      0x00000200 /* Protect MSR[DE] */
+ #define MSRP_PMMP     0x00000004 /* Protect MSR[PMM] */
  #endif
  
  #ifdef CONFIG_E200
  #define SPRN_EPCR_DMIUH               0x00400000      /* Disable MAS Interrupt updates
                                                 * for hypervisor */
  
+ /* Bit definitions for EPLC/EPSC */
+ #define EPC_EPR               0x80000000 /* 1 = user, 0 = kernel */
+ #define EPC_EPR_SHIFT 31
+ #define EPC_EAS               0x40000000 /* Address Space */
+ #define EPC_EAS_SHIFT 30
+ #define EPC_EGS               0x20000000 /* 1 = guest, 0 = hypervisor */
+ #define EPC_EGS_SHIFT 29
+ #define EPC_ELPID     0x00ff0000
+ #define EPC_ELPID_SHIFT       16
+ #define EPC_EPID      0x00003fff
+ #define EPC_EPID_SHIFT        0
  
  /*
   * The IBM-403 is an even more odd special case, as it is much
index 1a6320290d2616d971c2c09ca6da2281e5b766fb,1622c356ba90cc6160e466006e0266c97aba89a6..200d763a0a6708b16674eaa92df29a3fcf57fce9
@@@ -17,10 -17,12 +17,11 @@@ extern struct task_struct *_switch(stru
                                   struct thread_struct *next);
  
  extern void giveup_fpu(struct task_struct *);
+ extern void load_up_fpu(void);
  extern void disable_kernel_fp(void);
  extern void enable_kernel_fp(void);
  extern void flush_fp_to_thread(struct task_struct *);
  extern void enable_kernel_altivec(void);
 -extern void giveup_altivec(struct task_struct *);
  extern void load_up_altivec(struct task_struct *);
  extern int emulate_altivec(struct pt_regs *);
  extern void __giveup_vsx(struct task_struct *);
@@@ -39,15 -41,10 +40,15 @@@ static inline void discard_lazy_cpu_sta
  
  #ifdef CONFIG_ALTIVEC
  extern void flush_altivec_to_thread(struct task_struct *);
 +extern void giveup_altivec(struct task_struct *);
 +extern void giveup_altivec_notask(void);
  #else
  static inline void flush_altivec_to_thread(struct task_struct *t)
  {
  }
 +static inline void giveup_altivec(struct task_struct *t)
 +{
 +}
  #endif
  
  #ifdef CONFIG_VSX
index 4554dc2fe857262af77c26fa500bd669514460ee,694af3ebb0e43c92c9379c87321b07c31e78760c..52c7ad78242ebd95c16d950424552d6a4741018e
@@@ -116,6 -116,9 +116,9 @@@ int main(void
  #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
        DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
  #endif
+ #ifdef CONFIG_KVM_BOOKE_HV
+       DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
+ #endif
  
        DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
        DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
        DEFINE(SLBSHADOW_STACKESID,
               offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
        DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
 -      DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 -      DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
 -      DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
 -      DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
        DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use));
        DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
        DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count));
  #ifdef CONFIG_KVM
        DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
        DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+       DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
        DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
        DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
        DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr));
        DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
        DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
  
+       DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
+       DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
        /* book3s */
  #ifdef CONFIG_KVM_BOOK3S_64_HV
-       DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
        DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
        DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
        DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
        DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
        DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
        DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
+       DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
  #endif
  #ifdef CONFIG_PPC_BOOK3S
-       DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
        DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
        DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
        DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
        DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
        DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
        DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
-       DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
        DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
        DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
        DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
        HSTATE_FIELD(HSTATE_NAPPING, napping);
  
  #ifdef CONFIG_KVM_BOOK3S_64_HV
+       HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
+       HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
        HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
        HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
        HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
        DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
  #endif
  
+ #ifdef CONFIG_KVM_BOOKE_HV
+       DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
+       DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
+       DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
+ #endif
  #ifdef CONFIG_KVM_EXIT_TIMING
        DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
                                                arch.timing_exit.tv32.tbu));
index f7bed44ee165594abd3c37bf3387dec2c29d9dff,8829b1095f7fe45105e11d49b7155fed28fb1ccb..1c06d297154532473527de1ee7a26081714f4fa0
@@@ -63,11 -63,13 +63,13 @@@ BEGIN_FTR_SECTIO
        GET_PACA(r13)
  
  #ifdef CONFIG_KVM_BOOK3S_64_HV
-       lbz     r0,PACAPROCSTART(r13)
-       cmpwi   r0,0x80
-       bne     1f
-       li      r0,1
-       stb     r0,PACAPROCSTART(r13)
+       li      r0,KVM_HWTHREAD_IN_KERNEL
+       stb     r0,HSTATE_HWTHREAD_STATE(r13)
+       /* Order setting hwthread_state vs. testing hwthread_req */
+       sync
+       lbz     r0,HSTATE_HWTHREAD_REQ(r13)
+       cmpwi   r0,0
+       beq     1f
        b       kvm_start_guest
  1:
  #endif
@@@ -94,10 -96,12 +96,10 @@@ machine_check_pSeries_1
  data_access_pSeries:
        HMT_MEDIUM
        SET_SCRATCH0(r13)
 -#ifndef CONFIG_POWER4_ONLY
  BEGIN_FTR_SECTION
        b       data_access_check_stab
  data_access_not_stab:
  END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
 -#endif
        EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
                                 KVMTEST, 0x300)
  
@@@ -299,6 -303,7 +301,6 @@@ machine_check_fwnmi
                                 EXC_STD, KVMTEST, 0x200)
        KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
  
 -#ifndef CONFIG_POWER4_ONLY
        /* moved from 0x300 */
  data_access_check_stab:
        GET_PACA(r13)
@@@ -325,6 -330,7 +327,6 @@@ do_stab_bolted_pSeries
        GET_SCRATCH0(r10)
        std     r10,PACA_EXSLB+EX_R13(r13)
        EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
 -#endif /* CONFIG_POWER4_ONLY */
  
        KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
        KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
@@@ -764,8 -770,8 +766,8 @@@ alignment_common
        std     r3,_DAR(r1)
        std     r4,_DSISR(r1)
        bl      .save_nvgprs
 +      DISABLE_INTS
        addi    r3,r1,STACK_FRAME_OVERHEAD
 -      ENABLE_INTS
        bl      .alignment_exception
        b       .ret_from_except
  
index 22d608e8bb7d258cdda03a64aba858addd7c81fd,d1192c577ed31682e5a18833ad50524fecfe4842..7a2e5e421abfd2f6da2331844471c9eedc3b7847
@@@ -248,10 -248,11 +248,11 @@@ _ENTRY(_start)
  
  interrupt_base:
        /* Critical Input Interrupt */
-       CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+       CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
  
        /* Machine Check Interrupt */
-       CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+       CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+                          machine_check_exception)
        MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
  
        /* Data Storage Interrupt */
        INSTRUCTION_STORAGE_EXCEPTION
  
        /* External Input Interrupt */
-       EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+       EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
+                 do_IRQ, EXC_XFER_LITE)
  
        /* Alignment Interrupt */
        ALIGNMENT_EXCEPTION
  #ifdef CONFIG_PPC_FPU
        FP_UNAVAILABLE_EXCEPTION
  #else
-       EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+       EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
+                 FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
  #endif
        /* System Call Interrupt */
        START_EXCEPTION(SystemCall)
-       NORMAL_EXCEPTION_PROLOG
+       NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
        EXC_XFER_EE_LITE(0x0c00, DoSyscall)
  
        /* Auxiliary Processor Unavailable Interrupt */
-       EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+       EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
+                 AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
  
        /* Decrementer Interrupt */
        DECREMENTER_EXCEPTION
  
        /* Fixed Internal Timer Interrupt */
        /* TODO: Add FIT support */
-       EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+       EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
+                 unknown_exception, EXC_XFER_EE)
  
        /* Watchdog Timer Interrupt */
        /* TODO: Add watchdog support */
  #ifdef CONFIG_BOOKE_WDT
-       CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException)
+       CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException)
  #else
-       CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception)
+       CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception)
  #endif
  
        /* Data TLB Error Interrupt */
@@@ -777,6 -782,14 +782,6 @@@ _GLOBAL(__fixup_440A_mcheck
        sync
        blr
  
 -/*
 - * extern void giveup_altivec(struct task_struct *prev)
 - *
 - * The 44x core does not have an AltiVec unit.
 - */
 -_GLOBAL(giveup_altivec)
 -      blr
 -
  /*
   * extern void giveup_fpu(struct task_struct *prev)
   *
index de80e0f9a2bded4d802bdaf9c4f71a2e29add915,89c6d6f36785412a77c1b4ca50f606bfbacb5716..1f4434a3860885bc9fa33c821359731be7b7398d
@@@ -301,19 -301,20 +301,20 @@@ _ENTRY(__early_start
  
  interrupt_base:
        /* Critical Input Interrupt */
-       CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+       CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
  
        /* Machine Check Interrupt */
  #ifdef CONFIG_E200
        /* no RFMCI, MCSRRs on E200 */
-       CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+       CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+                          machine_check_exception)
  #else
        MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
  #endif
  
        /* Data Storage Interrupt */
        START_EXCEPTION(DataStorage)
-       NORMAL_EXCEPTION_PROLOG
+       NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
        mfspr   r5,SPRN_ESR             /* Grab the ESR, save it, pass arg3 */
        stw     r5,_ESR(r11)
        mfspr   r4,SPRN_DEAR            /* Grab the DEAR, save it, pass arg2 */
        INSTRUCTION_STORAGE_EXCEPTION
  
        /* External Input Interrupt */
-       EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+       EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
  
        /* Alignment Interrupt */
        ALIGNMENT_EXCEPTION
  #else
  #ifdef CONFIG_E200
        /* E200 treats 'normal' floating point instructions as FP Unavail exception */
-       EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE)
+       EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+                 program_check_exception, EXC_XFER_EE)
  #else
-       EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+       EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+                 unknown_exception, EXC_XFER_EE)
  #endif
  #endif
  
        /* System Call Interrupt */
        START_EXCEPTION(SystemCall)
-       NORMAL_EXCEPTION_PROLOG
+       NORMAL_EXCEPTION_PROLOG(SYSCALL)
        EXC_XFER_EE_LITE(0x0c00, DoSyscall)
  
        /* Auxiliary Processor Unavailable Interrupt */
-       EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+       EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
+                 unknown_exception, EXC_XFER_EE)
  
        /* Decrementer Interrupt */
        DECREMENTER_EXCEPTION
  
        /* Fixed Internal Timer Interrupt */
        /* TODO: Add FIT support */
-       EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+       EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
+                 unknown_exception, EXC_XFER_EE)
  
        /* Watchdog Timer Interrupt */
  #ifdef CONFIG_BOOKE_WDT
-       CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException)
+       CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException)
  #else
-       CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception)
+       CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception)
  #endif
  
        /* Data TLB Error Interrupt */
        mtspr   SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
        mfspr   r10, SPRN_SPRG_THREAD
        stw     r11, THREAD_NORMSAVE(0)(r10)
+ #ifdef CONFIG_KVM_BOOKE_HV
+ BEGIN_FTR_SECTION
+       mfspr   r11, SPRN_SRR1
+ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+ #endif
        stw     r12, THREAD_NORMSAVE(1)(r10)
        stw     r13, THREAD_NORMSAVE(2)(r10)
        mfcr    r13
        stw     r13, THREAD_NORMSAVE(3)(r10)
+       DO_KVM  BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
        mfspr   r10, SPRN_DEAR          /* Get faulting address */
  
        /* If we are faulting a kernel address, we have to use the
        mtspr   SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
        mfspr   r10, SPRN_SPRG_THREAD
        stw     r11, THREAD_NORMSAVE(0)(r10)
+ #ifdef CONFIG_KVM_BOOKE_HV
+ BEGIN_FTR_SECTION
+       mfspr   r11, SPRN_SRR1
+ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+ #endif
        stw     r12, THREAD_NORMSAVE(1)(r10)
        stw     r13, THREAD_NORMSAVE(2)(r10)
        mfcr    r13
        stw     r13, THREAD_NORMSAVE(3)(r10)
+       DO_KVM  BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
        mfspr   r10, SPRN_SRR0          /* Get faulting address */
  
        /* If we are faulting a kernel address, we have to use the
  #ifdef CONFIG_SPE
        /* SPE Unavailable */
        START_EXCEPTION(SPEUnavailable)
-       NORMAL_EXCEPTION_PROLOG
+       NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
        bne     load_up_spe
        addi    r3,r1,STACK_FRAME_OVERHEAD
        EXC_XFER_EE_LITE(0x2010, KernelSPE)
  #else
-       EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE)
+       EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
+                 unknown_exception, EXC_XFER_EE)
  #endif /* CONFIG_SPE */
  
        /* SPE Floating Point Data */
  #ifdef CONFIG_SPE
-       EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE);
+       EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \
+                 SPEFloatingPointException, EXC_XFER_EE);
  
        /* SPE Floating Point Round */
-       EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE)
+       EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+                 SPEFloatingPointRoundException, EXC_XFER_EE)
  #else
-       EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE)
-       EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE)
+       EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \
+                 unknown_exception, EXC_XFER_EE)
+       EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+                 unknown_exception, EXC_XFER_EE)
  #endif /* CONFIG_SPE */
  
        /* Performance Monitor */
-       EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD)
+       EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
+                 performance_monitor_exception, EXC_XFER_STD)
  
-       EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD)
+       EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
  
-       CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception)
+       CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
+                          CriticalDoorbell, unknown_exception)
  
        /* Debug Interrupt */
        DEBUG_DEBUG_EXCEPTION
        DEBUG_CRIT_EXCEPTION
  
+       GUEST_DOORBELL_EXCEPTION
+       CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \
+                          unknown_exception)
+       /* Hypercall */
+       EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
+       /* Embedded Hypervisor Privilege */
+       EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
  /*
   * Local functions
   */
@@@ -871,9 -906,40 +906,32 @@@ _GLOBAL(__setup_e500mc_ivors
        mtspr   SPRN_IVOR36,r3
        li      r3,CriticalDoorbell@l
        mtspr   SPRN_IVOR37,r3
+       /*
+        * We only want to touch IVOR38-41 if we're running on hardware
+        * that supports category E.HV.  The architectural way to determine
+        * this is MMUCFG[LPIDSIZE].
+        */
+       mfspr   r3, SPRN_MMUCFG
+       andis.  r3, r3, MMUCFG_LPIDSIZE@h
+       beq     no_hv
+       li      r3,GuestDoorbell@l
+       mtspr   SPRN_IVOR38,r3
+       li      r3,CriticalGuestDoorbell@l
+       mtspr   SPRN_IVOR39,r3
+       li      r3,Hypercall@l
+       mtspr   SPRN_IVOR40,r3
+       li      r3,Ehvpriv@l
+       mtspr   SPRN_IVOR41,r3
+ skip_hv_ivors:
        sync
        blr
+ no_hv:
+       lwz     r3, CPU_SPEC_FEATURES(r5)
+       rlwinm  r3, r3, 0, ~CPU_FTR_EMB_HV
+       stw     r3, CPU_SPEC_FEATURES(r5)
+       b       skip_hv_ivors
  
 -/*
 - * extern void giveup_altivec(struct task_struct *prev)
 - *
 - * The e500 core does not have an AltiVec unit.
 - */
 -_GLOBAL(giveup_altivec)
 -      blr
 -
  #ifdef CONFIG_SPE
  /*
   * extern void giveup_spe(struct task_struct *prev)
index c3beaeef3f60b41013c723197b2b898cf47f3d1b,8e6401f2c16fd89421a29584410f47700d621470..80a57751758444a427797ecaff2a6f2e07a1ba03
  
  /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
  #define MAX_LPID_970  63
- #define NR_LPIDS      (LPID_RSVD + 1)
- unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];
  
  long kvmppc_alloc_hpt(struct kvm *kvm)
  {
        unsigned long hpt;
-       unsigned long lpid;
+       long lpid;
        struct revmap_entry *rev;
        struct kvmppc_linear_info *li;
  
        }
        kvm->arch.revmap = rev;
  
-       /* Allocate the guest's logical partition ID */
-       do {
-               lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
-               if (lpid >= NR_LPIDS) {
-                       pr_err("kvm_alloc_hpt: No LPIDs free\n");
-                       goto out_freeboth;
-               }
-       } while (test_and_set_bit(lpid, lpid_inuse));
+       lpid = kvmppc_alloc_lpid();
+       if (lpid < 0)
+               goto out_freeboth;
  
        kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
        kvm->arch.lpid = lpid;
@@@ -96,7 -89,7 +89,7 @@@
  
  void kvmppc_free_hpt(struct kvm *kvm)
  {
-       clear_bit(kvm->arch.lpid, lpid_inuse);
+       kvmppc_free_lpid(kvm->arch.lpid);
        vfree(kvm->arch.revmap);
        if (kvm->arch.hpt_li)
                kvm_release_hpt(kvm->arch.hpt_li);
@@@ -171,8 -164,7 +164,7 @@@ int kvmppc_mmu_hv_init(void
        if (!cpu_has_feature(CPU_FTR_HVMODE))
                return -EINVAL;
  
-       memset(lpid_inuse, 0, sizeof(lpid_inuse));
+       /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
        if (cpu_has_feature(CPU_FTR_ARCH_206)) {
                host_lpid = mfspr(SPRN_LPID);   /* POWER7 */
                rsvd_lpid = LPID_RSVD;
                rsvd_lpid = MAX_LPID_970;
        }
  
-       set_bit(host_lpid, lpid_inuse);
+       kvmppc_init_lpid(rsvd_lpid + 1);
+       kvmppc_claim_lpid(host_lpid);
        /* rsvd_lpid is reserved for use in partition switching */
-       set_bit(rsvd_lpid, lpid_inuse);
+       kvmppc_claim_lpid(rsvd_lpid);
  
        return 0;
  }
@@@ -258,8 -252,6 +252,8 @@@ static long kvmppc_get_guest_page(struc
                            !(memslot->userspace_addr & (s - 1))) {
                                start &= ~(s - 1);
                                pgsize = s;
 +                              get_page(hpage);
 +                              put_page(page);
                                page = hpage;
                        }
                }
        err = 0;
  
   out:
 -      if (got) {
 -              if (PageHuge(page))
 -                      page = compound_head(page);
 +      if (got)
                put_page(page);
 -      }
        return err;
  
   up_err:
@@@ -452,7 -447,7 +446,7 @@@ static int instruction_is_store(unsigne
  }
  
  static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                                 unsigned long gpa, int is_store)
+                                 unsigned long gpa, gva_t ea, int is_store)
  {
        int ret;
        u32 last_inst;
         */
  
        vcpu->arch.paddr_accessed = gpa;
+       vcpu->arch.vaddr_accessed = ea;
        return kvmppc_emulate_mmio(run, vcpu);
  }
  
@@@ -552,7 -548,7 +547,7 @@@ int kvmppc_book3s_hv_page_fault(struct 
        /* No memslot means it's an emulated MMIO region */
        if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
                unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
-               return kvmppc_hv_emulate_mmio(run, vcpu, gpa,
+               return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
                                              dsisr & DSISR_ISSTORE);
        }
  
                SetPageDirty(page);
  
   out_put:
 -      if (page)
 -              put_page(page);
 +      if (page) {
 +              /*
 +               * We drop pages[0] here, not page because page might
 +               * have been set to the head page of a compound, but
 +               * we have to drop the reference on the correct tail
 +               * page to match the get inside gup()
 +               */
 +              put_page(pages[0]);
 +      }
        return ret;
  
   out_unlock:
@@@ -985,7 -974,6 +980,7 @@@ void *kvmppc_pin_guest_page(struct kvm 
                        pa = *physp;
                }
                page = pfn_to_page(pa >> PAGE_SHIFT);
 +              get_page(page);
        } else {
                hva = gfn_to_hva_memslot(memslot, gfn);
                npages = get_user_pages_fast(hva, 1, 1, pages);
                page = compound_head(page);
                psize <<= compound_order(page);
        }
 -      if (!kvm->arch.using_mmu_notifiers)
 -              get_page(page);
        offset = gpa & (psize - 1);
        if (nb_ret)
                *nb_ret = psize - offset;
@@@ -1008,6 -998,7 +1003,6 @@@ void kvmppc_unpin_guest_page(struct kv
  {
        struct page *page = virt_to_page(va);
  
 -      page = compound_head(page);
        put_page(page);
  }
  
index 108d1f580177b5e0b860c02b221ef888d1c63676,db36598a90d79cb45b6624f243c356be8f678c3f..c6af1d6238395947725a2e53ff0fbd6d6614b2e7
@@@ -60,12 -60,20 +60,20 @@@ static int kvmppc_hv_setup_rma(struct k
  
  void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
        local_paca->kvm_hstate.kvm_vcpu = vcpu;
-       local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
+       local_paca->kvm_hstate.kvm_vcore = vc;
+       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+               vc->stolen_tb += mftb() - vc->preempt_tb;
  }
  
  void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
  {
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+               vc->preempt_tb = mftb();
  }
  
  void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@@ -134,6 -142,22 +142,22 @@@ static void init_vpa(struct kvm_vcpu *v
        vpa->yield_count = 1;
  }
  
+ /* Length for a per-processor buffer is passed in at offset 4 in the buffer */
+ struct reg_vpa {
+       u32 dummy;
+       union {
+               u16 hword;
+               u32 word;
+       } length;
+ };
+ static int vpa_is_registered(struct kvmppc_vpa *vpap)
+ {
+       if (vpap->update_pending)
+               return vpap->next_gpa != 0;
+       return vpap->pinned_addr != NULL;
+ }
  static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
                                       unsigned long flags,
                                       unsigned long vcpuid, unsigned long vpa)
        unsigned long len, nb;
        void *va;
        struct kvm_vcpu *tvcpu;
-       int err = H_PARAMETER;
+       int err;
+       int subfunc;
+       struct kvmppc_vpa *vpap;
  
        tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
        if (!tvcpu)
                return H_PARAMETER;
  
-       flags >>= 63 - 18;
-       flags &= 7;
-       if (flags == 0 || flags == 4)
-               return H_PARAMETER;
-       if (flags < 4) {
-               if (vpa & 0x7f)
+       subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
+       if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
+           subfunc == H_VPA_REG_SLB) {
+               /* Registering new area - address must be cache-line aligned */
+               if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
                        return H_PARAMETER;
-               if (flags >= 2 && !tvcpu->arch.vpa)
-                       return H_RESOURCE;
-               /* registering new area; convert logical addr to real */
+               /* convert logical addr to kernel addr and read length */
                va = kvmppc_pin_guest_page(kvm, vpa, &nb);
                if (va == NULL)
                        return H_PARAMETER;
-               if (flags <= 1)
-                       len = *(unsigned short *)(va + 4);
+               if (subfunc == H_VPA_REG_VPA)
+                       len = ((struct reg_vpa *)va)->length.hword;
                else
-                       len = *(unsigned int *)(va + 4);
-               if (len > nb)
-                       goto out_unpin;
-               switch (flags) {
-               case 1:         /* register VPA */
-                       if (len < 640)
-                               goto out_unpin;
-                       if (tvcpu->arch.vpa)
-                               kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
-                       tvcpu->arch.vpa = va;
-                       init_vpa(vcpu, va);
-                       break;
-               case 2:         /* register DTL */
-                       if (len < 48)
-                               goto out_unpin;
-                       len -= len % 48;
-                       if (tvcpu->arch.dtl)
-                               kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
-                       tvcpu->arch.dtl = va;
-                       tvcpu->arch.dtl_end = va + len;
+                       len = ((struct reg_vpa *)va)->length.word;
+               kvmppc_unpin_guest_page(kvm, va);
+               /* Check length */
+               if (len > nb || len < sizeof(struct reg_vpa))
+                       return H_PARAMETER;
+       } else {
+               vpa = 0;
+               len = 0;
+       }
+       err = H_PARAMETER;
+       vpap = NULL;
+       spin_lock(&tvcpu->arch.vpa_update_lock);
+       switch (subfunc) {
+       case H_VPA_REG_VPA:             /* register VPA */
+               if (len < sizeof(struct lppaca))
                        break;
-               case 3:         /* register SLB shadow buffer */
-                       if (len < 16)
-                               goto out_unpin;
-                       if (tvcpu->arch.slb_shadow)
-                               kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
-                       tvcpu->arch.slb_shadow = va;
+               vpap = &tvcpu->arch.vpa;
+               err = 0;
+               break;
+       case H_VPA_REG_DTL:             /* register DTL */
+               if (len < sizeof(struct dtl_entry))
                        break;
-               }
-       } else {
-               switch (flags) {
-               case 5:         /* unregister VPA */
-                       if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
-                               return H_RESOURCE;
-                       if (!tvcpu->arch.vpa)
-                               break;
-                       kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
-                       tvcpu->arch.vpa = NULL;
+               len -= len % sizeof(struct dtl_entry);
+               /* Check that they have previously registered a VPA */
+               err = H_RESOURCE;
+               if (!vpa_is_registered(&tvcpu->arch.vpa))
                        break;
-               case 6:         /* unregister DTL */
-                       if (!tvcpu->arch.dtl)
-                               break;
-                       kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
-                       tvcpu->arch.dtl = NULL;
+               vpap = &tvcpu->arch.dtl;
+               err = 0;
+               break;
+       case H_VPA_REG_SLB:             /* register SLB shadow buffer */
+               /* Check that they have previously registered a VPA */
+               err = H_RESOURCE;
+               if (!vpa_is_registered(&tvcpu->arch.vpa))
                        break;
-               case 7:         /* unregister SLB shadow buffer */
-                       if (!tvcpu->arch.slb_shadow)
-                               break;
-                       kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
-                       tvcpu->arch.slb_shadow = NULL;
+               vpap = &tvcpu->arch.slb_shadow;
+               err = 0;
+               break;
+       case H_VPA_DEREG_VPA:           /* deregister VPA */
+               /* Check they don't still have a DTL or SLB buf registered */
+               err = H_RESOURCE;
+               if (vpa_is_registered(&tvcpu->arch.dtl) ||
+                   vpa_is_registered(&tvcpu->arch.slb_shadow))
                        break;
-               }
+               vpap = &tvcpu->arch.vpa;
+               err = 0;
+               break;
+       case H_VPA_DEREG_DTL:           /* deregister DTL */
+               vpap = &tvcpu->arch.dtl;
+               err = 0;
+               break;
+       case H_VPA_DEREG_SLB:           /* deregister SLB shadow buffer */
+               vpap = &tvcpu->arch.slb_shadow;
+               err = 0;
+               break;
+       }
+       if (vpap) {
+               vpap->next_gpa = vpa;
+               vpap->len = len;
+               vpap->update_pending = 1;
        }
-       return H_SUCCESS;
  
-  out_unpin:
-       kvmppc_unpin_guest_page(kvm, va);
+       spin_unlock(&tvcpu->arch.vpa_update_lock);
        return err;
  }
  
+ static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
+ {
+       void *va;
+       unsigned long nb;
+       vpap->update_pending = 0;
+       va = NULL;
+       if (vpap->next_gpa) {
+               va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
+               if (nb < vpap->len) {
+                       /*
+                        * If it's now too short, it must be that userspace
+                        * has changed the mappings underlying guest memory,
+                        * so unregister the region.
+                        */
+                       kvmppc_unpin_guest_page(kvm, va);
+                       va = NULL;
+               }
+       }
+       if (vpap->pinned_addr)
+               kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
+       vpap->pinned_addr = va;
+       if (va)
+               vpap->pinned_end = va + vpap->len;
+ }
+ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
+ {
+       struct kvm *kvm = vcpu->kvm;
+       spin_lock(&vcpu->arch.vpa_update_lock);
+       if (vcpu->arch.vpa.update_pending) {
+               kvmppc_update_vpa(kvm, &vcpu->arch.vpa);
+               init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+       }
+       if (vcpu->arch.dtl.update_pending) {
+               kvmppc_update_vpa(kvm, &vcpu->arch.dtl);
+               vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
+               vcpu->arch.dtl_index = 0;
+       }
+       if (vcpu->arch.slb_shadow.update_pending)
+               kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);
+       spin_unlock(&vcpu->arch.vpa_update_lock);
+ }
+ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+                                   struct kvmppc_vcore *vc)
+ {
+       struct dtl_entry *dt;
+       struct lppaca *vpa;
+       unsigned long old_stolen;
+       dt = vcpu->arch.dtl_ptr;
+       vpa = vcpu->arch.vpa.pinned_addr;
+       old_stolen = vcpu->arch.stolen_logged;
+       vcpu->arch.stolen_logged = vc->stolen_tb;
+       if (!dt || !vpa)
+               return;
+       memset(dt, 0, sizeof(struct dtl_entry));
+       dt->dispatch_reason = 7;
+       dt->processor_id = vc->pcpu + vcpu->arch.ptid;
+       dt->timebase = mftb();
+       dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
+       dt->srr0 = kvmppc_get_pc(vcpu);
+       dt->srr1 = vcpu->arch.shregs.msr;
+       ++dt;
+       if (dt == vcpu->arch.dtl.pinned_end)
+               dt = vcpu->arch.dtl.pinned_addr;
+       vcpu->arch.dtl_ptr = dt;
+       /* order writing *dt vs. writing vpa->dtl_idx */
+       smp_wmb();
+       vpa->dtl_idx = ++vcpu->arch.dtl_index;
+ }
  int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
  {
        unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@@ -468,6 -586,7 +586,7 @@@ struct kvm_vcpu *kvmppc_core_vcpu_creat
        /* default to host PVR, since we can't spoof it */
        vcpu->arch.pvr = mfspr(SPRN_PVR);
        kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+       spin_lock_init(&vcpu->arch.vpa_update_lock);
  
        kvmppc_mmu_book3s_hv_init(vcpu);
  
                        INIT_LIST_HEAD(&vcore->runnable_threads);
                        spin_lock_init(&vcore->lock);
                        init_waitqueue_head(&vcore->wq);
+                       vcore->preempt_tb = mftb();
                }
                kvm->arch.vcores[core] = vcore;
        }
        ++vcore->num_threads;
        spin_unlock(&vcore->lock);
        vcpu->arch.vcore = vcore;
+       vcpu->arch.stolen_logged = vcore->stolen_tb;
  
        vcpu->arch.cpu_type = KVM_CPU_3S_64;
        kvmppc_sanity_check(vcpu);
@@@ -512,12 -633,14 +633,14 @@@ out
  
  void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
  {
-       if (vcpu->arch.dtl)
-               kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
-       if (vcpu->arch.slb_shadow)
-               kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
-       if (vcpu->arch.vpa)
-               kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
+       spin_lock(&vcpu->arch.vpa_update_lock);
+       if (vcpu->arch.dtl.pinned_addr)
+               kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
+       if (vcpu->arch.slb_shadow.pinned_addr)
+               kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
+       if (vcpu->arch.vpa.pinned_addr)
+               kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
+       spin_unlock(&vcpu->arch.vpa_update_lock);
        kvm_vcpu_uninit(vcpu);
        kmem_cache_free(kvm_vcpu_cache, vcpu);
  }
@@@ -569,6 -692,45 +692,45 @@@ static void kvmppc_remove_runnable(stru
        list_del(&vcpu->arch.run_list);
  }
  
+ static int kvmppc_grab_hwthread(int cpu)
+ {
+       struct paca_struct *tpaca;
+       long timeout = 1000;
+       tpaca = &paca[cpu];
+       /* Ensure the thread won't go into the kernel if it wakes */
+       tpaca->kvm_hstate.hwthread_req = 1;
+       /*
+        * If the thread is already executing in the kernel (e.g. handling
+        * a stray interrupt), wait for it to get back to nap mode.
+        * The smp_mb() is to ensure that our setting of hwthread_req
+        * is visible before we look at hwthread_state, so if this
+        * races with the code at system_reset_pSeries and the thread
+        * misses our setting of hwthread_req, we are sure to see its
+        * setting of hwthread_state, and vice versa.
+        */
+       smp_mb();
+       while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
+               if (--timeout <= 0) {
+                       pr_err("KVM: couldn't grab cpu %d\n", cpu);
+                       return -EBUSY;
+               }
+               udelay(1);
+       }
+       return 0;
+ }
+ static void kvmppc_release_hwthread(int cpu)
+ {
+       struct paca_struct *tpaca;
+       tpaca = &paca[cpu];
+       tpaca->kvm_hstate.hwthread_req = 0;
+       tpaca->kvm_hstate.kvm_vcpu = NULL;
+ }
  static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
  {
        int cpu;
        smp_wmb();
  #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
        if (vcpu->arch.ptid) {
-               tpaca->cpu_start = 0x80;
-               wmb();
+               kvmppc_grab_hwthread(cpu);
                xics_wake_cpu(cpu);
                ++vc->n_woken;
        }
@@@ -639,7 -800,7 +800,7 @@@ static int kvmppc_run_core(struct kvmpp
        struct kvm_vcpu *vcpu, *vcpu0, *vnext;
        long ret;
        u64 now;
-       int ptid;
+       int ptid, i;
  
        /* don't start if any threads have a signal pending */
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
        vc->nap_count = 0;
        vc->entry_exit_count = 0;
        vc->vcore_state = VCORE_RUNNING;
+       vc->stolen_tb += mftb() - vc->preempt_tb;
        vc->in_guest = 0;
        vc->pcpu = smp_processor_id();
        vc->napping_threads = 0;
-       list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+       list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
                kvmppc_start_thread(vcpu);
+               if (vcpu->arch.vpa.update_pending ||
+                   vcpu->arch.slb_shadow.update_pending ||
+                   vcpu->arch.dtl.update_pending)
+                       kvmppc_update_vpas(vcpu);
+               kvmppc_create_dtl_entry(vcpu, vc);
+       }
+       /* Grab any remaining hw threads so they can't go into the kernel */
+       for (i = ptid; i < threads_per_core; ++i)
+               kvmppc_grab_hwthread(vc->pcpu + i);
  
        preempt_disable();
        spin_unlock(&vc->lock);
  
        kvm_guest_enter();
        __kvmppc_vcore_entry(NULL, vcpu0);
+       for (i = 0; i < threads_per_core; ++i)
+               kvmppc_release_hwthread(vc->pcpu + i);
  
        spin_lock(&vc->lock);
        /* disable sending of IPIs on virtual external irqs */
        spin_lock(&vc->lock);
   out:
        vc->vcore_state = VCORE_INACTIVE;
+       vc->preempt_tb = mftb();
        list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
                                 arch.run_list) {
                if (vcpu->arch.ret != RESUME_GUEST) {
@@@ -835,6 -1009,7 +1009,7 @@@ static int kvmppc_run_vcpu(struct kvm_r
                        spin_lock(&vc->lock);
                        continue;
                }
+               vc->runner = vcpu;
                n_ceded = 0;
                list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
                        n_ceded += v->arch.ceded;
                                wake_up(&v->arch.cpu_run);
                        }
                }
+               vc->runner = NULL;
        }
  
        if (signal_pending(current)) {
@@@ -917,115 -1093,6 +1093,6 @@@ int kvmppc_vcpu_run(struct kvm_run *run
        return r;
  }
  
- static long kvmppc_stt_npages(unsigned long window_size)
- {
-       return ALIGN((window_size >> SPAPR_TCE_SHIFT)
-                    * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
- }
- static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
- {
-       struct kvm *kvm = stt->kvm;
-       int i;
-       mutex_lock(&kvm->lock);
-       list_del(&stt->list);
-       for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
-               __free_page(stt->pages[i]);
-       kfree(stt);
-       mutex_unlock(&kvm->lock);
-       kvm_put_kvm(kvm);
- }
- static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
- {
-       struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
-       struct page *page;
-       if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
-               return VM_FAULT_SIGBUS;
-       page = stt->pages[vmf->pgoff];
-       get_page(page);
-       vmf->page = page;
-       return 0;
- }
- static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
-       .fault = kvm_spapr_tce_fault,
- };
- static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
- {
-       vma->vm_ops = &kvm_spapr_tce_vm_ops;
-       return 0;
- }
- static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
- {
-       struct kvmppc_spapr_tce_table *stt = filp->private_data;
-       release_spapr_tce_table(stt);
-       return 0;
- }
- static struct file_operations kvm_spapr_tce_fops = {
-       .mmap           = kvm_spapr_tce_mmap,
-       .release        = kvm_spapr_tce_release,
- };
- long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
-                                  struct kvm_create_spapr_tce *args)
- {
-       struct kvmppc_spapr_tce_table *stt = NULL;
-       long npages;
-       int ret = -ENOMEM;
-       int i;
-       /* Check this LIOBN hasn't been previously allocated */
-       list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
-               if (stt->liobn == args->liobn)
-                       return -EBUSY;
-       }
-       npages = kvmppc_stt_npages(args->window_size);
-       stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
-                     GFP_KERNEL);
-       if (!stt)
-               goto fail;
-       stt->liobn = args->liobn;
-       stt->window_size = args->window_size;
-       stt->kvm = kvm;
-       for (i = 0; i < npages; i++) {
-               stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
-               if (!stt->pages[i])
-                       goto fail;
-       }
-       kvm_get_kvm(kvm);
-       mutex_lock(&kvm->lock);
-       list_add(&stt->list, &kvm->arch.spapr_tce_tables);
-       mutex_unlock(&kvm->lock);
-       return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
-                               stt, O_RDWR);
- fail:
-       if (stt) {
-               for (i = 0; i < npages; i++)
-                       if (stt->pages[i])
-                               __free_page(stt->pages[i]);
-               kfree(stt);
-       }
-       return ret;
- }
  
  /* Work out RMLS (real mode limit selector) field value for a given RMA size.
     Assumes POWER7 or PPC970. */
@@@ -1108,6 -1175,38 +1175,38 @@@ long kvm_vm_ioctl_allocate_rma(struct k
        return fd;
  }
  
+ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
+                                    int linux_psize)
+ {
+       struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
+       if (!def->shift)
+               return;
+       (*sps)->page_shift = def->shift;
+       (*sps)->slb_enc = def->sllp;
+       (*sps)->enc[0].page_shift = def->shift;
+       (*sps)->enc[0].pte_enc = def->penc;
+       (*sps)++;
+ }
+ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+ {
+       struct kvm_ppc_one_seg_page_size *sps;
+       info->flags = KVM_PPC_PAGE_SIZES_REAL;
+       if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+               info->flags |= KVM_PPC_1T_SEGMENTS;
+       info->slb_size = mmu_slb_size;
+       /* We only support these sizes for now, and no muti-size segments */
+       sps = &info->sps[0];
+       kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
+       kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
+       kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
+       return 0;
+ }
  /*
   * Get (and clear) the dirty memory log for a memory slot.
   */
@@@ -1192,6 -1291,8 +1291,6 @@@ static void unpin_slot(struct kvm *kvm
                                continue;
                        pfn = physp[j] >> PAGE_SHIFT;
                        page = pfn_to_page(pfn);
 -                      if (PageHuge(page))
 -                              page = compound_head(page);
                        SetPageDirty(page);
                        put_page(page);
                }
@@@ -1404,12 -1505,12 +1503,12 @@@ int kvmppc_core_emulate_op(struct kvm_r
        return EMULATE_FAIL;
  }
  
- int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
  {
        return EMULATE_FAIL;
  }
  
- int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
  {
        return EMULATE_FAIL;
  }
index 6e6e9cef34a8977235020904936f244a8d777a02,8b2fc66a30665df0533d0024d0424384a719f0c9..798491a268b3f6d0da8e4b9c96fbbcca64d44dd2
@@@ -128,24 -128,25 +128,25 @@@ no_dcbz32_on
        /* First clear RI in our current MSR value */
        li      r0, MSR_RI
        andc    r6, r6, r0
-       MTMSR_EERI(r6)
-       mtsrr0  r9
-       mtsrr1  r4
  
        PPC_LL  r0, SVCPU_R0(r3)
        PPC_LL  r1, SVCPU_R1(r3)
        PPC_LL  r2, SVCPU_R2(r3)
-       PPC_LL  r4, SVCPU_R4(r3)
        PPC_LL  r5, SVCPU_R5(r3)
-       PPC_LL  r6, SVCPU_R6(r3)
        PPC_LL  r7, SVCPU_R7(r3)
        PPC_LL  r8, SVCPU_R8(r3)
-       PPC_LL  r9, SVCPU_R9(r3)
        PPC_LL  r10, SVCPU_R10(r3)
        PPC_LL  r11, SVCPU_R11(r3)
        PPC_LL  r12, SVCPU_R12(r3)
        PPC_LL  r13, SVCPU_R13(r3)
  
+       MTMSR_EERI(r6)
+       mtsrr0  r9
+       mtsrr1  r4
+       PPC_LL  r4, SVCPU_R4(r3)
+       PPC_LL  r6, SVCPU_R6(r3)
+       PPC_LL  r9, SVCPU_R9(r3)
        PPC_LL  r3, (SVCPU_R3)(r3)
  
        RFI
@@@ -197,8 -198,8 +198,8 @@@ kvmppc_interrupt
        /* Save guest PC and MSR */
  #ifdef CONFIG_PPC64
  BEGIN_FTR_SECTION
 -      mr      r10, r12
 -      andi.   r0,r12,0x2
 +      andi.   r0, r12, 0x2
 +      cmpwi   cr1, r0, 0
        beq     1f
        mfspr   r3,SPRN_HSRR0
        mfspr   r4,SPRN_HSRR1
@@@ -251,12 -252,6 +252,12 @@@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE
        beq     ld_last_prev_inst
        cmpwi   r12, BOOK3S_INTERRUPT_ALIGNMENT
        beq-    ld_last_inst
 +#ifdef CONFIG_PPC64
 +BEGIN_FTR_SECTION
 +      cmpwi   r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
 +      beq-    ld_last_inst
 +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 +#endif
  
        b       no_ld_last_inst
  
@@@ -345,7 -340,8 +346,7 @@@ no_dcbz32_off
  
  #ifdef CONFIG_PPC64
  BEGIN_FTR_SECTION
 -      andi.   r0,r10,0x2
 -      beq     1f
 +      beq     cr1, 1f
        mtspr   SPRN_HSRR1, r6
        mtspr   SPRN_HSRR0, r8
  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
index e5b97be12d2a6798aadfb4a67c4839ed01aafe31,64c8989263f626779160b3c439551a065da1a6b9..db7c1f2709a270a03429ee1c9089209942647ff5
@@@ -27,7 -27,6 +27,7 @@@
  #include <asm/desc.h>
  #include <asm/mtrr.h>
  #include <asm/msr-index.h>
 +#include <asm/asm.h>
  
  #define KVM_MAX_VCPUS 254
  #define KVM_SOFT_MAX_VCPUS 160
@@@ -173,6 -172,9 +173,9 @@@ enum 
  #define DR7_FIXED_1   0x00000400
  #define DR7_VOLATILE  0xffff23ff
  
+ /* apic attention bits */
+ #define KVM_APIC_CHECK_VAPIC  0
  /*
   * We don't want allocation failures within the mmu code, so we preallocate
   * enough memory for a single page fault in a cache.
@@@ -238,8 -240,6 +241,6 @@@ struct kvm_mmu_page 
  #endif
  
        int write_flooding_count;
-       struct rcu_head rcu;
  };
  
  struct kvm_pio_request {
@@@ -338,6 -338,7 +339,7 @@@ struct kvm_vcpu_arch 
        u64 efer;
        u64 apic_base;
        struct kvm_lapic *apic;    /* kernel irqchip context */
+       unsigned long apic_attention;
        int32_t apic_arb_prio;
        int mp_state;
        int sipi_vector;
@@@ -537,8 -538,6 +539,6 @@@ struct kvm_arch 
        u64 hv_guest_os_id;
        u64 hv_hypercall;
  
-       atomic_t reader_counter;
        #ifdef CONFIG_KVM_MMU_AUDIT
        int audit_point;
        #endif
@@@ -713,8 -712,9 +713,9 @@@ void kvm_mmu_set_mask_ptes(u64 user_mas
  
  int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
  void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
- int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
-                              struct kvm_memory_slot *slot);
+ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+                                    struct kvm_memory_slot *slot,
+                                    gfn_t gfn_offset, unsigned long mask);
  void kvm_mmu_zap_all(struct kvm *kvm);
  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
@@@ -922,7 -922,9 +923,7 @@@ extern bool kvm_rebooting
        __ASM_SIZE(push) " $666b \n\t"        \
        "call kvm_spurious_fault \n\t"        \
        ".popsection \n\t" \
 -      ".pushsection __ex_table, \"a\" \n\t" \
 -      _ASM_PTR " 666b, 667b \n\t" \
 -      ".popsection"
 +      _ASM_EXTABLE(666b, 667b)
  
  #define __kvm_handle_fault_on_reboot(insn)            \
        ____kvm_handle_fault_on_reboot(insn, "")
index 183922e13de1aa83ead2959144378e96934d4608,a7a7a94b94ce9e69e135b15b8ad5d324178bfb40..63ab1661d00eb0401eba1a379eb49409933332fa
@@@ -95,6 -95,14 +95,14 @@@ struct kvm_vcpu_pv_apf_data 
  extern void kvmclock_init(void);
  extern int kvm_register_clock(char *txt);
  
+ #ifdef CONFIG_KVM_CLOCK
+ bool kvm_check_and_clear_guest_paused(void);
+ #else
+ static inline bool kvm_check_and_clear_guest_paused(void)
+ {
+       return false;
+ }
+ #endif /* CONFIG_KVMCLOCK */
  
  /* This instruction is vmcall.  On non-VT architectures, it will generate a
   * trap that we will then rewrite to the appropriate instruction.
@@@ -170,17 -178,16 +178,19 @@@ static inline int kvm_para_available(vo
        unsigned int eax, ebx, ecx, edx;
        char signature[13];
  
-       cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
-       memcpy(signature + 0, &ebx, 4);
-       memcpy(signature + 4, &ecx, 4);
-       memcpy(signature + 8, &edx, 4);
-       signature[12] = 0;
 +      if (boot_cpu_data.cpuid_level < 0)
 +              return 0;       /* So we don't blow up on old processors */
 +
+       if (cpu_has_hypervisor) {
+               cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
+               memcpy(signature + 0, &ebx, 4);
+               memcpy(signature + 4, &ecx, 4);
+               memcpy(signature + 8, &edx, 4);
+               signature[12] = 0;
  
-       if (strcmp(signature, "KVMKVMKVM") == 0)
-               return 1;
+               if (strcmp(signature, "KVMKVMKVM") == 0)
+                       return 1;
+       }
  
        return 0;
  }
diff --combined arch/x86/kvm/x86.c
index 185a2b823a2dbbceedab23b4dcfd8db4395ceda9,b78f89d34242c249842479fd946360e0cc91d462..be6d54929fa7d661c31f65d076c0daad8086d785
@@@ -2147,6 -2147,7 +2147,7 @@@ int kvm_dev_ioctl_check_extension(long 
        case KVM_CAP_ASYNC_PF:
        case KVM_CAP_GET_TSC_KHZ:
        case KVM_CAP_PCI_2_3:
+       case KVM_CAP_KVMCLOCK_CTRL:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@@ -2597,6 -2598,23 +2598,23 @@@ static int kvm_vcpu_ioctl_x86_set_xcrs(
        return r;
  }
  
+ /*
+  * kvm_set_guest_paused() indicates to the guest kernel that it has been
+  * stopped by the hypervisor.  This function will be called from the host only.
+  * EINVAL is returned when the host attempts to set the flag for a guest that
+  * does not support pv clocks.
+  */
+ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
+ {
+       struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
+       if (!vcpu->arch.time_page)
+               return -EINVAL;
+       src->flags |= PVCLOCK_GUEST_STOPPED;
+       mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
+       kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+       return 0;
+ }
  long kvm_arch_vcpu_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
  {
                r = vcpu->arch.virtual_tsc_khz;
                goto out;
        }
+       case KVM_KVMCLOCK_CTRL: {
+               r = kvm_set_guest_paused(vcpu);
+               goto out;
+       }
        default:
                r = -EINVAL;
        }
@@@ -3045,57 -3067,32 +3067,32 @@@ static int kvm_vm_ioctl_reinject(struc
  }
  
  /**
-  * write_protect_slot - write protect a slot for dirty logging
-  * @kvm: the kvm instance
-  * @memslot: the slot we protect
-  * @dirty_bitmap: the bitmap indicating which pages are dirty
-  * @nr_dirty_pages: the number of dirty pages
+  * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+  * @kvm: kvm instance
+  * @log: slot id and address to which we copy the log
   *
-  * We have two ways to find all sptes to protect:
-  * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and
-  *    checks ones that have a spte mapping a page in the slot.
-  * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap.
+  * We need to keep it in mind that VCPU threads can write to the bitmap
+  * concurrently.  So, to avoid losing data, we keep the following order for
+  * each bit:
   *
-  * Generally speaking, if there are not so many dirty pages compared to the
-  * number of shadow pages, we should use the latter.
+  *   1. Take a snapshot of the bit and clear it if needed.
+  *   2. Write protect the corresponding page.
+  *   3. Flush TLB's if needed.
+  *   4. Copy the snapshot to the userspace.
   *
-  * Note that letting others write into a page marked dirty in the old bitmap
-  * by using the remaining tlb entry is not a problem.  That page will become
-  * write protected again when we flush the tlb and then be reported dirty to
-  * the user space by copying the old bitmap.
-  */
- static void write_protect_slot(struct kvm *kvm,
-                              struct kvm_memory_slot *memslot,
-                              unsigned long *dirty_bitmap,
-                              unsigned long nr_dirty_pages)
- {
-       spin_lock(&kvm->mmu_lock);
-       /* Not many dirty pages compared to # of shadow pages. */
-       if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
-               unsigned long gfn_offset;
-               for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
-                       unsigned long gfn = memslot->base_gfn + gfn_offset;
-                       kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
-               }
-               kvm_flush_remote_tlbs(kvm);
-       } else
-               kvm_mmu_slot_remove_write_access(kvm, memslot->id);
-       spin_unlock(&kvm->mmu_lock);
- }
- /*
-  * Get (and clear) the dirty memory log for a memory slot.
+  * Between 2 and 3, the guest may write to the page using the remaining TLB
+  * entry.  This is not a problem because the page will be reported dirty at
+  * step 4 using the snapshot taken before and step 3 ensures that successive
+  * writes will be logged for the next call.
   */
- int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-                                     struct kvm_dirty_log *log)
+ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
  {
        int r;
        struct kvm_memory_slot *memslot;
-       unsigned long n, nr_dirty_pages;
+       unsigned long n, i;
+       unsigned long *dirty_bitmap;
+       unsigned long *dirty_bitmap_buffer;
+       bool is_dirty = false;
  
        mutex_lock(&kvm->slots_lock);
  
                goto out;
  
        memslot = id_to_memslot(kvm->memslots, log->slot);
+       dirty_bitmap = memslot->dirty_bitmap;
        r = -ENOENT;
-       if (!memslot->dirty_bitmap)
+       if (!dirty_bitmap)
                goto out;
  
        n = kvm_dirty_bitmap_bytes(memslot);
-       nr_dirty_pages = memslot->nr_dirty_pages;
  
-       /* If nothing is dirty, don't bother messing with page tables. */
-       if (nr_dirty_pages) {
-               struct kvm_memslots *slots, *old_slots;
-               unsigned long *dirty_bitmap, *dirty_bitmap_head;
+       dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+       memset(dirty_bitmap_buffer, 0, n);
  
-               dirty_bitmap = memslot->dirty_bitmap;
-               dirty_bitmap_head = memslot->dirty_bitmap_head;
-               if (dirty_bitmap == dirty_bitmap_head)
-                       dirty_bitmap_head += n / sizeof(long);
-               memset(dirty_bitmap_head, 0, n);
+       spin_lock(&kvm->mmu_lock);
  
-               r = -ENOMEM;
-               slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL);
-               if (!slots)
-                       goto out;
+       for (i = 0; i < n / sizeof(long); i++) {
+               unsigned long mask;
+               gfn_t offset;
  
-               memslot = id_to_memslot(slots, log->slot);
-               memslot->nr_dirty_pages = 0;
-               memslot->dirty_bitmap = dirty_bitmap_head;
-               update_memslots(slots, NULL);
+               if (!dirty_bitmap[i])
+                       continue;
  
-               old_slots = kvm->memslots;
-               rcu_assign_pointer(kvm->memslots, slots);
-               synchronize_srcu_expedited(&kvm->srcu);
-               kfree(old_slots);
+               is_dirty = true;
  
-               write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages);
+               mask = xchg(&dirty_bitmap[i], 0);
+               dirty_bitmap_buffer[i] = mask;
  
-               r = -EFAULT;
-               if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
-                       goto out;
-       } else {
-               r = -EFAULT;
-               if (clear_user(log->dirty_bitmap, n))
-                       goto out;
+               offset = i * BITS_PER_LONG;
+               kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
        }
+       if (is_dirty)
+               kvm_flush_remote_tlbs(kvm);
+       spin_unlock(&kvm->mmu_lock);
+       r = -EFAULT;
+       if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+               goto out;
  
        r = 0;
  out:
@@@ -3728,9 -3718,8 +3718,8 @@@ struct read_write_emulator_ops 
  static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
  {
        if (vcpu->mmio_read_completed) {
-               memcpy(val, vcpu->mmio_data, bytes);
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
-                              vcpu->mmio_phys_addr, *(u64 *)val);
+                              vcpu->mmio_fragments[0].gpa, *(u64 *)val);
                vcpu->mmio_read_completed = 0;
                return 1;
        }
@@@ -3766,8 -3755,9 +3755,9 @@@ static int read_exit_mmio(struct kvm_vc
  static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
                           void *val, int bytes)
  {
-       memcpy(vcpu->mmio_data, val, bytes);
-       memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+       struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
+       memcpy(vcpu->run->mmio.data, frag->data, frag->len);
        return X86EMUL_CONTINUE;
  }
  
@@@ -3794,10 -3784,7 +3784,7 @@@ static int emulator_read_write_onepage(
        gpa_t gpa;
        int handled, ret;
        bool write = ops->write;
-       if (ops->read_write_prepare &&
-                 ops->read_write_prepare(vcpu, val, bytes))
-               return X86EMUL_CONTINUE;
+       struct kvm_mmio_fragment *frag;
  
        ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
  
@@@ -3823,15 -3810,19 +3810,19 @@@ mmio
        bytes -= handled;
        val += handled;
  
-       vcpu->mmio_needed = 1;
-       vcpu->run->exit_reason = KVM_EXIT_MMIO;
-       vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-       vcpu->mmio_size = bytes;
-       vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
-       vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
-       vcpu->mmio_index = 0;
+       while (bytes) {
+               unsigned now = min(bytes, 8U);
  
-       return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
+               frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
+               frag->gpa = gpa;
+               frag->data = val;
+               frag->len = now;
+               gpa += now;
+               val += now;
+               bytes -= now;
+       }
+       return X86EMUL_CONTINUE;
  }
  
  int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
                        struct read_write_emulator_ops *ops)
  {
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+       gpa_t gpa;
+       int rc;
+       if (ops->read_write_prepare &&
+                 ops->read_write_prepare(vcpu, val, bytes))
+               return X86EMUL_CONTINUE;
+       vcpu->mmio_nr_fragments = 0;
  
        /* Crossing a page boundary? */
        if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
-               int rc, now;
+               int now;
  
                now = -addr & ~PAGE_MASK;
                rc = emulator_read_write_onepage(addr, val, now, exception,
                bytes -= now;
        }
  
-       return emulator_read_write_onepage(addr, val, bytes, exception,
-                                          vcpu, ops);
+       rc = emulator_read_write_onepage(addr, val, bytes, exception,
+                                        vcpu, ops);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+       if (!vcpu->mmio_nr_fragments)
+               return rc;
+       gpa = vcpu->mmio_fragments[0].gpa;
+       vcpu->mmio_needed = 1;
+       vcpu->mmio_cur_fragment = 0;
+       vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
+       vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
+       vcpu->run->exit_reason = KVM_EXIT_MMIO;
+       vcpu->run->mmio.phys_addr = gpa;
+       return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
  }
  
  static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
@@@ -5263,10 -5279,6 +5279,6 @@@ static int vcpu_enter_guest(struct kvm_
                        kvm_deliver_pmi(vcpu);
        }
  
-       r = kvm_mmu_reload(vcpu);
-       if (unlikely(r))
-               goto out;
        if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
                inject_pending_event(vcpu);
  
                }
        }
  
+       r = kvm_mmu_reload(vcpu);
+       if (unlikely(r)) {
+               kvm_x86_ops->cancel_injection(vcpu);
+               goto out;
+       }
        preempt_disable();
  
        kvm_x86_ops->prepare_guest_switch(vcpu);
@@@ -5456,33 -5474,55 +5474,55 @@@ static int __vcpu_run(struct kvm_vcpu *
        return r;
  }
  
+ /*
+  * Implements the following, as a state machine:
+  *
+  * read:
+  *   for each fragment
+  *     write gpa, len
+  *     exit
+  *     copy data
+  *   execute insn
+  *
+  * write:
+  *   for each fragment
+  *      write gpa, len
+  *      copy data
+  *      exit
+  */
  static int complete_mmio(struct kvm_vcpu *vcpu)
  {
        struct kvm_run *run = vcpu->run;
+       struct kvm_mmio_fragment *frag;
        int r;
  
        if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
                return 1;
  
        if (vcpu->mmio_needed) {
-               vcpu->mmio_needed = 0;
+               /* Complete previous fragment */
+               frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
                if (!vcpu->mmio_is_write)
-                       memcpy(vcpu->mmio_data + vcpu->mmio_index,
-                              run->mmio.data, 8);
-               vcpu->mmio_index += 8;
-               if (vcpu->mmio_index < vcpu->mmio_size) {
-                       run->exit_reason = KVM_EXIT_MMIO;
-                       run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
-                       memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
-                       run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
-                       run->mmio.is_write = vcpu->mmio_is_write;
-                       vcpu->mmio_needed = 1;
-                       return 0;
+                       memcpy(frag->data, run->mmio.data, frag->len);
+               if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+                       vcpu->mmio_needed = 0;
+                       if (vcpu->mmio_is_write)
+                               return 1;
+                       vcpu->mmio_read_completed = 1;
+                       goto done;
                }
+               /* Initiate next fragment */
+               ++frag;
+               run->exit_reason = KVM_EXIT_MMIO;
+               run->mmio.phys_addr = frag->gpa;
                if (vcpu->mmio_is_write)
-                       return 1;
-               vcpu->mmio_read_completed = 1;
+                       memcpy(run->mmio.data, frag->data, frag->len);
+               run->mmio.len = frag->len;
+               run->mmio.is_write = vcpu->mmio_is_write;
+               return 0;
        }
+ done:
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@@ -6336,11 -6376,13 +6376,11 @@@ int kvm_arch_prepare_memory_region(stru
                if (npages && !old.rmap) {
                        unsigned long userspace_addr;
  
 -                      down_write(&current->mm->mmap_sem);
 -                      userspace_addr = do_mmap(NULL, 0,
 +                      userspace_addr = vm_mmap(NULL, 0,
                                                 npages * PAGE_SIZE,
                                                 PROT_READ | PROT_WRITE,
                                                 map_flags,
                                                 0);
 -                      up_write(&current->mm->mmap_sem);
  
                        if (IS_ERR((void *)userspace_addr))
                                return PTR_ERR((void *)userspace_addr);
@@@ -6364,8 -6406,10 +6404,8 @@@ void kvm_arch_commit_memory_region(stru
        if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
                int ret;
  
 -              down_write(&current->mm->mmap_sem);
 -              ret = do_munmap(current->mm, old.userspace_addr,
 +              ret = vm_munmap(old.userspace_addr,
                                old.npages * PAGE_SIZE);
 -              up_write(&current->mm->mmap_sem);
                if (ret < 0)
                        printk(KERN_WARNING
                               "kvm_vm_ioctl_set_memory_region: "
@@@ -6399,21 -6443,9 +6439,9 @@@ int kvm_arch_vcpu_runnable(struct kvm_v
                 kvm_cpu_has_interrupt(vcpu));
  }
  
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
  {
-       int me;
-       int cpu = vcpu->cpu;
-       if (waitqueue_active(&vcpu->wq)) {
-               wake_up_interruptible(&vcpu->wq);
-               ++vcpu->stat.halt_wakeup;
-       }
-       me = get_cpu();
-       if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
-               if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
-                       smp_send_reschedule(cpu);
-       put_cpu();
+       return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
  }
  
  int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
@@@ -6581,7 -6613,6 +6609,7 @@@ void kvm_arch_async_page_present(struc
                kvm_inject_page_fault(vcpu, &fault);
        }
        vcpu->arch.apf.halted = false;
 +      vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
  }
  
  bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
index 36506366158daa719c5c66419c0000c233a09d87,032171e335e9df5789a2ab1a1413d534dc20130f..766cb7b19b403fedadc2fecee3d9c24cc7bee273
@@@ -17,6 -17,7 +17,7 @@@
  #include <linux/mm.h>
  #include <linux/mmzone.h>
  #include <linux/memory.h>
+ #include <linux/module.h>
  #include <linux/platform_device.h>
  #include <asm/chpid.h>
  #include <asm/sclp.h>
@@@ -38,7 -39,8 +39,8 @@@ struct read_info_sccb 
        u64     facilities;             /* 48-55 */
        u8      _reserved2[84 - 56];    /* 56-83 */
        u8      fac84;                  /* 84 */
-       u8      _reserved3[91 - 85];    /* 85-90 */
+       u8      fac85;                  /* 85 */
+       u8      _reserved3[91 - 86];    /* 86-90 */
        u8      flags;                  /* 91 */
        u8      _reserved4[100 - 92];   /* 92-99 */
        u32     rnsize2;                /* 100-103 */
@@@ -51,6 -53,7 +53,7 @@@ static int __initdata early_read_info_s
  
  u64 sclp_facilities;
  static u8 sclp_fac84;
+ static u8 sclp_fac85;
  static unsigned long long rzm;
  static unsigned long long rnmax;
  
@@@ -112,6 -115,7 +115,7 @@@ void __init sclp_facilities_detect(void
        sccb = &early_read_info_sccb;
        sclp_facilities = sccb->facilities;
        sclp_fac84 = sccb->fac84;
+       sclp_fac85 = sccb->fac85;
        rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
        rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
        rzm <<= 20;
@@@ -127,6 -131,12 +131,12 @@@ unsigned long long sclp_get_rzm(void
        return rzm;
  }
  
+ u8 sclp_get_fac85(void)
+ {
+       return sclp_fac85;
+ }
+ EXPORT_SYMBOL_GPL(sclp_get_fac85);
  /*
   * This function will be called after sclp_facilities_detect(), which gets
   * called from early.c code. Therefore the sccb should have valid contents.
@@@ -352,17 -362,7 +362,17 @@@ out
  
  static int sclp_assign_storage(u16 rn)
  {
 -      return do_assign_storage(0x000d0001, rn);
 +      unsigned long long start, address;
 +      int rc;
 +
 +      rc = do_assign_storage(0x000d0001, rn);
 +      if (rc)
 +              goto out;
 +      start = address = rn2addr(rn);
 +      for (; address < start + rzm; address += PAGE_SIZE)
 +              page_set_storage_key(address, PAGE_DEFAULT_KEY, 0);
 +out:
 +      return rc;
  }
  
  static int sclp_unassign_storage(u16 rn)