diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 33f8d84e68..d725b23312 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -45,8 +45,8 @@ CONFIG_PREP=y
 CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
+CONFIG_ETSEC=y
+CONFIG_LIBDECNUMBER=y
 # For PReP
 CONFIG_MC146818RTC=y
-CONFIG_ETSEC=y
 CONFIG_ISA_TESTDEV=y
-CONFIG_LIBDECNUMBER=y
diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index 37a15b7ce2..bd30d69347 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -46,6 +46,8 @@ CONFIG_PREP=y
 CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
+CONFIG_ETSEC=y
+CONFIG_LIBDECNUMBER=y
 # For pSeries
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
@@ -58,4 +60,3 @@ CONFIG_I82374=y
 CONFIG_I8257=y
 CONFIG_MC146818RTC=y
 CONFIG_ISA_TESTDEV=y
-CONFIG_LIBDECNUMBER=y
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 193f0f8400..597b117db3 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -89,6 +89,27 @@ int load_image(const char *filename, uint8_t *addr)
     return size;
 }
 
+/* return the size or -1 if error */
+ssize_t load_image_size(const char *filename, void *addr, size_t size)
+{
+    int fd;
+    ssize_t actsize;
+
+    fd = open(filename, O_RDONLY | O_BINARY);
+    if (fd < 0) {
+        return -1;
+    }
+
+    actsize = read(fd, addr, size);
+    if (actsize < 0) {
+        close(fd);
+        return -1;
+    }
+    close(fd);
+
+    return actsize;
+}
+
 /* read()-like version */
 ssize_t read_targphys(const char *name,
                       int fd, hwaddr dst_addr, size_t nbytes)
diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
index ff6051defe..b4273aa171 100644
--- a/hw/misc/macio/cuda.c
+++ b/hw/misc/macio/cuda.c
@@ -123,13 +123,22 @@ static void cuda_update_irq(CUDAState *s)
     }
 }
 
+static uint64_t get_tb(uint64_t freq)
+{
+    return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                    freq, get_ticks_per_sec());
+}
+
 static unsigned int get_counter(CUDATimer *s)
 {
     int64_t d;
     unsigned int counter;
+    uint64_t tb_diff;
+
+    /* Reverse of the tb calculation algorithm that Mac OS X uses on bootup. */
+    tb_diff = get_tb(s->frequency) - s->load_time;
+    d = (tb_diff * 0xBF401675E5DULL) / (s->frequency << 24);
 
-    d = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - s->load_time,
-                 CUDA_TIMER_FREQ, get_ticks_per_sec());
     if (s->index == 0) {
         /* the timer goes down from latch to -1 (period of latch + 2) */
         if (d <= (s->counter_value + 1)) {
@@ -147,7 +156,7 @@ static unsigned int get_counter(CUDATimer *s)
 static void set_counter(CUDAState *s, CUDATimer *ti, unsigned int val)
 {
     CUDA_DPRINTF("T%d.counter=%d\n", 1 + (ti->timer == NULL), val);
-    ti->load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    ti->load_time = get_tb(s->frequency);
     ti->counter_value = val;
     cuda_timer_update(s, ti, ti->load_time);
 }
@@ -688,6 +697,8 @@ static void cuda_realizefn(DeviceState *dev, Error **errp)
     struct tm tm;
 
     s->timers[0].timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, cuda_timer1, s);
+    s->timers[0].frequency = s->frequency;
+    s->timers[1].frequency = s->frequency;
 
     qemu_get_timedate(&tm, 0);
     s->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET;
@@ -713,6 +724,11 @@ static void cuda_initfn(Object *obj)
                         DEVICE(obj), "adb.0");
 }
 
+static Property cuda_properties[] = {
+    DEFINE_PROP_UINT64("frequency", CUDAState, frequency, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void cuda_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
@@ -720,6 +736,7 @@ static void cuda_class_init(ObjectClass *oc, void *data)
     dc->realize = cuda_realizefn;
     dc->reset = cuda_reset;
     dc->vmsd = &vmstate_cuda;
+    dc->props = cuda_properties;
 }
 
 static const TypeInfo cuda_type_info = {
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index 47f45f5d2c..e0f1e88f54 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -42,6 +42,7 @@ typedef struct MacIOState
     void *dbdma;
     MemoryRegion *pic_mem;
     MemoryRegion *escc_mem;
+    uint64_t frequency;
 } MacIOState;
 
 #define OLDWORLD_MACIO(obj) \
@@ -243,13 +244,18 @@ static void timer_write(void *opaque, hwaddr addr, uint64_t value,
 static uint64_t timer_read(void *opaque, hwaddr addr, unsigned size)
 {
     uint32_t value = 0;
+    uint64_t systime = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    uint64_t kltime;
+
+    kltime = muldiv64(systime, 4194300, get_ticks_per_sec() * 4);
+    kltime = muldiv64(kltime, 18432000, 1048575);
 
     switch (addr) {
     case 0x38:
-        value = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        value = kltime;
         break;
     case 0x3c:
-        value = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) >> 32;
+        value = kltime >> 32;
         break;
     }
 
@@ -346,12 +352,19 @@ static void macio_newworld_class_init(ObjectClass *oc, void *data)
     pdc->device_id = PCI_DEVICE_ID_APPLE_UNI_N_KEYL;
 }
 
+static Property macio_properties[] = {
+    DEFINE_PROP_UINT64("frequency", MacIOState, frequency, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void macio_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->class_id = PCI_CLASS_OTHERS << 8;
+    dc->props = macio_properties;
 }
 
 static const TypeInfo macio_oldworld_type_info = {
@@ -398,6 +411,8 @@ void macio_init(PCIDevice *d,
     macio_state->escc_mem = escc_mem;
     /* Note: this code is strongly inspirated from the corresponding code
        in PearPC */
+    qdev_prop_set_uint64(DEVICE(&macio_state->cuda), "frequency",
+                         macio_state->frequency);
 
     qdev_init_nofail(DEVICE(d));
 }
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 2d47df6930..23c47d397c 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -72,7 +72,14 @@ typedef uint64_t vlan_bd_t;
 #define VLAN_RXQ_BD_OFF      0
 #define VLAN_FILTER_BD_OFF   8
 #define VLAN_RX_BDS_OFF      16
-#define VLAN_MAX_BUFS        ((SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8)
+/*
+ * The final 8 bytes of the buffer list is a counter of frames dropped
+ * because there was not a buffer in the buffer list capable of holding
+ * the frame. We must avoid it, or the operating system will report garbage
+ * for this statistic.
+ */
+#define VLAN_RX_BDS_LEN      (SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF - 8)
+#define VLAN_MAX_BUFS        (VLAN_RX_BDS_LEN / 8)
 
 #define TYPE_VIO_SPAPR_VLAN_DEVICE "spapr-vlan"
 #define VIO_SPAPR_VLAN_DEVICE(obj) \
@@ -119,7 +126,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
 
     do {
         buf_ptr += 8;
-        if (buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
+        if (buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) {
             buf_ptr = VLAN_RX_BDS_OFF;
         }
 
@@ -397,7 +404,7 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu,
 
     do {
         dev->add_buf_ptr += 8;
-        if (dev->add_buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
+        if (dev->add_buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) {
             dev->add_buf_ptr = VLAN_RX_BDS_OFF;
         }
 
diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index 170b10b766..d35f8a3121 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -26,6 +26,7 @@
 #include "hw/nvram/openbios_firmware_abi.h"
 #include "sysemu/sysemu.h"
 #include "hw/ppc/mac.h"
+#include <zlib.h>
 
 /* debug NVR */
 //#define DEBUG_NVR
@@ -39,29 +40,6 @@
 
 #define DEF_SYSTEM_SIZE 0xc10
 
-/* Direct access to NVRAM */
-uint8_t macio_nvram_read(MacIONVRAMState *s, uint32_t addr)
-{
-    uint32_t ret;
-
-    if (addr < s->size) {
-        ret = s->data[addr];
-    } else {
-        ret = -1;
-    }
-    NVR_DPRINTF("read addr %04" PRIx32 " val %" PRIx8 "\n", addr, ret);
-
-    return ret;
-}
-
-void macio_nvram_write(MacIONVRAMState *s, uint32_t addr, uint8_t val)
-{
-    NVR_DPRINTF("write addr %04" PRIx32 " val %" PRIx8 "\n", addr, val);
-    if (addr < s->size) {
-        s->data[addr] = val;
-    }
-}
-
 /* macio style NVRAM device */
 static void macio_nvram_writeb(void *opaque, hwaddr addr,
                                uint64_t value, unsigned size)
@@ -89,6 +67,10 @@ static uint64_t macio_nvram_readb(void *opaque, hwaddr addr,
 static const MemoryRegionOps macio_nvram_ops = {
     .read = macio_nvram_readb,
     .write = macio_nvram_writeb,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 1,
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
@@ -156,15 +138,16 @@ static void macio_nvram_register_types(void)
 }
 
 /* Set up a system OpenBIOS NVRAM partition */
-void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len)
+static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off,
+                                           int len)
 {
     unsigned int i;
-    uint32_t start = 0, end;
+    uint32_t start = off, end;
     struct OpenBIOS_nvpart_v1 *part_header;
 
     // OpenBIOS nvram variables
     // Variable partition
-    part_header = (struct OpenBIOS_nvpart_v1 *)nvr->data;
+    part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start];
     part_header->signature = OPENBIOS_PART_SYSTEM;
     pstrcpy(part_header->name, sizeof(part_header->name), "system");
 
@@ -192,4 +175,39 @@ void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len)
     OpenBIOS_finish_partition(part_header, end - start);
 }
 
+#define OSX_NVRAM_SIGNATURE     (0x5A)
+
+/* Set up a Mac OS X NVRAM partition */
+static void pmac_format_nvram_partition_osx(MacIONVRAMState *nvr, int off,
+                                            int len)
+{
+    uint32_t start = off;
+    struct OpenBIOS_nvpart_v1 *part_header;
+    unsigned char *data = &nvr->data[start];
+
+    /* empty partition */
+    part_header = (struct OpenBIOS_nvpart_v1 *)data;
+    part_header->signature = OSX_NVRAM_SIGNATURE;
+    pstrcpy(part_header->name, sizeof(part_header->name), "wwwwwwwwwwww");
+
+    OpenBIOS_finish_partition(part_header, len);
+
+    /* Generation */
+    stl_be_p(&data[20], 2);
+
+    /* Adler32 checksum */
+    stl_be_p(&data[16], adler32(0, &data[20], len - 20));
+}
+
+/* Set up NVRAM with OF and OSX partitions */
+void pmac_format_nvram_partition(MacIONVRAMState *nvr, int len)
+{
+    /*
+     * Mac OS X expects side "B" of the flash at the second half of NVRAM,
+     * so we use half of the chip for OF and the other half for a free OSX
+     * partition.
+     */
+    pmac_format_nvram_partition_of(nvr, 0, len / 2);
+    pmac_format_nvram_partition_osx(nvr, len / 2, len / 2);
+}
 type_init(macio_nvram_register_types)
diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h
index c1faf9ce27..aff2b9a566 100644
--- a/hw/ppc/mac.h
+++ b/hw/ppc/mac.h
@@ -57,6 +57,7 @@ typedef struct CUDATimer {
     uint16_t counter_value;
     int64_t load_time;
     int64_t next_irq_time;
+    uint64_t frequency;
     QEMUTimer *timer;
 } CUDATimer;
 
@@ -97,6 +98,7 @@ typedef struct CUDAState {
     CUDATimer timers[2];
 
     uint32_t tick_offset;
+    uint64_t frequency;
 
     uint8_t last_b;
     uint8_t last_acr;
@@ -178,6 +180,4 @@ typedef struct MacIONVRAMState {
 } MacIONVRAMState;
 
 void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len);
-uint8_t macio_nvram_read(MacIONVRAMState *s, uint32_t addr);
-void macio_nvram_write(MacIONVRAMState *s, uint32_t addr, uint8_t val);
 #endif /* !defined(__PPC_MAC_H__) */
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 1ec4bb490b..8453bfa284 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -176,6 +176,8 @@ static void ppc_core99_init(MachineState *machine)
     SysBusDevice *s;
     DeviceState *dev;
     int *token = g_new(int, 1);
+    hwaddr nvram_addr = 0xFFF04000;
+    uint64_t tbfreq;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -372,6 +374,14 @@ static void ppc_core99_init(MachineState *machine)
         pci_bus = pci_pmac_init(pic, get_system_memory(), get_system_io());
         machine_arch = ARCH_MAC99;
     }
+
+    /* Timebase Frequency */
+    if (kvm_enabled()) {
+        tbfreq = kvmppc_get_tbfreq();
+    } else {
+        tbfreq = TBFREQ;
+    }
+
     /* init basic PC hardware */
     escc_mem = escc_init(0, pic[0x25], pic[0x24],
                          serial_hds[0], serial_hds[1], ESCC_CLOCK, 4);
@@ -385,6 +395,7 @@ static void ppc_core99_init(MachineState *machine)
     qdev_connect_gpio_out(dev, 2, pic[0x02]); /* IDE DMA */
     qdev_connect_gpio_out(dev, 3, pic[0x0e]); /* IDE */
     qdev_connect_gpio_out(dev, 4, pic[0x03]); /* IDE DMA */
+    qdev_prop_set_uint64(dev, "frequency", tbfreq);
     macio_init(macio, pic_mem, escc_bar);
 
     /* We only emulate 2 out of 3 IDE controllers for now */
@@ -426,11 +437,18 @@ static void ppc_core99_init(MachineState *machine)
     }
 
     /* The NewWorld NVRAM is not located in the MacIO device */
+#ifdef CONFIG_KVM
+    if (kvm_enabled() && getpagesize() > 4096) {
+        /* We can't combine read-write and read-only in a single page, so
+           move the NVRAM out of ROM again for KVM */
+        nvram_addr = 0xFFE00000;
+    }
+#endif
     dev = qdev_create(NULL, TYPE_MACIO_NVRAM);
     qdev_prop_set_uint32(dev, "size", 0x2000);
     qdev_prop_set_uint32(dev, "it_shift", 1);
     qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0xFFF04000);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, nvram_addr);
     nvr = MACIO_NVRAM(dev);
     pmac_format_nvram_partition(nvr, 0x2000);
     /* No PCI init: the BIOS will do it */
@@ -461,28 +479,34 @@ static void ppc_core99_init(MachineState *machine)
 #ifdef CONFIG_KVM
         uint8_t *hypercall;
 
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq());
         hypercall = g_malloc(16);
         kvmppc_get_hypercall(env, hypercall, 16);
         fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
         fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
 #endif
-    } else {
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
     }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_NVRAM_ADDR, nvram_addr);
 
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
 
+static int core99_kvm_type(const char *arg)
+{
+    /* Always force PR KVM */
+    return 2;
+}
+
 static QEMUMachine core99_machine = {
     .name = "mac99",
     .desc = "Mac99 based PowerMAC",
     .init = ppc_core99_init,
     .max_cpus = MAX_CPUS,
     .default_boot_order = "cd",
+    .kvm_type = core99_kvm_type,
 };
 
 static void core99_machine_init(void)
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index cd9bdbc53e..630a9f9c36 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -103,6 +103,7 @@ static void ppc_heathrow_init(MachineState *machine)
     uint16_t ppc_boot_device;
     DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
     void *fw_cfg;
+    uint64_t tbfreq;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -250,6 +251,13 @@ static void ppc_heathrow_init(MachineState *machine)
         }
     }
 
+    /* Timebase Frequency */
+    if (kvm_enabled()) {
+        tbfreq = kvmppc_get_tbfreq();
+    } else {
+        tbfreq = TBFREQ;
+    }
+
     /* init basic PC hardware */
     if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
         hw_error("Only 6xx bus is supported on heathrow machine\n");
@@ -278,6 +286,7 @@ static void ppc_heathrow_init(MachineState *machine)
     qdev_connect_gpio_out(dev, 2, pic[0x02]); /* IDE-0 DMA */
     qdev_connect_gpio_out(dev, 3, pic[0x0E]); /* IDE-1 */
     qdev_connect_gpio_out(dev, 4, pic[0x03]); /* IDE-1 DMA */
+    qdev_prop_set_uint64(dev, "frequency", tbfreq);
     macio_init(macio, pic_mem, escc_bar);
 
     macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
@@ -330,15 +339,13 @@ static void ppc_heathrow_init(MachineState *machine)
 #ifdef CONFIG_KVM
         uint8_t *hypercall;
 
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq());
         hypercall = g_malloc(16);
         kvmppc_get_hypercall(env, hypercall, 16);
         fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
         fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
 #endif
-    } else {
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
     }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
@@ -346,6 +353,12 @@ static void ppc_heathrow_init(MachineState *machine)
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
 
+static int heathrow_kvm_type(const char *arg)
+{
+    /* Always force PR KVM */
+    return 2;
+}
+
 static QEMUMachine heathrow_machine = {
     .name = "g3beige",
     .desc = "Heathrow based PowerMAC",
@@ -355,6 +368,7 @@ static QEMUMachine heathrow_machine = {
     .is_default = 1,
 #endif
     .default_boot_order = "cd", /* TOFIX "cad" when Mac floppy is implemented */
+    .kvm_type = heathrow_kvm_type,
 };
 
 static void heathrow_machine_init(void)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 5cb452f234..2ab4460f04 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -71,6 +71,7 @@
  */
 #define FDT_MAX_SIZE            0x40000
 #define RTAS_MAX_SIZE           0x10000
+#define RTAS_MAX_ADDR           0x80000000 /* RTAS must stay below that */
 #define FW_MAX_SIZE             0x400000
 #define FW_FILE_NAME            "slof.bin"
 #define FW_OVERHEAD             0x2800000
@@ -80,7 +81,7 @@
 
 #define TIMEBASE_FREQ           512000000ULL
 
-#define MAX_CPUS                256
+#define MAX_CPUS                255
 
 #define PHANDLE_XICP            0x00001111
 
@@ -283,6 +284,19 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
     return (p - prop) * sizeof(uint32_t);
 }
 
+static hwaddr spapr_node0_size(void)
+{
+    if (nb_numa_nodes) {
+        int i;
+        for (i = 0; i < nb_numa_nodes; ++i) {
+            if (numa_info[i].node_mem) {
+                return MIN(pow2floor(numa_info[i].node_mem), ram_size);
+            }
+        }
+    }
+    return ram_size;
+}
+
 #define _FDT(exp) \
     do { \
         int ret = (exp);                                           \
@@ -319,6 +333,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
     unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
     uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
+    char *buf;
 
     add_str(hypertas, "hcall-pft");
     add_str(hypertas, "hcall-term");
@@ -348,6 +363,29 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
     _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
 
+    /*
+     * Add info to guest to indentify which host is it being run on
+     * and what is the uuid of the guest
+     */
+    if (kvmppc_get_host_model(&buf)) {
+        _FDT((fdt_property_string(fdt, "host-model", buf)));
+        g_free(buf);
+    }
+    if (kvmppc_get_host_serial(&buf)) {
+        _FDT((fdt_property_string(fdt, "host-serial", buf)));
+        g_free(buf);
+    }
+
+    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
+                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
+                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
+                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
+                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
+                          qemu_uuid[14], qemu_uuid[15]);
+
+    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
+    g_free(buf);
+
     _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
     _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
 
@@ -502,6 +540,15 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
 
     _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
 
+    /*
+     * According to PAPR, rtas ibm,os-term, does not gaurantee a return
+     * back to the guest cpu.
+     *
+     * While an additional ibm,extended-os-term property indicates that
+     * rtas call return will always occur. Set this property.
+     */
+    _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0)));
+
     _FDT((fdt_end_node(fdt)));
 
     /* interrupt controller */
@@ -597,72 +644,75 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
     return 0;
 }
 
-static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
+static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
+                                       hwaddr size)
 {
-    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0)};
+    uint32_t associativity[] = {
+        cpu_to_be32(0x4), /* length */
+        cpu_to_be32(0x0), cpu_to_be32(0x0),
+        cpu_to_be32(0x0), cpu_to_be32(nodeid)
+    };
     char mem_name[32];
-    hwaddr node0_size, mem_start, node_size;
     uint64_t mem_reg_property[2];
-    int i, off;
+    int off;
 
-    /* memory node(s) */
-    if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
-        node0_size = numa_info[0].node_mem;
-    } else {
-        node0_size = ram_size;
-    }
+    mem_reg_property[0] = cpu_to_be64(start);
+    mem_reg_property[1] = cpu_to_be64(size);
 
-    /* RMA */
-    mem_reg_property[0] = 0;
-    mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
-    off = fdt_add_subnode(fdt, 0, "memory@0");
+    sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
+    off = fdt_add_subnode(fdt, 0, mem_name);
     _FDT(off);
     _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
     _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
                       sizeof(mem_reg_property))));
     _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
                       sizeof(associativity))));
+}
 
-    /* RAM: Node 0 */
-    if (node0_size > spapr->rma_size) {
-        mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
-        mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
+static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
+{
+    hwaddr mem_start, node_size;
+    int i, nb_nodes = nb_numa_nodes;
+    NodeInfo *nodes = numa_info;
+    NodeInfo ramnode;
 
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
-        off = fdt_add_subnode(fdt, 0, mem_name);
-        _FDT(off);
-        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
-        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
-                          sizeof(mem_reg_property))));
-        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
-                          sizeof(associativity))));
+    /* No NUMA nodes, assume there is just one node with whole RAM */
+    if (!nb_numa_nodes) {
+        nb_nodes = 1;
+        ramnode.node_mem = ram_size;
+        nodes = &ramnode;
     }
 
-    /* RAM: Node 1 and beyond */
-    mem_start = node0_size;
-    for (i = 1; i < nb_numa_nodes; i++) {
-        mem_reg_property[0] = cpu_to_be64(mem_start);
+    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
+        if (!nodes[i].node_mem) {
+            continue;
+        }
         if (mem_start >= ram_size) {
             node_size = 0;
         } else {
-            node_size = numa_info[i].node_mem;
+            node_size = nodes[i].node_mem;
             if (node_size > ram_size - mem_start) {
                 node_size = ram_size - mem_start;
             }
         }
-        mem_reg_property[1] = cpu_to_be64(node_size);
-        associativity[3] = associativity[4] = cpu_to_be32(i);
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
-        off = fdt_add_subnode(fdt, 0, mem_name);
-        _FDT(off);
-        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
-        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
-                          sizeof(mem_reg_property))));
-        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
-                          sizeof(associativity))));
-        mem_start += node_size;
+        if (!mem_start) {
+            /* ppc_spapr_init() checks for rma_size <= node0_size already */
+            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
+            mem_start += spapr->rma_size;
+            node_size -= spapr->rma_size;
+        }
+        for ( ; node_size; ) {
+            hwaddr sizetmp = pow2floor(node_size);
+
+            /* mem_start != 0 here */
+            if (ctzl(mem_start) < ctzl(sizetmp)) {
+                sizetmp = 1ULL << ctzl(mem_start);
+            }
+
+            spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
+            node_size -= sizetmp;
+            mem_start += sizetmp;
+        }
     }
 
     return 0;
@@ -746,6 +796,7 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
 
     cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
 
+    g_free(bootlist);
     g_free(fdt);
 }
 
@@ -792,25 +843,38 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
 
     /* Update the RMA size if necessary */
     if (spapr->vrma_adjust) {
-        hwaddr node0_size = (nb_numa_nodes > 1) ?
-            numa_info[0].node_mem : ram_size;
-        spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift);
+        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
+                                          spapr->htab_shift);
     }
 }
 
 static void ppc_spapr_reset(void)
 {
     PowerPCCPU *first_ppc_cpu;
+    uint32_t rtas_limit;
 
     /* Reset the hash table & recalc the RMA */
     spapr_reset_htab(spapr);
 
     qemu_devices_reset();
 
+    /*
+     * We place the device tree and RTAS just below either the top of the RMA,
+     * or just below 2GB, whichever is lowere, so that it can be
+     * processed with 32-bit real mode code if necessary
+     */
+    rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
+    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
+    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
+
     /* Load the fdt */
     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                        spapr->rtas_size);
 
+    /* Copy RTAS over */
+    cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob,
+                              spapr->rtas_size);
+
     /* Set up the entry state */
     first_ppc_cpu = POWERPC_CPU(first_cpu);
     first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
@@ -1227,10 +1291,10 @@ static void ppc_spapr_init(MachineState *machine)
     MemoryRegion *rma_region;
     void *rma = NULL;
     hwaddr rma_alloc_size;
-    hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
+    hwaddr node0_size = spapr_node0_size();
     uint32_t initrd_base = 0;
     long kernel_size = 0, initrd_size = 0;
-    long load_limit, rtas_limit, fw_size;
+    long load_limit, fw_size;
     bool kernel_le = false;
     char *filename;
 
@@ -1275,13 +1339,8 @@ static void ppc_spapr_init(MachineState *machine)
         exit(1);
     }
 
-    /* We place the device tree and RTAS just below either the top of the RMA,
-     * or just below 2GB, whichever is lowere, so that it can be
-     * processed with 32-bit real mode code if necessary */
-    rtas_limit = MIN(spapr->rma_size, 0x80000000);
-    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
-    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
-    load_limit = spapr->fdt_addr - FW_OVERHEAD;
+    /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
+    load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
 
     /* We aim for a hash table of size 1/128 the size of RAM.  The
      * normal rule of thumb is 1/64 the size of RAM, but that's much
@@ -1349,14 +1408,14 @@ static void ppc_spapr_init(MachineState *machine)
     }
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
-    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
-                                           rtas_limit - spapr->rtas_addr);
-    if (spapr->rtas_size < 0) {
+    spapr->rtas_size = get_image_size(filename);
+    spapr->rtas_blob = g_malloc(spapr->rtas_size);
+    if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
         exit(1);
     }
     if (spapr->rtas_size > RTAS_MAX_SIZE) {
-        hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
+        hw_error("RTAS too big ! 0x%zx bytes (max is 0x%x)\n",
                  spapr->rtas_size, RTAS_MAX_SIZE);
         exit(1);
     }
@@ -1378,7 +1437,6 @@ static void ppc_spapr_init(MachineState *machine)
     spapr_create_nvram(spapr);
 
     /* Set up PCI */
-    spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
     spapr_pci_rtas_init();
 
     phb = spapr_create_phb(spapr, 0);
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 467858ce05..86514472fe 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -712,10 +712,10 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     return H_SUCCESS;
 }
 
-static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu,
-                                          target_ulong mflags,
-                                          target_ulong value1,
-                                          target_ulong value2)
+static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu,
+                                           target_ulong mflags,
+                                           target_ulong value1,
+                                           target_ulong value2)
 {
     CPUState *cs;
 
@@ -743,10 +743,10 @@ static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu,
     return H_UNSUPPORTED_FLAG;
 }
 
-static target_ulong h_set_mode_resouce_addr_trans_mode(PowerPCCPU *cpu,
-                                                       target_ulong mflags,
-                                                       target_ulong value1,
-                                                       target_ulong value2)
+static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu,
+                                                        target_ulong mflags,
+                                                        target_ulong value1,
+                                                        target_ulong value2)
 {
     CPUState *cs;
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
@@ -794,11 +794,11 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
     switch (resource) {
     case H_SET_MODE_RESOURCE_LE:
-        ret = h_set_mode_resouce_le(cpu, args[0], args[2], args[3]);
+        ret = h_set_mode_resource_le(cpu, args[0], args[2], args[3]);
         break;
     case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
-        ret = h_set_mode_resouce_addr_trans_mode(cpu, args[0],
-                                                 args[2], args[3]);
+        ret = h_set_mode_resource_addr_trans_mode(cpu, args[0],
+                                                  args[2], args[3]);
         break;
     }
 
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 9ed39a93b7..ad0da7fdc4 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -262,7 +262,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     unsigned int irq, max_irqs = 0, num = 0;
     sPAPRPHBState *phb = NULL;
     PCIDevice *pdev = NULL;
-    bool msix = false;
     spapr_pci_msi *msi;
     int *config_addr_key;
 
@@ -300,7 +299,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
         }
 
         xics_free(spapr->icp, msi->first_irq, msi->num);
-        spapr_msi_setmsg(pdev, 0, msix, 0, num);
+        if (msi_present(pdev)) {
+            spapr_msi_setmsg(pdev, 0, false, 0, num);
+        }
+        if (msix_present(pdev)) {
+            spapr_msi_setmsg(pdev, 0, true, 0, num);
+        }
         g_hash_table_remove(phb->msi, &config_addr);
 
         trace_spapr_pci_msi("Released MSIs", config_addr);
@@ -341,7 +345,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     }
 
     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
-    spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
+    spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
                      irq, req_num);
 
     /* Add MSI device to cache */
@@ -465,34 +469,6 @@ static const MemoryRegionOps spapr_msi_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN
 };
 
-void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
-{
-    uint64_t window_size = 4096;
-
-    /*
-     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
-     * we need to allocate some memory to catch those writes coming
-     * from msi_notify()/msix_notify().
-     * As MSIMessage:addr is going to be the same and MSIMessage:data
-     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
-     * be used.
-     *
-     * For KVM we want to ensure that this memory is a full page so that
-     * our memory slot is of page size granularity.
-     */
-#ifdef CONFIG_KVM
-    if (kvm_enabled()) {
-        window_size = getpagesize();
-    }
-#endif
-
-    spapr->msi_win_addr = addr;
-    memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
-                          "msi", window_size);
-    memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
-                                &spapr->msiwindow);
-}
-
 /*
  * PHB PCI device
  */
@@ -512,6 +488,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     char *namebuf;
     int i;
     PCIBus *bus;
+    uint64_t msi_window_size = 4096;
 
     if (sphb->index != -1) {
         hwaddr windows_base;
@@ -604,6 +581,28 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     address_space_init(&sphb->iommu_as, &sphb->iommu_root,
                        sphb->dtbusname);
 
+    /*
+     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
+     * we need to allocate some memory to catch those writes coming
+     * from msi_notify()/msix_notify().
+     * As MSIMessage:addr is going to be the same and MSIMessage:data
+     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
+     * be used.
+     *
+     * For KVM we want to ensure that this memory is a full page so that
+     * our memory slot is of page size granularity.
+     */
+#ifdef CONFIG_KVM
+    if (kvm_enabled()) {
+        msi_window_size = getpagesize();
+    }
+#endif
+
+    memory_region_init_io(&sphb->msiwindow, NULL, &spapr_msi_ops, spapr,
+                          "msi", msi_window_size);
+    memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
+                                &sphb->msiwindow);
+
     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
 
     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 9ba1ba69f9..2ec2a8e4d1 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -277,6 +277,19 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
     rtas_st(rets, 0, ret);
 }
 
+static void rtas_ibm_os_term(PowerPCCPU *cpu,
+                            sPAPREnvironment *spapr,
+                            uint32_t token, uint32_t nargs,
+                            target_ulong args,
+                            uint32_t nret, target_ulong rets)
+{
+    target_ulong ret = 0;
+
+    qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort);
+
+    rtas_st(rets, 0, ret);
+}
+
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
@@ -404,6 +417,8 @@ static void core_rtas_register_types(void)
     spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER,
                         "ibm,set-system-parameter",
                         rtas_ibm_set_system_parameter);
+    spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term",
+                        rtas_ibm_os_term);
 }
 
 type_init(core_rtas_register_types)
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 00c9117ee0..9190387b8e 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -13,6 +13,7 @@
  */
 int get_image_size(const char *filename);
 int load_image(const char *filename, uint8_t *addr); /* deprecated */
+ssize_t load_image_size(const char *filename, void *addr, size_t size);
 int load_image_targphys(const char *filename, hwaddr,
                         uint64_t max_sz);
 int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 32f0aa7d10..4ea2a0d14a 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -70,7 +70,7 @@ struct sPAPRPHBState {
 
     MemoryRegion memspace, iospace;
     hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
-    MemoryRegion memwindow, iowindow;
+    MemoryRegion memwindow, iowindow, msiwindow;
 
     uint32_t dma_liobn;
     AddressSpace iommu_as;
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index 7e16e2e06c..14efd0ca31 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -92,7 +92,7 @@ enum {
 #define FW_CFG_PPC_IS_KVM       (FW_CFG_ARCH_LOCAL + 0x05)
 #define FW_CFG_PPC_KVM_HC       (FW_CFG_ARCH_LOCAL + 0x06)
 #define FW_CFG_PPC_KVM_PID      (FW_CFG_ARCH_LOCAL + 0x07)
-/* OpenBIOS has FW_CFG_PPC_NVRAM_ADDR as +0x08 */
+#define FW_CFG_PPC_NVRAM_ADDR   (FW_CFG_ARCH_LOCAL + 0x08)
 #define FW_CFG_PPC_BUSFREQ      (FW_CFG_ARCH_LOCAL + 0x09)
 
 #define PPC_SERIAL_MM_BAUDBASE 399193
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index bbba51a703..749daf4dd7 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -13,8 +13,6 @@ struct sPAPRNVRAM;
 typedef struct sPAPREnvironment {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
-    hwaddr msi_win_addr;
-    MemoryRegion msiwindow;
     struct sPAPRNVRAM *nvram;
     XICSState *icp;
 
@@ -24,7 +22,8 @@ typedef struct sPAPREnvironment {
     hwaddr rma_size;
     int vrma_adjust;
     hwaddr fdt_addr, rtas_addr;
-    long rtas_size;
+    ssize_t rtas_size;
+    void *rtas_blob;
     void *fdt_skel;
     target_ulong entry_point;
     uint64_t rtc_offset;
@@ -382,9 +381,8 @@ int spapr_allocate_irq_block(int num, bool lsi, bool msi);
 #define RTAS_GET_SENSOR_STATE                   (RTAS_TOKEN_BASE + 0x1D)
 #define RTAS_IBM_CONFIGURE_CONNECTOR            (RTAS_TOKEN_BASE + 0x1E)
 #define RTAS_IBM_OS_TERM                        (RTAS_TOKEN_BASE + 0x1F)
-#define RTAS_IBM_EXTENDED_OS_TERM               (RTAS_TOKEN_BASE + 0x20)
 
-#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x21)
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x20)
 
 /* RTAS ibm,get-system-parameter token values */
 #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 174ea36afa..d2000af9c3 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -303,6 +303,8 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu);
 
 int kvm_check_extension(KVMState *s, unsigned int extension);
 
+int kvm_vm_check_extension(KVMState *s, unsigned int extension);
+
 #define kvm_vm_enable_cap(s, capability, cap_flags, ...)             \
     ({                                                               \
         struct kvm_enable_cap cap = {                                \
diff --git a/kvm-all.c b/kvm-all.c
index b240bf87a9..b1cf703f9e 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -493,6 +493,19 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
     return ret;
 }
 
+int kvm_vm_check_extension(KVMState *s, unsigned int extension)
+{
+    int ret;
+
+    ret = kvm_vm_ioctl(s, KVM_CHECK_EXTENSION, extension);
+    if (ret < 0) {
+        /* VM wide version not implemented, use global one instead */
+        ret = kvm_check_extension(s, extension);
+    }
+
+    return ret;
+}
+
 static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val,
                                   bool assign, uint32_t size, bool datamatch)
 {
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 26929c59de..e11b20887e 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4325,15 +4325,7 @@ badframe:
     return 0;
 }
 
-#elif defined(TARGET_PPC) && !defined(TARGET_PPC64)
-
-/* FIXME: Many of the structures are defined for both PPC and PPC64, but
-   the signal handling is different enough that we haven't implemented
-   support for PPC64 yet.  Hence the restriction above.
-
-   There are various #if'd blocks for code for TARGET_PPC64.  These
-   blocks should go away so that we can successfully run 32-bit and
-   64-bit binaries on a QEMU configured for PPC64.  */
+#elif defined(TARGET_PPC)
 
 /* Size of dummy stack frame allocated when calling signal handler.
    See arch/powerpc/include/asm/ptrace.h.  */
@@ -4343,6 +4335,33 @@ badframe:
 #define SIGNAL_FRAMESIZE 64
 #endif
 
+/* See arch/powerpc/include/asm/ucontext.h.  Only used for 32-bit PPC;
+   on 64-bit PPC, sigcontext and mcontext are one and the same.  */
+struct target_mcontext {
+    target_ulong mc_gregs[48];
+    /* Includes fpscr.  */
+    uint64_t mc_fregs[33];
+    target_ulong mc_pad[2];
+    /* We need to handle Altivec and SPE at the same time, which no
+       kernel needs to do.  Fortunately, the kernel defines this bit to
+       be Altivec-register-large all the time, rather than trying to
+       twiddle it based on the specific platform.  */
+    union {
+        /* SPE vector registers.  One extra for SPEFSCR.  */
+        uint32_t spe[33];
+        /* Altivec vector registers.  The packing of VSCR and VRSAVE
+           varies depending on whether we're PPC64 or not: PPC64 splits
+           them apart; PPC32 stuffs them together.  */
+#if defined(TARGET_PPC64)
+#define QEMU_NVRREG 34
+#else
+#define QEMU_NVRREG 33
+#endif
+        ppc_avr_t altivec[QEMU_NVRREG];
+#undef QEMU_NVRREG
+    } mc_vregs __attribute__((__aligned__(16)));
+};
+
 /* See arch/powerpc/include/asm/sigcontext.h.  */
 struct target_sigcontext {
     target_ulong _unused[4];
@@ -4353,7 +4372,9 @@ struct target_sigcontext {
     target_ulong handler;
     target_ulong oldmask;
     target_ulong regs;      /* struct pt_regs __user * */
-    /* TODO: PPC64 includes extra bits here.  */
+#if defined(TARGET_PPC64)
+    struct target_mcontext mcontext;
+#endif
 };
 
 /* Indices for target_mcontext.mc_gregs, below.
@@ -4408,32 +4429,6 @@ enum {
     TARGET_PT_REGS_COUNT = 44
 };
 
-/* See arch/powerpc/include/asm/ucontext.h.  Only used for 32-bit PPC;
-   on 64-bit PPC, sigcontext and mcontext are one and the same.  */
-struct target_mcontext {
-    target_ulong mc_gregs[48];
-    /* Includes fpscr.  */
-    uint64_t mc_fregs[33];
-    target_ulong mc_pad[2];
-    /* We need to handle Altivec and SPE at the same time, which no
-       kernel needs to do.  Fortunately, the kernel defines this bit to
-       be Altivec-register-large all the time, rather than trying to
-       twiddle it based on the specific platform.  */
-    union {
-        /* SPE vector registers.  One extra for SPEFSCR.  */
-        uint32_t spe[33];
-        /* Altivec vector registers.  The packing of VSCR and VRSAVE
-           varies depending on whether we're PPC64 or not: PPC64 splits
-           them apart; PPC32 stuffs them together.  */
-#if defined(TARGET_PPC64)
-#define QEMU_NVRREG 34
-#else
-#define QEMU_NVRREG 33
-#endif
-        ppc_avr_t altivec[QEMU_NVRREG];
-#undef QEMU_NVRREG
-    } mc_vregs __attribute__((__aligned__(16)));
-};
 
 struct target_ucontext {
     target_ulong tuc_flags;
@@ -4447,7 +4442,7 @@ struct target_ucontext {
     target_sigset_t tuc_sigmask;
 #if defined(TARGET_PPC64)
     target_sigset_t unused[15]; /* Allow for uc_sigmask growth */
-    struct target_sigcontext tuc_mcontext;
+    struct target_sigcontext tuc_sigcontext;
 #else
     int32_t tuc_maskext[30];
     int32_t tuc_pad2[3];
@@ -4462,12 +4457,41 @@ struct target_sigframe {
     int32_t abigap[56];
 };
 
+#if defined(TARGET_PPC64)
+
+#define TARGET_TRAMP_SIZE 6
+
+struct target_rt_sigframe {
+        /* sys_rt_sigreturn requires the ucontext be the first field */
+        struct target_ucontext uc;
+        target_ulong  _unused[2];
+        uint32_t trampoline[TARGET_TRAMP_SIZE];
+        target_ulong pinfo; /* struct siginfo __user * */
+        target_ulong puc; /* void __user * */
+        struct target_siginfo info;
+        /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */
+        char abigap[288];
+} __attribute__((aligned(16)));
+
+#else
+
 struct target_rt_sigframe {
     struct target_siginfo info;
     struct target_ucontext uc;
     int32_t abigap[56];
 };
 
+#endif
+
+#if defined(TARGET_PPC64)
+
+struct target_func_ptr {
+    target_ulong entry;
+    target_ulong toc;
+};
+
+#endif
+
 /* We use the mc_pad field for the signal return trampoline.  */
 #define tramp mc_pad
 
@@ -4491,8 +4515,7 @@ static target_ulong get_sigframe(struct target_sigaction *ka,
     return newsp;
 }
 
-static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame,
-                          int sigret)
+static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
 {
     target_ulong msr = env->msr;
     int i;
@@ -4559,11 +4582,14 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame,
 
     /* Store MSR.  */
     __put_user(msr, &frame->mc_gregs[TARGET_PT_MSR]);
+}
 
+static void encode_trampoline(int sigret, uint32_t *tramp)
+{
     /* Set up the sigreturn trampoline: li r0,sigret; sc.  */
     if (sigret) {
-        __put_user(0x38000000UL | sigret, &frame->tramp[0]);
-        __put_user(0x44000002UL, &frame->tramp[1]);
+        __put_user(0x38000000 | sigret, &tramp[0]);
+        __put_user(0x44000002, &tramp[1]);
     }
 }
 
@@ -4655,6 +4681,9 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     target_ulong frame_addr, newsp;
     int err = 0;
     int signal;
+#if defined(TARGET_PPC64)
+    struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
+#endif
 
     frame_addr = get_sigframe(ka, env, sizeof(*frame));
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 1))
@@ -4665,7 +4694,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 
     __put_user(ka->_sa_handler, &sc->handler);
     __put_user(set->sig[0], &sc->oldmask);
-#if defined(TARGET_PPC64)
+#if TARGET_ABI_BITS == 64
     __put_user(set->sig[0] >> 32, &sc->_unused[3]);
 #else
     __put_user(set->sig[1], &sc->_unused[3]);
@@ -4674,7 +4703,10 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     __put_user(sig, &sc->signal);
 
     /* Save user regs.  */
-    save_user_regs(env, &frame->mctx, TARGET_NR_sigreturn);
+    save_user_regs(env, &frame->mctx);
+
+    /* Construct the trampoline code on the stack. */
+    encode_trampoline(TARGET_NR_sigreturn, (uint32_t *)&frame->mctx.tramp);
 
     /* The kernel checks for the presence of a VDSO here.  We don't
        emulate a vdso, so use a sigreturn system call.  */
@@ -4694,7 +4726,24 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     env->gpr[1] = newsp;
     env->gpr[3] = signal;
     env->gpr[4] = frame_addr + offsetof(struct target_sigframe, sctx);
+
+#if defined(TARGET_PPC64)
+    if (get_ppc64_abi(image) < 2) {
+        /* ELFv1 PPC64 function pointers are pointers to OPD entries. */
+        struct target_func_ptr *handler =
+            (struct target_func_ptr *)g2h(ka->_sa_handler);
+        env->nip = tswapl(handler->entry);
+        env->gpr[2] = tswapl(handler->toc);
+    } else {
+        /* ELFv2 PPC64 function pointers are entry points, but R12
+         * must also be set */
+        env->nip = tswapl((target_ulong) ka->_sa_handler);
+        env->gpr[12] = env->nip;
+    }
+#else
     env->nip = (target_ulong) ka->_sa_handler;
+#endif
+
     /* Signal handlers are entered in big-endian mode.  */
     env->msr &= ~MSR_LE;
 
@@ -4712,10 +4761,14 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
                            target_sigset_t *set, CPUPPCState *env)
 {
     struct target_rt_sigframe *rt_sf;
-    struct target_mcontext *frame;
+    uint32_t *trampptr = 0;
+    struct target_mcontext *mctx = 0;
     target_ulong rt_sf_addr, newsp = 0;
     int i, err = 0;
     int signal;
+#if defined(TARGET_PPC64)
+    struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
+#endif
 
     rt_sf_addr = get_sigframe(ka, env, sizeof(*rt_sf));
     if (!lock_user_struct(VERIFY_WRITE, rt_sf, rt_sf_addr, 1))
@@ -4733,25 +4786,35 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
                &rt_sf->uc.tuc_stack.ss_flags);
     __put_user(target_sigaltstack_used.ss_size,
                &rt_sf->uc.tuc_stack.ss_size);
+#if !defined(TARGET_PPC64)
     __put_user(h2g (&rt_sf->uc.tuc_mcontext),
                &rt_sf->uc.tuc_regs);
+#endif
     for(i = 0; i < TARGET_NSIG_WORDS; i++) {
         __put_user(set->sig[i], &rt_sf->uc.tuc_sigmask.sig[i]);
     }
 
-    frame = &rt_sf->uc.tuc_mcontext;
-    save_user_regs(env, frame, TARGET_NR_rt_sigreturn);
+#if defined(TARGET_PPC64)
+    mctx = &rt_sf->uc.tuc_sigcontext.mcontext;
+    trampptr = &rt_sf->trampoline[0];
+#else
+    mctx = &rt_sf->uc.tuc_mcontext;
+    trampptr = (uint32_t *)&rt_sf->uc.tuc_mcontext.tramp;
+#endif
+
+    save_user_regs(env, mctx);
+    encode_trampoline(TARGET_NR_rt_sigreturn, trampptr);
 
     /* The kernel checks for the presence of a VDSO here.  We don't
        emulate a vdso, so use a sigreturn system call.  */
-    env->lr = (target_ulong) h2g(frame->tramp);
+    env->lr = (target_ulong) h2g(trampptr);
 
     /* Turn off all fp exceptions.  */
     env->fpscr = 0;
 
     /* Create a stack frame for the caller of the handler.  */
     newsp = rt_sf_addr - (SIGNAL_FRAMESIZE + 16);
-    __put_user(env->gpr[1], (target_ulong *)(uintptr_t) newsp);
+    err |= put_user(env->gpr[1], newsp, target_ulong);
 
     if (err)
         goto sigsegv;
@@ -4762,7 +4825,24 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     env->gpr[4] = (target_ulong) h2g(&rt_sf->info);
     env->gpr[5] = (target_ulong) h2g(&rt_sf->uc);
     env->gpr[6] = (target_ulong) h2g(rt_sf);
+
+#if defined(TARGET_PPC64)
+    if (get_ppc64_abi(image) < 2) {
+        /* ELFv1 PPC64 function pointers are pointers to OPD entries. */
+        struct target_func_ptr *handler =
+            (struct target_func_ptr *)g2h(ka->_sa_handler);
+        env->nip = tswapl(handler->entry);
+        env->gpr[2] = tswapl(handler->toc);
+    } else {
+        /* ELFv2 PPC64 function pointers are entry points, but R12
+         * must also be set */
+        env->nip = tswapl((target_ulong) ka->_sa_handler);
+        env->gpr[12] = env->nip;
+    }
+#else
     env->nip = (target_ulong) ka->_sa_handler;
+#endif
+
     /* Signal handlers are entered in big-endian mode.  */
     env->msr &= ~MSR_LE;
 
@@ -4789,7 +4869,7 @@ long do_sigreturn(CPUPPCState *env)
         goto sigsegv;
 
 #if defined(TARGET_PPC64)
-    set.sig[0] = sc->oldmask + ((long)(sc->_unused[3]) << 32);
+    set.sig[0] = sc->oldmask + ((uint64_t)(sc->_unused[3]) << 32);
 #else
     __get_user(set.sig[0], &sc->oldmask);
     __get_user(set.sig[1], &sc->_unused[3]);
@@ -4827,10 +4907,11 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
         return 1;
 
 #if defined(TARGET_PPC64)
-    fprintf (stderr, "do_setcontext: not implemented\n");
-    return 0;
+    mcp_addr = h2g(ucp) +
+        offsetof(struct target_ucontext, tuc_sigcontext.mcontext);
 #else
     __get_user(mcp_addr, &ucp->tuc_regs);
+#endif
 
     if (!lock_user_struct(VERIFY_READ, mcp, mcp_addr, 1))
         return 1;
@@ -4841,7 +4922,6 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
 
     unlock_user_struct(mcp, mcp_addr, 1);
     return 0;
-#endif
 }
 
 long do_rt_sigreturn(CPUPPCState *env)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 509eae52ff..0cfdc8ab8f 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -28,7 +28,6 @@ DEF_HELPER_2(icbi, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_3(mulldo, i64, env, i64, i64)
 DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
 DEF_HELPER_4(divde, i64, env, i64, i64, i32)
 #endif
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index f6e8846707..713d777076 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -24,23 +24,6 @@
 #include "helper_regs.h"
 /*****************************************************************************/
 /* Fixed point operations helpers */
-#if defined(TARGET_PPC64)
-
-uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
-{
-    int64_t th;
-    uint64_t tl;
-
-    muls64(&tl, (uint64_t *)&th, arg1, arg2);
-    /* If th != 0 && th != -1, then we had an overflow */
-    if (likely((uint64_t)(th + 1) <= 1)) {
-        env->ov = 0;
-    } else {
-        env->so = env->ov = 1;
-    }
-    return (int64_t)tl;
-}
-#endif
 
 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
                            uint32_t oe)
@@ -238,7 +221,7 @@ target_ulong helper_srad(CPUPPCState *env, target_ulong value,
         if (likely((uint64_t)shift != 0)) {
             shift &= 0x3f;
             ret = (int64_t)value >> shift;
-            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
+            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
                 env->ca = 0;
             } else {
                 env->ca = 1;
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 42718f77ae..9c23c6ba0d 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -38,6 +38,7 @@
 #include "hw/ppc/ppc.h"
 #include "sysemu/watchdog.h"
 #include "trace.h"
+#include "exec/gdbstub.h"
 
 //#define DEBUG_KVM
 
@@ -72,6 +73,8 @@ static int cap_papr;
 static int cap_htab_fd;
 static int cap_fixup_hcalls;
 
+static uint32_t debug_inst_opcode;
+
 /* XXX We have a race condition where we actually have a level triggered
  *     interrupt, but the infrastructure can't expose that yet, so the guest
  *     takes but ignores it, goes to sleep and never gets notified that there's
@@ -410,6 +413,38 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 }
 
+/* e500 supports 2 h/w breakpoint and 2 watchpoint.
+ * book3s supports only 1 watchpoint, so array size
+ * of 4 is sufficient for now.
+ */
+#define MAX_HW_BKPTS 4
+
+static struct HWBreakpoint {
+    target_ulong addr;
+    int type;
+} hw_debug_points[MAX_HW_BKPTS];
+
+static CPUWatchpoint hw_watchpoint;
+
+/* Default there is no breakpoint and watchpoint supported */
+static int max_hw_breakpoint;
+static int max_hw_watchpoint;
+static int nb_hw_breakpoint;
+static int nb_hw_watchpoint;
+
+static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
+{
+    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
+        max_hw_breakpoint = 2;
+        max_hw_watchpoint = 2;
+    }
+
+    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
+        fprintf(stderr, "Error initializing h/w breakpoints\n");
+        return;
+    }
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -436,6 +471,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
         break;
     }
 
+    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
+    kvmppc_hw_debug_points_init(cenv);
+
     return ret;
 }
 
@@ -899,6 +937,11 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     return ret;
 }
 
+static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
+{
+     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
+}
+
 int kvm_arch_get_registers(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -981,35 +1024,57 @@ int kvm_arch_get_registers(CPUState *cs)
 
         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
+            kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
+            kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
+            kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
+            kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
+            kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
+            kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
+            kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
+            kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
+            kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
+            kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
+            kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
+            kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
+            kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
+            kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
+            kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
+            kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
 
             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
+                kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
+                kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
+                kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
             }
 
             if (sregs.u.e.features & KVM_SREGS_E_PM) {
                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
+                kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
             }
 
             if (sregs.u.e.features & KVM_SREGS_E_PC) {
                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
+                kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
+                kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
             }
         }
 
@@ -1244,6 +1309,259 @@ static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t dat
     return 0;
 }
 
+int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+    /* Mixed endian case is not handled */
+    uint32_t sc = debug_inst_opcode;
+
+    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
+                            sizeof(sc), 0) ||
+        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+    uint32_t sc;
+
+    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
+        sc != debug_inst_opcode ||
+        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
+                            sizeof(sc), 1)) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int find_hw_breakpoint(target_ulong addr, int type)
+{
+    int n;
+
+    assert((nb_hw_breakpoint + nb_hw_watchpoint)
+           <= ARRAY_SIZE(hw_debug_points));
+
+    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
+        if (hw_debug_points[n].addr == addr &&
+             hw_debug_points[n].type == type) {
+            return n;
+        }
+    }
+
+    return -1;
+}
+
+static int find_hw_watchpoint(target_ulong addr, int *flag)
+{
+    int n;
+
+    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
+    if (n >= 0) {
+        *flag = BP_MEM_ACCESS;
+        return n;
+    }
+
+    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
+    if (n >= 0) {
+        *flag = BP_MEM_WRITE;
+        return n;
+    }
+
+    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
+    if (n >= 0) {
+        *flag = BP_MEM_READ;
+        return n;
+    }
+
+    return -1;
+}
+
+int kvm_arch_insert_hw_breakpoint(target_ulong addr,
+                                  target_ulong len, int type)
+{
+    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
+        return -ENOBUFS;
+    }
+
+    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
+    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
+
+    switch (type) {
+    case GDB_BREAKPOINT_HW:
+        if (nb_hw_breakpoint >= max_hw_breakpoint) {
+            return -ENOBUFS;
+        }
+
+        if (find_hw_breakpoint(addr, type) >= 0) {
+            return -EEXIST;
+        }
+
+        nb_hw_breakpoint++;
+        break;
+
+    case GDB_WATCHPOINT_WRITE:
+    case GDB_WATCHPOINT_READ:
+    case GDB_WATCHPOINT_ACCESS:
+        if (nb_hw_watchpoint >= max_hw_watchpoint) {
+            return -ENOBUFS;
+        }
+
+        if (find_hw_breakpoint(addr, type) >= 0) {
+            return -EEXIST;
+        }
+
+        nb_hw_watchpoint++;
+        break;
+
+    default:
+        return -ENOSYS;
+    }
+
+    return 0;
+}
+
+int kvm_arch_remove_hw_breakpoint(target_ulong addr,
+                                  target_ulong len, int type)
+{
+    int n;
+
+    n = find_hw_breakpoint(addr, type);
+    if (n < 0) {
+        return -ENOENT;
+    }
+
+    switch (type) {
+    case GDB_BREAKPOINT_HW:
+        nb_hw_breakpoint--;
+        break;
+
+    case GDB_WATCHPOINT_WRITE:
+    case GDB_WATCHPOINT_READ:
+    case GDB_WATCHPOINT_ACCESS:
+        nb_hw_watchpoint--;
+        break;
+
+    default:
+        return -ENOSYS;
+    }
+    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
+
+    return 0;
+}
+
+void kvm_arch_remove_all_hw_breakpoints(void)
+{
+    nb_hw_breakpoint = nb_hw_watchpoint = 0;
+}
+
+void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
+{
+    int n;
+
+    /* Software Breakpoint updates */
+    if (kvm_sw_breakpoints_active(cs)) {
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+    }
+
+    assert((nb_hw_breakpoint + nb_hw_watchpoint)
+           <= ARRAY_SIZE(hw_debug_points));
+    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
+
+    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
+        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
+            switch (hw_debug_points[n].type) {
+            case GDB_BREAKPOINT_HW:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
+                break;
+            case GDB_WATCHPOINT_WRITE:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
+                break;
+            case GDB_WATCHPOINT_READ:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
+                break;
+            case GDB_WATCHPOINT_ACCESS:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
+                                        KVMPPC_DEBUG_WATCH_READ;
+                break;
+            default:
+                cpu_abort(cs, "Unsupported breakpoint type\n");
+            }
+            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
+        }
+    }
+}
+
+static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
+    int handle = 0;
+    int n;
+    int flag = 0;
+
+    if (cs->singlestep_enabled) {
+        handle = 1;
+    } else if (arch_info->status) {
+        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
+            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
+                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
+                if (n >= 0) {
+                    handle = 1;
+                }
+            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
+                                            KVMPPC_DEBUG_WATCH_WRITE)) {
+                n = find_hw_watchpoint(arch_info->address,  &flag);
+                if (n >= 0) {
+                    handle = 1;
+                    cs->watchpoint_hit = &hw_watchpoint;
+                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
+                    hw_watchpoint.flags = flag;
+                }
+            }
+        }
+    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
+        handle = 1;
+    } else {
+        /* QEMU is not able to handle debug exception, so inject
+         * program exception to guest;
+         * Yes program exception NOT debug exception !!
+         * When QEMU is using debug resources then debug exception must
+         * be always set. To achieve this we set MSR_DE and also set
+         * MSRP_DEP so guest cannot change MSR_DE.
+         * When emulating debug resource for guest we want guest
+         * to control MSR_DE (enable/disable debug interrupt on need).
+         * Supporting both configurations are NOT possible.
+         * So the result is that we cannot share debug resources
+         * between QEMU and Guest on BOOKE architecture.
+         * In the current design QEMU gets the priority over guest,
+         * this means that if QEMU is using debug resources then guest
+         * cannot use them;
+         * For software breakpoint QEMU uses a privileged instruction;
+         * So there cannot be any reason that we are here for guest
+         * set debug exception, only possibility is guest executed a
+         * privileged / illegal instruction and that's why we are
+         * injecting a program interrupt.
+         */
+
+        cpu_synchronize_state(cs);
+        /* env->nip is PC, so increment this by 4 to use
+         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
+         */
+        env->nip += 4;
+        cs->exception_index = POWERPC_EXCP_PROGRAM;
+        env->error_code = POWERPC_EXCP_INVAL;
+        ppc_cpu_do_interrupt(cs);
+    }
+
+    return handle;
+}
+
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -1284,6 +1602,16 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         ret = 0;
         break;
 
+    case KVM_EXIT_DEBUG:
+        DPRINTF("handle debug exception\n");
+        if (kvm_handle_debug(cpu, run)) {
+            ret = EXCP_DEBUG;
+            break;
+        }
+        /* re-enter, this exception was guest-internal */
+        ret = 0;
+        break;
+
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;
@@ -1369,7 +1697,7 @@ static int read_cpuinfo(const char *field, char *value, int len)
     }
 
     do {
-        if(!fgets(line, sizeof(line), f)) {
+        if (!fgets(line, sizeof(line), f)) {
             break;
         }
         if (!strncmp(line, field, field_len)) {
@@ -1404,6 +1732,17 @@ uint32_t kvmppc_get_tbfreq(void)
     return retval;
 }
 
+bool kvmppc_get_host_serial(char **value)
+{
+    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
+                               NULL);
+}
+
+bool kvmppc_get_host_model(char **value)
+{
+    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
+}
+
 /* Try to find a device tree node for a CPU with clock-frequency property */
 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
 {
@@ -1496,7 +1835,7 @@ static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
      PowerPCCPU *cpu = ppc_env_get_cpu(env);
      CPUState *cs = CPU(cpu);
 
-    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
+    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
         return 0;
     }
@@ -1965,34 +2304,6 @@ void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
 
-int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
-{
-    return -EINVAL;
-}
-
-int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
-{
-    return -EINVAL;
-}
-
-int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
-{
-    return -EINVAL;
-}
-
-int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
-{
-    return -EINVAL;
-}
-
-void kvm_arch_remove_all_hw_breakpoints(void)
-{
-}
-
-void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
-{
-}
-
 struct kvm_get_htab_buf {
     struct kvm_get_htab_header header;
     /*
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index d9516e73ef..2e0224c6af 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -19,6 +19,8 @@ uint32_t kvmppc_get_tbfreq(void);
 uint64_t kvmppc_get_clockfreq(void);
 uint32_t kvmppc_get_vmx(void);
 uint32_t kvmppc_get_dfp(void);
+bool kvmppc_get_host_model(char **buf);
+bool kvmppc_get_host_serial(char **buf);
 int kvmppc_get_hasidle(CPUPPCState *env);
 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len);
 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level);
@@ -60,6 +62,16 @@ static inline uint32_t kvmppc_get_tbfreq(void)
     return 0;
 }
 
+static inline bool kvmppc_get_host_model(char **buf)
+{
+    return false;
+}
+
+static inline bool kvmppc_get_host_serial(char **buf)
+{
+    return false;
+}
+
 static inline uint64_t kvmppc_get_clockfreq(void)
 {
     return 0;
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c07bb01a7a..d03daeaa48 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1128,9 +1128,19 @@ static void gen_mulhwu(DisasContext *ctx)
 /* mullw  mullw. */
 static void gen_mullw(DisasContext *ctx)
 {
-    tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                   cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_ext32s_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rD(ctx->opcode)]);
+#if defined(TARGET_PPC64)
+    TCGv_i64 t0, t1;
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+#else
+    tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                    cpu_gpr[rB(ctx->opcode)]);
+#endif
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
 }
@@ -1144,7 +1154,11 @@ static void gen_mullwo(DisasContext *ctx)
     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
     tcg_gen_muls2_i32(t0, t1, t0, t1);
-    tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
+#if defined(TARGET_PPC64)
+    tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
+#else
+    tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0);
+#endif
 
     tcg_gen_sari_i32(t0, t0, 31);
     tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
@@ -1201,8 +1215,20 @@ static void gen_mulld(DisasContext *ctx)
 /* mulldo  mulldo. */
 static void gen_mulldo(DisasContext *ctx)
 {
-    gen_helper_mulldo(cpu_gpr[rD(ctx->opcode)], cpu_env,
-                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx->opcode)],
+                      cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_mov_i64(cpu_gpr[rD(ctx->opcode)], t0);
+
+    tcg_gen_sari_i64(t0, t0, 63);
+    tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
+    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+
     if (unlikely(Rc(ctx->opcode) != 0)) {
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
     }
@@ -1616,18 +1642,17 @@ static void gen_rlwimi(DisasContext *ctx)
     mb = MB(ctx->opcode);
     me = ME(ctx->opcode);
     sh = SH(ctx->opcode);
-    if (likely(sh == 0 && mb == 0 && me == 31)) {
-        tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+    if (likely(sh == (31-me) && mb <= me)) {
+        tcg_gen_deposit_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                           cpu_gpr[rS(ctx->opcode)], sh, me - mb + 1);
     } else {
         target_ulong mask;
         TCGv t1;
         TCGv t0 = tcg_temp_new();
 #if defined(TARGET_PPC64)
-        TCGv_i32 t2 = tcg_temp_new_i32();
-        tcg_gen_trunc_i64_i32(t2, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_rotli_i32(t2, t2, sh);
-        tcg_gen_extu_i32_i64(t0, t2);
-        tcg_temp_free_i32(t2);
+        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
+            cpu_gpr[rS(ctx->opcode)], 32, 32);
+        tcg_gen_rotli_i64(t0, t0, sh);
 #else
         tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
 #endif
@@ -1672,14 +1697,18 @@ static void gen_rlwinm(DisasContext *ctx)
         tcg_gen_shri_tl(t0, t0, mb);
         tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0);
         tcg_temp_free(t0);
+    } else if (likely(mb == 0 && me == 31)) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rS(ctx->opcode)]);
+        tcg_gen_rotli_i32(t0, t0, sh);
+        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t0);
+        tcg_temp_free_i32(t0);
     } else {
         TCGv t0 = tcg_temp_new();
 #if defined(TARGET_PPC64)
-        TCGv_i32 t1 = tcg_temp_new_i32();
-        tcg_gen_trunc_i64_i32(t1, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_rotli_i32(t1, t1, sh);
-        tcg_gen_extu_i32_i64(t0, t1);
-        tcg_temp_free_i32(t1);
+        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
+            cpu_gpr[rS(ctx->opcode)], 32, 32);
+        tcg_gen_rotli_i64(t0, t0, sh);
 #else
         tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
 #endif
@@ -1698,37 +1727,49 @@ static void gen_rlwinm(DisasContext *ctx)
 static void gen_rlwnm(DisasContext *ctx)
 {
     uint32_t mb, me;
-    TCGv t0;
-#if defined(TARGET_PPC64)
-    TCGv_i32 t1, t2;
-#endif
-
     mb = MB(ctx->opcode);
     me = ME(ctx->opcode);
-    t0 = tcg_temp_new();
-    tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
-#if defined(TARGET_PPC64)
-    t1 = tcg_temp_new_i32();
-    t2 = tcg_temp_new_i32();
-    tcg_gen_trunc_i64_i32(t1, cpu_gpr[rS(ctx->opcode)]);
-    tcg_gen_trunc_i64_i32(t2, t0);
-    tcg_gen_rotl_i32(t1, t1, t2);
-    tcg_gen_extu_i32_i64(t0, t1);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t2);
-#else
-    tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
-#endif
-    if (unlikely(mb != 0 || me != 31)) {
-#if defined(TARGET_PPC64)
-        mb += 32;
-        me += 32;
-#endif
-        tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
+
+    if (likely(mb == 0 && me == 31)) {
+        TCGv_i32 t0, t1;
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]);
+        tcg_gen_trunc_tl_i32(t1, cpu_gpr[rS(ctx->opcode)]);
+        tcg_gen_andi_i32(t0, t0, 0x1f);
+        tcg_gen_rotl_i32(t1, t1, t0);
+        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
     } else {
-        tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
+        TCGv t0;
+#if defined(TARGET_PPC64)
+        TCGv t1;
+#endif
+
+        t0 = tcg_temp_new();
+        tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
+#if defined(TARGET_PPC64)
+        t1 = tcg_temp_new_i64();
+        tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)],
+                            cpu_gpr[rS(ctx->opcode)], 32, 32);
+        tcg_gen_rotl_i64(t0, t1, t0);
+        tcg_temp_free_i64(t1);
+#else
+        tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
+#endif
+        if (unlikely(mb != 0 || me != 31)) {
+#if defined(TARGET_PPC64)
+            mb += 32;
+            me += 32;
+#endif
+            tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
+        } else {
+            tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+            tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
+        }
+        tcg_temp_free(t0);
     }
-    tcg_temp_free(t0);
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
@@ -1924,7 +1965,7 @@ static void gen_srawi(DisasContext *ctx)
     TCGv dst = cpu_gpr[rA(ctx->opcode)];
     TCGv src = cpu_gpr[rS(ctx->opcode)];
     if (sh == 0) {
-        tcg_gen_mov_tl(dst, src);
+        tcg_gen_ext32s_tl(dst, src);
         tcg_gen_movi_tl(cpu_ca, 0);
     } else {
         TCGv t0;