root/arch/x86/kvm/smm.c
/* SPDX-License-Identifier: GPL-2.0 */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/kvm_host.h>
#include "x86.h"
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
#include "smm.h"
#include "cpuid.h"
#include "trace.h"

#define CHECK_SMRAM32_OFFSET(field, offset) \
        ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)

#define CHECK_SMRAM64_OFFSET(field, offset) \
        ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)

static void check_smram_offsets(void)
{
        /* 32 bit SMRAM image */
        CHECK_SMRAM32_OFFSET(reserved1,                 0xFE00);
        CHECK_SMRAM32_OFFSET(smbase,                    0xFEF8);
        CHECK_SMRAM32_OFFSET(smm_revision,              0xFEFC);
        CHECK_SMRAM32_OFFSET(io_inst_restart,           0xFF00);
        CHECK_SMRAM32_OFFSET(auto_hlt_restart,          0xFF02);
        CHECK_SMRAM32_OFFSET(io_restart_rdi,            0xFF04);
        CHECK_SMRAM32_OFFSET(io_restart_rcx,            0xFF08);
        CHECK_SMRAM32_OFFSET(io_restart_rsi,            0xFF0C);
        CHECK_SMRAM32_OFFSET(io_restart_rip,            0xFF10);
        CHECK_SMRAM32_OFFSET(cr4,                       0xFF14);
        CHECK_SMRAM32_OFFSET(reserved2,                 0xFF18);
        CHECK_SMRAM32_OFFSET(int_shadow,                0xFF1A);
        CHECK_SMRAM32_OFFSET(reserved3,                 0xFF1B);
        CHECK_SMRAM32_OFFSET(ds,                        0xFF2C);
        CHECK_SMRAM32_OFFSET(fs,                        0xFF38);
        CHECK_SMRAM32_OFFSET(gs,                        0xFF44);
        CHECK_SMRAM32_OFFSET(idtr,                      0xFF50);
        CHECK_SMRAM32_OFFSET(tr,                        0xFF5C);
        CHECK_SMRAM32_OFFSET(gdtr,                      0xFF6C);
        CHECK_SMRAM32_OFFSET(ldtr,                      0xFF78);
        CHECK_SMRAM32_OFFSET(es,                        0xFF84);
        CHECK_SMRAM32_OFFSET(cs,                        0xFF90);
        CHECK_SMRAM32_OFFSET(ss,                        0xFF9C);
        CHECK_SMRAM32_OFFSET(es_sel,                    0xFFA8);
        CHECK_SMRAM32_OFFSET(cs_sel,                    0xFFAC);
        CHECK_SMRAM32_OFFSET(ss_sel,                    0xFFB0);
        CHECK_SMRAM32_OFFSET(ds_sel,                    0xFFB4);
        CHECK_SMRAM32_OFFSET(fs_sel,                    0xFFB8);
        CHECK_SMRAM32_OFFSET(gs_sel,                    0xFFBC);
        CHECK_SMRAM32_OFFSET(ldtr_sel,                  0xFFC0);
        CHECK_SMRAM32_OFFSET(tr_sel,                    0xFFC4);
        CHECK_SMRAM32_OFFSET(dr7,                       0xFFC8);
        CHECK_SMRAM32_OFFSET(dr6,                       0xFFCC);
        CHECK_SMRAM32_OFFSET(gprs,                      0xFFD0);
        CHECK_SMRAM32_OFFSET(eip,                       0xFFF0);
        CHECK_SMRAM32_OFFSET(eflags,                    0xFFF4);
        CHECK_SMRAM32_OFFSET(cr3,                       0xFFF8);
        CHECK_SMRAM32_OFFSET(cr0,                       0xFFFC);

        /* 64 bit SMRAM image */
        CHECK_SMRAM64_OFFSET(es,                        0xFE00);
        CHECK_SMRAM64_OFFSET(cs,                        0xFE10);
        CHECK_SMRAM64_OFFSET(ss,                        0xFE20);
        CHECK_SMRAM64_OFFSET(ds,                        0xFE30);
        CHECK_SMRAM64_OFFSET(fs,                        0xFE40);
        CHECK_SMRAM64_OFFSET(gs,                        0xFE50);
        CHECK_SMRAM64_OFFSET(gdtr,                      0xFE60);
        CHECK_SMRAM64_OFFSET(ldtr,                      0xFE70);
        CHECK_SMRAM64_OFFSET(idtr,                      0xFE80);
        CHECK_SMRAM64_OFFSET(tr,                        0xFE90);
        CHECK_SMRAM64_OFFSET(io_restart_rip,            0xFEA0);
        CHECK_SMRAM64_OFFSET(io_restart_rcx,            0xFEA8);
        CHECK_SMRAM64_OFFSET(io_restart_rsi,            0xFEB0);
        CHECK_SMRAM64_OFFSET(io_restart_rdi,            0xFEB8);
        CHECK_SMRAM64_OFFSET(io_restart_dword,          0xFEC0);
        CHECK_SMRAM64_OFFSET(reserved1,                 0xFEC4);
        CHECK_SMRAM64_OFFSET(io_inst_restart,           0xFEC8);
        CHECK_SMRAM64_OFFSET(auto_hlt_restart,          0xFEC9);
        CHECK_SMRAM64_OFFSET(amd_nmi_mask,              0xFECA);
        CHECK_SMRAM64_OFFSET(int_shadow,                0xFECB);
        CHECK_SMRAM64_OFFSET(reserved2,                 0xFECC);
        CHECK_SMRAM64_OFFSET(efer,                      0xFED0);
        CHECK_SMRAM64_OFFSET(svm_guest_flag,            0xFED8);
        CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,        0xFEE0);
        CHECK_SMRAM64_OFFSET(svm_guest_virtual_int,     0xFEE8);
        CHECK_SMRAM64_OFFSET(reserved3,                 0xFEF0);
        CHECK_SMRAM64_OFFSET(smm_revison,               0xFEFC);
        CHECK_SMRAM64_OFFSET(smbase,                    0xFF00);
        CHECK_SMRAM64_OFFSET(reserved4,                 0xFF04);
        CHECK_SMRAM64_OFFSET(ssp,                       0xFF18);
        CHECK_SMRAM64_OFFSET(svm_guest_pat,             0xFF20);
        CHECK_SMRAM64_OFFSET(svm_host_efer,             0xFF28);
        CHECK_SMRAM64_OFFSET(svm_host_cr4,              0xFF30);
        CHECK_SMRAM64_OFFSET(svm_host_cr3,              0xFF38);
        CHECK_SMRAM64_OFFSET(svm_host_cr0,              0xFF40);
        CHECK_SMRAM64_OFFSET(cr4,                       0xFF48);
        CHECK_SMRAM64_OFFSET(cr3,                       0xFF50);
        CHECK_SMRAM64_OFFSET(cr0,                       0xFF58);
        CHECK_SMRAM64_OFFSET(dr7,                       0xFF60);
        CHECK_SMRAM64_OFFSET(dr6,                       0xFF68);
        CHECK_SMRAM64_OFFSET(rflags,                    0xFF70);
        CHECK_SMRAM64_OFFSET(rip,                       0xFF78);
        CHECK_SMRAM64_OFFSET(gprs,                      0xFF80);

        BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
}

#undef CHECK_SMRAM64_OFFSET
#undef CHECK_SMRAM32_OFFSET


void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
{
        trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);

        if (entering_smm) {
                vcpu->arch.hflags |= HF_SMM_MASK;
        } else {
                vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);

                /* Process a latched INIT or SMI, if any.  */
                kvm_make_request(KVM_REQ_EVENT, vcpu);

                /*
                 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
                 * on SMM exit we still need to reload them from
                 * guest memory
                 */
                vcpu->arch.pdptrs_from_userspace = false;
        }

        kvm_mmu_reset_context(vcpu);
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_smm_changed);

void process_smi(struct kvm_vcpu *vcpu)
{
        vcpu->arch.smi_pending = true;
        kvm_make_request(KVM_REQ_EVENT, vcpu);
}

static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
{
        u32 flags = 0;
        flags |= seg->g       << 23;
        flags |= seg->db      << 22;
        flags |= seg->l       << 21;
        flags |= seg->avl     << 20;
        flags |= seg->present << 15;
        flags |= seg->dpl     << 13;
        flags |= seg->s       << 12;
        flags |= seg->type    << 8;
        return flags;
}

static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
                                  struct kvm_smm_seg_state_32 *state,
                                  u32 *selector, int n)
{
        struct kvm_segment seg;

        kvm_get_segment(vcpu, &seg, n);
        *selector = seg.selector;
        state->base = seg.base;
        state->limit = seg.limit;
        state->flags = enter_smm_get_segment_flags(&seg);
}

#ifdef CONFIG_X86_64
static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
                                  struct kvm_smm_seg_state_64 *state,
                                  int n)
{
        struct kvm_segment seg;

        kvm_get_segment(vcpu, &seg, n);
        state->selector = seg.selector;
        state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
        state->limit = seg.limit;
        state->base = seg.base;
}
#endif

static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
                                    struct kvm_smram_state_32 *smram)
{
        struct desc_ptr dt;
        int i;

        smram->cr0     = kvm_read_cr0(vcpu);
        smram->cr3     = kvm_read_cr3(vcpu);
        smram->eflags  = kvm_get_rflags(vcpu);
        smram->eip     = kvm_rip_read(vcpu);

        for (i = 0; i < 8; i++)
                smram->gprs[i] = kvm_register_read_raw(vcpu, i);

        smram->dr6     = (u32)vcpu->arch.dr6;
        smram->dr7     = (u32)vcpu->arch.dr7;

        enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
        enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);

        kvm_x86_call(get_gdt)(vcpu, &dt);
        smram->gdtr.base = dt.address;
        smram->gdtr.limit = dt.size;

        kvm_x86_call(get_idt)(vcpu, &dt);
        smram->idtr.base = dt.address;
        smram->idtr.limit = dt.size;

        enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
        enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
        enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);

        enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
        enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
        enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);

        smram->cr4 = kvm_read_cr4(vcpu);
        smram->smm_revision = 0x00020000;
        smram->smbase = vcpu->arch.smbase;

        smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
}

#ifdef CONFIG_X86_64
static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
                                    struct kvm_smram_state_64 *smram)
{
        struct desc_ptr dt;
        int i;

        for (i = 0; i < 16; i++)
                smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);

        smram->rip    = kvm_rip_read(vcpu);
        smram->rflags = kvm_get_rflags(vcpu);

        smram->dr6 = vcpu->arch.dr6;
        smram->dr7 = vcpu->arch.dr7;

        smram->cr0 = kvm_read_cr0(vcpu);
        smram->cr3 = kvm_read_cr3(vcpu);
        smram->cr4 = kvm_read_cr4(vcpu);

        smram->smbase = vcpu->arch.smbase;
        smram->smm_revison = 0x00020064;

        smram->efer = vcpu->arch.efer;

        enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);

        kvm_x86_call(get_idt)(vcpu, &dt);
        smram->idtr.limit = dt.size;
        smram->idtr.base = dt.address;

        enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);

        kvm_x86_call(get_gdt)(vcpu, &dt);
        smram->gdtr.limit = dt.size;
        smram->gdtr.base = dt.address;

        enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
        enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
        enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
        enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
        enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
        enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);

        smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);

        if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
            kvm_msr_read(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, &smram->ssp))
                kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
}
#endif

void enter_smm(struct kvm_vcpu *vcpu)
{
        struct kvm_segment cs, ds;
        struct desc_ptr dt;
        unsigned long cr0;
        union kvm_smram smram;

        check_smram_offsets();

        memset(smram.bytes, 0, sizeof(smram.bytes));

#ifdef CONFIG_X86_64
        if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
                enter_smm_save_state_64(vcpu, &smram.smram64);
        else
#endif
                enter_smm_save_state_32(vcpu, &smram.smram32);

        /*
         * Give enter_smm() a chance to make ISA-specific changes to the vCPU
         * state (e.g. leave guest mode) after we've saved the state into the
         * SMM state-save area.
         *
         * Kill the VM in the unlikely case of failure, because the VM
         * can be in undefined state in this case.
         */
        if (kvm_x86_call(enter_smm)(vcpu, &smram))
                goto error;

        kvm_smm_changed(vcpu, true);

        if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
                goto error;

        if (kvm_x86_call(get_nmi_mask)(vcpu))
                vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
        else
                kvm_x86_call(set_nmi_mask)(vcpu, true);

        kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
        kvm_rip_write(vcpu, 0x8000);

        kvm_x86_call(set_interrupt_shadow)(vcpu, 0);

        cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
        kvm_x86_call(set_cr0)(vcpu, cr0);

        kvm_x86_call(set_cr4)(vcpu, 0);

        /* Undocumented: IDT limit is set to zero on entry to SMM.  */
        dt.address = dt.size = 0;
        kvm_x86_call(set_idt)(vcpu, &dt);

        if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
                goto error;

        cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
        cs.base = vcpu->arch.smbase;

        ds.selector = 0;
        ds.base = 0;

        cs.limit    = ds.limit = 0xffffffff;
        cs.type     = ds.type = 0x3;
        cs.dpl      = ds.dpl = 0;
        cs.db       = ds.db = 0;
        cs.s        = ds.s = 1;
        cs.l        = ds.l = 0;
        cs.g        = ds.g = 1;
        cs.avl      = ds.avl = 0;
        cs.present  = ds.present = 1;
        cs.unusable = ds.unusable = 0;
        cs.padding  = ds.padding = 0;

        kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
        kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
        kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
        kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
        kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
        kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);

#ifdef CONFIG_X86_64
        if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
                if (kvm_x86_call(set_efer)(vcpu, 0))
                        goto error;
#endif

        vcpu->arch.cpuid_dynamic_bits_dirty = true;
        kvm_mmu_reset_context(vcpu);
        return;
error:
        kvm_vm_dead(vcpu->kvm);
}

static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
{
        desc->g    = (flags >> 23) & 1;
        desc->db   = (flags >> 22) & 1;
        desc->l    = (flags >> 21) & 1;
        desc->avl  = (flags >> 20) & 1;
        desc->present = (flags >> 15) & 1;
        desc->dpl  = (flags >> 13) & 3;
        desc->s    = (flags >> 12) & 1;
        desc->type = (flags >>  8) & 15;

        desc->unusable = !desc->present;
        desc->padding = 0;
}

static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
                           const struct kvm_smm_seg_state_32 *state,
                           u16 selector, int n)
{
        struct kvm_segment desc;

        desc.selector =           selector;
        desc.base =               state->base;
        desc.limit =              state->limit;
        rsm_set_desc_flags(&desc, state->flags);
        kvm_set_segment(vcpu, &desc, n);
        return X86EMUL_CONTINUE;
}

#ifdef CONFIG_X86_64

static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
                           const struct kvm_smm_seg_state_64 *state,
                           int n)
{
        struct kvm_segment desc;

        desc.selector =           state->selector;
        rsm_set_desc_flags(&desc, state->attributes << 8);
        desc.limit =              state->limit;
        desc.base =               state->base;
        kvm_set_segment(vcpu, &desc, n);
        return X86EMUL_CONTINUE;
}
#endif

static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
                                    u64 cr0, u64 cr3, u64 cr4)
{
        int bad;
        u64 pcid;

        /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
        pcid = 0;
        if (cr4 & X86_CR4_PCIDE) {
                pcid = cr3 & 0xfff;
                cr3 &= ~0xfff;
        }

        bad = kvm_set_cr3(vcpu, cr3);
        if (bad)
                return X86EMUL_UNHANDLEABLE;

        /*
         * First enable PAE, long mode needs it before CR0.PG = 1 is set.
         * Then enable protected mode.  However, PCID cannot be enabled
         * if EFER.LMA=0, so set it separately.
         */
        bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
        if (bad)
                return X86EMUL_UNHANDLEABLE;

        bad = kvm_set_cr0(vcpu, cr0);
        if (bad)
                return X86EMUL_UNHANDLEABLE;

        if (cr4 & X86_CR4_PCIDE) {
                bad = kvm_set_cr4(vcpu, cr4);
                if (bad)
                        return X86EMUL_UNHANDLEABLE;
                if (pcid) {
                        bad = kvm_set_cr3(vcpu, cr3 | pcid);
                        if (bad)
                                return X86EMUL_UNHANDLEABLE;
                }

        }

        return X86EMUL_CONTINUE;
}

static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
                             const struct kvm_smram_state_32 *smstate)
{
        struct kvm_vcpu *vcpu = ctxt->vcpu;
        struct desc_ptr dt;
        int i, r;

        ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
        ctxt->_eip =  smstate->eip;

        for (i = 0; i < 8; i++)
                *reg_write(ctxt, i) = smstate->gprs[i];

        if (kvm_set_dr(vcpu, 6, smstate->dr6))
                return X86EMUL_UNHANDLEABLE;
        if (kvm_set_dr(vcpu, 7, smstate->dr7))
                return X86EMUL_UNHANDLEABLE;

        rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
        rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);

        dt.address =               smstate->gdtr.base;
        dt.size =                  smstate->gdtr.limit;
        kvm_x86_call(set_gdt)(vcpu, &dt);

        dt.address =               smstate->idtr.base;
        dt.size =                  smstate->idtr.limit;
        kvm_x86_call(set_idt)(vcpu, &dt);

        rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
        rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
        rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);

        rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
        rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
        rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);

        vcpu->arch.smbase = smstate->smbase;

        r = rsm_enter_protected_mode(vcpu, smstate->cr0,
                                        smstate->cr3, smstate->cr4);

        if (r != X86EMUL_CONTINUE)
                return r;

        kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
        ctxt->interruptibility = (u8)smstate->int_shadow;

        return r;
}

#ifdef CONFIG_X86_64
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
                             const struct kvm_smram_state_64 *smstate)
{
        struct kvm_vcpu *vcpu = ctxt->vcpu;
        struct desc_ptr dt;
        int i, r;

        for (i = 0; i < 16; i++)
                *reg_write(ctxt, i) = smstate->gprs[15 - i];

        ctxt->_eip   = smstate->rip;
        ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;

        if (kvm_set_dr(vcpu, 6, smstate->dr6))
                return X86EMUL_UNHANDLEABLE;
        if (kvm_set_dr(vcpu, 7, smstate->dr7))
                return X86EMUL_UNHANDLEABLE;

        vcpu->arch.smbase =         smstate->smbase;

        if (__kvm_emulate_msr_write(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
                return X86EMUL_UNHANDLEABLE;

        rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);

        dt.size =                   smstate->idtr.limit;
        dt.address =                smstate->idtr.base;
        kvm_x86_call(set_idt)(vcpu, &dt);

        rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);

        dt.size =                   smstate->gdtr.limit;
        dt.address =                smstate->gdtr.base;
        kvm_x86_call(set_gdt)(vcpu, &dt);

        r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
        if (r != X86EMUL_CONTINUE)
                return r;

        rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
        rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
        rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
        rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
        rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
        rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);

        kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
        ctxt->interruptibility = (u8)smstate->int_shadow;

        if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
            kvm_msr_write(vcpu, MSR_KVM_INTERNAL_GUEST_SSP, smstate->ssp))
                return X86EMUL_UNHANDLEABLE;

        return X86EMUL_CONTINUE;
}
#endif

int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
{
        struct kvm_vcpu *vcpu = ctxt->vcpu;
        unsigned long cr0;
        union kvm_smram smram;
        u64 smbase;
        int ret;

        smbase = vcpu->arch.smbase;

        ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
        if (ret < 0)
                return X86EMUL_UNHANDLEABLE;

        if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
                kvm_x86_call(set_nmi_mask)(vcpu, false);

        kvm_smm_changed(vcpu, false);

        /*
         * Get back to real mode, to prepare a safe state in which to load
         * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
         * supports long mode.
         */
#ifdef CONFIG_X86_64
        if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
                struct kvm_segment cs_desc;
                unsigned long cr4;

                /* Zero CR4.PCIDE before CR0.PG.  */
                cr4 = kvm_read_cr4(vcpu);
                if (cr4 & X86_CR4_PCIDE)
                        kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);

                /* A 32-bit code segment is required to clear EFER.LMA.  */
                memset(&cs_desc, 0, sizeof(cs_desc));
                cs_desc.type = 0xb;
                cs_desc.s = cs_desc.g = cs_desc.present = 1;
                kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
        }
#endif

        /* For the 64-bit case, this will clear EFER.LMA.  */
        cr0 = kvm_read_cr0(vcpu);
        if (cr0 & X86_CR0_PE)
                kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));

#ifdef CONFIG_X86_64
        if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
                unsigned long cr4, efer;

                /* Clear CR4.PAE before clearing EFER.LME. */
                cr4 = kvm_read_cr4(vcpu);
                if (cr4 & X86_CR4_PAE)
                        kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);

                /* And finally go back to 32-bit mode.  */
                efer = 0;
                __kvm_emulate_msr_write(vcpu, MSR_EFER, efer);
        }
#endif

        /*
         * FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
         * mode should happen _after_ loading state from SMRAM.  However, KVM
         * piggybacks the nested VM-Enter flows (which is wrong for many other
         * reasons), and so nSVM/nVMX would clobber state that is loaded from
         * SMRAM and from the VMCS/VMCB.
         */
        if (kvm_x86_call(leave_smm)(vcpu, &smram))
                return X86EMUL_UNHANDLEABLE;

#ifdef CONFIG_X86_64
        if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
                ret = rsm_load_state_64(ctxt, &smram.smram64);
        else
#endif
                ret = rsm_load_state_32(ctxt, &smram.smram32);

        /*
         * If RSM fails and triggers shutdown, architecturally the shutdown
         * occurs *before* the transition to guest mode.  But due to KVM's
         * flawed handling of RSM to L2 (see above), the vCPU may already be
         * in_guest_mode().  Force the vCPU out of guest mode before delivering
         * the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
         * that architecturally shouldn't be possible.
         */
        if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu))
                kvm_leave_nested(vcpu);
        return ret;
}