Skip to content
Snippets Groups Projects
Commit bad4d883 authored by liubo's avatar liubo Committed by Cheng Jian
Browse files

etmem: add etmem-scan feature


euleros inclusion
category: feature
feature: etmem
bugzilla: 49889

-------------------------------------------------

etmem, the memory vertical expansion technology,
uses DRAM and high-performance storage new media to form multi-level memory storage.
By grading the stored data, etmem migrates the classified cold storage data
from the storage medium to the high-performance storage medium,
so as to achieve the purpose of memory capacity expansion and memory cost reduction.

The etmem feature is mainly composed of two parts: etmem_scan and
etmem_swap.

This patch is mainly used to generate etmem_scan.ko.
etmem_scan.ko is used to scan the virtual address of the target process
and return the address access information to
the user mode for grading cold and hot pages.

Signed-off-by: default avatarFengguang Wu <fengguang.wu@intel.com>
Signed-off-by: default avataryanxiaodan <yanxiaodan@huawei.com>
Signed-off-by: default avatarFeilong Lin <linfeilong@huawei.com>
Signed-off-by: default avatargeruijun <geruijun@huawei.com>
Signed-off-by: default avatarliubo <liubo254@huawei.com>
Acked-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
Reviewed-by: default avatarJing <Xiangfeng&lt;jingxiangfeng@huawei.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
Signed-off-by: default avatarCheng Jian <cj.chengjian@huawei.com>
parent e78c1ed5
No related branches found
No related tags found
No related merge requests found
......@@ -33,3 +33,4 @@ proc-$(CONFIG_PROC_KCORE) += kcore.o
proc-$(CONFIG_PROC_VMCORE) += vmcore.o
proc-$(CONFIG_PRINTK) += kmsg.o
proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o
obj-$(CONFIG_ETMEM_SCAN) += etmem_scan.o
......@@ -2988,6 +2988,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
REG("idle_pages", S_IRUSR|S_IWUSR, proc_mm_idle_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
......@@ -3373,6 +3374,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
REG("idle_pages", S_IRUSR|S_IWUSR, proc_mm_idle_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PAGE_IDLE_H
#define _PAGE_IDLE_H
#define SCAN_HUGE_PAGE O_NONBLOCK /* only huge page */
#define SCAN_SKIM_IDLE O_NOFOLLOW /* stop on PMD_IDLE_PTES */
#define SCAN_DIRTY_PAGE O_NOATIME /* report pte/pmd dirty bit */
enum ProcIdlePageType {
PTE_ACCESSED, /* 4k page */
PMD_ACCESSED, /* 2M page */
PUD_PRESENT, /* 1G page */
PTE_DIRTY_M,
PMD_DIRTY_M,
PTE_IDLE,
PMD_IDLE,
PMD_IDLE_PTES, /* all PTE idle */
PTE_HOLE,
PMD_HOLE,
PIP_CMD,
IDLE_PAGE_TYPE_MAX
};
#define PIP_TYPE(a) (0xf & (a >> 4))
#define PIP_SIZE(a) (0xf & a)
#define PIP_COMPOSE(type, nr) ((type << 4) | nr)
#define PIP_CMD_SET_HVA PIP_COMPOSE(PIP_CMD, 0)
#ifndef INVALID_PAGE
#define INVALID_PAGE ~0UL
#endif
#ifdef CONFIG_ARM64
#define _PAGE_MM_BIT_ACCESSED 10
#else
#define _PAGE_MM_BIT_ACCESSED _PAGE_BIT_ACCESSED
#endif
#ifdef CONFIG_X86_64
#define _PAGE_BIT_EPT_ACCESSED 8
#define _PAGE_BIT_EPT_DIRTY 9
#define _PAGE_EPT_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_EPT_ACCESSED)
#define _PAGE_EPT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_EPT_DIRTY)
#define _PAGE_EPT_PRESENT (_AT(pteval_t, 7))
static inline int ept_pte_present(pte_t a)
{
return pte_flags(a) & _PAGE_EPT_PRESENT;
}
static inline int ept_pmd_present(pmd_t a)
{
return pmd_flags(a) & _PAGE_EPT_PRESENT;
}
static inline int ept_pud_present(pud_t a)
{
return pud_flags(a) & _PAGE_EPT_PRESENT;
}
static inline int ept_p4d_present(p4d_t a)
{
return p4d_flags(a) & _PAGE_EPT_PRESENT;
}
static inline int ept_pgd_present(pgd_t a)
{
return pgd_flags(a) & _PAGE_EPT_PRESENT;
}
static inline int ept_pte_accessed(pte_t a)
{
return pte_flags(a) & _PAGE_EPT_ACCESSED;
}
static inline int ept_pmd_accessed(pmd_t a)
{
return pmd_flags(a) & _PAGE_EPT_ACCESSED;
}
static inline int ept_pud_accessed(pud_t a)
{
return pud_flags(a) & _PAGE_EPT_ACCESSED;
}
static inline int ept_p4d_accessed(p4d_t a)
{
return p4d_flags(a) & _PAGE_EPT_ACCESSED;
}
static inline int ept_pgd_accessed(pgd_t a)
{
return pgd_flags(a) & _PAGE_EPT_ACCESSED;
}
#endif
extern struct file_operations proc_page_scan_operations;
#define PAGE_IDLE_KBUF_FULL 1
#define PAGE_IDLE_BUF_FULL 2
#define PAGE_IDLE_BUF_MIN (sizeof(uint64_t) * 2 + 3)
#define PAGE_IDLE_KBUF_SIZE 8000
struct page_idle_ctrl {
struct mm_struct *mm;
struct kvm *kvm;
uint8_t kpie[PAGE_IDLE_KBUF_SIZE];
int pie_read;
int pie_read_max;
void __user *buf;
int buf_size;
int bytes_copied;
unsigned long next_hva; /* GPA for EPT; VA for PT */
unsigned long gpa_to_hva;
unsigned long restart_gpa;
unsigned long last_va;
unsigned int flags;
};
#endif
......@@ -299,6 +299,7 @@ extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
extern const struct file_operations proc_mm_idle_operations;
extern unsigned long task_vsize(struct mm_struct *);
extern unsigned long task_statm(struct mm_struct *,
......
......@@ -1598,6 +1598,72 @@ const struct file_operations proc_pagemap_operations = {
.open = pagemap_open,
.release = pagemap_release,
};
/* will be filled when kvm_ept_idle module loads */
struct file_operations proc_page_scan_operations = {
};
EXPORT_SYMBOL_GPL(proc_page_scan_operations);
static ssize_t mm_idle_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct mm_struct *mm = file->private_data;
int ret = 0;
if (!mm || !mmget_not_zero(mm)) {
ret = -ESRCH;
return ret;
}
if (proc_page_scan_operations.read)
ret = proc_page_scan_operations.read(file, buf, count, ppos);
mmput(mm);
return ret;
}
static int mm_idle_open(struct inode *inode, struct file *file)
{
struct mm_struct *mm = NULL;
if (!file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN))
return -EPERM;
mm = proc_mem_open(inode, PTRACE_MODE_READ);
if (IS_ERR(mm))
return PTR_ERR(mm);
file->private_data = mm;
if (proc_page_scan_operations.open)
return proc_page_scan_operations.open(inode, file);
return 0;
}
static int mm_idle_release(struct inode *inode, struct file *file)
{
struct mm_struct *mm = file->private_data;
if (mm) {
if (!mm_kvm(mm))
flush_tlb_mm(mm);
mmdrop(mm);
}
if (proc_page_scan_operations.release)
return proc_page_scan_operations.release(inode, file);
return 0;
}
const struct file_operations proc_mm_idle_operations = {
.llseek = mem_lseek, /* borrow this */
.read = mm_idle_read,
.open = mm_idle_open,
.release = mm_idle_release,
};
#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
......
......@@ -28,6 +28,7 @@ typedef int vm_fault_t;
struct address_space;
struct mem_cgroup;
struct hmm;
struct kvm;
/*
* Each physical page in the system has a struct page associated with
......@@ -513,7 +514,12 @@ struct mm_struct {
#endif
} __randomize_layout;
#if IS_ENABLED(CONFIG_KVM) && !defined(__GENKSYMS__)
struct kvm *kvm;
#else
KABI_RESERVE(1)
#endif
KABI_RESERVE(2)
KABI_RESERVE(3)
KABI_RESERVE(4)
......@@ -531,6 +537,18 @@ struct mm_struct {
extern struct mm_struct init_mm;
#if IS_ENABLED(CONFIG_KVM)
static inline struct kvm *mm_kvm(struct mm_struct *mm)
{
return mm->kvm;
}
#else
static inline struct kvm *mm_kvm(struct mm_struct *mm)
{
return NULL;
}
#endif
/* Pointer magic because the dynamic array size confuses some compilers. */
static inline void mm_init_cpumask(struct mm_struct *mm)
{
......
......@@ -599,6 +599,12 @@ config PARMAN
config PRIME_NUMBERS
tristate
config ETMEM_SCAN
tristate "module: etmem page scan for etmem support"
help
etmem page scan feature
used to scan the virtual address of the target process
config STRING_SELFTEST
tristate "Test string functions"
......
......@@ -338,6 +338,7 @@ int walk_page_range(unsigned long start, unsigned long end,
} while (start = next, start < end);
return err;
}
EXPORT_SYMBOL_GPL(walk_page_range);
int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk)
{
......
......@@ -778,6 +778,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
struct mm_struct *mm = kvm->mm;
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
#if IS_ENABLED(CONFIG_KVM)
mm->kvm = NULL;
#endif
kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
mutex_lock(&kvm_lock);
......@@ -3298,6 +3301,9 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
fput(file);
return -ENOMEM;
}
#if IS_ENABLED(CONFIG_KVM)
kvm->mm->kvm = kvm;
#endif
kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
fd_install(r, file);
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment