當前位置：首頁 > 编程资源 > 综合教程 >内容正文

综合教程

linux的nvme驱动参数调优

發布時間：2023/12/13 综合教程 21 生活家

生活随笔收集整理的這篇文章主要介紹了 linux的nvme驱动参数调优小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

nvme的設備，可以調優的參數比較少，相關的代碼如下：

blk_sysfs.c

static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
.show = queue_requests_show,
.store = queue_requests_store,
};

static struct queue_sysfs_entry queue_ra_entry = {
.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
.show = queue_ra_show,
.store = queue_ra_store,
};

static struct queue_sysfs_entry queue_max_sectors_entry = {
.attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
.show = queue_max_sectors_show,
.store = queue_max_sectors_store,
};

static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
.show = queue_max_hw_sectors_show,
};

static struct queue_sysfs_entry queue_max_segments_entry = {
.attr = {.name = "max_segments", .mode = S_IRUGO },
.show = queue_max_segments_show,
};

static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
.attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
.show = queue_max_integrity_segments_show,
};

static struct queue_sysfs_entry queue_max_segment_size_entry = {
.attr = {.name = "max_segment_size", .mode = S_IRUGO },
.show = queue_max_segment_size_show,
};

static struct queue_sysfs_entry queue_iosched_entry = {
.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
.show = elv_iosched_show,
.store = elv_iosched_store,
};

static struct queue_sysfs_entry queue_hw_sector_size_entry = {
.attr = {.name = "hw_sector_size", .mode = S_IRUGO },
.show = queue_logical_block_size_show,
};

static struct queue_sysfs_entry queue_logical_block_size_entry = {
.attr = {.name = "logical_block_size", .mode = S_IRUGO },
.show = queue_logical_block_size_show,
};

static struct queue_sysfs_entry queue_physical_block_size_entry = {
.attr = {.name = "physical_block_size", .mode = S_IRUGO },
.show = queue_physical_block_size_show,
};

static struct queue_sysfs_entry queue_io_min_entry = {
.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
.show = queue_io_min_show,
};

static struct queue_sysfs_entry queue_io_opt_entry = {
.attr = {.name = "optimal_io_size", .mode = S_IRUGO },
.show = queue_io_opt_show,
};

static struct queue_sysfs_entry queue_discard_granularity_entry = {
.attr = {.name = "discard_granularity", .mode = S_IRUGO },
.show = queue_discard_granularity_show,
};

static struct queue_sysfs_entry queue_discard_max_entry = {
.attr = {.name = "discard_max_bytes", .mode = S_IRUGO },
.show = queue_discard_max_show,
};

static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
.show = queue_discard_zeroes_data_show,
};

static struct queue_sysfs_entry queue_write_same_max_entry = {
.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
.show = queue_write_same_max_show,
};

static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
.store = queue_store_nonrot,
};

static struct queue_sysfs_entry queue_unpriv_sgio_entry = {
.attr = {.name = "unpriv_sgio", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_unpriv_sgio,
.store = queue_store_unpriv_sgio,
};

static struct queue_sysfs_entry queue_nomerges_entry = {
.attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
.show = queue_nomerges_show,
.store = queue_nomerges_store,
};

static struct queue_sysfs_entry queue_rq_affinity_entry = {
.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
.show = queue_rq_affinity_show,
.store = queue_rq_affinity_store,
};

static struct queue_sysfs_entry queue_iostats_entry = {
.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_iostats,
.store = queue_store_iostats,
};

static struct queue_sysfs_entry queue_random_entry = {
.attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_random,
.store = queue_store_random,
};

參數列表如下：

[root@localhost queue]# ls -alrt *
-rw-r--r-- 1 root root 4096 Dec 18 19:58 read_ahead_kb
-rw-r--r-- 1 root root 4096 Dec 18 20:01 nomerges
-rw-r--r-- 1 root root 4096 Dec 18 20:34 rq_affinity
-rw-r--r-- 1 root root 4096 Dec 19 08:39 max_sectors_kb
-rw-r--r-- 1 root root 4096 Dec 19 08:47 nr_requests
-rw-r--r-- 1 root root 4096 Dec 19 08:54 iostats
-r--r--r-- 1 root root 4096 Dec 19 08:54 write_same_max_bytes
-rw-r--r-- 1 root root 4096 Dec 19 08:54 unpriv_sgio
-rw-r--r-- 1 root root 4096 Dec 19 08:54 scheduler
-rw-r--r-- 1 root root 4096 Dec 19 08:54 rotational
-r--r--r-- 1 root root 4096 Dec 19 08:54 physical_block_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 optimal_io_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 minimum_io_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 max_segments
-r--r--r-- 1 root root 4096 Dec 19 08:54 max_segment_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 max_integrity_segments
-r--r--r-- 1 root root 4096 Dec 19 08:54 max_hw_sectors_kb
-r--r--r-- 1 root root 4096 Dec 19 08:54 logical_block_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 hw_sector_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 discard_zeroes_data
-r--r--r-- 1 root root 4096 Dec 19 08:54 discard_max_bytes
-r--r--r-- 1 root root 4096 Dec 19 08:54 discard_granularity
-rw-r--r-- 1 root root 4096 Dec 19 08:54 add_random

其中屬性為只讀的，肯定直接通過/sys/沒法修改，有的硬編碼的跟驅動相關，可以嘗試修改驅動。其余可以嘗試調優的參數如下：

1.nomerges (RW)
------------- This enables the user to disable the lookup logic involved with IO merging requests in the block layer. By default (0) all merges are enabled. When set to 1 only simple one-hit merges will be tried. When set to 2 no merge algorithms will be tried (including one-hit or more complex tree/hash lookups).

這個根據打開的統計，發現iostat里面前面的兩列關于merge的，都一直為0，所以干脆設置為不要merge，可以減少一段代碼邏輯，代碼中會判斷queue的merge屬性，

當其值為0，說明QUEUE_FLAG_NOXMERGES 和QUEUE_FLAG_NOMERGES 都沒有設置。這個設置為2，表示不需要merge，機械盤一般設置為需要merge，相關代碼如下：

static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
                    size_t count)
{
    unsigned long nm;
    ssize_t ret = queue_var_store(&nm, page, count);

    if (ret < 0)
        return ret;

    spin_lock_irq(q->queue_lock);
    queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
    queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
    if (nm == 2)
        queue_flag_set(QUEUE_FLAG_NOMERGES, q);-----------值為2，則設置QUEUE_FLAG_NOMERGES
    else if (nm)
        queue_flag_set(QUEUE_FLAG_NOXMERGES, q); ----------值為非0，則設置QUEUE_FLAG_NOXMERGES
    spin_unlock_irq(q->queue_lock); 
    return ret; 
}

2.rq_affinity (RW)
---------------- If this option is '1', the block layer will migrate request completions to the cpu "group" that originally submitted the request. For some workloads this provides a significant reduction in CPU cycles due to caching effects.
For storage configurations that need to maximize distribution of completion processing setting this option to '2' forces the completion to run on the requesting cpu (bypassing the "group" aggregation logic).

因為cache的命中，這個設置為2，可以減少cpu使用。

3.add_random (RW)
---------------- This file allows to trun off the disk entropy contribution. Default value of this file is '1'(on).

這個最好設置為0，可以減少一點點性能消耗。

readahead參數，是如何影響nvme的性能的？一開始以為這個對機械盤影響較大，后來根據追蹤代碼，發現對io的及時性還是有較大的提高。

下面的代碼描述了初始化階段設置的大小，這個如果可以的話，建議設置大一些，比如1M。

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
    struct request_queue *q;
    struct queue_limits_aux *limits_aux = NULL;
    int err;

    q = kmem_cache_alloc_node(blk_requestq_cachep,
                gfp_mask | __GFP_ZERO, node_id);
    if (!q)
        return NULL;

    q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
    if (q->id < 0)
        goto fail_q;

    q->backing_dev_info.ra_pages =
            (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;//初始化預讀參數，默認為128k

4.中斷綁核

我們知道，nvme的隊列名稱，其實是根據核數來編號的，因為admin的隊列和io隊列的第一個是共享一個中斷的，所以他倆的中斷數會相對比其他io隊列多一些，由于隊列默認就是跟隨

cpu號而綁定的，所以中斷號，最好送到指定的cpu上去，因為中斷上下文畢竟是要訪問內存的，具體怎么綁，可以參照如下：

查看/proc/interrupt,中斷名稱是nvme0q0，當然類似的nvme1q0也是，以此類推，這個肯定是admin隊列。

io隊列就是nvme0q1----nvme0qx，其中x就是cpu的核數。

nvme0q1這個對列，其實默認就是在cpu0上，那么對應的中斷，最好也綁在cpu0上。

nvme0q30這個隊列，默認在cpu29上，那么對應的中斷，最好也綁在cpu29上。以此類推。

總結

以上是生活随笔為你收集整理的linux的nvme驱动参数调优的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： scrapy的allowed_domai
下一篇： Lettuce命令延迟测量(Comman