NVMe-oF 设备文件实现

基于 RHEL/CentOS 8.4, 4.18.0-305.19.1 内核代码。

# nvme list -v
NVM Express Subsystems

Subsystem        Subsystem-NQN                                                                                    Controllers
---------------- ------------------------------------------------------------------------------------------------ ----------------
nvme-subsys0     nqn.2014.08.org.nvmexpress:80868086CVCQ652400851P2JGN  INTEL SSDPEDMW012T4                       nvme0
nvme-subsys1     nqn.2014-08.org.nvmexpress:NVMf:uuid:7763c073-d4f6-42b0-bd3d-70826704856e                        nvme1

NVM Express Controllers

Device   SN                   MN                                       FR       TxPort Address        Subsystem    Namespaces      
-------- -------------------- ---------------------------------------- -------- ------ -------------- ------------ ----------------
nvme0    CVCQ652400851P2JGN   INTEL SSDPEDMW012T4                      8EV10171 pcie   0000:05:00.0   nvme-subsys0 nvme0n1
nvme1    a2e89bbaa386e18e     Linux                                    4.18.0-3 tcp    traddr=192.168.1.70 trsvcid=3261 nvme-subsys1 nvme1n1

NVM Express Namespaces

Device       NSID     Usage                      Format           Controllers     
------------ -------- -------------------------- ---------------- ----------------
nvme0n1      1          1.20  TB /   1.20  TB    512   B +  0 B   nvme0
nvme1n1      1          1.07  GB /   1.07  GB      4 KiB +  0 B   nvme1
$ ls /dev/nvme*
/dev/nvme0  /dev/nvme0n1  /dev/nvme1  /dev/nvme1n1  /dev/nvme-fabrics

nvme fabrics

# tree -lL 1 /sys/class/misc/nvme-fabrics/
/sys/class/misc/nvme-fabrics/
├── dev
├── power
├── subsystem -> ../../../../class/misc
└── uevent

# tree -lL 2 /sys/class/nvme-fabrics/
/sys/class/nvme-fabrics/
└── ctl -> ../../devices/virtual/nvme-fabrics/ctl
    ├── nvme1
    ├── power
    ├── subsystem -> ../../../../class/nvme-fabrics  [recursive, not followed]
    └── uevent
nvmf_init
  nvmf_class = class_create(THIS_MODULE, "nvme-fabrics")
  // /sys/class/nvme-fabrics/ctl
  nvmf_device =	device_create(nvmf_class)
  // /sys/class/misc/nvme-fabrics
  // /dev/nvme-fabrics
  misc_register(&nvmf_misc)

nvme ctrl & nvme-subsystem device

nvme_core_init
  alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme")
  nvme_class = class_create(THIS_MODULE, "nvme")
  nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem")

nvme controller, char device

# tree -lL 2 /sys/class/nvme/
/sys/class/nvme/
├── nvme0 -> ../../devices/pci0000:00/0000:00:02.0/0000:05:00.0/nvme/nvme0
│   ├── address
│   ├── cntlid
│   ├── dev
│   ├── device -> ../../../0000:05:00.0
│   ├── firmware_rev
│   ├── model
│   ├── numa_node
│   ├── nvme0n1
│   ├── power
│   ├── queue_count
│   ├── rescan_controller
│   ├── reset_controller
│   ├── serial
│   ├── sqsize
│   ├── state
│   ├── subsysnqn
│   ├── subsystem -> ../../../../../../class/nvme  [recursive, not followed]
│   ├── transport
│   └── uevent
└── nvme1 -> ../../devices/virtual/nvme-fabrics/ctl/nvme1
    ├── address
    ├── cntlid
    ├── ctrl_loss_tmo
    ├── delete_controller
    ├── dev
    ├── device -> ../../ctl
    ├── firmware_rev
    ├── hostid
    ├── hostnqn
    ├── model
    ├── numa_node
    ├── nvme1c1n1
    ├── power
    ├── queue_count
    ├── reconnect_delay
    ├── rescan_controller
    ├── reset_controller
    ├── serial
    ├── sqsize
    ├── state
    ├── subsysnqn
    ├── subsystem -> ../../../../../class/nvme  [recursive, not followed]
    ├── transport
    └── uevent
nvmf_create_ctrl(nvmf_device)
  nvme_tcp_create_ctrl
    nvme_init_ctrl
      ctrl->dev = dev
      ctrl->device->class = nvme_class
      ctrl->device->parent = ctrl->dev
      dev_set_name(ctrl->device, "nvme%d", ctrl->instance)
      // /sys/class/nvme-fabrics/ctl/nvmeX
      // /sys/class/nvme/nvmeX
      // /dev/nvmeX
      cdev_device_add(&ctrl->cdev, ctrl->device)
    nvme_tcp_setup_ctrl

nvme subsystem

# tree -lL 2 /sys/class/nvme-subsystem
/sys/class/nvme-subsystem
├── nvme-subsys0 -> ../../devices/virtual/nvme-subsystem/nvme-subsys0
│   ├── firmware_rev
│   ├── iopolicy
│   ├── model
│   ├── nvme0 -> ../../../pci0000:00/0000:00:02.0/0000:05:00.0/nvme/nvme0
│   ├── power
│   ├── serial
│   ├── subsysnqn
│   ├── subsystem -> ../../../../class/nvme-subsystem  [recursive, not followed]
│   └── uevent
└── nvme-subsys1 -> ../../devices/virtual/nvme-subsystem/nvme-subsys1
    ├── firmware_rev
    ├── iopolicy
    ├── model
    ├── nvme1 -> ../../nvme-fabrics/ctl/nvme1
    ├── nvme1n1
    ├── power
    ├── serial
    ├── subsysnqn
    ├── subsystem -> ../../../../class/nvme-subsystem  [recursive, not followed]
    └── uevent
nvme_init_subsystem
  subsys->dev.class = nvme_subsys_class
  dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance)
  // /sys/class/nvme-subsystem/nvme-subsysA
  device_add(&subsys->dev)
  // /sys/calss/nvme-fabrics/ctl/nvmeX ->
  // /sys/class/nvme-subsystem/nvme-subsysA/nvmeX
  sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
          dev_name(ctrl->device))

nvme namespace, block device

# tree -L 1 /sys/class/block
/sys/class/block
├── nvme0n1 -> ../../devices/pci0000:00/0000:00:02.0/0000:05:00.0/nvme/nvme0/nvme0n1
├── nvme1c1n1 -> ../../devices/virtual/nvme-fabrics/ctl/nvme1/nvme1c1n1
└── nvme1n1 -> ../../devices/virtual/nvme-subsystem/nvme-subsys1/nvme1n1
nvme_alloc_ns
  nvme_init_ns_head
    nvme_alloc_ns_head
      nvme_mpath_alloc_disk
        if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
          return 0;
        head->disk = alloc_disk(0)
        sprintf(head->disk->disk_name, "nvme%dn%d",
                ctrl->subsys->instance, head->instance);
  nvme_set_disk_name(disk_name, ns, ctrl, &flags)
    if (!multipath) {
      sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
    } else if (ns->head->disk) {
      sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
              ctrl->instance, ns->head->instance);
      *flags = GENHD_FL_HIDDEN;
    } else {
      sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
              ns->head->instance);
    }
  disk = alloc_disk_node(0, node)
  memcpy(disk->disk_name, disk_name, DISK_NAME_LEN)
  ns->disk = disk
  // /sys/class/nvme-fabrics/ctl/nvmeA/nvmeXcYnZ
  // /dev/nvmeXcYnZ -- hidden if multipathing enabled
  device_add_disk(ctrl->device, ns->disk)
  sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
          &nvme_ns_id_attr_group)
  nvme_mpath_add_disk(ns, id)
    nvme_mpath_set_live
      // /sys/class/nvme-subsystem/nvme-subsysA/nvmeXnY
      // /dev/nvmeXnY
      device_add_disk(&head->subsys->dev, head->disk)
      sysfs_create_group(&disk_to_dev(head->disk)->kobj,
              &nvme_ns_id_attr_group)

最后修改于 2021-10-17