mirror of
https://github.com/AuxXxilium/redpill-lkm5.git
synced 2024-11-23 23:11:02 +07:00
576 lines
32 KiB
C
576 lines
32 KiB
C
/*
|
|
* This file is a SIMPLE (yes, this IS simple) software emulation layer for PCI devices.
|
|
*
|
|
* Before you even start reading it you need to get familiar with references listed below. As the kernel people put it
|
|
* mildly "The world of PCI is vast and full of (mostly unpleasant) surprises.". This module tries to abstract hardware
|
|
* space emulation into highest level API possible.
|
|
*
|
|
*
|
|
* QUICK INTRODUCTION
|
|
* ------------------
|
|
* To use it you need to supply a descriptor (e.g. struct pci_dev_descriptor) and give it domain-unique combination of
|
|
* {bus#, device#, function#}. The domain for most (all?) physical devices is usually 0x0000. This module uses 0x0001 to
|
|
* avoid conflicts.
|
|
* Fast PCI facts (read this to add devices):
|
|
* - every device is in the system has a location of BDF (256 buses max, 32 devices/bus, 8 functions/device = 65536)
|
|
* - every device MUST contain a function 0 (and may contain 1-7)
|
|
* - function is kind-of a subdevice (e.g. a quad-port network card will usually have functions 0-3)
|
|
* - you should (but you don't HAVE to) set "master bus" (.command |= PCI_COMMAND_MASTER) for every function 0 device
|
|
* instance
|
|
* - every device MUST have a valid VID/DEV. None of the fields can be 0x0000 or 0xFFFF (they have special meanings)
|
|
* - this module does NOT have any support for capabilities (CAPs) as they're variable length and we don't want to
|
|
* force every device struct to take 256K of memory (todo if needed?)
|
|
* - there are three types of headers: PCI device, PCI-PCI bridge, PCI-CardBus bridge. Only the first one was tested.
|
|
* The second one allows for more levels of the tree and should work if configured properly (see struct
|
|
* pci_pci_bridge_descriptor) but it wasn't needed yet. The third one is practically a bitrot now.
|
|
* - EVERYTHING IN PCI IS LITTLE ENDIAN no matter what your CPU says. Triple check if you're setting values correctly.
|
|
* Then you realize you set them incorrectly.
|
|
* - "devfn" in Linux terminology does NOT mean "device function" but rather "device# and function#". It is described
|
|
* in drivers/pci/search.c as "encodes number of PCI slot in which the desired PCI device resides and the logical
|
|
* device number within that slot in case of multi-function devices".
|
|
* You can use macros PCI_SLOT() and PCI_FUNC() to get dev# and fn# from that field.
|
|
* - Linux provides class & subclass constants (PCI_CLASS_* in include/linux/pci_ids.h). However they're defined as
|
|
* either:
|
|
* - 8 bit class
|
|
* - e.g. PCI_BASE_CLASS_SERIAL [0x0c]
|
|
* - can be put into pci_dev_descriptor.class directly
|
|
* - 16 bit class+subclass
|
|
* - e.g. PCI_CLASS_SERIAL_USB [0x0c03]
|
|
* - use U16_CLASS_TO_U8_CLASS(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.class [0x0c]
|
|
* - use U16_CLASS_TO_U8_SUBCLASS(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.subclass [0x03]
|
|
* - 24 bit (sic!) class+subclass+prog_if
|
|
* - e.g. PCI_CLASS_SERIAL_USB_EHCI (0x0c0320)
|
|
* - use U24_CLASS_TO_U8_CLASS(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.class [0x0c]
|
|
* - use U24_CLASS_TO_U8_SUBCLASS(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.subclass [0x03]
|
|
* - use U24_CLASS_TO_U8_PROGIF(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.prog_if [0x03]
|
|
* - "pci_dev_conf_default_normal_dev" provides a sane-default device where you need to only set: vid, dev, class,
|
|
* and subclass.
|
|
*
|
|
*
|
|
* DEBUGGING DEVICES
|
|
* -----------------
|
|
* To see the tree you can use "lspci -tvnn". Here's a quick cheat-sheet from the output format:
|
|
* 0001:0a:00.0 Class 0000: Device 1b4b:9235 (rev ff)
|
|
* ^ ^ ^ ^ ^ ^ ^ ^
|
|
* | | | | | | | |_______ pci_dev_descriptor.class_revision (lower 24 bits)
|
|
* | | | | | | |_________________ pci_dev_descriptor.dev (device ID)
|
|
* | | | | | |______________________ pci_dev_descriptor.vid (vendor ID)
|
|
* | | | | |___________________________________ pci_dev_descriptor.class_revision (higher 24 bits)
|
|
* | | | |___________________________________________ PCI device function
|
|
* | | |______________________________________________ device num on the bus
|
|
* | |_________________________________________________ PCI bus no
|
|
* |_____________________________________________________ PCIBUS_VIRTUAL_DOMAIN
|
|
*
|
|
*
|
|
* To debug the Linux PCI subsytem side of things these will be useful:
|
|
* echo 'file probe.c +p' > /sys/kernel/debug/dynamic_debug/control
|
|
* echo 'file search.c +p' > /sys/kernel/debug/dynamic_debug/control
|
|
* echo 'file delete.c +p' > /sys/kernel/debug/dynamic_debug/control
|
|
*
|
|
*
|
|
* INTERNAL STRUCTURE
|
|
* ------------------
|
|
* The module emulates PCI on the lowest possible level - it literally fakes the otherwise-physical memory of
|
|
* configuration registries.
|
|
*
|
|
* The two header types are memory-mapped as follows: (PCI-CardBus isn't shown as nobody uses that)
|
|
* HEADER TYPE 0x00 (Normal Device) HEADER TYPE 0x01 (PCI-PCI Bridge)
|
|
* 31 16 15 0 hh 31 16 15 0 hh
|
|
* ╠══════════╩══════════╬══════════╩═══════════╬════ ╠═════════╩══════════╬═══════════╩═══════════╬════
|
|
* ║ Device ID ║ Vendor ID ║ 00 ║ Device ID ║ Vendor ID ║ 00
|
|
* ╠═════════════════════╬══════════════════════╬════ ╠════════════════════╬═══════════════════════╬════
|
|
* ║ Status ║ Command ║ 04 ║ Status ║ Command ║ 04
|
|
* ╠══════════╦══════════╬══════════╦═══════════╬════ ╠═════════╦══════════╬═══════════╦═══════════╬════
|
|
* ║ Class ║ Subclass ║ ProgIF ║ Rev. ID ║ 08 ║ Class ║ Subclass ║ ProgIF ║ Rev. ID ║ 08
|
|
* ╠══════════╬══════════╬══════════╬═══════════╬════ ╠═════════╬══════════╬═══════════╬═══════════╬════
|
|
* ║ BIST ║ HeaderT ║ Lat.Tmr. ║ Cache LS ║ 0c ║ BIST ║ HeaderT ║ Lat.Tmr. ║ Cache LS ║ 0c
|
|
* ╠══════════╩══════════╩══════════╩═══════════╬════ ╠═════════╩══════════╩═══════════╩═══════════╬════
|
|
* ║ BAR0 ║ 10 ║ BAR0 ║ 10
|
|
* ╠════════════════════════════════════════════╬════ ╠════════════════════════════════════════════╬════
|
|
* ║ BAR1 ║ 14 ║ BAR1 ║ 14
|
|
* ╠════════════════════════════════════════════╬════ ╠═════════╦══════════╦═══════════╦═══════════╬════
|
|
* ║ BAR2 ║ 18 ║ SecLatT ║ SubordB# ║ SecBus# ║ PriBus# ║ 18
|
|
* ╠════════════════════════════════════════════╬════ ╠═════════╩══════════╬═══════════╬═══════════╬════
|
|
* ║ BAR3 ║ 1c ║ Secondary Status ║ I/O Limit ║ I/O Base ║ 1c
|
|
* ╠════════════════════════════════════════════╬════ ╠════════════════════╬═══════════╩═══════════╬════
|
|
* ║ BAR4 ║ 20 ║ Memory limit ║ Memory base ║ 20
|
|
* ╠════════════════════════════════════════════╬════ ╠════════════════════╬═══════════════════════╬════
|
|
* ║ BAR5 ║ 24 ║ Prefetch. Mem. L. ║ Prefetch. Mem. B. ║ 24
|
|
* ╠════════════════════════════════════════════╬════ ╠════════════════════╩═══════════════════════╬════
|
|
* ║ Cardbus CIS ptr ║ 28 ║ Prefetchable Base Upper 32 bit ║ 28
|
|
* ╠═════════════════════╦══════════════════════╬════ ╠════════════════════════════════════════════╬════
|
|
* ║ Subsys ID ║ Subsys VID ║ 2c ║ Prefetchable Limit Upper 32 bit ║ 2c
|
|
* ╠═════════════════════╩══════════════════════╬════ ╠════════════════════╦═══════════════════════╬════
|
|
* ║ Exp. ROM Base Addr. ║ 30 ║ I/O Lim. Up. 16b ║ I/O Base Up. 16b ║ 30
|
|
* ╠════════════════════════════════╦═══════════╬════ ╠════════════════════╩═══════════╦═══════════╬════
|
|
* ║ *RSV* ║ Cap. ptr ║ 34 ║ *RSV* ║ Cap. ptr ║ 34
|
|
* ╠════════════════════════════════╩═══════════╬════ ╠════════════════════════════════╩═══════════╬════
|
|
* ║ *RSV* ║ 38 ║ Exp. ROM Base Addr. ║ 38
|
|
* ╠══════════╦══════════╦══════════╦═══════════╬════ ╠════════════════════╦═══════════╦═══════════╬════
|
|
* ║ Max Lat. ║ Min Gnt. ║ Int. pin ║ Int. lin. ║ 3c ║ Bridge Control ║ Int. pin ║ Int. lin. ║ 3c
|
|
* ╠══════════╩══════════╩══════════╩═══════════╬═══════ ╠════════════════════╩═══════════╩═══════════╬═══════
|
|
* ║ Optional Dev.-Dep. Config (192 bytes) ║ 40-100 ║ Optional Dev.-Dep. Config (192 bytes) ║ 40-100
|
|
* ╚════════════════════════════════════════════╩═══════ ╚════════════════════════════════════════════╩═══════
|
|
*
|
|
*
|
|
* LINUX PCI SUBSYSTEM SCANNING ROUTINE
|
|
* ------------------------------------
|
|
* The kernel has a surprisingly readable code for the PCI scanning. We recommend starting from drivers/pci/probe.c and
|
|
* "struct pci_bus *pci_scan_bus()" function.
|
|
* In a big simplification it goes something like this:
|
|
* probe.c
|
|
* pci_scan_bus()
|
|
* => pci_scan_child_bus
|
|
* => loop pci_scan_slot(bus, devfn) with devfn=<0,0x100> every 8 bytes
|
|
* => pci_scan_single_device
|
|
* => pci_get_slot to check if device already exists
|
|
* => pci_scan_device to probe the device
|
|
* => pci_bus_read_dev_vendor_id
|
|
* => .... [and others]
|
|
* => pci_device_add if device probe succeeded
|
|
*
|
|
* THE ACPI SAGA
|
|
* -------------
|
|
* If you were thinking PCI is hard you haven't heard about ACPI. Kernels starting from v3.13 require ACPI companion
|
|
* for PCI devices when the system was configured to run on an ACPI-complain x86 platform. This isn't an unusual
|
|
* assumption. Before v3.13 the struct x86_sysdata contained a simple ACPI handle, which could be NULL. Now it should
|
|
* contain a structure. However it still PROBABLY can be NULL.
|
|
* See https://github.com/torvalds/linux/commit/7b1998116bbb2f3e5dd6cb9a8ee6db479b0b50a9 for details of that change.
|
|
*
|
|
* When the structure (=ACPI data) is NULL the error "ACPI: \: failed to evaluate _DSM (0x1001)" will be logged upon
|
|
* scanning. However it seems to be harmless. There are two ways to get rid of this error: 1) Implement a proper ACPI
|
|
* _DSM [no, just NO], or 2) user override_symbol() for acpi_evaluate_dsm() with a function doing the following (for the
|
|
* time of scanning ONLY):
|
|
* union acpi_object *obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
|
|
* obj->type = ACPI_TYPE_INTEGER;
|
|
* obj->integer.value = 1;
|
|
* return obj;
|
|
*
|
|
* x86 BUS SCANNING BUG (>=v4.1)
|
|
* -----------------------------
|
|
* Since v4.1 adding a new bus under a different domain will cause devices on the bus to not be fully populated. See the
|
|
* comment in "vpci_add_single_device()" here for details & a simple fix.
|
|
*
|
|
* KNOWN BUGS
|
|
* ----------
|
|
* Under Linux v3.10 once bus is added it cannot be fully removed (or we didn't find the correct way). When you do the
|
|
* initial add and scan everything works correctly. You can later even remove that bus BUT the kernel leaves some sysfs
|
|
* stuff behind in /sys/devices (while /sys/bus/pci/devices are cleaned up). This means that if you try to re-register
|
|
* the same bus it explode with sysfs duplication errors.
|
|
* As of now we have no idea how to go around that.
|
|
*
|
|
*
|
|
* References:
|
|
* - https://stackoverflow.com/a/31465293 (how PCI subsystem works)
|
|
* - https://docs.oracle.com/cd/E19120-01/open.solaris/819-3196/hwovr-25/index.html (PCI working theory)
|
|
* - https://elixir.bootlin.com/linux/v3.10.108/source/include/uapi/linux/pci_regs.h (Linux PCI registers)
|
|
* - https://elixir.bootlin.com/linux/v3.10.108/source/drivers/pci/probe.c (PCI scanning code; very readable)
|
|
* - https://blog.csdn.net/moon146/article/details/18988849 (scanning process)
|
|
* - https://wiki.osdev.org/PCI (details regarding flags & commands)
|
|
*/
|
|
#include "virtual_pci.h"
|
|
#include "../common.h"
|
|
#include "../config/vpci_types.h" //MAX_VPCI_BUSES
|
|
#include <linux/pci.h>
|
|
#include <linux/pci_regs.h> //PCI device header constants
|
|
#include <linux/pci_ids.h> //Constants for vendors, classes, and other
|
|
#include <linux/list.h> //list_for_each
|
|
#include <linux/device.h> //device_del
|
|
|
|
#define PCIBUS_VIRTUAL_DOMAIN 0x0001 //normal PC buses are (always?) on domain 0, this is just a next one
|
|
#define PCI_DEVICE_NOT_FOUND_VID_DID 0xFFFFFFFF //A special case to detect non-existing devices (per PCI spec)
|
|
|
|
/* As per PCI spec
|
|
* If a single function device is detected (i.e., bit 7 in the Header
|
|
* Type register of function 0 is 0), no more functions for that
|
|
* Device Number will be checked. If a multi-function device is
|
|
* detected (i.e., bit 7 in the Header Type register of function 0
|
|
* is 1), then all remaining Function Numbers will be checked.
|
|
* This helper converts single-function header type to multifunction header type
|
|
*/
|
|
#define PCI_HEADER_TO_MULTI(x) ((1 << 7) | (x))
|
|
#define IS_PCI_HEADER_MULTI(x) (!!((x) & 0x80))
|
|
|
|
//Model of a default config for a device
|
|
const struct pci_dev_descriptor pci_dev_conf_default_normal_dev = {
|
|
.vid = 0xDEAD, //set me!
|
|
.dev = 0xBEEF, //set me!
|
|
|
|
.command = 0x0000,
|
|
.status = 0x0000,
|
|
|
|
.rev_id = PCI_DSC_REV_NONE,
|
|
.prog_if = PCI_DSC_PROGIF_NONE,
|
|
.subclass = U16_CLASS_TO_U8_CLASS(PCI_CLASS_NOT_DEFINED), //set me!
|
|
.class = U16_CLASS_TO_U8_CLASS(PCI_CLASS_NOT_DEFINED), //set me!
|
|
|
|
.cache_line_size = 0x00,
|
|
.latency_timer = 0x00,
|
|
.header_type = PCI_HEADER_TYPE_NORMAL,
|
|
.bist = PCI_DSC_BIST_NONE, //Built-In Self Test
|
|
|
|
.bar0 = PCI_DSC_NULL_BAR,
|
|
.bar1 = PCI_DSC_NULL_BAR,
|
|
.bar2 = PCI_DSC_NULL_BAR,
|
|
.bar3 = PCI_DSC_NULL_BAR,
|
|
.bar4 = PCI_DSC_NULL_BAR,
|
|
.bar5 = PCI_DSC_NULL_BAR,
|
|
|
|
.cardbus_cis = 0x00000000,
|
|
|
|
.subsys_vid = 0x0000, //you probably want to set this
|
|
.subsys_id = 0x0000, //you probably want to set this
|
|
|
|
.exp_rom_base_addr = 0x00000000,
|
|
|
|
.cap_ptr = PCI_DSC_NULL_CAP,
|
|
.reserved_34_8_15 = PCI_DSC_RSV8,
|
|
.reserved_34_16_31 = PCI_DSC_RSV16,
|
|
|
|
.reserved_38h = 0x00000000,
|
|
|
|
.interrupt_line = PCI_DSC_NO_INT_LINE,
|
|
.interrupt_pin = PCI_DSC_NO_INT_PIN,
|
|
.min_gnt = PCI_DSC_ZERO_BURST,
|
|
.max_lat = PCI_DSC_INF_LATENCY,
|
|
};
|
|
|
|
struct virtual_device {
|
|
unsigned char *bus_no; //same as bus->number, used when bus is not initialized yet (e.g. during scanning)
|
|
unsigned char dev_no;
|
|
unsigned char fn_no;
|
|
struct pci_bus* bus;
|
|
void *descriptor;
|
|
};
|
|
static unsigned int free_bus_idx = 0; //Used to find next free bus and for indexing other arrays
|
|
static struct pci_bus *buses[MAX_VPCI_BUSES] = { NULL }; //All virtual buses
|
|
|
|
static unsigned int free_dev_idx = 0; //Used to find next free bus and for indexing other arrays
|
|
static struct virtual_device *devices[MAX_VPCI_DEVS] = { NULL }; //All virtual devices
|
|
|
|
//Macros to easily iterate over lists above
|
|
#define for_each_bus_idx() for (int i = 0, last_bus_idx = free_bus_idx-1; i <= last_bus_idx; i++)
|
|
#define for_each_dev_idx() for (int i = 0, last_dev_idx = free_dev_idx-1; i <= last_dev_idx; i++)
|
|
|
|
/**
|
|
* Prints pci_dev_descriptor or pci_pci_bridge_descriptor
|
|
*/
|
|
void print_pci_descriptor(void *test_dev)
|
|
{
|
|
pr_loc_dbg("Printing PCI descriptor @ %p", test_dev);
|
|
pr_loc_dbg_raw("\n31***********0***ADDR*******************\n");
|
|
u8 *ptr = (u8 *)test_dev;
|
|
DBG_ALLOW_UNUSED(*ptr);
|
|
|
|
for (int row = 3; row < 64; row += 4) {
|
|
for (int byte = 0; byte > -4; byte--) {
|
|
pr_loc_dbg_raw("%02x ", *(ptr + row + byte));
|
|
if (byte == -1) pr_loc_dbg_raw(" ");
|
|
}
|
|
|
|
pr_loc_dbg_raw(" | 0x%02X\n", row - 3);
|
|
}
|
|
//The following format will be useful when/if CAPs are implemented
|
|
// printk("\n--------------Device Private--------------\n");
|
|
// printk("00000000 00000000 00000000 00000000 | xxx\n");
|
|
// printk("******************************************\n");
|
|
}
|
|
|
|
/**
|
|
* @param bus The bus (may be under first scan so only its number may be present in virtual_device)
|
|
* @param devfn Device AND its function; it's a 0-256 number allowing for 32 devices with 8 functions each
|
|
* @param where Offset in the device structure to read
|
|
* @param size How many BYTES (not bits) to read
|
|
* @param val Pointer to save read bytes
|
|
* @return PCIBIOS_*
|
|
*/
|
|
static int pci_read_cfg(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val)
|
|
{
|
|
//devfn is a combination of device number on bus and function number (Bus/Device/Function addressing)
|
|
//Each device which exists MUST implement function 0. So every 8th value of devfn we have a new device.
|
|
unsigned char vdev_no = PCI_SLOT(devfn);
|
|
unsigned char vdev_fn = PCI_FUNC(devfn);
|
|
|
|
void *pci_descriptor = NULL;
|
|
|
|
//Very noisy!
|
|
//pr_loc_dbg("Read SYN wh=0x%d sz=%d B / %d for vDEV @ bus=%02x dev=%02x fn=%02x", where, size, size * 8,
|
|
// bus->number, vdev_no, vdev_fn);
|
|
for_each_dev_idx() {
|
|
//Very noisy!
|
|
//pr_loc_dbg("Checking vDEV @ bus=%02x dev=%02x fn=%02x", *devices[i]->bus_no, devices[i]->dev_no,
|
|
// devices[i]->fn_no);
|
|
|
|
//We cannot use devices[i]->bus->number during scan as the bus may just being created and no ->bus is available
|
|
if(*devices[i]->bus_no == bus->number && devices[i]->dev_no == vdev_no && devices[i]->fn_no == vdev_fn) {
|
|
//Very noisy!
|
|
//pr_loc_dbg("Found matching vDEV @ bus=%02x dev=%02x fn=%02x => vidx=%d mf=%d", bus->number, vdev_no,
|
|
// vdev_fn, i,
|
|
// IS_PCI_HEADER_MULTI(((struct pci_dev_descriptor *) devices[i]->descriptor)->header_type) ? 1:0);
|
|
pci_descriptor = devices[i]->descriptor;
|
|
break;
|
|
}
|
|
};
|
|
|
|
if (!pci_descriptor) { //This is not a hack - this is per PCI spec to return special "not found pid/vid"
|
|
if (where == PCI_VENDOR_ID || where == PCI_DEVICE_ID)
|
|
*val = PCI_DEVICE_NOT_FOUND_VID_DID;
|
|
|
|
//Very noisy!
|
|
//pr_loc_dbg("Read NAK wh=0x%d sz=%d B / %d for vDEV @ bus=%02x dev=%02x fn=%02x", where, size, size * 8, bus->number,
|
|
// vdev_no, vdev_fn);
|
|
return PCIBIOS_DEVICE_NOT_FOUND;
|
|
}
|
|
|
|
//Very noisy!
|
|
//pr_loc_dbg("Read ACK wh=0x%d sz=%d B / %d for vDEV @ bus=%02x dev=%02x fn=%02x", where, size, size * 8, bus->number,
|
|
// vdev_no, vdev_fn);
|
|
memcpy(val, (u8 *)pci_descriptor + where, size);
|
|
|
|
return PCIBIOS_SUCCESSFUL;
|
|
}
|
|
|
|
static int pci_write_cfg(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val)
|
|
{
|
|
return PCIBIOS_SET_FAILED;
|
|
}
|
|
|
|
//Definition of callbacks the PCI subsystem uses to query the root bus
|
|
static struct pci_ops pci_shim_ops = {
|
|
.read = pci_read_cfg,
|
|
.write = pci_write_cfg
|
|
};
|
|
|
|
//x86-specific sysdata which is expected to be present while running on x86 (if it's not you will get a KP)
|
|
static struct pci_sysdata x86_sysdata = {
|
|
.domain = PCIBUS_VIRTUAL_DOMAIN,
|
|
#ifdef CONFIG_ACPI
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0)
|
|
.companion = NULL, //See https://github.com/torvalds/linux/commit/7b1998116bbb2f3e5dd6cb9a8ee6db479b0b50a9
|
|
#else
|
|
.acpi = NULL,
|
|
#endif //LINUX_VERSION_CODE
|
|
#endif //CONFIG_ACPI
|
|
.iommu = NULL
|
|
};
|
|
|
|
//_NO => number according to the PCI spec
|
|
//_IDX => index in arrays (internal to this emulation layer only)
|
|
#define BUS_NO_VALID(x) ((x) >= 0 && (x) <= 0xFF) //Check if a given bus# is valid according to the PCI spec
|
|
#define DEV_NO_VALID(x) ((x) >= 0 && (x) <= 32) //Check if a given dev# is valid according to the PCI spec
|
|
#define FN_NO_VALID(x) ((x) >= 0 && (x) <= 7) //Check if a given function# is valid according to the PCI spec
|
|
#define VBUS_IDX_VALID(x) ((x) >= 0 && (x) < MAX_VPCI_BUSES-1) //Check if virtual bus INDEX is valid for this emulator
|
|
#define VBUS_IDX_USED(x) ((x) >= 0 && (x) < free_bus_idx) //Check if a given bus index is used now in the emulator
|
|
#define VDEV_IDX_VALID(x) ((x) >= 0 && (x) < MAX_VPCI_DEVS-1) //Check if virtual device INDEX is valid for this emulator
|
|
#define VDEV_IDX_USED(x) ((x) >= 0 && (x) < free_dev_idx) //Check if a given bus index is used now in the emulator
|
|
|
|
static inline int validate_bdf(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no)
|
|
{
|
|
if (unlikely(!BUS_NO_VALID(bus_no))) {
|
|
pr_loc_err("%02x is not a valid PCI bus number", bus_no);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (unlikely(!DEV_NO_VALID(dev_no))) {
|
|
pr_loc_err("%02x is not a valid PCI device number", dev_no);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (unlikely(!FN_NO_VALID(fn_no))) {
|
|
pr_loc_err("%02x is not a valid PCI device function number", fn_no);
|
|
return -EINVAL;
|
|
}
|
|
|
|
//if the free device index is not valid it means we're out of free IDs for devices
|
|
if (unlikely(!VDEV_IDX_VALID(free_dev_idx))) {
|
|
pr_loc_bug("No more device indexes are available (max devs: %d)", MAX_VPCI_DEVS);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
//If the device has the same B/D/F address it is a duplicate
|
|
for_each_dev_idx() {
|
|
if (
|
|
likely(*devices[i]->bus_no == bus_no) &&
|
|
unlikely(devices[i]->dev_no == dev_no && devices[i]->fn_no == fn_no)
|
|
) {
|
|
pr_loc_err("Device bus=%02x dev=%02x fn=%02x already exists in vidx=%d", bus_no, dev_no, fn_no, i);
|
|
return -EEXIST;
|
|
}
|
|
};
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline struct pci_bus *get_vbus_by_number(unsigned char bus_no)
|
|
{
|
|
for_each_bus_idx() { //Determine whether we need to rescan existing bus after adding a device OR scan a new root bus
|
|
if (buses[i]->number == bus_no) {
|
|
pr_loc_dbg("Found existing bus_no=%d @ bidx=%d", bus_no, i);
|
|
return buses[i];
|
|
break;
|
|
}
|
|
};
|
|
|
|
return NULL;
|
|
}
|
|
|
|
const __must_check struct virtual_device *
|
|
vpci_add_device(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, void *descriptor)
|
|
{
|
|
pr_loc_dbg("Attempting to add vPCI device [printed below] @ bus=%02x dev=%02x fn=%02x", bus_no, dev_no, fn_no);
|
|
print_pci_descriptor(descriptor);
|
|
|
|
int error = validate_bdf(bus_no, dev_no, fn_no);
|
|
if (error != 0)
|
|
return ERR_PTR(error);
|
|
|
|
struct pci_bus *bus = get_vbus_by_number(bus_no);
|
|
|
|
//At this point we know the device can be added either to a new or existing bus so we have to populate their struct
|
|
struct virtual_device *device;
|
|
kmalloc_or_exit_ptr(device, sizeof(struct virtual_device));
|
|
|
|
device->dev_no = dev_no;
|
|
device->fn_no = fn_no;
|
|
device->descriptor = descriptor;
|
|
|
|
if (bus) { //We have an existing bus to use
|
|
device->bus_no = &bus->number;
|
|
devices[free_dev_idx++] = device;
|
|
|
|
//We cannot use "pci_scan_single_device" here in case there are mf devices
|
|
pci_rescan_bus(bus); //this cannot fail - it simply return max device num
|
|
|
|
pr_loc_err("Added device with existing bus @ bus=%02x dev=%02x fn=%02x", *device->bus_no, device->dev_no,
|
|
device->fn_no);
|
|
return device;
|
|
}
|
|
|
|
//No existing bus - check if we can add a new one
|
|
//if the free bus index is not valid it means we're out of free IDs for buses
|
|
if (unlikely(!VBUS_IDX_VALID(free_bus_idx))) {
|
|
pr_loc_bug("No more bus indexes are available (max buses: %d)", MAX_VPCI_BUSES);
|
|
return ERR_PTR(-ENOMEM);
|
|
}
|
|
|
|
//Since we don't have a bus so we need to add the device with a mock dev_no and trigger scanning (which actually
|
|
// creates the bus). While it sounds counter-intuitive it is how the PCI subsystem works.
|
|
unsigned char tmp_bus_no = bus_no; //It will be valid for the time of initial scan
|
|
device->bus_no = &tmp_bus_no;
|
|
devices[free_dev_idx++] = device;
|
|
|
|
bus = pci_scan_bus(*device->bus_no, &pci_shim_ops, &x86_sysdata);
|
|
if (!bus) {
|
|
pr_loc_err("pci_scan_bus failed - cannot add new bus");
|
|
devices[free_dev_idx--] = NULL; //Reverse adding & ensure idx is still free
|
|
kfree(device); //Free memory for the device itself
|
|
return ERR_PTR(-EIO);
|
|
}
|
|
|
|
device->bus_no = &bus->number; //Replace temp bus number pointer with the actual bus struct pointer
|
|
device->bus = bus;
|
|
buses[free_bus_idx++] = bus;
|
|
|
|
/*
|
|
* There was a commit in v4.1 which made "subtle" change aimed to "cleanup control flow" by moving
|
|
* pci_bus_add_devices(bus) from drivers/pci/probe.c:pci_scan_bus() to a higher order
|
|
* arch/x86/pci/common.c:pcibios_scan_root().
|
|
* However this means that adding a bus with a domain different than 0 as used on x86 with BIOS/ACPI causes some
|
|
* resources to not be created (e.g. /sys/bus/pci/devices/..../config) which in turn breaks a ton of tools (lspci
|
|
* included). This is because pci_bus_add_devices() calls pci_create_sysfs_dev_files().
|
|
* It's important to mention that this is broken only for new buses - pci_rescan_bus() calls pci_bus_add_devices().
|
|
*
|
|
* Don't even fucking ask how long we looked for that...
|
|
*
|
|
* See https://github.com/torvalds/linux/commit/8e795840e4d89df3d594e736989212ee8a4a1fca#
|
|
*/
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0)
|
|
pr_loc_dbg("Linux >=v4.1 quirk: calling pci_bus_add_devices(bus) manually");
|
|
pci_bus_add_devices(bus);
|
|
#endif
|
|
|
|
pr_loc_inf("Added device with new bus @ bus=%02x dev=%02x fn=%02x", *device->bus_no, device->dev_no, device->fn_no);
|
|
return device;
|
|
}
|
|
|
|
const struct virtual_device *
|
|
vpci_add_single_device(unsigned char bus_no, unsigned char dev_no, struct pci_dev_descriptor *descriptor)
|
|
{
|
|
if (unlikely(IS_PCI_HEADER_MULTI(descriptor->header_type))) {
|
|
pr_loc_bug("Attempted to use %s() to add multifunction device."
|
|
"Did you mean to use vpci_add_multifunction_device()?", __FUNCTION__);
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
return vpci_add_device(bus_no, dev_no, 0x00, descriptor);
|
|
}
|
|
|
|
const struct virtual_device *
|
|
vpci_add_multifunction_device(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no,
|
|
struct pci_dev_descriptor *descriptor)
|
|
{
|
|
descriptor->header_type = PCI_HEADER_TO_MULTI(descriptor->header_type);
|
|
|
|
return vpci_add_device(bus_no, dev_no, fn_no, descriptor);
|
|
}
|
|
|
|
const struct virtual_device *
|
|
vpci_add_single_bridge(unsigned char bus_no, unsigned char dev_no, struct pci_pci_bridge_descriptor *descriptor)
|
|
{
|
|
if (unlikely(IS_PCI_HEADER_MULTI(descriptor->header_type))) {
|
|
pr_loc_bug("Attempted to use %s() to add multifunction device."
|
|
"Did you mean to use vpci_add_multifunction_device()?", __FUNCTION__);
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
return vpci_add_device(bus_no, dev_no, 0x00, descriptor);
|
|
}
|
|
|
|
const struct virtual_device *
|
|
vpci_add_multifunction_bridge(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no,
|
|
struct pci_pci_bridge_descriptor *descriptor)
|
|
{
|
|
descriptor->header_type = PCI_HEADER_TO_MULTI(descriptor->header_type);
|
|
|
|
return vpci_add_device(bus_no, dev_no, fn_no, descriptor);
|
|
}
|
|
|
|
int vpci_remove_all_devices_and_buses(void)
|
|
{
|
|
//The order here is crucial - kernel WILL NOT remove references to devices on bus removal (and cause a KP)
|
|
//Doing this in any other order will cause an instant KP when PCI subsys tries to access its structures (e.g. lspci)
|
|
//However, this is still leaving dangling things in /sys/devices which cannot be removed (kernel bug?)
|
|
|
|
struct pci_dev *pci_dev, *pci_dev_n;
|
|
for_each_bus_idx() {
|
|
list_for_each_entry_safe(pci_dev, pci_dev_n, &buses[i]->devices, bus_list) {
|
|
pr_loc_dbg("Detaching vDEV dev=%02x fn=%02x from bus=%02x [add=%d]", PCI_SLOT(pci_dev->devfn),
|
|
#if LINUX_VERSION_CODE <= KERNEL_VERSION(5,0,0)
|
|
PCI_FUNC(pci_dev->devfn), buses[i]->number, pci_dev->is_added);
|
|
#else
|
|
PCI_FUNC(pci_dev->devfn), buses[i]->number, 0); // Not found a replacement for pci_dev->is_added
|
|
#endif
|
|
pci_stop_and_remove_bus_device(pci_dev);
|
|
}
|
|
}
|
|
|
|
for_each_dev_idx() {
|
|
pr_loc_dbg("Removing PCI vDEV @ didx %d", i);
|
|
kfree(devices[i]);
|
|
devices[i] = NULL;
|
|
};
|
|
free_dev_idx = 0;
|
|
|
|
for_each_bus_idx() {
|
|
pr_loc_dbg("Removing child PCI vBUS @ bidx %d", i);
|
|
pci_rescan_bus(buses[i]);
|
|
pci_remove_bus(buses[i]);
|
|
buses[i] = NULL;
|
|
}
|
|
free_bus_idx = 0;
|
|
|
|
pr_loc_inf("All vPCI devices and buses removed");
|
|
|
|
return -EIO; //This is hardcoded to return an error as there's a known bug (see "KNOWN BUGS" in the file header)
|
|
}
|