diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..53cf23a --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,783 @@ +# This CMakeLists file is for usage with CLion (and maybe other) IDEs ONLY. Do NOT attempt to build the project with +# CMake as it will fail (kernel build process is tailored for Makefile while CLion's support for Makefile is... meh) + +cmake_minimum_required(VERSION 3.0) +project(redpill C) + +set(CMAKE_C_STANDARD 11) +include_directories(compat/toolkit/include) + +add_definitions(-DLINUX_VERSION_CODE=199273) +include_directories(../linux-3.10.x-bromolow-25426/include) +include_directories(../linux-3.10.x-bromolow-25426/arch/x86/include) +include_directories(../linux-3.10.x-bromolow-25426/arch/x86/include/uapi) +add_definitions(-DCONFIG_SYNO_X86_SERIAL_PORT_SWAP) + +#add_definitions(-DLINUX_VERSION_CODE=263227) +#include_directories(../linux-4.4.x-apollolake-25426/include) +#include_directories(../linux-4.4.x-apollolake-25426/arch/x86/include) +#include_directories(../linux-4.4.x-apollolake-25426/arch/x86/include/uapi) + +# Custom options in our makefile +add_definitions(-DDBG_EXECVE) + +# RP custom definitions +add_definitions(-DRP_MODULE_TARGET_VER=6) + +# Generic options +add_definitions(-D__KERNEL__) +add_definitions(-DMODULE) +add_definitions(-DKBUILD_MODNAME=\"dummy\") + +# bromolow + bromowell ones (minus CONFIG_SYNO_* which are replaced with "MY_ABC_HERE" in the kernel anyway) +add_definitions(-DCONFIG_64BIT) +add_definitions(-DCONFIG_X86_64) +add_definitions(-DCONFIG_X86) +add_definitions(-DCONFIG_INSTRUCTION_DECODER) +add_definitions(-DCONFIG_LOCKDEP_SUPPORT) +add_definitions(-DCONFIG_STACKTRACE_SUPPORT) +add_definitions(-DCONFIG_HAVE_LATENCYTOP_SUPPORT) +add_definitions(-DCONFIG_MMU) +add_definitions(-DCONFIG_NEED_DMA_MAP_STATE) +add_definitions(-DCONFIG_NEED_SG_DMA_LENGTH) +add_definitions(-DCONFIG_GENERIC_ISA_DMA) +add_definitions(-DCONFIG_GENERIC_BUG) +add_definitions(-DCONFIG_GENERIC_BUG_RELATIVE_POINTERS) +add_definitions(-DCONFIG_GENERIC_HWEIGHT) +add_definitions(-DCONFIG_ARCH_MAY_HAVE_PC_FDC) +add_definitions(-DCONFIG_RWSEM_XCHGADD_ALGORITHM) +add_definitions(-DCONFIG_GENERIC_CALIBRATE_DELAY) +add_definitions(-DCONFIG_ARCH_HAS_CPU_RELAX) +add_definitions(-DCONFIG_ARCH_HAS_CACHE_LINE_SIZE) +add_definitions(-DCONFIG_ARCH_HAS_CPU_AUTOPROBE) +add_definitions(-DCONFIG_HAVE_SETUP_PER_CPU_AREA) +add_definitions(-DCONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) +add_definitions(-DCONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) +add_definitions(-DCONFIG_ARCH_HIBERNATION_POSSIBLE) +add_definitions(-DCONFIG_ARCH_SUSPEND_POSSIBLE) +add_definitions(-DCONFIG_ZONE_DMA32) +add_definitions(-DCONFIG_AUDIT_ARCH) +add_definitions(-DCONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) +add_definitions(-DCONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) +add_definitions(-DCONFIG_HAVE_INTEL_TXT) +add_definitions(-DCONFIG_X86_64_SMP) +add_definitions(-DCONFIG_X86_HT) +add_definitions(-DCONFIG_ARCH_CPU_PROBE_RELEASE) +add_definitions(-DCONFIG_ARCH_SUPPORTS_UPROBES) +add_definitions(-DCONFIG_IRQ_WORK) +add_definitions(-DCONFIG_BUILDTIME_EXTABLE_SORT) +add_definitions(-DCONFIG_HAVE_KERNEL_GZIP) +add_definitions(-DCONFIG_HAVE_KERNEL_BZIP2) +add_definitions(-DCONFIG_HAVE_KERNEL_LZMA) +add_definitions(-DCONFIG_HAVE_KERNEL_XZ) +add_definitions(-DCONFIG_HAVE_KERNEL_LZO) +add_definitions(-DCONFIG_KERNEL_LZMA) +add_definitions(-DCONFIG_SWAP) +add_definitions(-DCONFIG_SYSVIPC) +add_definitions(-DCONFIG_SYSVIPC_SYSCTL) +add_definitions(-DCONFIG_POSIX_MQUEUE) +add_definitions(-DCONFIG_POSIX_MQUEUE_SYSCTL) +add_definitions(-DCONFIG_AUDIT) +add_definitions(-DCONFIG_HAVE_GENERIC_HARDIRQS) +add_definitions(-DCONFIG_GENERIC_HARDIRQS) +add_definitions(-DCONFIG_GENERIC_IRQ_PROBE) +add_definitions(-DCONFIG_GENERIC_IRQ_SHOW) +add_definitions(-DCONFIG_GENERIC_PENDING_IRQ) +add_definitions(-DCONFIG_IRQ_DOMAIN) +add_definitions(-DCONFIG_IRQ_FORCED_THREADING) +add_definitions(-DCONFIG_SPARSE_IRQ) +add_definitions(-DCONFIG_CLOCKSOURCE_WATCHDOG) +add_definitions(-DCONFIG_ARCH_CLOCKSOURCE_DATA) +add_definitions(-DCONFIG_GENERIC_TIME_VSYSCALL) +add_definitions(-DCONFIG_GENERIC_CLOCKEVENTS) +add_definitions(-DCONFIG_GENERIC_CLOCKEVENTS_BUILD) +add_definitions(-DCONFIG_GENERIC_CLOCKEVENTS_BROADCAST) +add_definitions(-DCONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST) +add_definitions(-DCONFIG_GENERIC_CMOS_UPDATE) +add_definitions(-DCONFIG_TICK_ONESHOT) +add_definitions(-DCONFIG_NO_HZ_COMMON) +add_definitions(-DCONFIG_NO_HZ_IDLE) +add_definitions(-DCONFIG_NO_HZ) +add_definitions(-DCONFIG_HIGH_RES_TIMERS) +add_definitions(-DCONFIG_TICK_CPU_ACCOUNTING) +add_definitions(-DCONFIG_TASKSTATS) +add_definitions(-DCONFIG_TASK_DELAY_ACCT) +add_definitions(-DCONFIG_TASK_XACCT) +add_definitions(-DCONFIG_TASK_IO_ACCOUNTING) +add_definitions(-DCONFIG_TREE_RCU) +add_definitions(-DCONFIG_RCU_STALL_COMMON) +add_definitions(-DCONFIG_RCU_FAST_NO_HZ) +add_definitions(-DCONFIG_HAVE_UNSTABLE_SCHED_CLOCK) +add_definitions(-DCONFIG_ARCH_SUPPORTS_NUMA_BALANCING) +add_definitions(-DCONFIG_ARCH_WANTS_PROT_NUMA_PROT_NONE) +add_definitions(-DCONFIG_CGROUPS) +add_definitions(-DCONFIG_CGROUP_FREEZER) +add_definitions(-DCONFIG_CGROUP_DEVICE) +add_definitions(-DCONFIG_CPUSETS) +add_definitions(-DCONFIG_CGROUP_CPUACCT) +add_definitions(-DCONFIG_RESOURCE_COUNTERS) +add_definitions(-DCONFIG_MEMCG) +add_definitions(-DCONFIG_MEMCG_SWAP) +add_definitions(-DCONFIG_MEMCG_SWAP_ENABLED) +add_definitions(-DCONFIG_CGROUP_SCHED) +add_definitions(-DCONFIG_FAIR_GROUP_SCHED) +add_definitions(-DCONFIG_BLK_CGROUP) +add_definitions(-DCONFIG_NAMESPACES) +add_definitions(-DCONFIG_UTS_NS) +add_definitions(-DCONFIG_IPC_NS) +add_definitions(-DCONFIG_PID_NS) +add_definitions(-DCONFIG_NET_NS) +add_definitions(-DCONFIG_UIDGID_CONVERTED) +add_definitions(-DCONFIG_MM_OWNER) +add_definitions(-DCONFIG_BLK_DEV_INITRD) +add_definitions(-DCONFIG_RD_GZIP) +add_definitions(-DCONFIG_RD_LZMA) +add_definitions(-DCONFIG_SYSCTL) +add_definitions(-DCONFIG_ANON_INODES) +add_definitions(-DCONFIG_HAVE_UID16) +add_definitions(-DCONFIG_SYSCTL_EXCEPTION_TRACE) +add_definitions(-DCONFIG_HOTPLUG) +add_definitions(-DCONFIG_HAVE_PCSPKR_PLATFORM) +add_definitions(-DCONFIG_EXPERT) +add_definitions(-DCONFIG_UID16) +add_definitions(-DCONFIG_SYSCTL_SYSCALL) +add_definitions(-DCONFIG_KALLSYMS) +add_definitions(-DCONFIG_PRINTK) +add_definitions(-DCONFIG_BUG) +add_definitions(-DCONFIG_ELF_CORE) +add_definitions(-DCONFIG_BASE_FULL) +add_definitions(-DCONFIG_FUTEX) +add_definitions(-DCONFIG_EPOLL) +add_definitions(-DCONFIG_SIGNALFD) +add_definitions(-DCONFIG_TIMERFD) +add_definitions(-DCONFIG_EVENTFD) +add_definitions(-DCONFIG_SHMEM) +add_definitions(-DCONFIG_AIO) +add_definitions(-DCONFIG_PCI_QUIRKS) +add_definitions(-DCONFIG_EMBEDDED) +add_definitions(-DCONFIG_HAVE_PERF_EVENTS) +add_definitions(-DCONFIG_PERF_EVENTS) +add_definitions(-DCONFIG_VM_EVENT_COUNTERS) +add_definitions(-DCONFIG_SLAB) +add_definitions(-DCONFIG_HAVE_OPROFILE) +add_definitions(-DCONFIG_OPROFILE_NMI_TIMER) +add_definitions(-DCONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +add_definitions(-DCONFIG_ARCH_USE_BUILTIN_BSWAP) +add_definitions(-DCONFIG_USER_RETURN_NOTIFIER) +add_definitions(-DCONFIG_HAVE_IOREMAP_PROT) +add_definitions(-DCONFIG_HAVE_KPROBES) +add_definitions(-DCONFIG_HAVE_KRETPROBES) +add_definitions(-DCONFIG_HAVE_OPTPROBES) +add_definitions(-DCONFIG_HAVE_KPROBES_ON_FTRACE) +add_definitions(-DCONFIG_HAVE_ARCH_TRACEHOOK) +add_definitions(-DCONFIG_HAVE_DMA_ATTRS) +add_definitions(-DCONFIG_USE_GENERIC_SMP_HELPERS) +add_definitions(-DCONFIG_GENERIC_SMP_IDLE_THREAD) +add_definitions(-DCONFIG_HAVE_REGS_AND_STACK_ACCESS_API) +add_definitions(-DCONFIG_HAVE_DMA_API_DEBUG) +add_definitions(-DCONFIG_HAVE_HW_BREAKPOINT) +add_definitions(-DCONFIG_HAVE_MIXED_BREAKPOINTS_REGS) +add_definitions(-DCONFIG_HAVE_USER_RETURN_NOTIFIER) +add_definitions(-DCONFIG_HAVE_PERF_EVENTS_NMI) +add_definitions(-DCONFIG_HAVE_PERF_REGS) +add_definitions(-DCONFIG_HAVE_PERF_USER_STACK_DUMP) +add_definitions(-DCONFIG_HAVE_ARCH_JUMP_LABEL) +add_definitions(-DCONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) +add_definitions(-DCONFIG_HAVE_CMPXCHG_LOCAL) +add_definitions(-DCONFIG_HAVE_CMPXCHG_DOUBLE) +add_definitions(-DCONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION) +add_definitions(-DCONFIG_ARCH_WANT_OLD_COMPAT_IPC) +add_definitions(-DCONFIG_HAVE_ARCH_SECCOMP_FILTER) +add_definitions(-DCONFIG_HAVE_CONTEXT_TRACKING) +add_definitions(-DCONFIG_HAVE_IRQ_TIME_ACCOUNTING) +add_definitions(-DCONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE) +add_definitions(-DCONFIG_MODULES_USE_ELF_RELA) +add_definitions(-DCONFIG_OLD_SIGSUSPEND3) +add_definitions(-DCONFIG_COMPAT_OLD_SIGACTION) +add_definitions(-DCONFIG_SLABINFO) +add_definitions(-DCONFIG_RT_MUTEXES) +add_definitions(-DCONFIG_MODULES) +add_definitions(-DCONFIG_MODULE_UNLOAD) +add_definitions(-DCONFIG_X86_X2APIC) # broadwell only +add_definitions(-DCONFIG_MODULE_FORCE_UNLOAD) +add_definitions(-DCONFIG_MODULE_SIG) +add_definitions(-DCONFIG_MODULE_SIG_SHA384) +add_definitions(-DCONFIG_STOP_MACHINE) +add_definitions(-DCONFIG_BLOCK) +add_definitions(-DCONFIG_BLK_DEV_BSG) +add_definitions(-DCONFIG_BLK_DEV_BSGLIB) +add_definitions(-DCONFIG_BLK_DEV_INTEGRITY) +add_definitions(-DCONFIG_PARTITION_ADVANCED) +add_definitions(-DCONFIG_MAC_PARTITION) +add_definitions(-DCONFIG_MSDOS_PARTITION) +add_definitions(-DCONFIG_EFI_PARTITION) +add_definitions(-DCONFIG_BLOCK_COMPAT) +add_definitions(-DCONFIG_IOSCHED_NOOP) +add_definitions(-DCONFIG_IOSCHED_DEADLINE) +add_definitions(-DCONFIG_IOSCHED_CFQ) +add_definitions(-DCONFIG_DEFAULT_CFQ) +add_definitions(-DCONFIG_PREEMPT_NOTIFIERS) +add_definitions(-DCONFIG_ASN1) +add_definitions(-DCONFIG_INLINE_SPIN_UNLOCK_IRQ) +add_definitions(-DCONFIG_INLINE_READ_UNLOCK) +add_definitions(-DCONFIG_INLINE_READ_UNLOCK_IRQ) +add_definitions(-DCONFIG_INLINE_WRITE_UNLOCK) +add_definitions(-DCONFIG_INLINE_WRITE_UNLOCK_IRQ) +add_definitions(-DCONFIG_ARCH_SUPPORTS_ATOMIC_RMW) +add_definitions(-DCONFIG_MUTEX_SPIN_ON_OWNER) +add_definitions(-DCONFIG_FREEZER) +add_definitions(-DCONFIG_ZONE_DMA) +add_definitions(-DCONFIG_SMP) +add_definitions(-DCONFIG_X86_MPPARSE) +add_definitions(-DCONFIG_RETPOLINE) +add_definitions(-DCONFIG_X86_SUPPORTS_MEMORY_FAILURE) +add_definitions(-DCONFIG_NO_BOOTMEM) +add_definitions(-DCONFIG_GENERIC_CPU) +add_definitions(-DCONFIG_X86_TSC) +add_definitions(-DCONFIG_X86_CMPXCHG64) +add_definitions(-DCONFIG_X86_CMOV) +add_definitions(-DCONFIG_X86_DEBUGCTLMSR) +add_definitions(-DCONFIG_PROCESSOR_SELECT) +add_definitions(-DCONFIG_CPU_SUP_INTEL) +add_definitions(-DCONFIG_HPET_TIMER) +add_definitions(-DCONFIG_HPET_EMULATE_RTC) +add_definitions(-DCONFIG_DMI) +add_definitions(-DCONFIG_SWIOTLB) +add_definitions(-DCONFIG_IOMMU_HELPER) +add_definitions(-DCONFIG_SCHED_SMT) +add_definitions(-DCONFIG_SCHED_MC) +add_definitions(-DCONFIG_PREEMPT_NONE) +add_definitions(-DCONFIG_X86_LOCAL_APIC) +add_definitions(-DCONFIG_X86_IO_APIC) +add_definitions(-DCONFIG_X86_MCE) +add_definitions(-DCONFIG_X86_MCE_INTEL) +add_definitions(-DCONFIG_X86_MCE_THRESHOLD) +add_definitions(-DCONFIG_X86_THERMAL_VECTOR) +add_definitions(-DCONFIG_X86_MSR) +add_definitions(-DCONFIG_X86_CPUID) +add_definitions(-DCONFIG_ARCH_PHYS_ADDR_T_64BIT) +add_definitions(-DCONFIG_ARCH_DMA_ADDR_T_64BIT) +add_definitions(-DCONFIG_DIRECT_GBPAGES) +add_definitions(-DCONFIG_ARCH_SPARSEMEM_ENABLE) +add_definitions(-DCONFIG_ARCH_SPARSEMEM_DEFAULT) +add_definitions(-DCONFIG_ARCH_SELECT_MEMORY_MODEL) +add_definitions(-DCONFIG_ARCH_PROC_KCORE_TEXT) +add_definitions(-DCONFIG_SELECT_MEMORY_MODEL) +add_definitions(-DCONFIG_SPARSEMEM_MANUAL) +add_definitions(-DCONFIG_SPARSEMEM) +add_definitions(-DCONFIG_HAVE_MEMORY_PRESENT) +add_definitions(-DCONFIG_SPARSEMEM_EXTREME) +add_definitions(-DCONFIG_SPARSEMEM_VMEMMAP_ENABLE) +add_definitions(-DCONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER) +add_definitions(-DCONFIG_SPARSEMEM_VMEMMAP) +add_definitions(-DCONFIG_HAVE_MEMBLOCK) +add_definitions(-DCONFIG_HAVE_MEMBLOCK_NODE_MAP) +add_definitions(-DCONFIG_ARCH_DISCARD_MEMBLOCK) +add_definitions(-DCONFIG_PAGEFLAGS_EXTENDED) +add_definitions(-DCONFIG_COMPACTION) +add_definitions(-DCONFIG_MIGRATION) +add_definitions(-DCONFIG_PHYS_ADDR_T_64BIT) +add_definitions(-DCONFIG_BOUNCE) +add_definitions(-DCONFIG_NEED_BOUNCE_POOL) +add_definitions(-DCONFIG_VIRT_TO_BUS) +add_definitions(-DCONFIG_MMU_NOTIFIER) +add_definitions(-DCONFIG_KSM) +add_definitions(-DCONFIG_ARCH_SUPPORTS_MEMORY_FAILURE) +add_definitions(-DCONFIG_CROSS_MEMORY_ATTACH) +add_definitions(-DCONFIG_MTRR) +add_definitions(-DCONFIG_ARCH_RANDOM) +add_definitions(-DCONFIG_X86_SMAP) +add_definitions(-DCONFIG_EFI) +add_definitions(-DCONFIG_HZ_1000) +add_definitions(-DCONFIG_HZ=1000) +add_definitions(-DCONFIG_SCHED_HRTICK) +add_definitions(-DCONFIG_KEXEC) +add_definitions(-DCONFIG_CRASH_DUMP) +add_definitions(-DCONFIG_RELOCATABLE) +add_definitions(-DCONFIG_HOTPLUG_CPU) +add_definitions(-DCONFIG_ARCH_ENABLE_MEMORY_HOTPLUG) +add_definitions(-DCONFIG_ARCH_HIBERNATION_HEADER) +add_definitions(-DCONFIG_HIBERNATE_CALLBACKS) +add_definitions(-DCONFIG_HIBERNATION) +add_definitions(-DCONFIG_PM_SLEEP) +add_definitions(-DCONFIG_PM_SLEEP_SMP) +add_definitions(-DCONFIG_PM) +add_definitions(-DCONFIG_ACPI) +add_definitions(-DCONFIG_ACPI_SLEEP) +add_definitions(-DCONFIG_ACPI_DOCK) +add_definitions(-DCONFIG_ACPI_I2C) +add_definitions(-DCONFIG_ACPI_HOTPLUG_CPU) +add_definitions(-DCONFIG_X86_PM_TIMER) +add_definitions(-DCONFIG_ACPI_CONTAINER) +add_definitions(-DCONFIG_CPU_FREQ) +add_definitions(-DCONFIG_CPU_FREQ_GOV_COMMON) +add_definitions(-DCONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) +add_definitions(-DCONFIG_CPU_FREQ_GOV_USERSPACE) +add_definitions(-DCONFIG_CPU_IDLE) +add_definitions(-DCONFIG_CPU_IDLE_GOV_LADDER) +add_definitions(-DCONFIG_CPU_IDLE_GOV_MENU) +add_definitions(-DCONFIG_PCI) +add_definitions(-DCONFIG_PCI_DIRECT) +add_definitions(-DCONFIG_PCI_MMCONFIG) +add_definitions(-DCONFIG_PCI_DOMAINS) +add_definitions(-DCONFIG_PCIEPORTBUS) +add_definitions(-DCONFIG_PCIEAER) +add_definitions(-DCONFIG_PCIE_ECRC) +add_definitions(-DCONFIG_PCIEASPM) +add_definitions(-DCONFIG_PCIEASPM_PERFORMANCE) +add_definitions(-DCONFIG_ARCH_SUPPORTS_MSI) +add_definitions(-DCONFIG_PCI_MSI) +add_definitions(-DCONFIG_HT_IRQ) +add_definitions(-DCONFIG_PCI_ATS) +add_definitions(-DCONFIG_PCI_IOV) +add_definitions(-DCONFIG_PCI_IOAPIC) +add_definitions(-DCONFIG_PCI_LABEL) +add_definitions(-DCONFIG_ISA_DMA_API) +add_definitions(-DCONFIG_BINFMT_ELF) +add_definitions(-DCONFIG_COMPAT_BINFMT_ELF) +add_definitions(-DCONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE) +add_definitions(-DCONFIG_BINFMT_SCRIPT) +add_definitions(-DCONFIG_BINFMT_MISC) +add_definitions(-DCONFIG_COREDUMP) +add_definitions(-DCONFIG_IA32_EMULATION) +add_definitions(-DCONFIG_IA32_AOUT) +add_definitions(-DCONFIG_COMPAT) +add_definitions(-DCONFIG_COMPAT_FOR_U64_ALIGNMENT) +add_definitions(-DCONFIG_SYSVIPC_COMPAT) +add_definitions(-DCONFIG_KEYS_COMPAT) +add_definitions(-DCONFIG_HAVE_TEXT_POKE_SMP) +add_definitions(-DCONFIG_X86_DEV_DMA_OPS) +add_definitions(-DCONFIG_NET) +add_definitions(-DCONFIG_COMPAT_NETLINK_MESSAGES) +add_definitions(-DCONFIG_PACKET) +add_definitions(-DCONFIG_UNIX) +add_definitions(-DCONFIG_XFRM) +add_definitions(-DCONFIG_INET) +add_definitions(-DCONFIG_IP_MULTICAST) +add_definitions(-DCONFIG_IP_ADVANCED_ROUTER) +add_definitions(-DCONFIG_IP_MULTIPLE_TABLES) +add_definitions(-DCONFIG_IP_PNP) +add_definitions(-DCONFIG_IP_PNP_DHCP) +add_definitions(-DCONFIG_NET_IPGRE_BROADCAST) +add_definitions(-DCONFIG_SYN_COOKIES) +add_definitions(-DCONFIG_INET_LRO) +add_definitions(-DCONFIG_INET_DIAG) +add_definitions(-DCONFIG_INET_TCP_DIAG) +add_definitions(-DCONFIG_TCP_CONG_CUBIC) +add_definitions(-DCONFIG_IPV6_PRIVACY) +add_definitions(-DCONFIG_IPV6_ROUTER_PREF) +add_definitions(-DCONFIG_IPV6_OPTIMISTIC_DAD) +add_definitions(-DCONFIG_IPV6_SIT_6RD) +add_definitions(-DCONFIG_IPV6_NDISC_NODETYPE) +add_definitions(-DCONFIG_IPV6_MULTIPLE_TABLES) +add_definitions(-DCONFIG_IPV6_MROUTE) +add_definitions(-DCONFIG_IPV6_PIMSM_V2) +add_definitions(-DCONFIG_NETFILTER) +add_definitions(-DCONFIG_NETFILTER_ADVANCED) +add_definitions(-DCONFIG_BRIDGE_NETFILTER) +add_definitions(-DCONFIG_NF_CONNTRACK_MARK) +add_definitions(-DCONFIG_NF_CONNTRACK_PROCFS) +add_definitions(-DCONFIG_NF_NAT_NEEDED) +add_definitions(-DCONFIG_NF_CONNTRACK_PROC_COMPAT) +add_definitions(-DCONFIG_HAVE_NET_DSA) +add_definitions(-DCONFIG_NET_SCHED) +add_definitions(-DCONFIG_NET_CLS) +add_definitions(-DCONFIG_NET_SCH_FIFO) +add_definitions(-DCONFIG_DNS_RESOLVER) +add_definitions(-DCONFIG_RPS) +add_definitions(-DCONFIG_RFS_ACCEL) +add_definitions(-DCONFIG_XPS) +add_definitions(-DCONFIG_BQL) +add_definitions(-DCONFIG_FIB_RULES) +add_definitions(-DCONFIG_WIRELESS) +add_definitions(-DCONFIG_WIRELESS_EXT) +add_definitions(-DCONFIG_WEXT_CORE) +add_definitions(-DCONFIG_WEXT_PROC) +add_definitions(-DCONFIG_WEXT_PRIV) +add_definitions(-DCONFIG_HAVE_BPF_JIT) +add_definitions(-DCONFIG_DEVTMPFS) +add_definitions(-DCONFIG_STANDALONE) +add_definitions(-DCONFIG_PREVENT_FIRMWARE_BUILD) +add_definitions(-DCONFIG_FW_LOADER) +add_definitions(-DCONFIG_FW_LOADER_USER_HELPER) +add_definitions(-DCONFIG_GENERIC_CPU_VULNERABILITIES) +add_definitions(-DCONFIG_REGMAP) +add_definitions(-DCONFIG_REGMAP_I2C) +add_definitions(-DCONFIG_PNP) +add_definitions(-DCONFIG_PNPACPI) +add_definitions(-DCONFIG_BLK_DEV) +add_definitions(-DCONFIG_BLK_DEV_RAM) +add_definitions(-DCONFIG_ENCLOSURE_SERVICES) +add_definitions(-DCONFIG_HAVE_IDE) +add_definitions(-DCONFIG_SCSI_MOD) +add_definitions(-DCONFIG_RAID_ATTRS) +add_definitions(-DCONFIG_SCSI) +add_definitions(-DCONFIG_SCSI_DMA) +add_definitions(-DCONFIG_SCSI_PROC_FS) +add_definitions(-DCONFIG_BLK_DEV_SD) +add_definitions(-DCONFIG_SCSI_ENCLOSURE) +add_definitions(-DCONFIG_SCSI_MULTI_LUN) +add_definitions(-DCONFIG_SCSI_ISCSI_ATTRS) +add_definitions(-DCONFIG_SCSI_SAS_ATTRS) +add_definitions(-DCONFIG_SCSI_SAS_LIBSAS) +add_definitions(-DCONFIG_SCSI_SAS_ATA) +add_definitions(-DCONFIG_SCSI_SAS_HOST_SMP) +add_definitions(-DCONFIG_SCSI_LOWLEVEL) +add_definitions(-DCONFIG_SCSI_DH) +add_definitions(-DCONFIG_SCSI_DH_RDAC) +add_definitions(-DCONFIG_ATA) +add_definitions(-DCONFIG_ATA_VERBOSE_ERROR) +add_definitions(-DCONFIG_ATA_ACPI) +add_definitions(-DCONFIG_SATA_PMP) +add_definitions(-DCONFIG_SATA_AHCI) +add_definitions(-DCONFIG_SATA_SIL24) +add_definitions(-DCONFIG_ATA_SFF) +add_definitions(-DCONFIG_ATA_BMDMA) +add_definitions(-DCONFIG_SATA_MV) +add_definitions(-DCONFIG_MD) +add_definitions(-DCONFIG_BLK_DEV_MD) +add_definitions(-DCONFIG_MD_AUTODETECT) +add_definitions(-DCONFIG_MD_LINEAR) +add_definitions(-DCONFIG_MD_RAID0) +add_definitions(-DCONFIG_MD_RAID1) +add_definitions(-DCONFIG_MD_RAID10) +add_definitions(-DCONFIG_MD_RAID456) +add_definitions(-DCONFIG_BLK_DEV_DM_BUILTIN) +add_definitions(-DCONFIG_BLK_DEV_DM) +add_definitions(-DCONFIG_NETDEVICES) +add_definitions(-DCONFIG_NET_CORE) +add_definitions(-DCONFIG_ETHERNET) +add_definitions(-DCONFIG_NET_VENDOR_3COM) +add_definitions(-DCONFIG_NET_VENDOR_ADAPTEC) +add_definitions(-DCONFIG_NET_VENDOR_ALTEON) +add_definitions(-DCONFIG_NET_VENDOR_AMD) +add_definitions(-DCONFIG_NET_VENDOR_ATHEROS) +add_definitions(-DCONFIG_NET_CADENCE) +add_definitions(-DCONFIG_NET_VENDOR_BROADCOM) +add_definitions(-DCONFIG_BNX2X_SRIOV) +add_definitions(-DCONFIG_NET_VENDOR_BROCADE) +add_definitions(-DCONFIG_NET_VENDOR_CHELSIO) +add_definitions(-DCONFIG_NET_VENDOR_CISCO) +add_definitions(-DCONFIG_NET_VENDOR_DEC) +add_definitions(-DCONFIG_NET_VENDOR_DLINK) +add_definitions(-DCONFIG_NET_VENDOR_EMULEX) +add_definitions(-DCONFIG_NET_VENDOR_EXAR) +add_definitions(-DCONFIG_NET_VENDOR_HP) +add_definitions(-DCONFIG_NET_VENDOR_INTEL) +add_definitions(-DCONFIG_IGB_DCA) +add_definitions(-DCONFIG_NET_VENDOR_I825XX) +add_definitions(-DCONFIG_NET_VENDOR_MARVELL) +add_definitions(-DCONFIG_NET_VENDOR_MELLANOX) +add_definitions(-DCONFIG_NET_VENDOR_MICREL) +add_definitions(-DCONFIG_NET_VENDOR_MICROCHIP) +add_definitions(-DCONFIG_NET_VENDOR_MYRI) +add_definitions(-DCONFIG_NET_VENDOR_NATSEMI) +add_definitions(-DCONFIG_NET_VENDOR_8390) +add_definitions(-DCONFIG_NET_VENDOR_NVIDIA) +add_definitions(-DCONFIG_NET_VENDOR_OKI) +add_definitions(-DCONFIG_NET_VENDOR_QLOGIC) +add_definitions(-DCONFIG_NET_VENDOR_REALTEK) +add_definitions(-DCONFIG_NET_VENDOR_RDC) +add_definitions(-DCONFIG_NET_VENDOR_SEEQ) +add_definitions(-DCONFIG_NET_VENDOR_SILAN) +add_definitions(-DCONFIG_NET_VENDOR_SIS) +add_definitions(-DCONFIG_NET_VENDOR_SMSC) +add_definitions(-DCONFIG_NET_VENDOR_STMICRO) +add_definitions(-DCONFIG_NET_VENDOR_SUN) +add_definitions(-DCONFIG_NET_VENDOR_TEHUTI) +add_definitions(-DCONFIG_NET_VENDOR_TI) +add_definitions(-DCONFIG_NET_VENDOR_VIA) +add_definitions(-DCONFIG_NET_VENDOR_WIZNET) +add_definitions(-DCONFIG_WLAN) +add_definitions(-DCONFIG_INPUT) +add_definitions(-DCONFIG_SERIO) +add_definitions(-DCONFIG_SERIO_I8042) +add_definitions(-DCONFIG_SERIO_SERPORT) +add_definitions(-DCONFIG_SERIO_LIBPS2) +add_definitions(-DCONFIG_TTY) +add_definitions(-DCONFIG_VT) +add_definitions(-DCONFIG_CONSOLE_TRANSLATIONS) +add_definitions(-DCONFIG_VT_CONSOLE) +add_definitions(-DCONFIG_VT_CONSOLE_SLEEP) +add_definitions(-DCONFIG_HW_CONSOLE) +add_definitions(-DCONFIG_UNIX98_PTYS) +add_definitions(-DCONFIG_DEVPTS_MULTIPLE_INSTANCES) +add_definitions(-DCONFIG_LEGACY_PTYS) +add_definitions(-DCONFIG_SERIAL_NONSTANDARD) +add_definitions(-DCONFIG_DEVKMEM) +add_definitions(-DCONFIG_SERIAL_8250) +add_definitions(-DCONFIG_SERIAL_8250_DEPRECATED_OPTIONS) +add_definitions(-DCONFIG_SERIAL_8250_CONSOLE) +add_definitions(-DCONFIG_FIX_EARLYCON_MEM) +add_definitions(-DCONFIG_SERIAL_8250_DMA) +add_definitions(-DCONFIG_SERIAL_8250_PCI) +add_definitions(-DCONFIG_SERIAL_CORE) +add_definitions(-DCONFIG_SERIAL_CORE_CONSOLE) +add_definitions(-DCONFIG_DEVPORT) +add_definitions(-DCONFIG_I2C) +add_definitions(-DCONFIG_I2C_BOARDINFO) +add_definitions(-DCONFIG_I2C_COMPAT) +add_definitions(-DCONFIG_I2C_CHARDEV) +add_definitions(-DCONFIG_I2C_MUX) +add_definitions(-DCONFIG_I2C_HELPER_AUTO) +add_definitions(-DCONFIG_SPI) +add_definitions(-DCONFIG_SPI_MASTER) +add_definitions(-DCONFIG_SPI_BITBANG) +add_definitions(-DCONFIG_SPI_SPIDEV) +add_definitions(-DCONFIG_PPS) +add_definitions(-DCONFIG_PTP_1588_CLOCK) +add_definitions(-DCONFIG_ARCH_WANT_OPTIONAL_GPIOLIB) +add_definitions(-DCONFIG_GPIO_DEVRES) +add_definitions(-DCONFIG_HWMON) +add_definitions(-DCONFIG_HWMON_VID) +add_definitions(-DCONFIG_SENSORS_CORETEMP) +add_definitions(-DCONFIG_SENSORS_IT87) +add_definitions(-DCONFIG_THERMAL_HWMON) +add_definitions(-DCONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) +add_definitions(-DCONFIG_THERMAL_GOV_STEP_WISE) +add_definitions(-DCONFIG_SSB_POSSIBLE) +add_definitions(-DCONFIG_SSB_SPROM) +add_definitions(-DCONFIG_SSB_PCIHOST_POSSIBLE) +add_definitions(-DCONFIG_SSB_PCIHOST) +add_definitions(-DCONFIG_SSB_DRIVER_PCICORE_POSSIBLE) +add_definitions(-DCONFIG_BCMA_POSSIBLE) +add_definitions(-DCONFIG_MFD_CORE) +add_definitions(-DCONFIG_LPC_ICH) +add_definitions(-DCONFIG_VGA_ARB) +add_definitions(-DCONFIG_BACKLIGHT_LCD_SUPPORT) +add_definitions(-DCONFIG_DUMMY_CONSOLE) +add_definitions(-DCONFIG_SOUND_OSS_CORE) +add_definitions(-DCONFIG_SOUND_OSS_CORE_PRECLAIM) +add_definitions(-DCONFIG_SND_OSSEMUL) +add_definitions(-DCONFIG_SND_SUPPORT_OLD_API) +add_definitions(-DCONFIG_SND_DMA_SGBUF) +add_definitions(-DCONFIG_SND_USB) +add_definitions(-DCONFIG_USB_HIDDEV) +add_definitions(-DCONFIG_USB_ARCH_HAS_OHCI) +add_definitions(-DCONFIG_USB_ARCH_HAS_EHCI) +add_definitions(-DCONFIG_USB_ARCH_HAS_XHCI) +add_definitions(-DCONFIG_USB_SUPPORT) +add_definitions(-DCONFIG_USB_ARCH_HAS_HCD) +add_definitions(-DCONFIG_USB_DEFAULT_PERSIST) +add_definitions(-DCONFIG_USB_ETRON_HCD_DEBUGGING) +add_definitions(-DCONFIG_USB_ETRON_HUB) +add_definitions(-DCONFIG_USB_EHCI_ROOT_HUB_TT) +add_definitions(-DCONFIG_USB_EHCI_TT_NEWSCHED) +add_definitions(-DCONFIG_NEW_LEDS) +add_definitions(-DCONFIG_LEDS_CLASS) +add_definitions(-DCONFIG_LEDS_TRIGGERS) +add_definitions(-DCONFIG_LEDS_TRIGGER_TIMER) +add_definitions(-DCONFIG_LEDS_TRIGGER_HEARTBEAT) +add_definitions(-DCONFIG_RTC_LIB) +add_definitions(-DCONFIG_RTC_CLASS) +add_definitions(-DCONFIG_RTC_HCTOSYS) +add_definitions(-DCONFIG_RTC_SYSTOHC) +add_definitions(-DCONFIG_RTC_INTF_SYSFS) +add_definitions(-DCONFIG_RTC_INTF_PROC) +add_definitions(-DCONFIG_RTC_INTF_DEV) +add_definitions(-DCONFIG_RTC_DRV_CMOS) +add_definitions(-DCONFIG_DMADEVICES) +add_definitions(-DCONFIG_DMA_ENGINE) +add_definitions(-DCONFIG_DMA_ACPI) +add_definitions(-DCONFIG_ASYNC_TX_DMA) +add_definitions(-DCONFIG_STAGING) +add_definitions(-DCONFIG_USBIP_DEBUG) +add_definitions(-DCONFIG_ZSMALLOC) +add_definitions(-DCONFIG_NET_VENDOR_SILICOM) +add_definitions(-DCONFIG_X86_PLATFORM_DEVICES) +add_definitions(-DCONFIG_CLKEVT_I8253) +add_definitions(-DCONFIG_CLKBLD_I8253) +add_definitions(-DCONFIG_IOMMU_API) +add_definitions(-DCONFIG_IOMMU_SUPPORT) +add_definitions(-DCONFIG_DMAR_TABLE) +add_definitions(-DCONFIG_INTEL_IOMMU) +add_definitions(-DCONFIG_INTEL_IOMMU_DEFAULT_ON) +add_definitions(-DCONFIG_INTEL_IOMMU_FLOPPY_WA) +add_definitions(-DCONFIG_IRQ_REMAP) +add_definitions(-DCONFIG_FIRMWARE_MEMMAP) +add_definitions(-DCONFIG_DMIID) +add_definitions(-DCONFIG_EFI_VARS) +add_definitions(-DCONFIG_EFI_VARS_PSTORE) +add_definitions(-DCONFIG_DCACHE_WORD_ACCESS) +add_definitions(-DCONFIG_EXT2_FS) +add_definitions(-DCONFIG_EXT2_FS_XATTR) +add_definitions(-DCONFIG_EXT3_FS) +add_definitions(-DCONFIG_EXT3_DEFAULTS_TO_ORDERED) +add_definitions(-DCONFIG_EXT3_FS_XATTR) +add_definitions(-DCONFIG_EXT4_FS) +add_definitions(-DCONFIG_JBD) +add_definitions(-DCONFIG_JBD2) +add_definitions(-DCONFIG_FS_MBCACHE) +add_definitions(-DCONFIG_FS_POSIX_ACL) +add_definitions(-DCONFIG_FILE_LOCKING) +add_definitions(-DCONFIG_FSNOTIFY) +add_definitions(-DCONFIG_DNOTIFY) +add_definitions(-DCONFIG_INOTIFY_USER) +add_definitions(-DCONFIG_QUOTA) +add_definitions(-DCONFIG_QUOTACTL) +add_definitions(-DCONFIG_QUOTACTL_COMPAT) +add_definitions(-DCONFIG_JOLIET) +add_definitions(-DCONFIG_ZISOFS) +add_definitions(-DCONFIG_UDF_NLS) +add_definitions(-DCONFIG_PROC_FS) +add_definitions(-DCONFIG_PROC_KCORE) +add_definitions(-DCONFIG_PROC_VMCORE) +add_definitions(-DCONFIG_PROC_SYSCTL) +add_definitions(-DCONFIG_PROC_PAGE_MONITOR) +add_definitions(-DCONFIG_SYSFS) +add_definitions(-DCONFIG_TMPFS) +add_definitions(-DCONFIG_CONFIGFS_FS) +add_definitions(-DCONFIG_MISC_FILESYSTEMS) +add_definitions(-DCONFIG_PSTORE) +add_definitions(-DCONFIG_AUFS_BRANCH_MAX_1023) +add_definitions(-DCONFIG_AUFS_SBILIST) +add_definitions(-DCONFIG_AUFS_FHSM) +add_definitions(-DCONFIG_AUFS_BR_RAMFS) +add_definitions(-DCONFIG_AUFS_BR_HFSPLUS) +add_definitions(-DCONFIG_AUFS_BDEV_LOOP) +add_definitions(-DCONFIG_NETWORK_FILESYSTEMS) +add_definitions(-DCONFIG_NFS_FS) +add_definitions(-DCONFIG_NFS_V2) +add_definitions(-DCONFIG_NFS_V3) +add_definitions(-DCONFIG_NFS_V4) +add_definitions(-DCONFIG_NFS_USE_KERNEL_DNS) +add_definitions(-DCONFIG_NFS_DEBUG) +add_definitions(-DCONFIG_NFSD_V3) +add_definitions(-DCONFIG_NFSD_V4) +add_definitions(-DCONFIG_LOCKD) +add_definitions(-DCONFIG_LOCKD_V4) +add_definitions(-DCONFIG_NFS_COMMON) +add_definitions(-DCONFIG_SUNRPC) +add_definitions(-DCONFIG_SUNRPC_GSS) +add_definitions(-DCONFIG_SUNRPC_DEBUG) +add_definitions(-DCONFIG_CIFS_DEBUG) +add_definitions(-DCONFIG_CIFS_SMB2) +add_definitions(-DCONFIG_NLS) +add_definitions(-DCONFIG_NLS_UTF8) +add_definitions(-DCONFIG_TRACE_IRQFLAGS_SUPPORT) +add_definitions(-DCONFIG_PRINTK_TIME) +add_definitions(-DCONFIG_ENABLE_WARN_DEPRECATED) +add_definitions(-DCONFIG_ENABLE_MUST_CHECK) +add_definitions(-DCONFIG_MAGIC_SYSRQ) +add_definitions(-DCONFIG_DEBUG_FS) +add_definitions(-DCONFIG_DEBUG_KERNEL) +add_definitions(-DCONFIG_LOCKUP_DETECTOR) +add_definitions(-DCONFIG_HARDLOCKUP_DETECTOR) +add_definitions(-DCONFIG_BOOTPARAM_HARDLOCKUP_PANIC) +add_definitions(-DCONFIG_DETECT_HUNG_TASK) +add_definitions(-DCONFIG_SCHED_DEBUG) +add_definitions(-DCONFIG_SCHEDSTATS) +add_definitions(-DCONFIG_HAVE_DEBUG_KMEMLEAK) +add_definitions(-DCONFIG_STACKTRACE) +add_definitions(-DCONFIG_DEBUG_BUGVERBOSE) +add_definitions(-DCONFIG_ARCH_WANT_FRAME_POINTERS) +add_definitions(-DCONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) +add_definitions(-DCONFIG_USER_STACKTRACE_SUPPORT) +add_definitions(-DCONFIG_HAVE_FUNCTION_TRACER) +add_definitions(-DCONFIG_HAVE_FUNCTION_GRAPH_TRACER) +add_definitions(-DCONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) +add_definitions(-DCONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST) +add_definitions(-DCONFIG_HAVE_DYNAMIC_FTRACE) +add_definitions(-DCONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS) +add_definitions(-DCONFIG_HAVE_FTRACE_MCOUNT_RECORD) +add_definitions(-DCONFIG_HAVE_SYSCALL_TRACEPOINTS) +add_definitions(-DCONFIG_HAVE_FENTRY) +add_definitions(-DCONFIG_HAVE_C_RECORDMCOUNT) +add_definitions(-DCONFIG_TRACING_SUPPORT) +add_definitions(-DCONFIG_DYNAMIC_DEBUG) # bromolow only +add_definitions(-DCONFIG_HAVE_ARCH_KGDB) +add_definitions(-DCONFIG_HAVE_ARCH_KMEMCHECK) +add_definitions(-DCONFIG_X86_VERBOSE_BOOTUP) +add_definitions(-DCONFIG_EARLY_PRINTK) +add_definitions(-DCONFIG_DEBUG_RODATA) +add_definitions(-DCONFIG_DEBUG_RODATA_TEST) +add_definitions(-DCONFIG_HAVE_MMIOTRACE_SUPPORT) +add_definitions(-DCONFIG_IO_DELAY_0X80) +add_definitions(-DCONFIG_OPTIMIZE_INLINING) +add_definitions(-DCONFIG_KEYS) +add_definitions(-DCONFIG_KEYS_DEBUG_PROC_KEYS) +add_definitions(-DCONFIG_SECURITY) +add_definitions(-DCONFIG_SECURITYFS) +add_definitions(-DCONFIG_SECURITY_NETWORK) +add_definitions(-DCONFIG_KAISER) +add_definitions(-DCONFIG_SECURITY_PATH) +add_definitions(-DCONFIG_SECURITY_APPARMOR) +add_definitions(-DCONFIG_SECURITY_APPARMOR_COMPAT_24) +add_definitions(-DCONFIG_DEFAULT_SECURITY_APPARMOR) +add_definitions(-DCONFIG_XOR_BLOCKS) +add_definitions(-DCONFIG_ASYNC_CORE) +add_definitions(-DCONFIG_ASYNC_MEMCPY) +add_definitions(-DCONFIG_ASYNC_XOR) +add_definitions(-DCONFIG_ASYNC_PQ) +add_definitions(-DCONFIG_ASYNC_RAID6_RECOV) +add_definitions(-DCONFIG_CRYPTO) +add_definitions(-DCONFIG_CRYPTO_ALGAPI) +add_definitions(-DCONFIG_CRYPTO_ALGAPI2) +add_definitions(-DCONFIG_CRYPTO_AEAD2) +add_definitions(-DCONFIG_CRYPTO_BLKCIPHER2) +add_definitions(-DCONFIG_CRYPTO_HASH) +add_definitions(-DCONFIG_CRYPTO_HASH2) +add_definitions(-DCONFIG_CRYPTO_RNG2) +add_definitions(-DCONFIG_CRYPTO_PCOMP2) +add_definitions(-DCONFIG_CRYPTO_MANAGER2) +add_definitions(-DCONFIG_CRYPTO_MANAGER_DISABLE_TESTS) +add_definitions(-DCONFIG_CRYPTO_WORKQUEUE) +add_definitions(-DCONFIG_CRYPTO_CRC32C) +add_definitions(-DCONFIG_CRYPTO_CRCT10DIF) +add_definitions(-DCONFIG_CRYPTO_SHA512) +add_definitions(-DCONFIG_CRYPTO_AES) +add_definitions(-DCONFIG_ASYMMETRIC_KEY_TYPE) +add_definitions(-DCONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) +add_definitions(-DCONFIG_PUBLIC_KEY_ALGO_RSA) +add_definitions(-DCONFIG_X509_CERTIFICATE_PARSER) +add_definitions(-DCONFIG_HAVE_KVM) +add_definitions(-DCONFIG_HAVE_KVM_IRQCHIP) +add_definitions(-DCONFIG_HAVE_KVM_IRQ_ROUTING) +add_definitions(-DCONFIG_HAVE_KVM_EVENTFD) +add_definitions(-DCONFIG_KVM_APIC_ARCHITECTURE) +add_definitions(-DCONFIG_KVM_MMIO) +add_definitions(-DCONFIG_KVM_ASYNC_PF) +add_definitions(-DCONFIG_HAVE_KVM_MSI) +add_definitions(-DCONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT) +add_definitions(-DCONFIG_VIRTUALIZATION) +add_definitions(-DCONFIG_KVM_DEVICE_ASSIGNMENT) +add_definitions(-DCONFIG_RAID6_PQ) +add_definitions(-DCONFIG_BITREVERSE) +add_definitions(-DCONFIG_GENERIC_STRNCPY_FROM_USER) +add_definitions(-DCONFIG_GENERIC_STRNLEN_USER) +add_definitions(-DCONFIG_GENERIC_FIND_FIRST_BIT) +add_definitions(-DCONFIG_GENERIC_PCI_IOMAP) +add_definitions(-DCONFIG_GENERIC_IOMAP) +add_definitions(-DCONFIG_GENERIC_IO) +add_definitions(-DCONFIG_CRC16) +add_definitions(-DCONFIG_CRC_T10DIF) +add_definitions(-DCONFIG_CRC32) +add_definitions(-DCONFIG_CRC32_SLICEBY8) +add_definitions(-DCONFIG_ZLIB_INFLATE) +add_definitions(-DCONFIG_LZO_COMPRESS) +add_definitions(-DCONFIG_LZO_DECOMPRESS) +add_definitions(-DCONFIG_DECOMPRESS_GZIP) +add_definitions(-DCONFIG_DECOMPRESS_LZMA) +add_definitions(-DCONFIG_GENERIC_ALLOCATOR) +add_definitions(-DCONFIG_HAS_IOMEM) +add_definitions(-DCONFIG_HAS_IOPORT) +add_definitions(-DCONFIG_HAS_DMA) +add_definitions(-DCONFIG_CHECK_SIGNATURE) +add_definitions(-DCONFIG_CPU_RMAP) +add_definitions(-DCONFIG_DQL) +add_definitions(-DCONFIG_NLATTR) +add_definitions(-DCONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE) +add_definitions(-DCONFIG_AVERAGE) +add_definitions(-DCONFIG_CLZ_TAB) +add_definitions(-DCONFIG_MPILIB) +add_definitions(-DCONFIG_OID_REGISTRY) +add_definitions(-DCONFIG_UCS2_STRING) +add_definitions(-DCONFIG_SERIAL_8250_NR_UARTS=4) +add_definitions(-DCONFIG_SYNO_BOOT_SATA_DOM) # only some platforms support that, notably 3615xs while 918+ doesn't + +# SATA DOM-related configs (only models with it, e.g. 3615xs but not 918+) +add_definitions(-DCONFIG_SYNO_SATA_DOM_VENDOR=\"DUMMY_VENDOR\") +add_definitions(-DCONFIG_SYNO_SATA_DOM_MODEL=\"DUMMY_MODEL\") + +add_executable(redpill + redpill_main.c redpill_main.h internal/call_protected.c internal/call_protected.h common.h config/cmdline_delegate.c config/cmdline_delegate.h shim/boot_device_shim.c shim/boot_device_shim.h internal/stealth.c internal/stealth.h config/runtime_config.c config/runtime_config.h test.c shim/bios_shim.c shim/bios_shim.h internal/override/override_symbol.c internal/override/override_symbol.h shim/bios/bios_shims_collection.c shim/bios/bios_shims_collection.h shim/block_fw_update_shim.c shim/block_fw_update_shim.h internal/intercept_execve.c internal/intercept_execve.h shim/disable_exectutables.c shim/disable_exectutables.h debug/debug_execve.c debug/debug_execve.h compat/string_compat.c compat/string_compat.h internal/stealth/sanitize_cmdline.c internal/stealth/sanitize_cmdline.h internal/virtual_pci.c internal/virtual_pci.h shim/pci_shim.c shim/pci_shim.h shim/bios/rtc_proxy.c shim/bios/rtc_proxy.h shim/bios/rtc_proxy.c shim/bios/rtc_proxy.h internal/uart/virtual_uart.c internal/uart/virtual_uart.h shim/uart_fixer.c shim/uart_fixer.h config/uart_defs.h debug/debug_vuart.h internal/uart/vuart_virtual_irq.c internal/uart/vuart_virtual_irq.h internal/uart/vuart_internal.h shim/boot_dev/usb_boot_shim.c shim/boot_dev/usb_boot_shim.h shim/boot_dev/native_sata_boot_shim.c shim/boot_dev/native_sata_boot_shim.h internal/uart/uart_swapper.c internal/uart/uart_swapper.h shim/pmu_shim.c shim/pmu_shim.h internal/intercept_driver_register.c internal/intercept_driver_register.h shim/shim_base.h shim/storage/sata_port_shim.c shim/storage/sata_port_shim.h internal/scsi/scsi_notifier.c internal/scsi/scsi_notifier.h internal/scsi/scsi_notifier.c internal/scsi/scsi_notifier.h internal/notifier_base.h internal/scsi/scsi_toolbox.c internal/scsi/scsi_toolbox.h internal/scsi/scsi_notifier_list.c internal/scsi/scsi_notifier_list.h shim/storage/smart_shim.c shim/storage/smart_shim.h internal/helper/memory_helper.c internal/helper/memory_helper.h internal/scsi/hdparam.h internal/scsi/scsiparam.h internal/helper/symbol_helper.c internal/helper/symbol_helper.h compat/toolkit/drivers/usb/storage/usb.h shim/boot_dev/fake_sata_boot_shim.c shim/boot_dev/fake_sata_boot_shim.h shim/boot_dev/boot_shim_base.c shim/boot_dev/boot_shim_base.h config/cmdline_opts.h internal/ioscheduler_fixer.c internal/ioscheduler_fixer.h shim/bios/bios_hwcap_shim.c shim/bios/bios_hwcap_shim.h internal/helper/math_helper.c internal/helper/math_helper.h config/hwmon_defs.h config/platform_types.h shim/bios/bios_hwmon_shim.c shim/bios/bios_hwmon_shim.h config/vpci_types.h internal/override/override_syscall.c internal/override/override_syscall.h) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f86e9f8 --- /dev/null +++ b/Makefile @@ -0,0 +1,97 @@ +PWD := $(shell pwd) + +ifeq ($(LINUX_SRC),) +LINUX_SRC := "$(PWD)/../linux-3.10.x-bromolow-25426" +endif + +SRCS-$(DBG_EXECVE) += debug/debug_execve.c +ccflags-$(DBG_EXECVE) += -DRPDBG_EXECVE +SRCS-y += compat/string_compat.c \ + \ + internal/helper/math_helper.c internal/helper/memory_helper.c internal/helper/symbol_helper.c \ + internal/scsi/scsi_toolbox.c internal/scsi/scsi_notifier_list.c internal/scsi/scsi_notifier.c \ + internal/override/override_symbol.c internal/override/override_syscall.c internal/intercept_execve.c \ + internal/call_protected.c internal/intercept_driver_register.c internal/stealth/sanitize_cmdline.c \ + internal/stealth.c internal/virtual_pci.c internal/uart/uart_swapper.c internal/uart/vuart_virtual_irq.c \ + internal/uart/virtual_uart.c internal/ioscheduler_fixer.c \ + \ + config/cmdline_delegate.c config/runtime_config.c \ + \ + shim/boot_dev/boot_shim_base.c shim/boot_dev/usb_boot_shim.c shim/boot_dev/fake_sata_boot_shim.c \ + shim/boot_dev/native_sata_boot_shim.c shim/boot_device_shim.c \ + \ + shim/storage/smart_shim.c shim/storage/sata_port_shim.c \ + shim/bios/bios_hwcap_shim.c shim/bios/bios_hwmon_shim.c shim/bios/rtc_proxy.c \ + shim/bios/bios_shims_collection.c shim/bios_shim.c \ + shim/block_fw_update_shim.c shim/disable_exectutables.c shim/pci_shim.c shim/pmu_shim.c shim/uart_fixer.c \ + \ + redpill_main.c +OBJS = $(SRCS-y:.c=.o) +#this module name CAN NEVER be the same as the main file (or it will get weird ;)) and the main file has to be included +# in object file. So here we say the module file(s) which will create .ko(s) is "redpill.o" and that other objects which +# must be linked (redpill-objs variable) +obj-m += redpill.o +redpill-objs := $(OBJS) +ccflags-y += -std=gnu99 -fgnu89-inline -Wno-declaration-after-statement +ccflags-y += -I$(src)/compat/toolkit/include + +ifndef RP_VERSION_POSTFIX +RP_VERSION_POSTFIX := $(shell git rev-parse --is-inside-work-tree 1>/dev/null 2>/dev/null && echo -n "git-" && git log -1 --pretty='%h' 2>/dev/null || date '+at-%Y_%m_%d-%H_%M_%S') +endif +ccflags-y += -DRP_VERSION_POSTFIX="\"$(RP_VERSION_POSTFIX)\"" + +# Optimization settings per-target. Since LKM makefiles are evaluated twice (first with the specified target and second +# time with target "modules") we need to set the custom target variable during first parsing and based on that variable +# set additional CC-flags when the makefile is parsed for the second time +ifdef RP_MODULE_TARGET +ccflags-dev = -g -fno-inline -DDEBUG +ccflags-test = -O3 +ccflags-prod = -O3 +ccflags-y += -DRP_MODULE_TARGET_VER=${RP_MODULE_TARGET_VER} # this is assumed to be defined when target is specified + +$(info RP-TARGET SPECIFIED AS ${RP_MODULE_TARGET} v${RP_MODULE_TARGET_VER}) + +# stealth mode can always be overridden but there are sane per-target defaults (see above) +ifneq ($(STEALTH_MODE),) +$(info STEATLH MODE OVERRIDE: ${STEALTH_MODE}) +ccflags-y += -DSTEALTH_MODE=$(STEALTH_MODE) +else +ccflags-dev += -DSTEALTH_MODE=1 +ccflags-test += -DSTEALTH_MODE=2 +ccflags-prod += -DSTEALTH_MODE=3 +endif + +ccflags-y += ${ccflags-${RP_MODULE_TARGET}} +else +# during the first read of the makefile we don't get the RP_MODULE_TARGET - if for some reason we didn't get it during +# the actual build phase it should explode (and it will if an unknown GCC flag is specified). We cannot sue makefile +# error here as we don't know if the file is parsed for the first time or the second time. Just Kbuild peculiarities ;) +ccflags-y = --bogus-flag-which-should-not-be-called-NO_RP_MODULE_TARGER_SPECIFIED +endif + +# this MUST be last after all other options to force GNU89 for the file being a workaround for GCC bug #275674 +# see internal/scsi/scsi_notifier_list.h for detailed explanation +CFLAGS_scsi_notifier_list.o += -std=gnu89 + +# do NOT move this target - make <3.80 doesn't have a way to specify default target and takes the first one found +default_error: + $(error You need to specify one of the following targets: dev-v6, dev-v7, test-v6, test-v7, prod-v6, prod-v7, clean) + +# All v6 targets +dev-v6: # kernel running in v6.2+ OS, all symbols included, debug messages included + $(MAKE) -C $(LINUX_SRC) M=$(PWD) RP_MODULE_TARGET="dev" RP_MODULE_TARGET_VER="6" modules +test-v6: # kernel running in v6.2+ OS, fully stripped with only warning & above (no debugs or info) + $(MAKE) -C $(LINUX_SRC) M=$(PWD) RP_MODULE_TARGET="test" RP_MODULE_TARGET_VER="6" modules +prod-v6: # kernel running in v6.2+ OS, fully stripped with no debug messages + $(MAKE) -C $(LINUX_SRC) M=$(PWD) RP_MODULE_TARGET="prod" RP_MODULE_TARGET_VER="6" modules + +# All v7 targets +dev-v7: # kernel running in v6.2+ OS, all symbols included, debug messages included + $(MAKE) -C $(LINUX_SRC) M=$(PWD) RP_MODULE_TARGET="dev" RP_MODULE_TARGET_VER="7" modules +test-v7: # kernel running in v6.2+ OS, fully stripped with only warning & above (no debugs or info) + $(MAKE) -C $(LINUX_SRC) M=$(PWD) RP_MODULE_TARGET="test" RP_MODULE_TARGET_VER="7" modules +prod-v7: # kernel running in v6.2+ OS, fully stripped with no debug messages + $(MAKE) -C $(LINUX_SRC) M=$(PWD) RP_MODULE_TARGET="prod" RP_MODULE_TARGET_VER="7" modules + +clean: + $(MAKE) -C $(LINUX_SRC) M=$(PWD) clean diff --git a/PLATFORMS b/PLATFORMS new file mode 100644 index 0000000..5173e00 --- /dev/null +++ b/PLATFORMS @@ -0,0 +1,8 @@ +bromolow 3.10.108 +apollolake 4.4.180 +broadwell 4.4.180 +broadwellnk 4.4.180 +denverton 4.4.180 +geminilake 4.4.180 +v1000 4.4.180 +r1000 4.4.180 diff --git a/common.h b/common.h new file mode 100644 index 0000000..e1eb5e0 --- /dev/null +++ b/common.h @@ -0,0 +1,128 @@ +#ifndef REDPILLLKM_COMMON_H +#define REDPILLLKM_COMMON_H + +/******************************************** Available whole-module flags ********************************************/ +//This (shameful) flag disables shims which cannot be properly unloaded to make debugging of other things easier +//#define DBG_DISABLE_UNLOADABLE + +//disabled uart unswapping even if needed (useful for hand-loading while running) +//#define DBG_DISABLE_UART_SWAP_FIX + +//Whether to cause a kernel panic when module fails to load internally (which should be normally done on production) +#define KP_ON_LOAD_ERROR + +//Print A LOT of vUART debug messages +//#define VUART_DEBUG_LOG + +//Enabled printing of all ioctl() calls (hooked or not) +//#define DBG_SMART_PRINT_ALL_IOCTL + +//Normally GetHwCapability calls (checking what hardware supports) are responded internally. Setting this DBG adds log +// of all requests & responses for hardware capabilities (and there're frquent but not overwhelming). Additionally this +// option turns on additional calls to the original GetHwCapability and logs compared values. Some values are ALWAYS +// proxied to the original GetHwCapability +//#define DBG_HWCAP + +//Debug all hardware monitoring features (shim/bios/bios_hwmon_shim.c) +//#define DBG_HWMON +/**********************************************************************************************************************/ + +#include "internal/stealth.h" +#include +#include +#include +#include +#include //kmalloc +#include +#include "compat/string_compat.h" +#include //bool & others + +/************************************************** Strings handling **************************************************/ +#define get_static_name(variable) #variable +#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) +#define strlen_static(param) (sizeof(param)-1) //gets the size of a string minus trailing nullbyte (useful for partial matches) +#define strlen_to_size(len) (sizeof(char) * ((len)+1)) //useful for static strings, use strsize() for dynamic ones +#define strsize(param) strlen_to_size(strlen(param)) //strlen including NULLbyte; useful for kmalloc-ing +/**********************************************************************************************************************/ + +/****************************************** Dynamic memory allocation helpers *****************************************/ +//[internal] Cleans up & provides standard reporting and return +#define __kalloc_err_report_clean(variable, size, exit) \ + (variable) = NULL; \ + pr_loc_crt("kernel memory alloc failure - tried to allocate %ld bytes for %s", \ + (long)(size), get_static_name(variable)); \ + return exit; + +//Use these if you need to do a manual malloc with some extra checks but want to return a consistant message +#define kalloc_error_int(variable, size) do { __kalloc_err_report_clean(variable, size, -ENOMEM); } while(0) +#define kalloc_error_ptr(variable, size) do { __kalloc_err_report_clean(variable, size, ERR_PTR(-ENOMEM)); } while(0) + +//[internal] Reserves memory & checks result +#define __kalloc_or_exit(type, variable, size, exit_type) \ + (variable) = (type)(size, GFP_KERNEL); \ + if (unlikely(!(variable))) { kalloc_error_ ## exit_type (variable, size); } + +//Use these to do a standard malloc with error reporting +#define kmalloc_or_exit_int(variable, size) do { __kalloc_or_exit(kmalloc, variable, size, int); } while(0) +#define kmalloc_or_exit_ptr(variable, size) do { __kalloc_or_exit(kmalloc, variable, size, ptr); } while(0) +#define kzalloc_or_exit_int(variable, size) do { __kalloc_or_exit(kzalloc, variable, size, int); } while(0) +#define kzalloc_or_exit_ptr(variable, size) do { __kalloc_or_exit(kzalloc, variable, size, ptr); } while(0) +#define try_kfree(variable) do { if(variable) { kfree(variable); } } while(0) +/**********************************************************************************************************************/ + +/****************************************************** Logging *******************************************************/ +#define _pr_loc_crt(fmt, ...) pr_crit( "<%s/%s:%d> " pr_fmt(fmt) "\n", KBUILD_MODNAME, __FILENAME__, __LINE__, ##__VA_ARGS__) +#define _pr_loc_err(fmt, ...) pr_err ( "<%s/%s:%d> " pr_fmt(fmt) "\n", KBUILD_MODNAME, __FILENAME__, __LINE__, ##__VA_ARGS__) +#define _pr_loc_wrn(fmt, ...) pr_warn( "<%s/%s:%d> " pr_fmt(fmt) "\n", KBUILD_MODNAME, __FILENAME__, __LINE__, ##__VA_ARGS__) +#define _pr_loc_inf(fmt, ...) pr_info( "<%s/%s:%d> " pr_fmt(fmt) "\n", KBUILD_MODNAME, __FILENAME__, __LINE__, ##__VA_ARGS__) +#define _pr_loc_dbg(fmt, ...) pr_info( "<%s/%s:%d> " pr_fmt(fmt) "\n", KBUILD_MODNAME, __FILENAME__, __LINE__, ##__VA_ARGS__) +#define _pr_loc_dbg_raw(fmt, ...) printk(fmt, ##__VA_ARGS__) +#define _pr_loc_bug(fmt, ...) \ + do { \ + pr_err("<%s/%s:%d> !!BUG!! " pr_fmt(fmt) "\n", KBUILD_MODNAME, __FILENAME__, __LINE__, ##__VA_ARGS__); \ + WARN(1, "BUG log triggered"); \ + } while(0) + +#if STEALTH_MODE >= STEALTH_MODE_FULL //all logs will be disabled in full +#define pr_loc_crt(fmt, ...) +#define pr_loc_err(fmt, ...) +#define pr_loc_wrn(fmt, ...) +#define pr_loc_inf(fmt, ...) +#define pr_loc_dbg(fmt, ...) +#define pr_loc_dbg_raw(fmt, ...) +#define pr_loc_bug(fmt, ...) +#define DBG_ALLOW_UNUSED(var) ((void)var) //in debug modes some variables are seen as unused (as they're only for dbg) + +#elif STEALTH_MODE >= STEALTH_MODE_NORMAL //in normal mode we only warnings/errors/etc. +#define pr_loc_crt _pr_loc_crt +#define pr_loc_err _pr_loc_err +#define pr_loc_wrn _pr_loc_wrn +#define pr_loc_inf(fmt, ...) +#define pr_loc_dbg(fmt, ...) +#define pr_loc_dbg_raw(fmt, ...) +#define pr_loc_bug _pr_loc_bug +#define DBG_ALLOW_UNUSED(var) ((void)var) //in debug modes some variables are seen as unused (as they're only for dbg) + +#else +#define pr_loc_crt _pr_loc_crt +#define pr_loc_err _pr_loc_err +#define pr_loc_inf _pr_loc_inf +#define pr_loc_wrn _pr_loc_wrn +#define pr_loc_dbg _pr_loc_dbg +#define pr_loc_dbg_raw _pr_loc_dbg_raw +#define pr_loc_bug _pr_loc_bug +#define DBG_ALLOW_UNUSED(var) //when debug logs are enables we don't silence unused variables warnings + +#endif //STEALTH_MODE +/**********************************************************************************************************************/ + +#ifndef RP_MODULE_TARGET_VER +#error "The RP_MODULE_TARGET_VER is not defined - it is required to properly set VTKs" +#endif + +//Before you change that you need to go and check all usages of RP_MODULE_TARGET_VER +#if RP_MODULE_TARGET_VER != 6 && RP_MODULE_TARGET_VER != 7 +#error "The RP_MODULE_TARGET_VER value is invalid" +#endif + +#endif //REDPILLLKM_COMMON_H diff --git a/compat/string_compat.c b/compat/string_compat.c new file mode 100644 index 0000000..c3425e9 --- /dev/null +++ b/compat/string_compat.c @@ -0,0 +1,95 @@ +/* + * linux/lib/string.c + * Modified to take care of kernel versions + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +#include "string_compat.h" +#include //E2BIG + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,3,0) +#include +#include +#include //PAGE_SIZE + +/** + * strscpy - Copy a C-string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @count: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. + * The routine returns the number of characters copied (not including + * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough. + * The behavior is undefined if the string buffers overlap. + * The destination buffer is always NUL terminated, unless it's zero-sized. + * + * Preferred to strlcpy() since the API doesn't require reading memory + * from the src string beyond the specified "count" bytes, and since + * the return value is easier to error-check than strlcpy()'s. + * In addition, the implementation is robust to the string changing out + * from underneath it, unlike the current strlcpy() implementation. + * + * Preferred to strncpy() since it always returns a valid string, and + * doesn't unnecessarily force the tail of the destination buffer to be + * zeroed. If the zeroing is desired, it's likely cleaner to use strscpy() + * with an overflow test, then just memset() the tail of the dest buffer. + */ +ssize_t strscpy(char *dest, const char *src, size_t count) +{ + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; + size_t max = count; + long res = 0; + + if (count == 0) + return -E2BIG; + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + /* + * If src is unaligned, don't cross a page boundary, + * since we don't know if the next page is mapped. + */ + if ((long)src & (sizeof(long) - 1)) { + size_t limit = PAGE_SIZE - ((long)src & (PAGE_SIZE - 1)); + if (limit < max) + max = limit; + } +#else + /* If src or dest is unaligned, don't do word-at-a-time. */ + if (((long) dest | (long) src) & (sizeof(long) - 1)) + max = 0; +#endif + + while (max >= sizeof(unsigned long)) { + unsigned long c, data; + + c = *(unsigned long *)(src+res); + *(unsigned long *)(dest+res) = c; + if (has_zero(c, &data, &constants)) { + data = prep_zero_mask(c, data, &constants); + data = create_zero_mask(data); + return res + find_zero(data); + } + res += sizeof(unsigned long); + count -= sizeof(unsigned long); + max -= sizeof(unsigned long); + } + + while (count) { + char c; + + c = src[res]; + dest[res] = c; + if (!c) + return res; + res++; + count--; + } + + /* Hit buffer length without finding a NUL; force NUL-termination. */ + if (res) + dest[res-1] = '\0'; + + return -E2BIG; +} +#endif //kernel 4.3.0 \ No newline at end of file diff --git a/compat/string_compat.h b/compat/string_compat.h new file mode 100644 index 0000000..ba85bae --- /dev/null +++ b/compat/string_compat.h @@ -0,0 +1,11 @@ +#ifndef REDPILL_STRING_COMPAT_H +#define REDPILL_STRING_COMPAT_H + +#include //KERNEL_VERSION() +#include //ssize_t + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,3,0) +ssize_t __must_check strscpy(char *, const char *, size_t); +#endif + +#endif //REDPILL_STRING_COMPAT_H diff --git a/compat/toolkit/drivers/usb/storage/usb.h b/compat/toolkit/drivers/usb/storage/usb.h new file mode 100644 index 0000000..a55108e --- /dev/null +++ b/compat/toolkit/drivers/usb/storage/usb.h @@ -0,0 +1,135 @@ +/** + * Cherry-picked USB.h internal structures from Linux v4.4.x. If possible avoid using anything from this file like fire. + * + * ORIGINAL FILE HEADER PRESERVED BELOW + * ------------------------------------ + * Driver for USB Mass Storage compliant devices + * Main Header File + * + * Current development and maintenance by: + * (c) 1999-2002 Matthew Dharm (mdharm-usb@one-eyed-alien.net) + * + * Initial work by: + * (c) 1999 Michael Gee (michael@linuxspecific.com) + * + * This driver is based on the 'USB Mass Storage Class' document. This + * describes in detail the protocol used to communicate with such + * devices. Clearly, the designers had SCSI and ATAPI commands in + * mind when they created this document. The commands are all very + * similar to commands in the SCSI-II and ATAPI specifications. + * + * It is important to note that in a number of cases this class + * exhibits class-specific exemptions from the USB specification. + * Notably the usage of NAK, STALL and ACK differs from the norm, in + * that they are used to communicate wait, failed and OK on commands. + * + * Also, for certain devices, the interrupt endpoint is used to convey + * status of a command. + * + * Please see http://www.one-eyed-alien.net/~mdharm/linux-usb for more + * information about this driver. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef REDPILL_USB_H +#define REDPILL_USB_H + +#warning "Using compatibility file for drivers/usb/storage/usb.h - if possible do NOT compile using toolkit" + +//This structure didn't change substantially since v2.6 days; 5.14 is simply the newest one we checked - it will +// probably remain unchanged for years to come +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0) && LINUX_VERSION_CODE < KERNEL_VERSION(5,14,0) //v3.3 - v5.14 +#include //struct usb_sg_request + +struct us_data; +typedef int (*trans_cmnd)(struct scsi_cmnd *, struct us_data*); +typedef int (*trans_reset)(struct us_data*); +typedef void (*proto_cmnd)(struct scsi_cmnd*, struct us_data*); +typedef void (*extra_data_destructor)(void *); /* extra data destructor */ +typedef void (*pm_hook)(struct us_data *, int); /* power management hook */ + +struct us_data { + /* The device we're working with + * It's important to note: + * (o) you must hold dev_mutex to change pusb_dev + */ + struct mutex dev_mutex; /* protect pusb_dev */ + struct usb_device *pusb_dev; /* this usb_device */ + struct usb_interface *pusb_intf; /* this interface */ + struct us_unusual_dev *unusual_dev; /* device-filter entry */ + unsigned long fflags; /* fixed flags from filter */ + unsigned long dflags; /* dynamic atomic bitflags */ + unsigned int send_bulk_pipe; /* cached pipe values */ + unsigned int recv_bulk_pipe; + unsigned int send_ctrl_pipe; + unsigned int recv_ctrl_pipe; + unsigned int recv_intr_pipe; + + /* information about the device */ + char *transport_name; + char *protocol_name; + __le32 bcs_signature; + u8 subclass; + u8 protocol; + u8 max_lun; + + u8 ifnum; /* interface number */ + u8 ep_bInterval; /* interrupt interval */ + + /* function pointers for this device */ + trans_cmnd transport; /* transport function */ + trans_reset transport_reset; /* transport device reset */ + proto_cmnd proto_handler; /* protocol handler */ + + /* SCSI interfaces */ + struct scsi_cmnd *srb; /* current srb */ + unsigned int tag; /* current dCBWTag */ + char scsi_name[32]; /* scsi_host name */ + + /* control and bulk communications data */ + struct urb *current_urb; /* USB requests */ + struct usb_ctrlrequest *cr; /* control requests */ + struct usb_sg_request current_sg; /* scatter-gather req. */ + unsigned char *iobuf; /* I/O buffer */ + dma_addr_t iobuf_dma; /* buffer DMA addresses */ + struct task_struct *ctl_thread; /* the control thread */ + + /* mutual exclusion and synchronization structures */ + struct completion cmnd_ready; /* to sleep thread on */ + struct completion notify; /* thread begin/end */ + wait_queue_head_t delay_wait; /* wait during reset */ + struct delayed_work scan_dwork; /* for async scanning */ + + /* subdriver information */ + void *extra; /* Any extra data */ + extra_data_destructor extra_destructor;/* extra data destructor */ +#ifdef CONFIG_PM + pm_hook suspend_resume_hook; +#endif + + /* hacks for READ CAPACITY bug handling */ + int use_last_sector_hacks; + int last_sector_retries; +}; +#endif //LINUX_VERSION_CODE check + + +struct Scsi_Host; +static inline struct us_data *host_to_us(struct Scsi_Host *host) { + return (struct us_data *) host->hostdata; +} + +#endif //REDPILL_USB_H \ No newline at end of file diff --git a/compat/toolkit/fs/proc/internal.h b/compat/toolkit/fs/proc/internal.h new file mode 100644 index 0000000..2ed324f --- /dev/null +++ b/compat/toolkit/fs/proc/internal.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Internal procfs definitions + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This file contains curated definitions from original fs/proc/internal.h file which we (unfortunately) need to access. + * As the internal.h, as the name implies, is meant for in-tree use only official toolkits lack it. + * Kernel version constrains here are taken from the real kernel source tree to prevent including wrong structs + * definitions. Please keep it neat as mismatch will cause very hard to debug problems. + */ + +#warning "Using compatibility file for fs/proc/internal.h - if possible do NOT compile using toolkit" + +#include +#include +#include +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) && LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0) //v3.10 - v3.18 +struct proc_dir_entry { + unsigned int low_ino; + umode_t mode; + nlink_t nlink; + kuid_t uid; + kgid_t gid; + loff_t size; + const struct inode_operations *proc_iops; + const struct file_operations *proc_fops; + struct proc_dir_entry *next, *parent, *subdir; + void *data; + atomic_t count; /* use count */ + atomic_t in_use; /* number of callers into module in progress; */ + /* negative -> it's going away RSN */ + struct completion *pde_unload_completion; + struct list_head pde_openers; /* who did ->open, but not ->release */ + spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + u8 namelen; + char name[]; +}; + +union proc_op { + int (*proc_get_link)(struct dentry *, struct path *); + int (*proc_read)(struct task_struct *task, char *page); + int (*proc_show)(struct seq_file *m, + struct pid_namespace *ns, struct pid *pid, + struct task_struct *task); +}; + +struct proc_inode { + struct pid *pid; + int fd; + union proc_op op; + struct proc_dir_entry *pde; + struct ctl_table_header *sysctl; + struct ctl_table *sysctl_entry; + struct proc_ns ns; + struct inode vfs_inode; +}; + +//See https://github.com/torvalds/linux/commit/771187d61bb3cbaf62c492ec3b8b789933f7691e +//v3.19 - it's going away RSN */ + struct completion *pde_unload_completion; + struct list_head pde_openers; /* who did ->open, but not ->release */ + spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + u8 namelen; + char name[]; +}; + +union proc_op { + int (*proc_get_link)(struct dentry *, struct path *); + int (*proc_show)(struct seq_file *m, + struct pid_namespace *ns, struct pid *pid, + struct task_struct *task); +}; + +struct proc_inode { + struct pid *pid; + int fd; + union proc_op op; + struct proc_dir_entry *pde; + struct ctl_table_header *sysctl; + struct ctl_table *sysctl_entry; + const struct proc_ns_operations *ns_ops; + struct inode vfs_inode; +}; +#endif + +//These methods are the same forever +static inline struct proc_inode *PROC_I(const struct inode *inode) +{ + return container_of(inode, struct proc_inode, vfs_inode); +} + +static inline struct proc_dir_entry *PDE(const struct inode *inode) +{ + return PROC_I(inode)->pde; +} \ No newline at end of file diff --git a/compat/toolkit/include/.gitkeep b/compat/toolkit/include/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/compile-lkms.sh b/compile-lkms.sh new file mode 100644 index 0000000..3675160 --- /dev/null +++ b/compile-lkms.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +set -e + +TMP_PATH="/tmp" +DEST_PATH="output" +TOOLKIT_VER="7.1" + +mkdir -p "${DEST_PATH}" + +if [ -f ../arpl/PLATFORMS ]; then + cp ../arpl/PLATFORMS PLATFORMS +else + curl -sLO "https://github.com/fbelavenuto/arpl/raw/main/PLATFORMS" +fi + +function compileLkm() { + PLATFORM=$1 + KVER=$2 + OUT_PATH="${TMP_PATH}/${PLATFORM}" + mkdir -p "${OUT_PATH}" + # Compile using docker + docker run --rm -t -v "${OUT_PATH}":/output -v "${PWD}":/input \ + fbelavenuto/syno-toolkit:${PLATFORM}-${TOOLKIT_VER} compile-lkm + mv "${OUT_PATH}/redpill-dev.ko" "${DEST_PATH}/rp-${PLATFORM}-${KVER}-dev.ko" + rm -f "${DEST_PATH}/rp-${PLATFORM}-${KVER}-dev.ko.gz" + gzip "${DEST_PATH}/rp-${PLATFORM}-${KVER}-dev.ko" + mv "${OUT_PATH}/redpill-prod.ko" "${DEST_PATH}/rp-${PLATFORM}-${KVER}-prod.ko" + rm -f "${DEST_PATH}/rp-${PLATFORM}-${KVER}-prod.ko.gz" + gzip "${DEST_PATH}/rp-${PLATFORM}-${KVER}-prod.ko" + rm -rf "${OUT_PATH}" +} + +# Main +while read PLATFORM KVER; do + docker pull fbelavenuto/syno-toolkit:${PLATFORM}-${TOOLKIT_VER} + compileLkm "${PLATFORM}" "${KVER}" & +done < PLATFORMS +wait \ No newline at end of file diff --git a/config/.platforms.h.swp b/config/.platforms.h.swp new file mode 100644 index 0000000..e615aa2 Binary files /dev/null and b/config/.platforms.h.swp differ diff --git a/config/cmdline_delegate.c b/config/cmdline_delegate.c new file mode 100644 index 0000000..755f771 --- /dev/null +++ b/config/cmdline_delegate.c @@ -0,0 +1,455 @@ +#include "cmdline_delegate.h" +#include "../common.h" //commonly used headers in this module +#include "../internal/call_protected.h" //used to call cmdline_proc_show() +#include //struct seq_file + +#define ensure_cmdline_param(cmdline_param) \ + if (strncmp(param_pointer, cmdline_param, strlen_static(cmdline_param)) != 0) { return false; } + +#define ensure_cmdline_token(cmdline_param) \ + if (strncmp(param_pointer, cmdline_param, sizeof(cmdline_param)) != 0) { return false; } + +/** + * Extracts device model (syno_hw_version=) from kernel cmd line + * + * @param model pointer to save model to + * @param param_pointer currently processed token + * @return true on match, false if param didn't match + */ +static bool extract_hw(syno_hw *model, const char *param_pointer) +{ + ensure_cmdline_param(CMDLINE_KT_HW); + + if (strscpy((char *)model, param_pointer + strlen_static(CMDLINE_KT_HW), sizeof(syno_hw)) < 0) + pr_loc_wrn("HW version truncated to %zu", sizeof(syno_hw)-1); + + pr_loc_dbg("HW version set to: %s", (char *)model); + + return true; +} + +/** + * Extracts serial number (sn=) from kernel cmd line + * + * @param sn pointer to save s/n to + * @param param_pointer currently processed token + * @return true on match, false if param didn't match + */ +static bool extract_sn(serial_no *sn, const char *param_pointer) +{ + ensure_cmdline_param(CMDLINE_KT_SN); + + if(strscpy((char *)sn, param_pointer + strlen_static(CMDLINE_KT_SN), sizeof(serial_no)) < 0) + pr_loc_wrn("S/N truncated to %zu", sizeof(serial_no)-1); + + pr_loc_dbg("S/N set to: %s", (char *)sn); + + return true; +} + +static bool extract_boot_media_type(struct boot_media *boot_media, const char *param_pointer) +{ + ensure_cmdline_param(CMDLINE_KT_SATADOM); + + char value = param_pointer[strlen_static(CMDLINE_KT_SATADOM)]; + + switch (value) { + case CMDLINE_KT_SATADOM_NATIVE: + boot_media->type = BOOT_MEDIA_SATA_DOM; + pr_loc_dbg("Boot media SATADOM (native) requested"); + break; + + case CMDLINE_KT_SATADOM_FAKE: + boot_media->type = BOOT_MEDIA_SATA_DISK; + pr_loc_dbg("Boot media SATADISK (fake) requested"); + break; + + case CMDLINE_KT_SATADOM_DISABLED: + //There's no point to set that option but it's not an error + pr_loc_wrn("SATA-based boot media disabled (default will be used, %s0 is a noop)", CMDLINE_KT_SATADOM); + break; + + default: + pr_loc_err("Option \"%s%c\" is invalid (value should be 0/1/2)", CMDLINE_KT_SATADOM, value); + } + + return true; +} + +/** + * Extracts VID override (vid=) from kernel cmd line + * + * @param user_vid pointer to save VID + * @param param_pointer currently processed token + * @return true on match, false if param didn't match + */ +static bool extract_vid(device_id *user_vid, const char *param_pointer) +{ + ensure_cmdline_param(CMDLINE_CT_VID); + + long long numeric_param; + int tmp_call_res = kstrtoll(param_pointer + strlen_static(CMDLINE_CT_VID), 0, &numeric_param); + if (unlikely(tmp_call_res != 0)) { + pr_loc_err("Call to %s() failed => %d", "kstrtoll", tmp_call_res); + return true; + } + + if (unlikely(numeric_param > VID_PID_MAX)) { + pr_loc_err("Cmdline %s is invalid (value larger than %d)", CMDLINE_CT_VID, VID_PID_MAX); + return true; + } + + if (unlikely(*user_vid) != 0) + pr_loc_wrn( + "VID was already set to 0x%04x by a previous instance of %s - it will be changed now to 0x%04x", + *user_vid, CMDLINE_CT_VID, (unsigned int)numeric_param); + + *user_vid = (unsigned int)numeric_param; + pr_loc_dbg("VID override: 0x%04x", *user_vid); + + return true; +} + +/** + * Extracts PID override (pid=) from kernel cmd line + * + * @param user_pid pointer to save PID + * @param param_pointer currently processed token + * @return true on match, false if param didn't match + */ +static bool extract_pid(device_id *user_pid, const char *param_pointer) +{ + ensure_cmdline_param(CMDLINE_CT_PID); + + long long numeric_param; + int tmp_call_res = kstrtoll(param_pointer + strlen_static(CMDLINE_CT_PID), 0, &numeric_param); + if (unlikely(tmp_call_res != 0)) { + pr_loc_err("Call to %s() failed => %d", "kstrtoll", tmp_call_res); + return true; + } + + if (unlikely(numeric_param > VID_PID_MAX)) { + pr_loc_err("Cmdline %s is invalid (value larger than %d)", CMDLINE_CT_PID, VID_PID_MAX); + return true; + } + + if (unlikely(*user_pid) != 0) + pr_loc_wrn( + "PID was already set to 0x%04x by a previous instance of %s - it will be changed now to 0x%04x", + *user_pid, CMDLINE_CT_PID, (unsigned int)numeric_param); + + *user_pid = (unsigned int)numeric_param; + pr_loc_dbg("PID override: 0x%04x", *user_pid); + + return true; +} + +/** + * Extracts MFG mode enable switch (mfg) from kernel cmd line + * + * @param is_mfg_boot pointer to flag + * @param param_pointer currently processed token + * @return true on match, false if param didn't match + */ +static bool extract_mfg(bool *is_mfg_boot, const char *param_pointer) +{ + ensure_cmdline_token(CMDLINE_CT_MFG); + + *is_mfg_boot = true; + pr_loc_dbg("MFG boot requested"); + + return true; +} + +/** + * Extracts maximum size of SATA DOM (dom_szmax=) from kernel cmd line + */ +static bool extract_dom_max_size(struct boot_media *boot_media, const char *param_pointer) +{ + ensure_cmdline_param(CMDLINE_CT_DOM_SZMAX); + + long size_mib = simple_strtol(param_pointer + strlen_static(CMDLINE_CT_DOM_SZMAX), NULL, 10); + if (size_mib <= 0) { + pr_loc_err("Invalid maximum size of SATA DoM (\"%s=%ld\")", CMDLINE_CT_DOM_SZMAX, size_mib); + return true; + } + + boot_media->dom_size_mib = size_mib; + pr_loc_dbg("Set maximum SATA DoM to %ld", size_mib); + + return true; +} + +/** + * Extracts MFG mode enable switch (syno_port_thaw=<1|0>) from kernel cmd line + * + * @param port_thaw pointer to flag + * @param param_pointer currently processed token + * @return true on match, false if param didn't match + */ +static bool extract_port_thaw(bool *port_thaw, const char *param_pointer) +{ + ensure_cmdline_param(CMDLINE_KT_THAW); + + short value = param_pointer[strlen_static(CMDLINE_KT_THAW)]; + + if (value == '0') { + *port_thaw = false; + goto out_found; + } + if (value == '1') { + *port_thaw = true; + goto out_found; + } + + if (value == '\0') { + pr_loc_err("Option \"%s%d\" is invalid (value should be 0 or 1)", CMDLINE_KT_THAW, value); + return true; + } + + out_found: + pr_loc_dbg("Port thaw set to: %d", port_thaw?1:0); + return true; +} + +/** + * Extracts number of expected network interfaces (netif_num=) from kernel cmd line + * + * @param netif_num pointer to save number + * @param param_pointer currently processed token + * @return true on match, false if param didn't match + */ +static bool extract_netif_num(unsigned short *netif_num, const char *param_pointer) +{ + ensure_cmdline_param(CMDLINE_KT_NETIF_NUM); + + short value = *(param_pointer + strlen_static(CMDLINE_KT_NETIF_NUM)) - 48; //ASCII: 0=48 and 9=57 + + if (value == 0) { + pr_loc_wrn("You specified no network interfaces (\"%s=0\")", CMDLINE_KT_NETIF_NUM); + return true; + } + + if (value < 1 || value > 9) { + pr_loc_err("Invalid number of network interfaces set (\"%s%d\")", CMDLINE_KT_NETIF_NUM, value); + return true; + } + + *netif_num = value; + pr_loc_dbg("Declared network ifaces # as %d", value); + + return true; +} + +/** + * Extracts network interfaces MAC addresses (mac1...mac4= **OR** macs=) + * + * Note: mixing two notations may lead to undefined behaviors + * + * @param macs pointer to save macs + * @param param_pointer currently processed token + * @return true on match, false if param didn't match + */ +static bool extract_netif_macs(mac_address *macs[MAX_NET_IFACES], const char *param_pointer) +{ + if (strncmp(param_pointer, CMDLINE_KT_MACS, strlen_static(CMDLINE_KT_MACS)) == 0) { + unsigned short i = 0; + const char *pBegin = param_pointer + strlen_static(CMDLINE_KT_MACS); + char *pEnd = strchr(pBegin, ','); + + while (NULL != pEnd && MAX_NET_IFACES > i) { + *pEnd = '\0'; + macs[i] = kmalloc(sizeof(mac_address), GFP_KERNEL); + if (unlikely(!macs[i])) { + pr_loc_crt("kernel memory alloc failure - tried to allocate %lu bytes for macs[%d]", sizeof(mac_address), + i); + goto out_found; + } + if(strscpy((char *)macs[i], pBegin, sizeof(mac_address)) < 0) + pr_loc_wrn("MAC #%d truncated to %zu", i+1, sizeof(mac_address)-1); + pr_loc_dbg("Set MAC #%d: %s", i+1, (char *)macs[i]); + pBegin = pEnd + 1; + pEnd = strchr(pBegin, ','); + i++; + } + + if ('\0' != *pBegin && MAX_NET_IFACES > i) { + macs[i] = kmalloc(sizeof(mac_address), GFP_KERNEL); + if (unlikely(!macs[i])) { + pr_loc_crt("kernel memory alloc failure - tried to allocate %lu bytes for macs[%d]", sizeof(mac_address), + i); + goto out_found; + } + if(strscpy((char *)macs[i], pBegin, sizeof(mac_address)) < 0) + pr_loc_wrn("MAC #%d truncated to %zu", i+1, sizeof(mac_address)-1); + pr_loc_dbg("Set MAC #%d: %s", i+1, (char *)macs[i]); + } + + goto out_found; + } + + //mac1=...mac4= are valid options. ASCII for 1 is 49, ASCII for 4 is 52 + if (strncmp(param_pointer, "mac", 3) != 0 || *(param_pointer + 4) != '=' || *(param_pointer + 3) < 49 || + *(param_pointer + 3) > 52) + return false; + + //Find free spot + unsigned short i = 0; + for (; i < MAX_NET_IFACES; i++) { + if (macs[i]) + continue; + + macs[i] = kmalloc(sizeof(mac_address), GFP_KERNEL); + if (unlikely(!macs[i])) { + pr_loc_crt("kernel memory alloc failure - tried to allocate %lu bytes for macs[%d]", sizeof(mac_address), + i); + goto out_found; + } + + if(strscpy((char *)macs[i], param_pointer + strlen_static(CMDLINE_KT_MAC1), sizeof(mac_address)) < 0) + pr_loc_wrn("MAC #%d truncated to %zu", i+1, sizeof(mac_address)-1); + + pr_loc_dbg("Set MAC #%d: %s", i+1, (char *)macs[i]); + goto out_found; + } + + pr_loc_err("You set more than MAC addresses! Only first %d will be honored.", MAX_NET_IFACES); + + out_found: + return true; +} + +static bool report_unrecognized_option(const char *param_pointer) +{ + pr_loc_dbg("Option \"%s\" not recognized - ignoring", param_pointer); + + return true; +} + +/************************************************* End of extractors **************************************************/ + +static char cmdline_cache[CMDLINE_MAX] = { '\0' }; +/** + * Extracts the cmdline from kernel and caches it for later use + * + * The method we use here may seem weird but it is, believe or not, the most direct one available. Kernel cmdline + * internally is stored in a "saved_command_line" variable (and few derivatives) which isn't exported for modules in + * any way (at least on x86). The only semi-direct way to get it is to call the method responsible for /proc/cmdline) + */ +static int extract_kernel_cmdline(void) +{ + struct seq_file cmdline_itr = { + .buf = cmdline_cache, + .size = CMDLINE_MAX + }; + + int out = _cmdline_proc_show(&cmdline_itr, 0); + if (out != 0) + return out; + + pr_loc_dbg("Cmdline count: %d", (unsigned int)cmdline_itr.count); + if (unlikely(cmdline_itr.count == CMDLINE_MAX)) //if the kernel line is >1K + pr_loc_wrn("Cmdline may have been truncated to %d", CMDLINE_MAX); + + return 0; +} + +/** + * Returns kernel cmdline up to the length specified by maxlen + * + * @param cmdline_out + * @param maxlen + * @return 0 on success, -E2BIG if buffer was too small (but the operation succeeded up to maxlen), -E for error + */ +long get_kernel_cmdline(char *cmdline_out, size_t maxlen) +{ + if (unlikely(cmdline_cache[0] == '\0')) { + int out = extract_kernel_cmdline(); + if (out != 0) { + pr_loc_err("Failed to extract kernel cmdline"); + return out; + } + } + + if (unlikely(maxlen > CMDLINE_MAX)) + maxlen = CMDLINE_MAX; + + return strscpy(cmdline_out, cmdline_cache, maxlen); +} + +#define ADD_BLACKLIST_ENTRY(idx, token) kmalloc_or_exit_int(cmdline_blacklist[idx], sizeof(token)); \ + strcpy((char *)cmdline_blacklist[idx], token); \ + pr_loc_dbg("Add cmdline blacklist \"%s\" @ %d", \ + (char *)cmdline_blacklist[idx], idx); +int populate_cmdline_blacklist(cmdline_token *cmdline_blacklist[MAX_BLACKLISTED_CMDLINE_TOKENS], syno_hw *model) +{ + //Currently, this list is static. However, it's prepared to be dynamic based on the model + //Make sure you don't go over MAX_BLACKLISTED_CMDLINE_TOKENS (and if so adjust it) + ADD_BLACKLIST_ENTRY(0, CMDLINE_CT_VID); + ADD_BLACKLIST_ENTRY(1, CMDLINE_CT_PID); + ADD_BLACKLIST_ENTRY(2, CMDLINE_CT_MFG); + ADD_BLACKLIST_ENTRY(3, CMDLINE_CT_DOM_SZMAX); + ADD_BLACKLIST_ENTRY(4, CMDLINE_KT_ELEVATOR); + ADD_BLACKLIST_ENTRY(5, CMDLINE_KT_LOGLEVEL); + ADD_BLACKLIST_ENTRY(6, CMDLINE_KT_PK_BUFFER); + ADD_BLACKLIST_ENTRY(7, CMDLINE_KT_EARLY_PK); + ADD_BLACKLIST_ENTRY(8, CMDLINE_KT_THAW); + +#ifndef NATIVE_SATA_DOM_SUPPORTED //on kernels without SATA DOM support we shouldn't reveal that it's a SATA DOM-boot + ADD_BLACKLIST_ENTRY(9, CMDLINE_KT_SATADOM); +#endif + + return 0; +} + +int extract_config_from_cmdline(struct runtime_config *config) +{ + int out = 0; + char *cmdline_txt; + kzalloc_or_exit_int(cmdline_txt, strlen_to_size(CMDLINE_MAX)); + + if(get_kernel_cmdline(cmdline_txt, CMDLINE_MAX) <= 0) { + pr_loc_crt("Failed to extract cmdline"); + out = -EIO; + goto exit_free; + } + + pr_loc_dbg("Cmdline: %s", cmdline_txt); + + /** + * Temporary variables + */ + unsigned int param_counter = 0; + char *single_param_chunk; //Pointer to the beginning of the cmdline token + DBG_ALLOW_UNUSED(param_counter); + + while ((single_param_chunk = strsep(&cmdline_txt, CMDLINE_SEP)) != NULL ) { + if (unlikely(single_param_chunk[0] == '\0')) //Skip empty params (e.g. last one) + continue; + pr_loc_dbg("Param #%d: |%s|", param_counter++, single_param_chunk); + + //Stop after the first one matches + extract_hw(&config->hw, single_param_chunk) || + extract_sn(&config->sn, single_param_chunk) || + extract_boot_media_type(&config->boot_media, single_param_chunk) || + extract_vid(&config->boot_media.vid, single_param_chunk) || + extract_pid(&config->boot_media.pid, single_param_chunk) || + extract_dom_max_size(&config->boot_media, single_param_chunk) || + extract_mfg(&config->boot_media.mfg_mode, single_param_chunk) || + extract_port_thaw(&config->port_thaw, single_param_chunk) || + extract_netif_num(&config->netif_num, single_param_chunk) || + extract_netif_macs(config->macs, single_param_chunk) || + report_unrecognized_option(single_param_chunk) ; + } + + if (populate_cmdline_blacklist(config->cmdline_blacklist, &config->hw) != 0) { + out = -EIO; + goto exit_free; + } + + pr_loc_inf("CmdLine processed successfully, tokens=%d", param_counter); + + exit_free: + kfree(cmdline_txt); + return out; +} diff --git a/config/cmdline_delegate.h b/config/cmdline_delegate.h new file mode 100644 index 0000000..42d5510 --- /dev/null +++ b/config/cmdline_delegate.h @@ -0,0 +1,31 @@ +#ifndef REDPILLLKM_CMDLINE_DELEGATE_H +#define REDPILLLKM_CMDLINE_DELEGATE_H + +#include "runtime_config.h" +#include "cmdline_opts.h" + +/** + * Provides an easy access to kernel cmdline + * + * Internally in the kernel code it is available as "saved_command_line". However that variable is not accessible for + * modules. This function populates a char buffer with the cmdline extracted using other methods. + * + * WARNING: if something (e.g. sanitize cmdline) overrides the cmdline this method will return the overridden one! + * However, this method caches the cmdline, so if you call it once it will cache the original one internally. + * + * @param cmdline_out A pointer to your buffer to save the cmdline + * @param maxlen Your buffer space (in general you should use CMDLINE_MAX) + * @return cmdline length on success or -E on error + */ +long get_kernel_cmdline(char *cmdline_out, unsigned long maxlen); + +/** + * Extracts & processes parameters from kernel cmdline + * + * Note: it's not guaranteed that the config will be valid. Check runtime_config.h. + * + * @param config pointer to save configuration + */ +int extract_config_from_cmdline(struct runtime_config *config); + +#endif //REDPILLLKM_CMDLINE_DELEGATE_H diff --git a/config/cmdline_opts.h b/config/cmdline_opts.h new file mode 100644 index 0000000..77e5683 --- /dev/null +++ b/config/cmdline_opts.h @@ -0,0 +1,44 @@ +#ifndef REDPILL_CMDLINE_OPTS_H +#define REDPILL_CMDLINE_OPTS_H + +#define CMDLINE_MAX 1024 //Max length of cmdline expected/processed; if longer a warning will be emitted +#define CMDLINE_SEP "\t\n " + +/** + * Kernel command line tokens. For clarity keep them separated. + * CT = custom token + * KT = kernel token (default or syno) + * + * All should be defined in the .h to allow accessing outside for hints in errors. + */ +#define CMDLINE_CT_VID "vid=" //Boot media Vendor ID override +#define CMDLINE_CT_PID "pid=" //Boot media Product ID override +#define CMDLINE_CT_MFG "mfg" //VID & PID override will use force-reinstall VID/PID combo +#define CMDLINE_CT_DOM_SZMAX "dom_szmax=" //Max size of SATA device (MiB) to be considered a DOM (usually you should NOT use this) + +//Standard Linux cmdline tokens +#define CMDLINE_KT_ELEVATOR "elevator=" //Sets I/O scheduler (we use it to load RP LKM earlier than normally possible) +#define CMDLINE_KT_LOGLEVEL "loglevel=" +#define CMDLINE_KT_PK_BUFFER "log_buf_len=" //Length of the printk ring buffer (should usually be increased for debug) +#define CMDLINE_KT_EARLY_PK "earlyprintk" + +//Syno-specific cmdline tokens +#define CMDLINE_KT_HW "syno_hw_version=" +#define CMDLINE_KT_THAW "syno_port_thaw=" //?? + +//0|1 - whether to use native SATA Disk-on-Module for boot drive (syno); 2 - use fake/emulated SATA DOM (rp) +#define CMDLINE_KT_SATADOM "synoboot_satadom=" +# define CMDLINE_KT_SATADOM_DISABLED '0' +# define CMDLINE_KT_SATADOM_NATIVE '1' +# define CMDLINE_KT_SATADOM_FAKE '2' + +#define CMDLINE_KT_SN "sn=" +#define CMDLINE_KT_NETIF_NUM "netif_num=" +#define CMDLINE_KT_MACS "macs=" +//You CANNOT simply add more macN= - DSM kernel only uses 4. If they ever support >4 you need to modify cmdline handling +#define CMDLINE_KT_MAC1 "mac1=" +#define CMDLINE_KT_MAC2 "mac2=" +#define CMDLINE_KT_MAC3 "mac3=" +#define CMDLINE_KT_MAC4 "mac4=" + +#endif //REDPILL_CMDLINE_OPTS_H diff --git a/config/platform_types.h b/config/platform_types.h new file mode 100644 index 0000000..da15f6e --- /dev/null +++ b/config/platform_types.h @@ -0,0 +1,126 @@ +#ifndef REDPILL_PLATFORM_TYPES_H +#define REDPILL_PLATFORM_TYPES_H + +#include "vpci_types.h" //vpci_device_stub, MAX_VPCI_DEVS + +#ifndef RP_MODULE_TARGET_VER +#error "The RP_MODULE_TARGET_VER is not defined - it is required to properly set VTKs" +#endif + +//All HWMON_SYS enums defined here are for internal RP use only. Normally these have long names but duplicating names +// across multiple platforms is wasteful (and causes platforms.h compilation unit to grow) +//While adding new constants here MAKE SURE TO NOT CONFLICT with existing ones defining names in synobios.h (here we +// postfixed everything with _ID) +enum hwmon_sys_thermal_zone_id { + //i.e. "non-existent zone" so that we don't need another flag/number to indicated # of supported zones + HWMON_SYS_TZONE_NULL_ID = 0, + HWMON_SYS_TZONE_REMOTE1_ID, + HWMON_SYS_TZONE_REMOTE2_ID, + HWMON_SYS_TZONE_LOCAL_ID, + HWMON_SYS_TZONE_SYSTEM_ID, + HWMON_SYS_TZONE_ADT1_LOC_ID, + HWMON_SYS_TZONE_ADT2_LOC_ID, +}; +#define HWMON_SYS_THERMAL_ZONE_IDS 5 //number of thermal zones minus the fake NULL_ID + +enum hwmon_sys_voltage_sensor_id { + //i.e. "non-existent sensor type" so that we don't need another flag/number to indicated # of supported ones + HWMON_SYS_VSENS_NULL_ID = 0, + HWMON_SYS_VSENS_VCC_ID, + HWMON_SYS_VSENS_VPP_ID, + HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, + HWMON_SYS_VSENS_V12_ID, + HWMON_SYS_VSENS_ADT1_V33_ID, + HWMON_SYS_VSENS_ADT2_V33_ID, +}; +#define HWMON_SYS_VOLTAGE_SENSOR_IDS 7 //number of voltage sensors minus the fake NULL_ID + +enum hwmon_sys_fan_rpm_id { + //i.e. "non-existent fan" so that we don't need another flag/number to indicated # of supported fans + HWMON_SYS_FAN_NULL_ID = 0, + HWMON_SYS_FAN1_ID, + HWMON_SYS_FAN2_ID, + HWMON_SYS_FAN3_ID, + HWMON_SYS_FAN4_ID, +}; +#define HWMON_SYS_FAN_RPM_IDS 4 + +enum hwmon_sys_hdd_bp_id { + //i.e. "non-existent backplane sensor" so that we don't need another flag/number to indicated # of supported ones + HWMON_SYS_HDD_BP_NULL_ID = 0, + HWMON_SYS_HDD_BP_DETECT_ID, + HWMON_SYS_HDD_BP_ENABLE_ID, +}; +#define HWMON_SYS_HDD_BP_IDS 2 //number of HDD backplane sensors minus the fake NULL_ID + +enum hw_psu_sensor_id { + //i.e. "non-existent PSU sensor" so that we don't need another flag/number to indicated # of supported ones + HWMON_PSU_NULL_ID = 0, + HWMON_PSU_PWR_IN_ID, + HWMON_PSU_PWR_OUT_ID, +#if RP_MODULE_TARGET_VER == 6 + HWMON_PSU_TEMP_ID, +#elif RP_MODULE_TARGET_VER == 7 + HWMON_PSU_TEMP1_ID, + HWMON_PSU_TEMP2_ID, + HWMON_PSU_TEMP3_ID, + HWMON_PSU_FAN_VOLT, +#endif + HWMON_PSU_FAN_RPM_ID, + HWMON_PSU_STATUS_ID, +}; +#if RP_MODULE_TARGET_VER == 6 +#define HWMON_PSU_SENSOR_IDS 2 //number of power supply sensors minus the fake NULL_ID +#elif RP_MODULE_TARGET_VER == 7 +#define HWMON_PSU_SENSOR_IDS 8 //number of power supply sensors minus the fake NULL_ID +#else +#error "Unknown RP_MODULE_TARGET_VER version specified" +#endif + +enum hwmon_sys_current_id { + //i.e. "non-existent current sensor" so that we don't need another flag/number to indicated # of supported ones + HWMON_SYS_CURR_NULL_ID = 0, + HWMON_SYS_CURR_ADC_ID, +}; +#define HWMON_SYS_CURRENT_IDS 1 //number of current sensors minus the fake NULL_ID + +struct hw_config { + const char *name; //the longest so far is "RR36015xs+++" (12+1) + + const struct vpci_device_stub pci_stubs[MAX_VPCI_DEVS]; + + //All custom flags + const bool emulate_rtc:1; + const bool swap_serial:1; //Whether ttyS0 and ttyS1 are swapped (reverses CONFIG_SYNO_X86_SERIAL_PORT_SWAP) + const bool reinit_ttyS0:1; //Should the ttyS0 be forcefully re-initialized after module loads + const bool fix_disk_led_ctrl:1; //Disabled libata-scsi bespoke disk led control (which often crashes some v4 platforms) + + //See SYNO_HWMON_SUPPORT_ID in include/linux/synobios.h GPLed sources - it defines which ones are possible + //These define which parts of ACPI HWMON should be emulated + //For those with GetHwCapability() note enable DBG_HWCAP which will force bios_hwcap_shim to print original values. + // Unless there's a good reason to diverge from the platform-defined values you should not. + //Supported hwmon sensors; order of sensors within type IS IMPORTANT to be accurate with a real hardware. The number + // of sensors is derived from the enums defining their types. Internally the absolute maximum number is determined + // by MAX_SENSOR_NUM defined in include/linux/synobios.h + const bool has_cpu_temp:1; //GetHwCapability(id = CAPABILITY_CPU_TEMP) + // Device-tree models + const bool is_dt:1; + const struct hw_config_hwmon { + enum hwmon_sys_thermal_zone_id sys_thermal[HWMON_SYS_THERMAL_ZONE_IDS]; //GetHwCapability(id = CAPABILITY_THERMAL) + enum hwmon_sys_voltage_sensor_id sys_voltage[HWMON_SYS_VOLTAGE_SENSOR_IDS]; + enum hwmon_sys_fan_rpm_id sys_fan_speed_rpm[HWMON_SYS_FAN_RPM_IDS]; //GetHwCapability(id = CAPABILITY_FAN_RPM_RPT) + enum hwmon_sys_hdd_bp_id hdd_backplane[HWMON_SYS_HDD_BP_IDS]; + enum hw_psu_sensor_id psu_status[HWMON_PSU_SENSOR_IDS]; + enum hwmon_sys_current_id sys_current[HWMON_SYS_CURRENT_IDS]; + } hwmon; +}; + +#define platform_has_hwmon_thermal(hw_config_ptr) ((hw_config_ptr)->hwmon.sys_thermal[0] != HWMON_SYS_TZONE_NULL_ID) +#define platform_has_hwmon_voltage(hw_config_ptr) ((hw_config_ptr)->hwmon.sys_voltage[0] != HWMON_SYS_VSENS_NULL_ID) +#define platform_has_hwmon_fan_rpm(hw_config_ptr) ((hw_config_ptr)->hwmon.sys_fan_speed_rpm[0] != HWMON_SYS_FAN_NULL_ID) +#define platform_has_hwmon_hdd_bpl(hw_config_ptr) ((hw_config_ptr)->hwmon.hdd_backplane[0] != HWMON_SYS_HDD_BP_NULL_ID) +#define platform_has_hwmon_psu_status(hw_config_ptr) ((hw_config_ptr)->hwmon.psu_status[0] != HWMON_PSU_NULL_ID) +#define platform_has_hwmon_current_sens(hw_config_ptr) ((hw_config_ptr)->hwmon.sys_current[0] != HWMON_SYS_CURR_NULL_ID) + +#endif //REDPILL_PLATFORM_TYPES_H diff --git a/config/platforms.h b/config/platforms.h new file mode 100644 index 0000000..2eecfd2 --- /dev/null +++ b/config/platforms.h @@ -0,0 +1,462 @@ +/* + * DO NOT include this file anywhere besides runtime_config.c - its format is meant to be internal to the configuration + * parsing. + */ +#ifndef REDPILLLKM_PLATFORMS_H +#define REDPILLLKM_PLATFORMS_H + +#include "../shim/pci_shim.h" +#include "platform_types.h" +const struct hw_config supported_platforms[] = { + { + .name = "DS918+", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9215, .bus = 0x01, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_I211, .bus = 0x02, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_I211, .bus = 0x03, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_AHCI_CTRL, .bus = 0x00, .dev = 0x12, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_PCIE_PA, .bus = 0x00, .dev = 0x13, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_PCIE_PB, .bus = 0x00, .dev = 0x14, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_USB_XHCI, .bus = 0x00, .dev = 0x15, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_I2C, .bus = 0x00, .dev = 0x16, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_HSUART, .bus = 0x00, .dev = 0x18, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_SPI, .bus = 0x00, .dev = 0x19, .fn = 0x02, .multifunction = true }, + { .type = VPD_INTEL_CPU_SPI, .bus = 0x00, .dev = 0x19, .fn = 0x00, .multifunction = true }, + { .type = VPD_INTEL_CPU_SMBUS, .bus = 0x00, .dev = 0x1f, .fn = 0x01, .multifunction = true }, + { .type = VPD_INTEL_CPU_SMBUS, .bus = 0x00, .dev = 0x1f, .fn = 0x00, .multifunction = true }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = true, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_NULL_ID }, + .sys_voltage = { HWMON_SYS_VSENS_NULL_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN_NULL_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_DETECT_ID, HWMON_SYS_HDD_BP_ENABLE_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS920+", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9235, .bus = 0x01, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = true, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_NULL_ID }, + .sys_voltage = { HWMON_SYS_VSENS_NULL_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN_NULL_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_DETECT_ID, HWMON_SYS_HDD_BP_ENABLE_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS923+", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = {HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS1520+", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9235, .bus = 0x01, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = true, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_NULL_ID }, + .sys_voltage = { HWMON_SYS_VSENS_NULL_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN_NULL_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_DETECT_ID, HWMON_SYS_HDD_BP_ENABLE_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS1621+", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = {HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS923+", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = {HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS1621xs+", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9235, .bus = 0x09, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x0c, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS2422+", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS3615xs", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9235, .bus = 0x07, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x08, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x09, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x0a, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = true, + .reinit_ttyS0 = false, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS3617xs", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9215, .bus = 0x01, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9215, .bus = 0x02, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x08, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS3622xs+", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9235, .bus = 0x09, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x0c, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DVA1622", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9235, .bus = 0x01, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = true, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_NULL_ID }, + .sys_voltage = { HWMON_SYS_VSENS_NULL_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN_NULL_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_DETECT_ID, HWMON_SYS_HDD_BP_ENABLE_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DVA3219", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DVA3221", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "FS2500", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "FS6400", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "RS3413xs+", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9235, .bus = 0x07, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x08, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x09, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x0a, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = true, + .reinit_ttyS0 = false, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "RS3618xs", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9215, .bus = 0x01, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9215, .bus = 0x02, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x08, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "RS4021xs+", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .is_dt = false, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "SA6400", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .is_dt = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = {HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + } +}; + +#endif //REDPILLLKM_PLATFORMS_H diff --git a/config/platforms.h.bak b/config/platforms.h.bak new file mode 100644 index 0000000..c8e430d --- /dev/null +++ b/config/platforms.h.bak @@ -0,0 +1,156 @@ +/* + * DO NOT include this file anywhere besides runtime_config.c - its format is meant to be internal to the configuration + * parsing. + */ +#ifndef REDPILLLKM_PLATFORMS_H +#define REDPILLLKM_PLATFORMS_H + +#include "../shim/pci_shim.h" +#include "platform_types.h" +const struct hw_config supported_platforms[] = { + { + .name = "DS3615xs", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9235, .bus = 0x07, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x08, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x09, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x0a, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = true, + .reinit_ttyS0 = false, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = {HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS3617xs", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9215, .bus = 0x01, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9215, .bus = 0x02, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x08, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = {HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "FS6400", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = {HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + + { + .name = "DS3622xs+", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9235, .bus = 0x09, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_MARVELL_88SE9235, .bus = 0x0c, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = {HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "RS4021xs+", + .pci_stubs = { + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = false, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = false, + .has_cpu_temp = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_REMOTE1_ID, HWMON_SYS_TZONE_LOCAL_ID, HWMON_SYS_TZONE_REMOTE2_ID }, + .sys_voltage = { HWMON_SYS_VSENS_VCC_ID, HWMON_SYS_VSENS_VPP_ID, HWMON_SYS_VSENS_V33_ID, + HWMON_SYS_VSENS_V5_ID, HWMON_SYS_VSENS_V12_ID }, + .sys_fan_speed_rpm = {HWMON_SYS_FAN1_ID, HWMON_SYS_FAN2_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_NULL_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, + { + .name = "DS918+", + .pci_stubs = { + { .type = VPD_MARVELL_88SE9215, .bus = 0x01, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_I211, .bus = 0x02, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_I211, .bus = 0x03, .dev = 0x00, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_AHCI_CTRL, .bus = 0x00, .dev = 0x12, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_PCIE_PA, .bus = 0x00, .dev = 0x13, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_PCIE_PB, .bus = 0x00, .dev = 0x14, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_USB_XHCI, .bus = 0x00, .dev = 0x15, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_I2C, .bus = 0x00, .dev = 0x16, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_HSUART, .bus = 0x00, .dev = 0x18, .fn = 0x00, .multifunction = false }, + { .type = VPD_INTEL_CPU_SPI, .bus = 0x00, .dev = 0x19, .fn = 0x02, .multifunction = true }, + { .type = VPD_INTEL_CPU_SPI, .bus = 0x00, .dev = 0x19, .fn = 0x00, .multifunction = true }, + { .type = VPD_INTEL_CPU_SMBUS, .bus = 0x00, .dev = 0x1f, .fn = 0x01, .multifunction = true }, + { .type = VPD_INTEL_CPU_SMBUS, .bus = 0x00, .dev = 0x1f, .fn = 0x00, .multifunction = true }, + + { .type = __VPD_TERMINATOR__ } + }, + .emulate_rtc = true, + .swap_serial = false, + .reinit_ttyS0 = true, + .fix_disk_led_ctrl = true, + .has_cpu_temp = true, + .hwmon = { + .sys_thermal = { HWMON_SYS_TZONE_NULL_ID }, + .sys_voltage = { HWMON_SYS_VSENS_NULL_ID }, + .sys_fan_speed_rpm = { HWMON_SYS_FAN_NULL_ID }, + .hdd_backplane = { HWMON_SYS_HDD_BP_DETECT_ID, HWMON_SYS_HDD_BP_ENABLE_ID }, + .psu_status = { HWMON_PSU_NULL_ID }, + .sys_current = { HWMON_SYS_CURR_NULL_ID }, + } + }, +}; + +#endif //REDPILLLKM_PLATFORMS_H diff --git a/config/runtime_config.c b/config/runtime_config.c new file mode 100644 index 0000000..31400a6 --- /dev/null +++ b/config/runtime_config.c @@ -0,0 +1,246 @@ +#include "runtime_config.h" +#include "platforms.h" +#include "../common.h" +#include "cmdline_delegate.h" +#include "uart_defs.h" + +struct runtime_config current_config = { + .hw = { '\0' }, + .sn = { '\0' }, + .boot_media = { + .type = BOOT_MEDIA_USB, + .mfg_mode = false, + .vid = VID_PID_EMPTY, + .pid = VID_PID_EMPTY, + .dom_size_mib = 1024, //usually the image will be used with ESXi and thus it will be ~100MB anyway + }, + .port_thaw = true, + .netif_num = 0, + .macs = { '\0' }, + .cmdline_blacklist = { '\0' }, + .hw_config = NULL, +}; + +static inline bool validate_sn(const serial_no *sn) { + if (*sn[0] == '\0') { + pr_loc_err("Serial number is empty"); + return false; + } + + //TODO: add more validation here, probably w/model? + + return true; +} + +static __always_inline bool validate_boot_dev_usb(const struct boot_media *boot) +{ + if (boot->vid == VID_PID_EMPTY && boot->pid == VID_PID_EMPTY) { + pr_loc_wrn("Empty/no \"%s\" and \"%s\" specified - first USB storage device will be used", CMDLINE_CT_VID, + CMDLINE_CT_PID); + return true; //this isn't necessarily an error (e.g. running under a VM with only a single USB port) + } + + if (boot->vid == VID_PID_EMPTY) { //PID=0 is valid, but the VID is not + pr_loc_err("Empty/no \"%s\" specified", CMDLINE_CT_VID); + return false; + } + + pr_loc_dbg("Configured boot device type to USB"); + return true; + //not checking for >VID_PID_MAX as vid type is already ushort +} + +static __always_inline bool validate_boot_dev_sata_dom(const struct boot_media *boot) +{ +#ifndef NATIVE_SATA_DOM_SUPPORTED + pr_loc_err("Kernel you are running a kernel was built without SATA DoM support, you cannot use %s%c. " + "You can try booting with %s%c to enable experimental fake-SATA DoM.", + CMDLINE_KT_SATADOM, CMDLINE_KT_SATADOM_NATIVE, + CMDLINE_KT_SATADOM, CMDLINE_KT_SATADOM_FAKE); + return false; +#endif + + if (boot->vid != VID_PID_EMPTY || boot->pid != VID_PID_EMPTY) + pr_loc_wrn("Using native SATA-DoM boot - %s and %s parameter values will be ignored", + CMDLINE_CT_VID, CMDLINE_CT_PID); + + //this config is impossible as there's no equivalent for force-reinstall boot on SATA, so it's better to detect + //that rather than causing WTFs for someone who falsely assuming that it's possible + //However, it does work with fake-SATA boot (as it emulates USB disk anyway) + if (boot->mfg_mode) { + pr_loc_err("You cannot combine %s%c with %s - the OS supports force-reinstall on USB and fake SATA disk only", + CMDLINE_KT_SATADOM, CMDLINE_KT_SATADOM_NATIVE, CMDLINE_CT_MFG); + return false; + } + + pr_loc_dbg("Configured boot device type to fake-SATA DOM"); + return true; +} + +static __always_inline bool validate_boot_dev_sata_disk(const struct boot_media *boot) +{ +#ifdef NATIVE_SATA_DOM_SUPPORTED + pr_loc_wrn("The kernel you are running supports native SATA DoM (%s%c). You're currently using an experimental " + "fake-SATA DoM (%s%c) - consider switching to native SATA DoM (%s%c) for more stable operation.", + CMDLINE_KT_SATADOM, CMDLINE_KT_SATADOM_NATIVE, + CMDLINE_KT_SATADOM, CMDLINE_KT_SATADOM_FAKE, + CMDLINE_KT_SATADOM, CMDLINE_KT_SATADOM_NATIVE); +#endif + + if (boot->vid != VID_PID_EMPTY || boot->pid != VID_PID_EMPTY) + pr_loc_wrn("Using fake SATA disk boot - %s and %s parameter values will be ignored", + CMDLINE_CT_VID, CMDLINE_CT_PID); + + pr_loc_dbg("Configured boot device type to fake-SATA DOM"); + return true; +} + +static inline bool validate_boot_dev(const struct boot_media *boot) +{ + switch (boot->type) { + case BOOT_MEDIA_USB: + return validate_boot_dev_usb(boot); + case BOOT_MEDIA_SATA_DOM: + return validate_boot_dev_sata_dom(boot); + case BOOT_MEDIA_SATA_DISK: + return validate_boot_dev_sata_disk(boot); + default: + pr_loc_bug("Got unknown boot type - did you forget to update %s after changing cmdline parsing?", + __FUNCTION__); + return false; + + } +} + +static inline bool validate_nets(const unsigned short if_num, mac_address * const macs[MAX_NET_IFACES]) +{ + size_t mac_len; + unsigned short macs_num = 0; + bool valid = true; + for (; macs_num < MAX_NET_IFACES; macs_num++) { + if (!macs[macs_num]) + break; //You cannot have gaps in macs array + + mac_len = strlen(*macs[macs_num]); + if (mac_len != MAC_ADDR_LEN) { + pr_loc_err("MAC address \"%s\" is invalid (expected %d characters, found %zu)", *macs[macs_num], MAC_ADDR_LEN, + mac_len); + valid = false; + } //else if validate if the MAC is actually semi-valid + } + + if (if_num == 0) { + pr_loc_wrn("Number of defined interfaces (\"%s\") is not specified or empty", CMDLINE_KT_NETIF_NUM); + } + + if (macs_num == 0) { + pr_loc_wrn("No MAC addressed are specified - use \"%s\" or \"%s\"...\"%s\" to set them", CMDLINE_KT_MACS, + CMDLINE_KT_MAC1, CMDLINE_KT_MAC4); + } + + if (if_num != macs_num) { + pr_loc_err("Number of defined interfaces (\"%s%d\") is not equal to the number of MAC addresses found (%d)", + CMDLINE_KT_NETIF_NUM, if_num, macs_num); + valid = false; + } + + return valid; +} + +/** + * This function validates consistency of the currently loaded platform config with the current environment + * + * Some options don't make sense unless the kernel was built with some specific configuration. This function aims to + * detect common pitfalls in platforms configuration. This doesn't so much validate the platform definition per se + * (but partially too) but the match between platform config chosen vs. kernel currently attempting to run that + * platform. + */ +static inline bool validate_platform_config(const struct hw_config *hw) +{ +#ifdef UART_BUG_SWAPPED + const bool kernel_serial_swapped = true; +#else + const bool kernel_serial_swapped = false; +#endif + + //This will not prevent the code from working, so it's not an error state by itself + if (unlikely(hw->swap_serial && !kernel_serial_swapped)) + pr_loc_bug("Your kernel indicates COM1 & COM2 ARE NOT swapped but your platform specifies swapping"); + else if(unlikely(!hw->swap_serial && kernel_serial_swapped)) + pr_loc_bug("Your kernel indicates COM1 & COM2 ARE swapped but your platform specifies NO swapping"); + + return true; +} + +static int populate_hw_config(struct runtime_config *config) +{ + //We cannot run with empty model or model which didn't match + if (config->hw[0] == '\0') { + pr_loc_crt("Empty model, please set \"%s\" parameter", CMDLINE_KT_HW); + return -ENOENT; + } + + for (int i = 0; i < ARRAY_SIZE(supported_platforms); i++) { + if (strcmp(supported_platforms[i].name, (char *)config->hw) != 0) + continue; + + pr_loc_dbg("Found platform definition for \"%s\"", config->hw); + config->hw_config = &supported_platforms[i]; + return 0; + } + + pr_loc_crt("The model set using \"%s%s\" is not valid", CMDLINE_KT_HW, config->hw); + return -EINVAL; +} + +static bool validate_runtime_config(const struct runtime_config *config) +{ + pr_loc_dbg("Validating runtime config..."); + bool valid = true; + + valid &= validate_sn(&config->sn); + valid &= validate_boot_dev(&config->boot_media); + valid &= validate_nets(config->netif_num, config->macs); + valid &= validate_platform_config(config->hw_config); + + pr_loc_dbg("Config validation resulted in %s", valid ? "OK" : "ERR"); + if (valid) { + return 0; + } else { + pr_loc_err("Config validation FAILED"); + return -EINVAL; + } +} + +int populate_runtime_config(struct runtime_config *config) +{ + int out = 0; + + if ((out = populate_hw_config(config)) != 0 || (out = validate_runtime_config(config)) != 0) { + pr_loc_err("Failed to populate runtime config!"); + return out; + } + + pr_loc_inf("Runtime config populated"); + + return out; +} + +void free_runtime_config(struct runtime_config *config) +{ + for (int i = 0; i < MAX_NET_IFACES; i++) { + if (config->macs[i]) { + pr_loc_dbg("Free MAC%d @ %p", i, config->macs[i]); + kfree(config->macs[i]); + } + } + + for (int i = 0; i < MAX_BLACKLISTED_CMDLINE_TOKENS; i++) { + if (config->cmdline_blacklist[i]) { + pr_loc_dbg("Free cmdline blacklist entry %d @ %p", i, config->cmdline_blacklist[i]); + kfree(config->cmdline_blacklist[i]); + } + } + + pr_loc_inf("Runtime config freed"); +} diff --git a/config/runtime_config.h b/config/runtime_config.h new file mode 100644 index 0000000..04080e1 --- /dev/null +++ b/config/runtime_config.h @@ -0,0 +1,71 @@ +#ifndef REDPILLLKM_RUNTIME_CONFIG_H +#define REDPILLLKM_RUNTIME_CONFIG_H + +#include "uart_defs.h" //UART config values +#include //bool + +//These below are currently known runtime limitations +#define MAX_NET_IFACES 8 +#define MAC_ADDR_LEN 12 +#define MAX_BLACKLISTED_CMDLINE_TOKENS 10 + +#ifdef CONFIG_SYNO_BOOT_SATA_DOM +#define NATIVE_SATA_DOM_SUPPORTED //whether SCSI sd.c driver supports native SATA DOM +#endif + +//UART-related constants were moved to uart_defs.h, to allow subcomponents to importa a smaller subset than this header +#define MODEL_MAX_LENGTH 10 +#define SN_MAX_LENGTH 13 + +#define VID_PID_EMPTY 0x0000 +#define VID_PID_MAX 0xFFFF + +typedef unsigned short device_id; +typedef char syno_hw[MODEL_MAX_LENGTH + 1]; +typedef char mac_address[MAC_ADDR_LEN + 1]; +typedef char serial_no[SN_MAX_LENGTH + 1]; +typedef char cmdline_token[]; + +enum boot_media_type { + BOOT_MEDIA_USB, + BOOT_MEDIA_SATA_DOM, + BOOT_MEDIA_SATA_DISK, +}; + +struct boot_media { + enum boot_media_type type; // Default: BOOT_MEDIA_USB + + //USB only options + bool mfg_mode; //emulate mfg mode (valid for USB boot only). Default: false + device_id vid; //Vendor ID of device containing the loader. Default: empty + device_id pid; //Product ID of device containing the loader. Default: empty + + //SATA only options + unsigned long dom_size_mib; //Max size of SATA DOM Default: 1024 +}; + +struct hw_config; +struct runtime_config { + syno_hw hw; //used to determine quirks. Default: empty + serial_no sn; //Used to validate it and warn the user. Default: empty + struct boot_media boot_media; + bool port_thaw; //Currently unknown. Default: true + unsigned short netif_num; //Number of eth interfaces. Default: 0 + mac_address *macs[MAX_NET_IFACES]; //MAC addresses of eth interfaces. Default: [] + cmdline_token *cmdline_blacklist[MAX_BLACKLISTED_CMDLINE_TOKENS];// Default: [] + const struct hw_config *hw_config; +}; +extern struct runtime_config current_config; + +/** + * Takes a raw extracted config and "shakes it a little bit" by validating things & constructing dependent structures + * + * Warning: if this function returns false YOU MUST NOT trust the config structure. Other code WILL break as it assumes + * the config is valid (e.g. doesn't have null ptrs which this function generates). + * Also, after you call this function you should call free_runtime_config() to clear up memory reservations. + */ +int populate_runtime_config(struct runtime_config *config); + +void free_runtime_config(struct runtime_config *config); + +#endif //REDPILLLKM_RUNTIME_CONFIG_H diff --git a/config/uart_defs.h b/config/uart_defs.h new file mode 100644 index 0000000..fa8a444 --- /dev/null +++ b/config/uart_defs.h @@ -0,0 +1,50 @@ +/** + * This file is meant to be small and portable. It can be included by other parts of the module wishing to get some info + * about UARTs. It should not contain any extensive definitions or static structures reservation. It is mostly + * extracting information buried in the Linux serial subsystem into usable constants. + */ +#ifndef REDPILL_UART_DEFS_H +#define REDPILL_UART_DEFS_H + +#include //flags for pc_com* +#include //struct uart_port +#include //KERNEL_VERSION() + +//These definitions are taken from asm/serial.h for a normal (i.e. non-swapped) UART1/COM1 port on an x86 PC +#define STD_COM1_IOBASE 0x3f8 +#define STD_COM1_IRQ 4 +#define STD_COM2_IOBASE 0x2f8 +#define STD_COM2_IRQ 3 +#define STD_COM3_IOBASE 0x3e8 +#define STD_COM3_IRQ 4 +#define STD_COM4_IOBASE 0x2e8 +#define STD_COM4_IRQ 3 + +//They changed name of flags const: https://github.com/torvalds/linux/commit/196cf358422517b3ff3779c46a1f3e26fb084172 +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) +#define STD_COMX_FLAGS STD_COM_FLAGS +#endif + +#define STD_COMX_BAUD BASE_BAUD + +#define STD_COMX_DEV_NAME "ttyS" +#define SRD_COMX_BAUD_OPTS "115200n8" + +#define UART_NR CONFIG_SERIAL_8250_NR_UARTS +#define SERIAL8250_LAST_ISA_LINE (UART_NR-1) //max valid index of ttyS +#define SERIAL8250_SOFT_IRQ 0 //a special IRQ value which, if set on a port, will force 8250 driver to use timers + + +#ifdef CONFIG_SYNO_X86_SERIAL_PORT_SWAP +#define UART_BUG_SWAPPED //indicates that first two UARTs are swapped (sic!). Yes, we do consider it a fucking bug. +#endif + +// CONFIG_SYNO_FIX_TTYS_FUNCTIONS=y +// CONFIG_SYNO_TTYS_FUN_NUM=2 +#ifdef CONFIG_SYNO_TTYS_FUN_NUM +#define UART_SYNO_TTYS_FUN_NUM CONFIG_SYNO_TTYS_FUN_NUM // 看着像群晖独占的口数 模拟时直接跳过 +#else +#define UART_SYNO_TTYS_FUN_NUM 0 +#endif + +#endif //REDPILL_UART_DEFS_H diff --git a/config/vpci_types.h b/config/vpci_types.h new file mode 100644 index 0000000..41bf24c --- /dev/null +++ b/config/vpci_types.h @@ -0,0 +1,18 @@ +#ifndef REDPILL_VPCI_LIMITS_H +#define REDPILL_VPCI_LIMITS_H + +#include "../shim/pci_shim.h" //pci_shim_device_type + +//Defines below are experimentally determined to be sufficient but can often be changed +#define MAX_VPCI_BUSES 8 //adjust if needed, max 256 +#define MAX_VPCI_DEVS 16 //adjust if needed, max 256*32=8192 + +struct vpci_device_stub { + enum pci_shim_device_type type; + u8 bus; + u8 dev; + u8 fn; + bool multifunction:1; +}; + +#endif //REDPILL_VPCI_LIMITS_H diff --git a/debug/debug_execve.c b/debug/debug_execve.c new file mode 100644 index 0000000..c7652a7 --- /dev/null +++ b/debug/debug_execve.c @@ -0,0 +1,135 @@ +#include "debug_execve.h" +#include "../common.h" +#include //task_struct +#include //get_user +#include //compat_uptr_t +#include //MAX_ARG_STRINGS + +/* + * Struct copied 1:1 from: + * + * linux/fs/exec.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +struct user_arg_ptr { +#ifdef CONFIG_COMPAT + bool is_compat; +#endif + union { + const char __user *const __user *native; +#ifdef CONFIG_COMPAT + const compat_uptr_t __user *compat; +#endif + } ptr; +}; + +/* + * Function copied 1:1 from: + * + * linux/fs/exec.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr) +{ + const char __user *native; + +#ifdef CONFIG_COMPAT + if (unlikely(argv.is_compat)) { + compat_uptr_t compat; + + if (get_user(compat, argv.ptr.compat + nr)) + return ERR_PTR(-EFAULT); + + return compat_ptr(compat); + } +#endif + + if (get_user(native, argv.ptr.native + nr)) + return ERR_PTR(-EFAULT); + + return native; +} + +/* + * Modified for simplicity from count() in: + * + * linux/fs/exec.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +static int count_args(struct user_arg_ptr argv) +{ + if (argv.ptr.native == NULL) + return 0; + + int i = 0; + + for (;;) { + const char __user *p = get_user_arg_ptr(argv, i); + + if (!p) + break; + + if (IS_ERR(p) || i >= MAX_ARG_STRINGS) + return -EFAULT; + + ++i; + + if (fatal_signal_pending(current)) + return -ERESTARTNOHAND; + cond_resched(); + } + + return i; +} + +static void inline fixup_arg_str(char *arg_ptr, int cur_argc, const char *what) +{ + pr_loc_wrn("Failed to copy %d arg - %s failed", cur_argc, what); + memcpy(arg_ptr, "..?\0", 4); +} + +void RPDBG_print_execve_call(const char *filename, const char __user *const __user *argv) +{ + struct task_struct *caller = get_cpu_var(current_task); + + struct user_arg_ptr argv_up = { .ptr.native = argv }; + int argc = count_args(argv_up); + + char *arg_str = kzalloc(MAX_ARG_STRLEN, GFP_KERNEL); + if (unlikely(!arg_str)) { + pr_loc_crt("kzalloc failed"); + return; + } + + char *arg_ptr = &arg_str[0]; + for (int i = 0; i < argc; i++) { + const char __user *p = get_user_arg_ptr(argv_up, i); + if (IS_ERR(p)) { + fixup_arg_str(arg_ptr, i, "get_user_arg_ptr"); + goto out_free; + } + + int len = strnlen_user(p, MAX_ARG_STRLEN); //includes nullbyte + if (!len) { + fixup_arg_str(arg_ptr, i, "strnlen_user"); + goto out_free; + } + --len; //we want to copy without nullbyte as we handle it ourselves while attaching to arg_ptr + + if (copy_from_user(arg_ptr, p, len)) { + fixup_arg_str(arg_ptr, i, "copy_from_user"); + goto out_free; + } + + arg_ptr += len; + *arg_ptr = (i + 1 == argc) ? '\0' : ' '; //separate by spaces UNLESS it's the last argument + ++arg_ptr; + } + + out_free: + pr_loc_dbg("execve@cpu%d: %s[%d]=>%s[%d] {%s}", caller->on_cpu, caller->comm, caller->pid, filename, argc, arg_str); + kfree(arg_str); +} \ No newline at end of file diff --git a/debug/debug_execve.h b/debug/debug_execve.h new file mode 100644 index 0000000..09aea02 --- /dev/null +++ b/debug/debug_execve.h @@ -0,0 +1,6 @@ +#ifndef REDPILL_DEBUG_EXECVE_H +#define REDPILL_DEBUG_EXECVE_H + +void RPDBG_print_execve_call(const char *filename, const char *const *argv); + +#endif //REDPILL_DEBUG_EXECVE_H diff --git a/debug/debug_vuart.h b/debug/debug_vuart.h new file mode 100644 index 0000000..a0e66a7 --- /dev/null +++ b/debug/debug_vuart.h @@ -0,0 +1,77 @@ +#ifndef REDPILL_DEBUG_VUART_H +#define REDPILL_DEBUG_VUART_H + +//Whether the code will print all internal state changes +#ifdef VUART_DEBUG_LOG +//Main print macro used everywhere below +#define uart_prdbg(f, ...) pr_loc_dbg(f, ##__VA_ARGS__) + +#define reg_read(rN) uart_prdbg("Reading " rN " registry"); +#define reg_write(rN) uart_prdbg("Writing " rN " registry"); +#define reg_read_dump(d, rF, rN) reg_read(rN); dump_##rF(d); +#define reg_write_dump(d, rF, rN) reg_write(rN); dump_##rF(d); +#define dri(vdev, reg, flag) ((vdev)->reg&(flag)) ? 1:0 //Dump Register as 1-0 Integer +#define diiri(vdev, flag) (((vdev)->iir&UART_IIR_ID) == (flag)) ? 1:0 //Dump IIR Interrupt type as 1-0 integer +#define dump_ier(d) \ + uart_prdbg("IER[0x%02x]: DR_int=%d | THRe_int=%d | RLS_int=%d | " \ + "MS_int=%d", \ + (d)->ier, dri(d,ier,UART_IER_RDI), dri(d,ier,UART_IER_THRI), dri(d,ier,UART_IER_RLSI), \ + dri(d,ier,UART_IER_MSI)); +//Be careful interpreting the result of this macro - no_int_pend means "no interrupts pending" (so 0 if there are +// pending interrupts and 1 if there are no interrupts pending); see Table 3-5 in TI doc +//Also FIFO flags are slightly weird (it's 2 bit, see IIR table in https://en.wikibooks.org/wiki/Serial_Programming/8250_UART_Programming) +// so fifoen=0_0 means "FIFO disabled", fifoen=1_1 means "FIFO enabled", and fifoen=0_1 means "FIFO enabled & broken" +//Also, since MSI is 0-0-0 it's a special-ish case: it's only considered enabled when int is pending and all bits are 0 +#define dump_iir(d) \ + uart_prdbg("IIR/ISR[0x%02x]: no_int_pend=%d | int_MS=%d | " \ + "int_THRe=%d | int_DR=%d | int_RLS=%d | " \ + "fifoen=%d_%d", \ + (d)->iir, dri(d,iir,UART_IIR_NO_INT), (!((d)->iir&UART_IIR_NO_INT)&&((d)->iir&UART_IIR_ID)==UART_IIR_MSI)?1:0, \ + diiri(d,UART_IIR_THRI), diiri(d,UART_IIR_RDI), diiri(d,UART_IIR_RLSI), \ + (((d)->iir & UART_IIR_FIFEN_B6)?1:0), (((d)->iir & UART_IIR_FIFEN_B7)?1:0)); +#define dump_fcr(d) \ + uart_prdbg("FCR[0x%02x]: FIFOon=%d | RxFIFOrst=%d | " \ + "TxFIFOrst=%d | EnDMAend=%d", \ + (d)->fcr, dri(d,fcr,UART_FCR_ENABLE_FIFO), dri(d,fcr,UART_FCR_CLEAR_RCVR), \ + dri(d,fcr,UART_FCR_CLEAR_XMIT), dri(d,fcr,UART_FCR_DMA_SELECT)); +#define dump_lcr(d) \ + uart_prdbg("LCR[0x%02x]: Stop=%d | PairEN=%d | EvenP=%d | " \ + "ForcPair=%d | SetBrk=%d | DLAB=%d", \ + (d)->lcr, dri(d,lcr,UART_LCR_STOP), dri(d,lcr,UART_LCR_PARITY), dri(d,lcr,UART_LCR_EPAR), \ + dri(d,lcr,UART_LCR_SPAR), dri(d,lcr,UART_LCR_SBC), dri(d,lcr,UART_LCR_DLAB)); +#define dump_mcr(d) \ + uart_prdbg("MCR[0x%02x]: DTR=%d | RTS=%d | Out1=%d | " \ + "Out2/IntE=%d | Loop=%d", \ + (d)->mcr, dri(d,mcr,UART_MCR_DTR), dri(d,mcr,UART_MCR_RTS), dri(d,mcr,UART_MCR_OUT1), \ + dri(d,mcr,UART_MCR_OUT2), dri(d,mcr,UART_MCR_LOOP)); +#define dump_lsr(d) \ + uart_prdbg("LSR[0x%02x]: data_ready=%d | ovrunE=%d | pairE=%d | " \ + "frE=%d | break_req=%d | THRemp=%d | TransEMP=%d | " \ + "FIFOdE=%d", \ + (d)->lsr, dri(d,lsr,UART_LSR_DR), dri(d,lsr,UART_LSR_OE), dri(d,lsr,UART_LSR_PE), \ + dri(d,lsr,UART_LSR_FE), dri(d,lsr,UART_LSR_BI), dri(d,lsr,UART_LSR_THRE), dri(d,lsr,UART_LSR_TEMT), \ + dri(d,lsr,UART_LSR_FIFOE)); +#define dump_msr(d) \ + uart_prdbg("MSR[0x%02x]: delCTS=%d | delDSR=%d | trEdgRI=%d | " \ + "delCD=%d | CTS=%d | DSR=%d | RI=%d | " \ + "DCD=%d", \ + (d)->msr, dri(d,msr,UART_MSR_DCTS), dri(d,msr,UART_MSR_DDSR), dri(d,msr,UART_MSR_TERI), \ + dri(d,msr,UART_MSR_DDCD), dri(d,msr,UART_MSR_CTS), dri(d,msr,UART_MSR_DSR), dri(d,msr,UART_MSR_RI), \ + dri(d,msr,UART_MSR_DCD)); + +#else //VUART_DEBUG_LOG disabled \/ +#define uart_prdbg(f, ...) { /* noop */ } +#define reg_read(rN) { /* noop */ } +#define reg_write(rN) { /* noop */ } +#define reg_read_dump(d, rF, rN) { /* noop */ } +#define reg_write_dump(d, rF, rN) { /* noop */ } +#define dump_ier(d) { /* noop */ } +#define dump_iir(d) { /* noop */ } +#define dump_fcr(d) { /* noop */ } +#define dump_lcr(d) { /* noop */ } +#define dump_mcr(d) { /* noop */ } +#define dump_lsr(d) { /* noop */ } +#define dump_msr(d) { /* noop */ } +#endif //VUART_DEBUG_LOG + +#endif //REDPILL_DEBUG_VUART_H diff --git a/internal/call_protected.c b/internal/call_protected.c new file mode 100644 index 0000000..3926692 --- /dev/null +++ b/internal/call_protected.c @@ -0,0 +1,113 @@ +#include "call_protected.h" +#include "../common.h" +#include //common exit codes +//#include //kallsyms_lookup_name() +#include "helper/symbol_helper.h" //kln_func +#include //symbol_get()/put + +//This will eventually stop working (since Linux >=5.7.0 has the kallsyms_lookup_name() removed) +//Workaround will be needed: https://github.com/xcellerator/linux_kernel_hacking/issues/3 + +#define __VOID_RETURN__ +//This macro should be used to export symbols which aren't normally EXPORT_SYMBOL/EXPORT_SYMBOL_GPL in the kernel but +// they exist within the kernel (and not a loadable module!). Keep in mind that most of the time "static" cannot be +// reexported using this trick. +//All re-exported function will have _ prefix (e.g. foo() becomes _foo()) +#define DEFINE_UNEXPORTED_SHIM(return_type, org_function_name, call_args, call_vars, fail_return) \ + extern asmlinkage return_type org_function_name(call_args); \ + typedef typeof(org_function_name) *org_function_name##__ret; \ + static unsigned long org_function_name##__addr = 0; \ + return_type _##org_function_name(call_args) \ + { \ + if (unlikely(org_function_name##__addr == 0)) { \ + org_function_name##__addr = kln_func(#org_function_name); \ + if (org_function_name##__addr == 0) { \ + pr_loc_bug("Failed to fetch %s() syscall address", #org_function_name); \ + return fail_return; \ + } \ + pr_loc_dbg("Got addr %lx for %s", org_function_name##__addr, #org_function_name); \ + } \ + \ + return ((org_function_name##__ret)org_function_name##__addr)(call_vars); \ + } + +//This macro should be used to export symbols which aren't normally EXPORT_SYMBOL/EXPORT_SYMBOL_GPL in the kernel but +// they exist within the kernel and are defined as __init. These symbol can only be called when the system is still +// booting (i.e. before init user-space binary was called). After that calling such functions is a lottery - the memory +// of them is freed by free_initmem() [called in main.c:kernel_init()]. That's why we skip any caching kere as these are +// called mostly as a one-off during boot process when this module was loaded as a I/O scheduler. +//All re-exported function will have _ prefix (e.g. foo() becomes _foo()) +#define DEFINE_UNEXPORTED_INIT_SHIM(return_type, org_function_name, call_args, call_vars, fail_return) \ + extern asmlinkage return_type org_function_name(call_args); \ + typedef typeof(org_function_name) *org_function_name##__ret; \ + return_type _##org_function_name(call_args) \ + { \ + unsigned long org_function_name##__addr = 0; \ + if (unlikely(!is_system_booting())) { \ + pr_loc_bug("Attempted to call %s() when the system is already booted (state=%d)", \ + #org_function_name, system_state); \ + return fail_return; \ + } \ + org_function_name##__addr = kln_func(#org_function_name); \ + if (org_function_name##__addr == 0) { \ + pr_loc_bug("Failed to fetch %s() syscall address", #org_function_name); \ + return fail_return; \ + } \ + pr_loc_dbg("Got addr %lx for %s", org_function_name##__addr, #org_function_name); \ + \ + return ((org_function_name##__ret)org_function_name##__addr)(call_vars); \ + } + +//This macro should be used to export symbols which are normally exported by modules in situations where this module +// must be loaded before such module exporting the symbol. +//Normally if symbol for module "X" is used in "Y" the kernel will complain that "X" muse be loaded before "Y". +//All re-exported function will have _ prefix (e.g. foo() becomes _foo()) +#define DEFINE_DYNAMIC_SHIM(return_type, org_function_name, call_args, call_vars, fail_return) \ + extern asmlinkage return_type org_function_name(call_args); \ + typedef typeof(org_function_name) *org_function_name##__ret; \ + return_type _##org_function_name(call_args) \ + { \ + org_function_name##__ret org_function_name##__ptr = (org_function_name##__ret)__symbol_get(#org_function_name); \ + if (!org_function_name##__ptr) { \ + pr_loc_bug("Failed to fetch %s() symbol (is that module loaded?)", #org_function_name); \ + return fail_return; \ + } \ + pr_loc_dbg("Got ptr %p for %s", org_function_name##__ptr, #org_function_name); \ + /*Doing this BEFORE the call makes a TINY window where the symbol can "escape" but it's protects from deadlock*/\ + __symbol_put(#org_function_name); \ + \ + return ((org_function_name##__ret)org_function_name##__ptr)(call_vars); \ + } +//********************************************************************************************************************// + +DEFINE_UNEXPORTED_SHIM(int, cmdline_proc_show, CP_LIST(struct seq_file *m, void *v), CP_LIST(m, v), -EFAULT); +DEFINE_UNEXPORTED_SHIM(void, flush_tlb_all, CP_LIST(void), CP_LIST(), __VOID_RETURN__); + +//See header file for detailed explanation what's going on here as it's more complex than a single commit +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) +DEFINE_UNEXPORTED_SHIM(int, do_execve, CP_LIST(const char *filename, + const char __user *const __user *__argv, + const char __user *const __user *__envp), CP_LIST(filename, __argv, __envp), -EINTR); + +#ifndef CONFIG_AUDITSYSCALL +DEFINE_UNEXPORTED_SHIM(void, final_putname, CP_LIST(struct filename *name), CP_LIST(name), __VOID_RETURN__); +#else +DEFINE_UNEXPORTED_SHIM(void, putname, CP_LIST(struct filename *name), CP_LIST(name), __VOID_RETURN__); +#endif +#else +DEFINE_UNEXPORTED_SHIM(int, do_execve, CP_LIST(struct filename *filename, + const char __user *const __user *__argv, + const char __user *const __user *__envp), CP_LIST(filename, __argv, __envp), -EINTR); +DEFINE_UNEXPORTED_SHIM(struct filename *, getname, CP_LIST(const char __user *name), CP_LIST(name), ERR_PTR(-EFAULT)); +#endif + +DEFINE_UNEXPORTED_SHIM(int, scsi_scan_host_selected, CP_LIST(struct Scsi_Host *shost, unsigned int channel, unsigned int id, u64 lun, int rescan), CP_LIST(shost, channel, id, lun, rescan), -EIO); +DEFINE_UNEXPORTED_SHIM(int, ida_pre_get, CP_LIST(struct ida *ida, gfp_t gfp_mask), CP_LIST(ida, gfp_mask), -EINVAL); + +DEFINE_UNEXPORTED_SHIM(int, early_serial_setup, CP_LIST(struct uart_port *port), port, -EIO); +DEFINE_UNEXPORTED_SHIM(int, serial8250_find_port, CP_LIST(struct uart_port *p), CP_LIST(p), -EIO); + +DEFINE_UNEXPORTED_INIT_SHIM(int, elevator_setup, CP_LIST(char *str), CP_LIST(str), -EINVAL); + +DEFINE_DYNAMIC_SHIM(void, usb_register_notify, CP_LIST(struct notifier_block *nb), CP_LIST(nb), __VOID_RETURN__); +DEFINE_DYNAMIC_SHIM(void, usb_unregister_notify, CP_LIST(struct notifier_block *nb), CP_LIST(nb), __VOID_RETURN__); diff --git a/internal/call_protected.h b/internal/call_protected.h new file mode 100644 index 0000000..88a4221 --- /dev/null +++ b/internal/call_protected.h @@ -0,0 +1,77 @@ +#ifndef REDPILLLKM_CALL_PROTECTED_H +#define REDPILLLKM_CALL_PROTECTED_H + +#include //LINUX_VERSION_CODE, KERNEL_VERSION +#include //bool +#include //system_states & system_state + +// *************************************** Useful macros *************************************** // +//Check if the system is still in booting stage (useful when you want to call __init functions as they're deleted) +#define is_system_booting() (system_state == SYSTEM_BOOTING) + +// ************************** Exports of normally protected functions ************************** // + +//A usual macros to make defining them easier & consistent with .c implementation +#define CP_LIST(...) __VA_ARGS__ //used to pass a list of arguments as a single argument +#define CP_DECLARE_SHIM(return_type, org_function_name, call_args) return_type _##org_function_name(call_args); + +struct seq_file; +CP_DECLARE_SHIM(int, cmdline_proc_show, CP_LIST(struct seq_file *m, void *v)); //extracts kernel cmdline +CP_DECLARE_SHIM(void, flush_tlb_all, CP_LIST(void)); //used to flush caches in memory.c operations + +/* Thanks Jeff... https://groups.google.com/g/kernel-meetup-bangalore/c/rvQccTl_3kc/m/BJCnnXGCAgAJ + * In case the link disappears: Jeff Layton from RedHat decided to just nuke the getname() API after 7 years of it being + * exposed in the kernel. So in practice we need to use kallsyms to get it on kernels >=3.14 (up to current 5.14) + * See https://github.com/torvalds/linux/commit/9115eac2c788c17b57c9256cb322fa7371972ddf + * Another unrelated change which happened in v3.14 was that when "struct filename*" is passed the callee is responsible + * for freeing it (using putname()). However, in older versions we (the caller) needs to free it + * See https://github.com/torvalds/linux/commit/c4ad8f98bef77c7356aa6a9ad9188a6acc6b849d + * + * This whole block deals with functions needed for execve() shimming + */ +struct filename; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) +CP_DECLARE_SHIM(int, do_execve, CP_LIST(const char *filename, + const char __user *const __user *__argv, + const char __user *const __user *__envp)); + +#include +#define _getname(...) getname(__VA_ARGS__) + +//If syscall audit is disabled putname is an alias to final_putname(), see include/linux/fs.h; later on this was changed +// but this branch needs to handle only <3.14 as we don't need (and shouldn't use!) putname() in >=3.14 +#ifndef CONFIG_AUDITSYSCALL //if CONFIG_AUDITSYSCALL disabled we unexport final_putname and add putname define line fs.h +#define _putname(name) _final_putname(name) +CP_DECLARE_SHIM(void, final_putname, CP_LIST(struct filename *name)); +#else //if the CONFIG_AUDITSYSCALL is enabled we need to proxy to traced putname to make sure references are counted +CP_DECLARE_SHIM(void, putname, CP_LIST(struct filename *name)); +#endif +#else +CP_DECLARE_SHIM(int, do_execve, CP_LIST(struct filename *filename, + const char __user *const __user *__argv, + const char __user *const __user *__envp)); +CP_DECLARE_SHIM(struct filename *, getname, CP_LIST(const char __user *name)); +#endif + +//The following functions are used by vUART and uart_fixer +typedef struct uart_port *uart_port_p; +CP_DECLARE_SHIM(int, early_serial_setup, CP_LIST(struct uart_port *port)); +CP_DECLARE_SHIM(int, serial8250_find_port, CP_LIST(struct uart_port *p)); + +//Exported so that we can forcefully rescan the SCSI host in scsi_toolbox. This operation is normally available in +// userland when you're a root, but somehow they missed an export for kernel code (which according to kernel rules is a +// bug, but probably nobody asked before) +struct Scsi_Host; +CP_DECLARE_SHIM(int, scsi_scan_host_selected, + CP_LIST(struct Scsi_Host *shost, unsigned int channel, unsigned int id, u64 lun, int rescan)); + +struct ida; +CP_DECLARE_SHIM(int, ida_pre_get, CP_LIST(struct ida *ida, gfp_t gfp_mask)); + +//Used for fixing I/O scheduler if module was loaded using elevator= and broke it +CP_DECLARE_SHIM(int, elevator_setup, CP_LIST(char *str)); + +struct notifier_block; +CP_DECLARE_SHIM(void, usb_register_notify, CP_LIST(struct notifier_block *nb)); +CP_DECLARE_SHIM(void, usb_unregister_notify, CP_LIST(struct notifier_block *nb)); +#endif //REDPILLLKM_CALL_PROTECTED_H diff --git a/internal/helper/math_helper.c b/internal/helper/math_helper.c new file mode 100644 index 0000000..19387e9 --- /dev/null +++ b/internal/helper/math_helper.c @@ -0,0 +1,16 @@ +#include "math_helper.h" +#include //prandom_u32() + +int prandom_int_range_stable(int *cur_val, int dev, int min, int max) +{ + if (likely(*cur_val != 0)) { + int new_min = (*cur_val) - dev; + int new_max = (*cur_val) + dev; + min = new_min < min ? min : new_min; + max = new_max > max ? max : new_max; + } + + *cur_val = prandom_int_range(min, max); + + return *cur_val; +} \ No newline at end of file diff --git a/internal/helper/math_helper.h b/internal/helper/math_helper.h new file mode 100644 index 0000000..4338a91 --- /dev/null +++ b/internal/helper/math_helper.h @@ -0,0 +1,29 @@ +#ifndef REDPILL_MATH_HELPER_H +#define REDPILL_MATH_HELPER_H + +/** + * Generates pseudo-random integer in a range specified + * + * @param min Lower boundary integer + * @param max Higher boundary integer + * + * @return pseudorandom integer up to 32 bits in length + */ +#define prandom_int_range(min, max) ({ \ + int _rand = (prandom_u32() % ((max) + 1 - (min)) + (min)); \ + _rand; \ +}) + +/** + * Generates temporally stable pseudo-random integer in a range specified + * + * @param cur_val Pointer to store/read current value; set its value to 0 initially to generate setpoint automatically + * @param dev Max deviation from the current value + * @param min Lower boundary integer + * @param max Higher boundary integer + * + * @return pseudorandom integer up to 32 bits in length + */ +int prandom_int_range_stable(int *cur_val, int dev, int min, int max); + +#endif //REDPILL_MATH_HELPER_H diff --git a/internal/helper/memory_helper.c b/internal/helper/memory_helper.c new file mode 100644 index 0000000..7f24dda --- /dev/null +++ b/internal/helper/memory_helper.c @@ -0,0 +1,44 @@ +/** + * TODO: look into override_symbol to check if there's any docs + */ +#include "memory_helper.h" +#include "../../common.h" +#include "../call_protected.h" //_flush_tlb_all() +#include //PAGE_ALIGN +#include //PAGE_SIZE +#include //_PAGE_RW + +#define PAGE_ALIGN_BOTTOM(addr) (PAGE_ALIGN(addr) - PAGE_SIZE) //aligns the memory address to bottom of the page boundary +#define NUM_PAGES_BETWEEN(low, high) (((PAGE_ALIGN_BOTTOM(high) - PAGE_ALIGN_BOTTOM(low)) / PAGE_SIZE) + 1) + +void set_mem_addr_rw(const unsigned long vaddr, unsigned long len) +{ + unsigned long addr = PAGE_ALIGN_BOTTOM(vaddr); + pr_loc_dbg("Disabling memory protection for page(s) at %p+%lu/%u (<<%p)", (void *) vaddr, len, + (unsigned int) NUM_PAGES_BETWEEN(vaddr, vaddr + len), (void *) addr); + + //theoretically this should use set_pte_atomic() but we're touching pages that will not be modified by anything else + unsigned int level; + for(; addr <= vaddr; addr += PAGE_SIZE) { + pte_t *pte = lookup_address(addr, &level); + pte->pte |= _PAGE_RW; + } + + _flush_tlb_all(); +} + +void set_mem_addr_ro(const unsigned long vaddr, unsigned long len) +{ + unsigned long addr = PAGE_ALIGN_BOTTOM(vaddr); + pr_loc_dbg("Enabling memory protection for page(s) at %p+%lu/%u (<<%p)", (void *) vaddr, len, + (unsigned int) NUM_PAGES_BETWEEN(vaddr, vaddr + len), (void *) addr); + + //theoretically this should use set_pte_atomic() but we're touching pages that will not be modified by anything else + unsigned int level; + for(; addr <= vaddr; addr += PAGE_SIZE) { + pte_t *pte = lookup_address(addr, &level); + pte->pte &= ~_PAGE_RW; + } + + _flush_tlb_all(); +} \ No newline at end of file diff --git a/internal/helper/memory_helper.h b/internal/helper/memory_helper.h new file mode 100644 index 0000000..e505305 --- /dev/null +++ b/internal/helper/memory_helper.h @@ -0,0 +1,37 @@ +#ifndef REDPILL_MEMORY_HELPER_H +#define REDPILL_MEMORY_HELPER_H + +#define WITH_MEM_UNLOCKED(vaddr, size, code) \ + do { \ + set_mem_addr_rw((unsigned long)(vaddr), size); \ + ({code}); \ + set_mem_addr_ro((unsigned long)(vaddr), size); \ + } while(0) + +/** + * Disables write-protection for the memory where symbol resides + * + * There are a million different methods of circumventing the memory protection in Linux. The reason being the kernel + * people make it harder and harder to modify syscall table (& others in the process), which in general is a great idea. + * There are two core methods people use: 1) disabling CR0 WP bit, and 2) setting memory page(s) as R/W. + * The 1) is a flag, present on x86 CPUs, which when cleared configures the MMU to *ignore* write protection set on + * memory regions. However, this flag is per-core (=synchronization problems) and it works as all-or-none. We don't + * want to leave such a big thing disabled (especially for long time). + * The second mechanism disabled memory protection on per-page basis. Normally the kernel contains set_memory_rw() which + * does what it says - sets the address (which should be lower-page aligned) to R/W. However, this function is evil for + * some time (~2.6?). In its course it calls static_protections() which REMOVES the R/W flag from the request + * (effectively making the call a noop) while still returning 0. Guess how long we debugged that... additionally, that + * function is removed in newer kernels. + * The easiest way is to just lookup the page table entry for a given address, modify the R/W attribute directly and + * dump CPU caches. This will work as there's no middle-man to mess with our request. + */ +void set_mem_addr_rw(const unsigned long vaddr, unsigned long len); + +/** + * Reverses set_mem_rw() + * + * See set_mem_rw() for details + */ +void set_mem_addr_ro(const unsigned long vaddr, unsigned long len); + +#endif //REDPILL_MEMORY_HELPER_H diff --git a/internal/helper/symbol_helper.c b/internal/helper/symbol_helper.c new file mode 100644 index 0000000..67dca75 --- /dev/null +++ b/internal/helper/symbol_helper.c @@ -0,0 +1,112 @@ +#include "symbol_helper.h" //kln_func +#include //__symbol_get(), __symbol_put() +#include //kallsyms_lookup_name +#include "../../common.h" //pr_loc_* + +unsigned long (*kln_func)(const char* name) = NULL; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,7,0) +int get_kln_p(void) +{ + kln_func = kallsyms_lookup_name; + return 0; +} +#else +/* + * In kernel version 5.7, kallsyms_lookup_name() was unexported, so we can't use it anymore. + * The alternative method below is slower (but not really noticably), and works by brute-forcing + * possible addresses for the function name by starting at the kernel base address and using + * sprint_symbol() (which is still exported) to check if the symbol name at each address + * matches the one we want. + * + * https://github.com/xcellerator/linux_kernel_hacking/blob/446789fd152d2663cd2c7d7f8a5aaae873a92a30/3_RootkitTechniques/3.3_set_root/ftrace_helper.h + */ +static unsigned long kaddr_lookup_name(const char *fname_raw) +{ + int i; + unsigned long kaddr; + char *fname_lookup, *fname; + + fname_lookup = kzalloc(255, GFP_KERNEL); + if (!fname_lookup) + return 0; + + fname = kzalloc(strlen(fname_raw)+4, GFP_KERNEL); + if (!fname) + return 0; + + /* + * We have to add "+0x0" to the end of our function name + * because that's the format that sprint_symbol() returns + * to us. If we don't do this, then our search can stop + * prematurely and give us the wrong function address! + */ + strcpy(fname, fname_raw); + strcat(fname, "+0x0"); + + /* + * Get the kernel base address: + * sprint_symbol() is less than 0x100000 from the start of the kernel, so + * we can just AND-out the last 3 bytes from it's address to the the base + * address. + * There might be a better symbol-name to use? + */ + kaddr = (unsigned long) &sprint_symbol; + kaddr &= 0xffffffffff000000; + + /* + * All the syscalls (and all interesting kernel functions I've seen so far) + * are within the first 0x100000 bytes of the base address. However, the kernel + * functions are all aligned so that the final nibble is 0x0, so we only + * have to check every 16th address. + */ + for ( i = 0x0 ; i < 0x100000 ; i++ ) + { + /* + * Lookup the name ascribed to the current kernel address + */ + sprint_symbol(fname_lookup, kaddr); + + /* + * Compare the looked-up name to the one we want + */ + if ( strncmp(fname_lookup, fname, strlen(fname)) == 0 ) + { + /* + * Clean up and return the found address + */ + kfree(fname_lookup); + return kaddr; + } + /* + * Jump 16 addresses to next possible address + */ + kaddr += 0x10; + } + /* + * We didn't find the name, so clean up and return 0 + */ + kfree(fname_lookup); + return 0; +} + +int get_kln_p(void) +{ + kln_func = (long unsigned int (*)(const char *))kaddr_lookup_name("kallsyms_lookup_name"); + if (kln_func == 0) { + pr_loc_err("Error searching kallsyms_lookup_name address!"); + return -1; + } + pr_loc_dbg("kallsyms_lookup_name address = 0x%lx\n", (long unsigned int)kln_func); + return 0; +} +#endif + +bool kernel_has_symbol(const char *name) { + if (__symbol_get(name)) { //search for public symbols + __symbol_put(name); + + return true; + } + return kln_func(name) != 0; +} \ No newline at end of file diff --git a/internal/helper/symbol_helper.h b/internal/helper/symbol_helper.h new file mode 100644 index 0000000..d38c58e --- /dev/null +++ b/internal/helper/symbol_helper.h @@ -0,0 +1,22 @@ +#ifndef REDPILL_SYMBOL_HELPER_H +#define REDPILL_SYMBOL_HELPER_H + +#include //bool + +/** + * Workaround for kallsyms_lookup_name in kernels > 5.7 + * https://github.com/xcellerator/linux_kernel_hacking/issues/3 + */ +extern unsigned long (*kln_func)(const char*); +int get_kln_p(void); + +/** + * Check if a given symbol exists + * + * This function will return true for both public and private kernel symbols + * + * @param name name of the symbol + */ +bool kernel_has_symbol(const char *name); + +#endif //REDPILL_SYMBOL_HELPER_H diff --git a/internal/intercept_driver_register.c b/internal/intercept_driver_register.c new file mode 100644 index 0000000..64aa7bb --- /dev/null +++ b/internal/intercept_driver_register.c @@ -0,0 +1,269 @@ +#include "intercept_driver_register.h" +#include "../common.h" +#include "override/override_symbol.h" +#include //platform_bus_type + +#define MAX_WATCHERS 5 //can be increased as-needed +#define WATCH_FUNCTION "driver_register" + +struct driver_watcher_instance { + watch_dr_callback *cb; + bool notify_coming:1; + bool notify_live:1; + char name[]; +}; + +static override_symbol_inst *ov_driver_register = NULL; +static driver_watcher_instance *watchers[MAX_WATCHERS] = { NULL }; + +/** + * Finds a registered watcher based on the driver name + * + * @return pointer to the spot on the list containing driver_watcher_instance or NULL if not found + */ +static driver_watcher_instance **match_watcher(const char *name) +{ + for (int i=0; i < MAX_WATCHERS; ++i) { + if (!watchers[i]) + continue; //there could be "holes" due to unwatch calls + + if(strcmp(name, watchers[i]->name) == 0) + return &watchers[i]; + } + + return NULL; +} + +/** + * Finds an empty spot in the watchers list + * + * @return pointer to the spot on the list which is empty or NULL if not found + */ +static driver_watcher_instance **watcher_list_spot(void) +{ + for (int i=0; i < MAX_WATCHERS; ++i) { + if (!watchers[i]) + return &watchers[i]; + } + + return NULL; +} + +/** + * Checks if there any watchers registered (to determine if it makes sense to still shim the driver_register()) + */ +static bool has_any_watchers(void) +{ + for (int i=0; i < MAX_WATCHERS; ++i) { + if (watchers[i]) + return true; + } + + return false; +} + +/** + * Calls the original driver_register() with error handling + * + * @return 0 on success, -E on error + */ +static int call_original_driver_register(struct device_driver *drv) +{ + int driver_register_out, org_call_out; + org_call_out = call_overridden_symbol(driver_register_out, ov_driver_register, drv); + + if (unlikely(org_call_out != 0)) { + pr_loc_err("Failed to call original %s (error=%d)", WATCH_FUNCTION, org_call_out); + return org_call_out; + } + + return driver_register_out; +} + +/** + * Replacement for driver_register(), executing registered hooks + */ +static int driver_register_shim(struct device_driver *drv) +{ + driver_watcher_instance **watcher_lptr = match_watcher(drv->name); + int driver_load_result; + bool driver_register_fulfilled = false; + + if (unlikely(!watcher_lptr)) { + pr_loc_dbg("%s() interception active - no handler observing \"%s\" found, calling original %s()", + WATCH_FUNCTION, drv->name, WATCH_FUNCTION); + return call_original_driver_register(drv); + } + + pr_loc_dbg("%s() interception active - calling handler %pF<%p> for \"%s\"", WATCH_FUNCTION, (*watcher_lptr)->cb, + (*watcher_lptr)->cb, drv->name); + + if ((*watcher_lptr)->notify_coming) { + pr_loc_dbg("Calling for DWATCH_STATE_COMING"); + switch ((*watcher_lptr)->cb(drv, DWATCH_STATE_COMING)) { + //CONTINUE and DONE cannot use fall-through as we cannot unregister watcher before calling it (as if this is the + // last watcher the whole override will be stopped + case DWATCH_NOTIFY_CONTINUE: + pr_loc_dbg("Calling original %s() & leaving watcher active", WATCH_FUNCTION); + driver_load_result = call_original_driver_register(drv); + driver_register_fulfilled = true; + break; + case DWATCH_NOTIFY_DONE: + pr_loc_dbg("Calling original %s() & removing watcher", WATCH_FUNCTION); + driver_load_result = call_original_driver_register(drv); + unwatch_driver_register(*watcher_lptr); //regardless of the call result we unregister + return driver_load_result; //we return here as the watcher doesn't want to be bothered anymore + case DWATCH_NOTIFY_ABORT_OK: + pr_loc_dbg("Faking OK return of %s() per callback request", WATCH_FUNCTION); + driver_load_result = 0; + driver_register_fulfilled = true; + break; + case DWATCH_NOTIFY_ABORT_BUSY: + pr_loc_dbg("Faking BUSY return of %s() per callback request", WATCH_FUNCTION); + driver_load_result = -EBUSY; + driver_register_fulfilled = true; + break; + default: //This should never happen if the callback is correct + pr_loc_bug("%s callback %pF<%p> returned invalid status value during DWATCH_STATE_COMING", + WATCH_FUNCTION, (*watcher_lptr)->cb, (*watcher_lptr)->cb); + } + } + + if (!driver_register_fulfilled) + driver_load_result = call_original_driver_register(drv); + + if (driver_load_result != 0) { + pr_loc_err("%s driver failed to load - not triggering STATE_LIVE callbacks", drv->name); + return driver_load_result; + } + + if ((*watcher_lptr)->notify_live) { + pr_loc_dbg("Calling for DWATCH_STATE_LIVE"); + if ((*watcher_lptr)->cb(drv, DWATCH_STATE_LIVE) == DWATCH_NOTIFY_DONE) + unwatch_driver_register(*watcher_lptr); + } + + return driver_load_result; +} + +/** + * Enables override of driver_register() to watch for new drivers registration + * + * @return 0 on success, or -E on error + */ +static int start_watching(void) +{ + if (unlikely(ov_driver_register)) { + pr_loc_bug("Watching is already enabled!"); + return 0; + } + + pr_loc_dbg("Starting intercept of %s()", WATCH_FUNCTION); + ov_driver_register = override_symbol(WATCH_FUNCTION, driver_register_shim); + if (unlikely(IS_ERR(ov_driver_register))) { + pr_loc_err("Failed to intercept %s() - error=%ld", WATCH_FUNCTION, PTR_ERR(ov_driver_register)); + ov_driver_register = NULL; + return -EINVAL; + } + pr_loc_dbg("%s() is now intercepted", WATCH_FUNCTION); + + return 0; +} + +/** + * Disables override of driver_register(), started by start_watching() + * + * @return 0 on success, or -E on error + */ +static int stop_watching(void) +{ + if (unlikely(!ov_driver_register)) { + pr_loc_bug("Watching is NOT enabled"); + return 0; + } + + pr_loc_dbg("Stopping intercept of %s()", WATCH_FUNCTION); + int out = restore_symbol(ov_driver_register); + if (unlikely(out != 0)) { + pr_loc_err("Failed to restore %s() - error=%ld", WATCH_FUNCTION, PTR_ERR(ov_driver_register)); + return out; + } + pr_loc_dbg("Intercept of %s() stopped", WATCH_FUNCTION); + + return 0; +} + +driver_watcher_instance *watch_driver_register(const char *name, watch_dr_callback *cb, int event_mask) +{ + driver_watcher_instance **watcher_lptr = match_watcher(name); + if (unlikely(watcher_lptr)) { + pr_loc_err("Watcher for %s already exists (callback=%pF<%p>)", name, (*watcher_lptr)->cb, (*watcher_lptr)->cb); + return ERR_PTR(-EEXIST); + } + + watcher_lptr = watcher_list_spot(); + if (unlikely(!watcher_lptr)) { + pr_loc_bug("There are no free spots for a new watcher"); + return ERR_PTR(-ENOSPC); + } + + kmalloc_or_exit_ptr(*watcher_lptr, sizeof(driver_watcher_instance) + strsize(name)); + strcpy((*watcher_lptr)->name, name); + (*watcher_lptr)->cb = cb; + (*watcher_lptr)->notify_coming = ((event_mask & DWATCH_STATE_COMING) == DWATCH_STATE_COMING); + (*watcher_lptr)->notify_live = ((event_mask & DWATCH_STATE_LIVE) == DWATCH_STATE_LIVE); + pr_loc_dbg("Registered %s() watcher for \"%s\" driver (coming=%d, live=%d)", WATCH_FUNCTION, name, + (*watcher_lptr)->notify_coming ? 1 : 0, (*watcher_lptr)->notify_live ? 1 : 0); + + if (!ov_driver_register) { + pr_loc_dbg("Registered the first driver_register watcher - starting watching"); + int out = start_watching(); + if (unlikely(out != 0)) + return ERR_PTR(out); + } + + return *watcher_lptr; +} + +int unwatch_driver_register(driver_watcher_instance *instance) +{ + driver_watcher_instance **matched_lptr = match_watcher(instance->name); + if (unlikely(!matched_lptr)) { + //This means it could be a double-unwatch situation and this will prevent a double-kfree (but the lack of crash + // is not guaranteed as match_watcher() already touched the memory) + pr_loc_bug("Watcher %p for %s couldn't be found in the watchers list", instance, instance->name); + return -ENOENT; + } + + if (unlikely(*matched_lptr != instance)) { + pr_loc_bug("Watcher %p for %s was found but the instance on the list %p (@%p) isn't the same (?!)", instance, + instance->name, *matched_lptr, matched_lptr); + return -EINVAL; + } + + pr_loc_dbg("Removed %pF<%p> subscriber for \"%s\" driver", (*matched_lptr)->cb, (*matched_lptr)->cb, + (*matched_lptr)->name); + kfree(*matched_lptr); + *matched_lptr = NULL; + + if (!has_any_watchers()) { + pr_loc_dbg("Removed last %s() subscriber - unshimming %s()", WATCH_FUNCTION, WATCH_FUNCTION); + int out; + if ((out = stop_watching()) != 0) + return out; + } + + return 0; +} + +int is_driver_registered(const char *name, struct bus_type *bus) +{ + if (!bus) + bus = &platform_bus_type; + + struct device_driver *drv = driver_find(name, bus); + if (IS_ERR(drv)) + return PTR_ERR(drv); + + return drv ? 1:0; +} \ No newline at end of file diff --git a/internal/intercept_driver_register.h b/internal/intercept_driver_register.h new file mode 100644 index 0000000..fbd8c86 --- /dev/null +++ b/internal/intercept_driver_register.h @@ -0,0 +1,59 @@ +#ifndef REDPILL_DRIVER_WATCHER_H +#define REDPILL_DRIVER_WATCHER_H + +#include //struct device_driver, driver_find (in .c) + +/** + * Codes which the callback call on watch can return + */ +typedef enum { + DWATCH_NOTIFY_CONTINUE, //callback processed the data and allows for the chain to continue + DWATCH_NOTIFY_DONE, //callback processed the data, allows for the chain to continue but wants to unregister + DWATCH_NOTIFY_ABORT_OK, //callback processed the data and determined that fake-OK should be returned to the original caller (DWATCH_STATE_COMING only) + DWATCH_NOTIFY_ABORT_BUSY, //callback processed the data and determined that fake-EBUSY should be returned to the original caller (DWATCH_STATE_COMING only) +} driver_watch_notify_result; + +/** + * Controls when the callback for loaded driver is called + */ +typedef enum { + DWATCH_STATE_COMING = 0b100, //driver is loading, you can intercept the process using (DWATCH_NOTIFY_ABORT_*) and change data + DWATCH_STATE_LIVE = 0b010, //driver just loaded +} driver_watch_notify_state; + +typedef struct driver_watcher_instance driver_watcher_instance; +typedef driver_watch_notify_result (watch_dr_callback)(struct device_driver *drv, driver_watch_notify_state event); + +/** + * Start watching for a driver registration + * + * Note: if the driver is already loaded this will do nothing, unless the driver is removed and re-registers. You should + * probably call is_driver_registered() first. + * + * @param name Name of the driver you want to observe + * @param cb Callback called on an event + * @param event_mask ORed driver_watch_notify_state flags to when the callback is called + * + * @return 0 on success, -E on error + */ +driver_watcher_instance *watch_driver_register(const char *name, watch_dr_callback *cb, int event_mask); + +/** + * Undoes what watch_driver_register() did + * + * @return 0 on success, -E on error + */ +int unwatch_driver_register(driver_watcher_instance *instance); + +/** + * Checks if a given driver exists + * + * Usually if the driver exists already it doesn't make sense to watch for it as the event will never be triggered + * (unless the driver unregisters and registers again). If the bus is not specified here (NULL) a platform-driver will + * be looked up (aka legacy driver). + * + * @return 0 if the driver is not registered, 1 if the driver is registered, -E on lookup error + */ +int is_driver_registered(const char *name, struct bus_type *bus); + +#endif //REDPILL_DRIVER_WATCHER_H diff --git a/internal/intercept_execve.c b/internal/intercept_execve.c new file mode 100644 index 0000000..5df155e --- /dev/null +++ b/internal/intercept_execve.c @@ -0,0 +1,145 @@ +/* + * Submodule used to hook the execve() syscall, used by the userland to execute binaries. + * + * This submodule can currently block calls to specific binaries and fake a successful return of the execution. In the + * future, if needed, an option to fake certain response and/or execute a different binary instead can be easily added + * here. + * + * execve() is a rather special syscall. This submodule utilized override_symbool.c:override_syscall() to do the actual + * ground work of replacing the call. However some syscalls (execve, fork, etc.) use ASM stubs with a non-GCC call + * convention. Up until Linux v3.18 it wasn't a problem as long as the stub was called back. However, since v3.18 the + * stub was changed in such a way that calling it using a normal convention from (i.e. from the shim here) will cause + * IRET imbalance and a crash. This is worked around by skipping the whole stub and calling do_execve() with a filename + * struct directly. This requires re-exported versions of these functions, so it may be marginally slower. + * Because of that this trick is only utilized on Linux >v3.18 and older ones call the stub as normal. + * + * References: + * - https://github.com/torvalds/linux/commit/b645af2d5905c4e32399005b867987919cbfc3ae + * - https://my.oschina.net/macwe/blog/603583 + * - https://stackoverflow.com/questions/8372912/hooking-sys-execve-on-linux-3-x + */ +#include "intercept_execve.h" +#include "../common.h" +#include +#include //struct filename +#include "override/override_syscall.h" //SYSCALL_SHIM_DEFINE3, override_symbol +#include "call_protected.h" //do_execve(), getname(), putname() + +#ifdef RPDBG_EXECVE +#include "../debug/debug_execve.h" +#endif + +#define MAX_INTERCEPTED_FILES 10 + +static char * intercepted_filenames[MAX_INTERCEPTED_FILES] = { NULL }; + +int add_blocked_execve_filename(const char *filename) +{ + if (unlikely(strlen(filename) > PATH_MAX)) + return -ENAMETOOLONG; + + unsigned int idx = 0; + while (likely(intercepted_filenames[idx])) { //Find free spot + if (unlikely(strcmp(filename, intercepted_filenames[idx]) == 0)) { //Does it exist already? + pr_loc_bug("File %s was already added at %d", filename, idx); + return -EEXIST; + } + + if(unlikely(++idx >= MAX_INTERCEPTED_FILES)) { //Are we out of indexes? + pr_loc_bug("Tried to add %d intercepted filename (max=%d)", idx, MAX_INTERCEPTED_FILES); + return -ENOMEM; + } + } + + kmalloc_or_exit_int(intercepted_filenames[idx], strsize(filename)); + strcpy(intercepted_filenames[idx], filename); //Size checked above + + pr_loc_inf("Filename %s will be blocked from execution", filename); + return 0; +} + +SYSCALL_SHIM_DEFINE3(execve, + const char __user *, filename, + const char __user *const __user *, argv, + const char __user *const __user *, envp) +{ + struct filename *path = _getname(filename); + + //this is essentially what do_execve() (or SYSCALL_DEFINE3 on older kernels) will do if the getname ptr is invalid + if (IS_ERR(path)) + return PTR_ERR(path); + + const char *pathname = path->name; +#ifdef RPDBG_EXECVE + RPDBG_print_execve_call(pathname, argv); +#endif + + for (int i = 0; i < MAX_INTERCEPTED_FILES; i++) { + if (!intercepted_filenames[i]) + break; + + if (unlikely(strcmp(pathname, intercepted_filenames[i]) == 0)) { + pr_loc_inf("Blocked %s from running", pathname); + //We cannot just return 0 here - execve() *does NOT* return on success, but replaces the current process ctx + do_exit(0); + } + } + +//Depending on the version of the kernel do_execve() accepts bare filename (old) or the full struct filename (newer) +//Additionally in older kernels we need to take care of the path lifetime and put it back (it's automatic in newer) +//See: https://github.com/torvalds/linux/commit/c4ad8f98bef77c7356aa6a9ad9188a6acc6b849d +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) + int out = _do_execve(pathname, argv, envp); + _putname(path); + return out; +#else + return _do_execve(path, argv, envp); +#endif +} + +static override_symbol_inst *sys_execve_ovs = NULL; +int register_execve_interceptor() +{ + pr_loc_dbg("Registering execve() interceptor"); + + if (sys_execve_ovs) { + pr_loc_bug("Called %s() while execve() interceptor is already registered", __FUNCTION__); + return -EEXIST; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0) + override_symbol_or_exit_int(sys_execve_ovs, "SyS_execve", SyS_execve_shim); +#else + // TODO there is another __ia32_sys_execve, maybe need to override. + override_symbol_or_exit_int(sys_execve_ovs, "__x64_sys_execve", SyS_execve_shim); +#endif + + pr_loc_inf("execve() interceptor registered"); + return 0; +} + +int unregister_execve_interceptor() +{ + pr_loc_dbg("Unregistering execve() interceptor"); + + if (!sys_execve_ovs) { + pr_loc_bug("Called %s() while execve() interceptor is not registered (yet?)", __FUNCTION__); + return -ENXIO; + } + + int out = restore_symbol(sys_execve_ovs); + if (out != 0) + return out; + sys_execve_ovs = NULL; + + //Free all strings duplicated in add_blocked_execve_filename() + unsigned int idx = 0; + while (idx < MAX_INTERCEPTED_FILES-1 && intercepted_filenames[idx]) { + kfree(intercepted_filenames[idx]); + intercepted_filenames[idx] = NULL; + idx++; + } + + pr_loc_inf("execve() interceptor unregistered"); + return 0; +} diff --git a/internal/intercept_execve.h b/internal/intercept_execve.h new file mode 100644 index 0000000..8e901f1 --- /dev/null +++ b/internal/intercept_execve.h @@ -0,0 +1,9 @@ +#ifndef REDPILL_INTERCEPT_EXECVE_H +#define REDPILL_INTERCEPT_EXECVE_H + +//There's no remove_ as this requires rearranging the list etc and is not needed for now +int add_blocked_execve_filename(const char * filename); +int register_execve_interceptor(void); +int unregister_execve_interceptor(void); + +#endif //REDPILL_INTERCEPT_EXECVE_H diff --git a/internal/ioscheduler_fixer.c b/internal/ioscheduler_fixer.c new file mode 100644 index 0000000..6c1405e --- /dev/null +++ b/internal/ioscheduler_fixer.c @@ -0,0 +1,26 @@ +/** + * This very simple submodule which prevents kernel log from being flooded with "I/O scheduler elevator not found" + * + * When this shim is loaded as a I/O scheduler (to load very early) it is being set as a I/O scheduler. As we later + * remove the module file the system will constantly try to load now non-existing module "elevator-iosched". By + * resetting the "chosen_elevator" using the same function called by "elevator=" handler we can pretend no custom + * I/O scheduler was ever set (so that the system uses default one and stops complaining) + */ +#include "ioscheduler_fixer.h" +#include "../common.h" +#include "call_protected.h" //is_system_booting(), elevator_setup() +#include //system_state + +#define SHIM_NAME "I/O scheduler fixer" + +int reset_elevator(void) +{ + if (!is_system_booting()) { + pr_loc_wrn("Cannot reset I/O scheduler / elevator= set - system is past booting stage (state=%d)", + system_state); + return 0; //This is not an error technically speaking + } + + pr_loc_dbg("Resetting I/O scheduler to default"); + return _elevator_setup("") == 1 ? 0 : -EINVAL; +} \ No newline at end of file diff --git a/internal/ioscheduler_fixer.h b/internal/ioscheduler_fixer.h new file mode 100644 index 0000000..728f789 --- /dev/null +++ b/internal/ioscheduler_fixer.h @@ -0,0 +1,6 @@ +#ifndef REDPILL_IOSCHEDULER_FIXER_H +#define REDPILL_IOSCHEDULER_FIXER_H + +int reset_elevator(void); + +#endif //REDPILL_IOSCHEDULER_FIXER_H diff --git a/internal/notifier_base.h b/internal/notifier_base.h new file mode 100644 index 0000000..57e2fab --- /dev/null +++ b/internal/notifier_base.h @@ -0,0 +1,13 @@ +#ifndef REDPILL_NOTIFIER_BASE_H +#define REDPILL_NOTIFIER_BASE_H + +#define notifier_reg_in() pr_loc_dbg("Registering %s notifier", NOTIFIER_NAME); +#define notifier_reg_ok() pr_loc_inf("Successfully registered %s notifier", NOTIFIER_NAME); +#define notifier_ureg_in() pr_loc_dbg("Unregistering %s notifier", NOTIFIER_NAME); +#define notifier_ureg_ok() pr_loc_inf("Successfully unregistered %s notifier", NOTIFIER_NAME); +#define notifier_sub(nb) \ + pr_loc_dbg("%pF (priority=%d) subscribed to %s events", (nb)->notifier_call, (nb)->priority, NOTIFIER_NAME); +#define notifier_unsub(nb) \ + pr_loc_dbg("%pF (priority=%d) unsubscribed from %s events", (nb)->notifier_call, (nb)->priority, NOTIFIER_NAME); + +#endif //REDPILL_NOTIFIER_BASE_H diff --git a/internal/override/override_symbol.c b/internal/override/override_symbol.c new file mode 100644 index 0000000..8a1bae5 --- /dev/null +++ b/internal/override/override_symbol.c @@ -0,0 +1,307 @@ +/** + * This little (and dangerous) utility allows for replacement of any arbitrary kernel symbols with your own + * + * Since we're in the kernel we can do anything we want. This also included manipulating the actual code of functions + * executed by the kernel. So, if someone calls printk() it normally goes to the correct place. However this place can + * be override with a snippet of ASM which jumps to another place - a place we specify. It doesn't require a genius to + * understand the power and implication of this ;) + * + * HOW IT WORKS? + * See header file for example of usage. In short this code does the following: + * 0. Kernel protects .text pages (r/o by default, so you don't override the code by accident): they need to be unlocked + * 1. Find where our symbol-to-be-replaced is located + * 2. Disable memory pages write protection (as symbols are in .text which is ) + * 4. Make memory page containing symbol-to-be-replaced r/w + * 5. Generate jump code ASM containing the address of new symbol specified by the caller + * 6. Mark the memory page from [4] r/o again + * 7. [optional] Process is fully reversible + * + * SYSCALL SPECIAL CASE + * There's also a variant made for syscalls specifically. It differs by the fact that override_symbol() makes + * the original code unusable (as the first bytes are replaced with a jump) yet allows you to replace ANY function. The + * overridden_syscall() in the other hand changes a pointer in the syscalls table. That way you CAN use the original + * pointer to call back the original Linux syscall code you replaced. It works roughly like so: + * 0. Kernel keeps syscalls table in .data section which is marked as r/o: it needs to be found & unlocked (#2-4 above) + * 1. replace pointer in the table with custom one + * 2. relock memory + * + * CALLING THE ORIGINAL CODE PROBLEM + * When we wrote this code intially it was meant to be a temporary stop-gap until we have time to write a proper + * rerouting using kernel's "insn" framework. However, this approach only looks simple on the surface. In theory we just + * need to override the function preamble with a simple trampoline consisting of MOV+JMP to our new code. This is rather + * simple and work well. However, attempting to call the original code without restoring the full function to its + * original shape opens a huge can of worms with many edge-cases. Again, in *theory* we can simply grab the original + * preamble, attach a JMP to the original function just after the preamble, and execute it.. it will work MOST of the + * time, but: + * - we must ensure we round copied preamble to full instruction + * - trampoline must be automatically padded with NOP-sled + * - if function has ANY arguments (and most do) we need to take care of fixing the stack or saving preamble with all + * pushes? + * - overriden function may be shorter than trampoline (ok, we don't handle this even now, but it's unlikely) + * - and the biggest one: RIP. Some instructions are execution-point addressed (e.g. jump -5 bytes from here). + * Detecting that for the preamble and blocking override of such function (or EVEN fixing that RIP/EIP addressing) is + * rather possible. However, what becomes a problem are backward jumps in the code following the preamble. If the + * code after the preamble jumps backwards and lands in our trampoline instead of the original preamble it will + * either jump into a "random" place or jump to the replacement function. This is not that unlikely if the function + * is a short loop and nothing else. Trying to find such bug would be nightmare and we don't see a sane way of + * scanning the whole function and determining if it has any negative RIPs/EIPs and if they happen to fall within + * preamble. It's a mess with maaaany traps. While kernel has kprobes and fprobes we cannot use them as they're not + * enabled in syno kernels. + * + * We decided to compromise. The code offers a special call_overridden_symbol(). It follows a very similar process to + * restore_symbol()+override_symbol(). Normally the restoration [R] + override [O] process chain will look like so: + * 1. [R] Disable CR0 + * 2. [R] Unlock memory page(s) where trampoline lies + * 3. [R] Copy original preamble over trampoline + * 4. [R] Lock memory page(s) with preamble + * 5. [R] Enable CR0 + * 6. Call original + * 7. [O] Disable CR0 + * 8. [R] Unlock memory page(s) where we want to copy the trampoline + * 9. [R] Copy original trampoline over original preamble + * 10. [R] Lock memory page(s) with trampoline + * 11. [R] Enable CR0 + * + * The call_overridden_symbol() obviously has to disable CR0 and unlock memory but it LEAVES the memory unlocked for any + * subsequent calls. While it's less safe (as something can accidentally override it will not be reported) it shortens + * the call path for 2nd and beyond calls to the original: + * 1. Check if memory needs to be unlocked + * 2. [O] Copy original preamble over trampoline + * 3. Call original + * 4. [R] Copy original trampoline over original preamble + * + * Using call_overridden_symbol() thus has huge advantages over override+restore if you plan to call the original + * function more than once. If you want to call it only once the call_overridden_symbol() is an equivalent of restore+ + * override. That's why, for readability reasons and DRY of checking code it's preferred to use call_overridden_symbol() + * even if you call the original method even once. + * There's a third method: utilizing forceful breakpoints like kprobe does. However, this is a rather complex system and + * also contains many traps. Additionally, its overhead is no smaller than the current call_overridden_symbol() + * implementation. The kernel uses breakpoints for more safety and to detect possible interactions between different + * subsystems utilizing breakpoints. This isn't our concern here. + * + * References: + * - https://www.cs.uaf.edu/2016/fall/cs301/lecture/09_28_machinecode.html + * - http://www.watson.org/%7Erobert/2007woot/2007usenixwoot-exploitingconcurrency.pdf + * - https://stackoverflow.com/a/5711253 + * - https://www.kernel.org/doc/Documentation/kprobes.txt + * - https://stackoverflow.com/a/6742086 + */ + +#include "override_symbol.h" +#include "../../common.h" +#include "../helper/memory_helper.h" //set_mem_addr_ro(), set_mem_addr_rw() +#include "../helper/symbol_helper.h" //kln_func +#include //memcpy() + +#define JUMP_ADDR_POS 2 //JUMP starts at [2] in the jump template below +#define OVERRIDE_JUMP_SIZE 1 + 1 + 8 + 1 + 1 //MOVQ + %rax + $vaddr + JMP + *%rax +static const unsigned char jump_tpl[OVERRIDE_JUMP_SIZE] = + "\x48\xb8" "\x00\x00\x00\x00\x00\x00\x00\x00" /* MOVQ 64-bit-vaddr, %rax */ + "\xff\xe0" /* JMP *%rax */ +; + +#define WITH_OVS_LOCK(__sym, code) \ + do { \ + pr_loc_dbg("Obtaining lock for <%pF/%p>", (__sym)->org_sym_ptr, (__sym)->org_sym_ptr); \ + spin_lock_irqsave(&(__sym)->lock, (__sym)->lock_irq); \ + ({code}); \ + spin_unlock_irqrestore(&(__sym)->lock, (__sym)->lock_irq); \ + pr_loc_dbg("Released lock for <%p>", (__sym)->org_sym_ptr); \ + } while(0) + +struct override_symbol_inst { + void *org_sym_ptr; + const void *new_sym_ptr; + char org_sym_code[OVERRIDE_JUMP_SIZE]; + char trampoline[OVERRIDE_JUMP_SIZE]; + spinlock_t lock; + unsigned long lock_irq; + bool installed:1; //whether the symbol is currently overrode (=has trampoline installed) + bool has_trampoline:1; //does this structure contain a valid trampoline code already? + bool mem_protected:1; //is the trampoline installation site memory-protected? + char name[]; +}; + +/** + * Wrapper for set_mem_addr_rw() which works with symbols + */ +static void __always_inline set_symbol_rw(struct override_symbol_inst *sym) +{ + set_mem_addr_rw((unsigned long)sym->org_sym_ptr, OVERRIDE_JUMP_SIZE); + sym->mem_protected = false; +} + +/** + * Wrapper for set_mem_addr_ro() which works with symbols + */ +static void __always_inline set_symbol_ro(struct override_symbol_inst *sym) +{ + set_mem_addr_ro((unsigned long)sym->org_sym_ptr, OVERRIDE_JUMP_SIZE); + sym->mem_protected = true; +} + +void put_overridden_symbol(struct override_symbol_inst *sym) +{ + pr_loc_dbg("Freeing OVS for %s", sym->name); + kfree(sym); +} + +/** + * Initializes new "override symbol instance" structure + * + * @return ptr to struct override_symbol_inst or ERR_PTR(-E) on error + */ +static struct override_symbol_inst* get_ov_symbol_instance(const char *symbol_name, const void *new_sym_ptr) +{ + struct override_symbol_inst *sym; + kmalloc_or_exit_ptr(sym, sizeof(struct override_symbol_inst) + strsize(symbol_name)); + + sym->new_sym_ptr = new_sym_ptr; + spin_lock_init(&sym->lock); + sym->installed = false; + sym->has_trampoline = false; + sym->mem_protected = true; + strcpy(sym->name, symbol_name); + sym->org_sym_ptr = (void *)kln_func(sym->name); + if (unlikely(sym->org_sym_ptr == 0)) { //header file: "Lookup the address for a symbol. Returns 0 if not found." + pr_loc_err("Failed to locate vaddr for %s()", sym->name); + put_overridden_symbol(sym); + return ERR_PTR(-EFAULT); + } + pr_loc_dbg("Saved %s() ptr <%p>", sym->name, sym->org_sym_ptr); + + return sym; +} + +/** + * Generates trampoline code to jump from old symbol to the new symbol location and saves the original code + */ +static inline void prepare_trampoline(struct override_symbol_inst *sym) +{ + pr_loc_dbg("Generating trampoline"); + + //First generate jump/trampoline to new_sym_ptr + memcpy(sym->trampoline, jump_tpl, OVERRIDE_JUMP_SIZE); //copy "empty" trampoline + *(long *)&sym->trampoline[JUMP_ADDR_POS] = (long)sym->new_sym_ptr; //paste new addr into trampoline + pr_loc_dbg("Generated trampoline to %pF<%p> for %s<%p>: ", sym->new_sym_ptr, sym->new_sym_ptr, sym->name, + sym->org_sym_ptr); + + memcpy(sym->org_sym_code, sym->org_sym_ptr, OVERRIDE_JUMP_SIZE); //Backup old code + sym->has_trampoline = true; +} + +/** + * Enables (previously disabled) symbol override, disabling memory barriers if needed & leaving them disabled upon exit + * + * Warning: this function is exported only to make universal call original macros working. You should NOT use it outside + * of this submodule + * + * @return 0 on success, -E on error + */ +int __enable_symbol_override(struct override_symbol_inst *sym) +{ + if (sym->mem_protected) + set_symbol_rw(sym); + + WITH_OVS_LOCK(sym, + if (likely(!sym->installed)) { + if (!sym->has_trampoline) + prepare_trampoline(sym); + + //after we got the lock need to re-check the memory protection - this shouldn't be changed within spinlock + //since it generates a warning... but sometimes we have no choice + if (sym->mem_protected) + set_symbol_rw(sym); + + pr_loc_dbg("Writing trampoline code to <%p>", sym->org_sym_ptr); + memcpy(sym->org_sym_ptr, sym->trampoline, OVERRIDE_JUMP_SIZE); + sym->installed = true; + } + ); + + return 0; +} + +/** + * Disables (previously enables) symbol override, disabling memory barriers if needed & leaving them disabled upon exit + * + * Warning: this function is exported only to make universal call original macros working. You should NOT use it outside + * of this submodule + * + * @return 0 on success, -E on error + */ +int __disable_symbol_override(struct override_symbol_inst *sym) +{ + if (sym->mem_protected) + set_symbol_rw(sym); + + WITH_OVS_LOCK(sym, + if (likely(sym->installed)) { + //after we got the lock need to re-check the memory protection - this shouldn't be changed within spinlock + //since it generates a warning... but sometimes we have no choice + if (sym->mem_protected) + set_symbol_rw(sym); + + pr_loc_dbg("Writing original code to <%p>", sym->org_sym_ptr); + memcpy(sym->org_sym_ptr, sym->org_sym_code, OVERRIDE_JUMP_SIZE); + sym->installed = false; + } + ); + + return 0; +} + +struct override_symbol_inst* __must_check override_symbol(const char *name, const void *new_sym_ptr) +{ + pr_loc_dbg("Overriding %s() with %pf()<%p>", name, new_sym_ptr, new_sym_ptr); + + int out; + struct override_symbol_inst *sym = get_ov_symbol_instance(name, new_sym_ptr); + if (unlikely(IS_ERR(sym))) + return sym; + + if ((out = __enable_symbol_override(sym)) != 0) + goto error_out; + + set_symbol_ro(sym); //by design standard override leaves the memory protected + + pr_loc_dbg("Successfully overrode %s() with trampoline to %pF<%p>", sym->name, sym->new_sym_ptr, sym->new_sym_ptr); + return sym; + + error_out: + put_overridden_symbol(sym); + return ERR_PTR(out); +} + +int restore_symbol(struct override_symbol_inst *sym) +{ + pr_loc_dbg("Restoring %s<%p> to original code", sym->name, sym->org_sym_ptr); + + int out; + if ((out = __disable_symbol_override(sym)) != 0) + goto out_free; + + set_symbol_ro(sym); //by design restore leaves the memory protected + + pr_loc_dbg("Successfully restored original code of %s", sym->name); + + out_free: + put_overridden_symbol(sym); + return out; +} + +/** + * Returns pointer to original symbol. This is a function made to avoid exposing internals of the struct to header. + */ +__always_inline void * __get_org_ptr(struct override_symbol_inst *sym) +{ + return sym->org_sym_ptr; +} + +/** + * Checks if override is enabled. This is a function made to avoid exposing internals of the struct to header. + */ +__always_inline bool symbol_is_overridden(struct override_symbol_inst *sym) +{ + return likely(sym) && sym->installed; +} diff --git a/internal/override/override_symbol.h b/internal/override/override_symbol.h new file mode 100644 index 0000000..b370ae8 --- /dev/null +++ b/internal/override/override_symbol.h @@ -0,0 +1,133 @@ +#ifndef REDPILLLKM_OVERRIDE_KFUNC_H +#define REDPILLLKM_OVERRIDE_KFUNC_H + +#include +#include //PTR_ERR, IS_ERR + +typedef struct override_symbol_inst override_symbol_inst; + +/************************************************* Current interface **************************************************/ +/** + * Calls the original symbol, returning nothing, that was previously overridden + * + * @param sym pointer to a override_symbol_inst + * @param ... any arguments to the original function + * + * @return 0 if the execution succeeded, -E if it didn't + */ +#define call_overridden_symbol_void(sym, ...) ({ \ + int __ret; \ + bool __was_installed = symbol_is_overridden(sym); \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wstrict-prototypes\"") \ + void (*__ptr)() = __get_org_ptr(sym); \ + _Pragma("GCC diagnostic pop") \ + __ret = __disable_symbol_override(sym); \ + if (likely(__ret == 0)) { \ + __ptr(__VA_ARGS__); \ + if (likely(__was_installed)) { \ + __ret = __enable_symbol_override(sym); \ + } \ + } \ + __ret; \ +}); + +/** + * Calls the original symbol, returning a value, that was previously overridden + * + * @param out_var name of the variable where original function return value should be placed + * @param sym pointer to a override_symbol_inst + * @param ... any arguments to the original function + * + * @return 0 if the execution succeeded, -E if it didn't + */ +#define call_overridden_symbol(out_var, sym, ...) ({ \ + int __ret; \ + bool __was_installed = symbol_is_overridden(sym); \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wstrict-prototypes\"") \ + typeof (out_var) (*__ptr)() = __get_org_ptr(sym); \ + _Pragma("GCC diagnostic pop") \ + __ret = __disable_symbol_override(sym); \ + if (likely(__ret == 0)) { \ + out_var = __ptr(__VA_ARGS__); \ + if (likely(__was_installed)) { \ + __ret = __enable_symbol_override(sym); \ + } \ + } \ + __ret; \ +}); + +/** + * override_symbol() with automatic error handling. See the original function for details. + * + * @param ptr_var Variable to store ovs pointer + */ +#define override_symbol_or_exit_int(ptr_var, name, new_sym_ptr) \ + (ptr_var) = override_symbol(name, new_sym_ptr); \ + if (unlikely(IS_ERR((ptr_var)))) { \ + int _err = PTR_ERR((ptr_var)); \ + pr_loc_err("Failed to override %s - error=%d", name, _err); \ + (ptr_var) = NULL; \ + return _err; \ + } \ + +/** + * Overrides a kernel symbol with something else of your choice + * + * @param name Name of the kernel symbol (function) to override + * @param new_sym_ptr An address/pointer to a new function + * + * @return Instance of override_symbol_inst struct pointer on success, ERR_PTR(-E) on error + * + * @example + * struct override_symbol_inst *ovi; + * int null_printk() { + * int print_res; + * call_overridden_symbol(print_res, ovi, "No print for you!"); + * return print_res; + * } + * ovi = override_symbol("printk", null_printk); + * if (IS_ERR(ovi)) { ... handle error ... } + * ... + * restore_symbol(backup_addr, backup_code); //restore backed-up copy of printk() + * + * @todo: This should be rewritten using INSN without inline ASM wizardy, but this is much more complex + */ +struct override_symbol_inst* __must_check override_symbol(const char *name, const void *new_sym_ptr); + +/** + * Restores symbol overridden by override_symbol() + * + * For details see override_symbol() docblock + * + * @return 0 on success, -E on error + */ +int restore_symbol(struct override_symbol_inst *sym); + +/** + * Frees the symbol previously reserved by get_ov_symbol_instance() (e.g. via override_symbol) + * + * !! READ THIS SERIOUS WARNING BELOW CAREFULLY !! + * STOP! DO NOT USE THIS if you don't understand what it does and why it exists. This function should be called from + * outside of this submodule ONLY if the overridden code disappeared from memory. This practically can happen only when + * you override a symbol inside of a loadable module and the module is unloaded. In ANY other case you must call + * restore_symbol() to actually restore the original code. This function simply "forgets" about the override and frees + * memory (as if external module has been unloaded we are NOT allowed to touch that memory anymore as it may be freed). + * It is explicitly NOT necessary to call this function after restore_symbol() as it does so internally. + */ +void put_overridden_symbol(struct override_symbol_inst *sym); + +/** + * Check if the given symbol override is currently active + */ +bool symbol_is_overridden(struct override_symbol_inst *sym); + + +/****************** Private helpers (should not be used directly by any code outside of this unit!) *******************/ +#include +int __enable_symbol_override(override_symbol_inst *sym); +int __disable_symbol_override(override_symbol_inst *sym); +void * __get_org_ptr(struct override_symbol_inst *sym); + +#endif //REDPILLLKM_OVERRIDE_KFUNC_H diff --git a/internal/override/override_syscall.c b/internal/override/override_syscall.c new file mode 100644 index 0000000..3c1f3b8 --- /dev/null +++ b/internal/override/override_syscall.c @@ -0,0 +1,168 @@ +#include "override_syscall.h" +#include "../../common.h" +#include "../helper/memory_helper.h" //set_mem_addr_ro(), set_mem_addr_rw() +#include //__NR_syscall_max & NR_syscalls +#include //syscalls numbers (e.g. __NR_read) +#include "../helper/symbol_helper.h" //kln_func + +static unsigned long *syscall_table_ptr = NULL; +static void print_syscall_table(unsigned int from, unsigned to) +{ + if (unlikely(!syscall_table_ptr)) { + pr_loc_dbg("Cannot print - no syscall_table_ptr address"); + return; + } + + if (unlikely(from < 0 || to > __NR_syscall_max || from > to)) { + pr_loc_bug("%s called with from=%d to=%d which are invalid", __FUNCTION__, from, to); + return; + } + + pr_loc_dbg("Printing syscall table %d-%d @ %p containing %d elements", from, to, (void *)syscall_table_ptr, NR_syscalls); + for (unsigned int i = from; i < to; i++) { + pr_loc_dbg("#%03d\t%pS", i, (void *)syscall_table_ptr[i]); + } +} + +static int find_sys_call_table(void) +{ + syscall_table_ptr = (unsigned long *)kln_func("sys_call_table"); + if (syscall_table_ptr != 0) { + pr_loc_dbg("Found sys_call_table @ <%p> using kallsyms", syscall_table_ptr); + return 0; + } + + //See https://kernelnewbies.kernelnewbies.narkive.com/L1uH0n8P/ + //In essence some systems will have it and some will not - finding it using kallsyms is the easiest and fastest + pr_loc_dbg("Failed to locate vaddr for sys_call_table using kallsyms - falling back to memory search"); + + /* + There's also the bruteforce way - scan through the memory until you find it :D + We know numbers for syscalls (e.g. __NR_close, __NR_write, __NR_read, etc.) which are essentially fixed positions + in the sys_call_table. We also know addresses of functions handling these calls (sys_close/sys_write/sys_read + etc.). This lets us scan the memory for one syscall address reference and when found confirm if this is really + a place of sys_call_table by verifying other 2-3 places to make sure other syscalls are where they should be + The huge downside of this method is it is slow as potentially the amount of memory to search may be large. + */ + unsigned long sys_close_ptr = kln_func("sys_close"); + unsigned long sys_open_ptr = kln_func("sys_open"); + unsigned long sys_read_ptr = kln_func("sys_read"); + unsigned long sys_write_ptr = kln_func("sys_write"); + if (sys_close_ptr == 0 || sys_open_ptr == 0 || sys_read_ptr == 0 || sys_write_ptr == 0) { + pr_loc_bug( + "One or more syscall handler addresses cannot be located: " + "sys_close<%p>, sys_open<%p>, sys_read<%p>, sys_write<%p>", + (void *)sys_close_ptr, (void *)sys_open_ptr, (void *)sys_read_ptr, (void *)sys_write_ptr); + return -EFAULT; + } + + /* + To speed up things a bit we search from a known syscall which was loaded early into the memory. To be safe we pick + the earliest address and go from there. It can be nicely visualized on a system which DO export sys_call_table + by running grep -E ' (__x64_)?sys_(close|open|read|write|call_table)$' /proc/kallsyms | sort + You will get something like that: + ffffffff860c18b0 T __x64_sys_close + ffffffff860c37a0 T __x64_sys_open + ffffffff860c7a80 T __x64_sys_read + ffffffff860c7ba0 T __x64_sys_write + ffffffff86e013a0 R sys_call_table <= it's way below any of the syscalls but not too far (~13,892,336 bytes) + */ + unsigned long i = sys_close_ptr; + if (sys_open_ptr < i) i = sys_open_ptr; + if (sys_read_ptr < i) i = sys_read_ptr; + if (sys_write_ptr < i) i = sys_write_ptr; + + //If everything goes well it should take ~1-2ms tops (which is slow in the kernel sense but it's not bad) + pr_loc_dbg("Scanning memory for sys_call_table starting at %p", (void *)i); + for (; i < ULONG_MAX; i += sizeof(void *)) { + syscall_table_ptr = (unsigned long *)i; + + if (unlikely( + syscall_table_ptr[__NR_close] == sys_close_ptr && + syscall_table_ptr[__NR_open] == sys_open_ptr && + syscall_table_ptr[__NR_read] == sys_read_ptr && + syscall_table_ptr[__NR_write] == sys_write_ptr + )) { + pr_loc_dbg("Found sys_call_table @ %p", (void *)syscall_table_ptr); + return 0; + } + } + + pr_loc_bug("Failed to find sys call table"); + syscall_table_ptr = NULL; + return -EFAULT; +} + +static unsigned long *overridden_syscall[NR_syscalls] = { NULL }; //@todo this should be alloced dynamically +int override_syscall(unsigned int syscall_num, const void *new_sysc_ptr, void * *org_sysc_ptr) +{ + pr_loc_dbg("Overriding syscall #%d with %pf()<%p>", syscall_num, new_sysc_ptr, new_sysc_ptr); + + int out = 0; + if (unlikely(!syscall_table_ptr)) { + out = find_sys_call_table(); + if (unlikely(out != 0)) + return out; + } + + if (unlikely(syscall_num > __NR_syscall_max)) { + pr_loc_bug("Invalid syscall number: %d > %d", syscall_num, __NR_syscall_max); + return -EINVAL; + } + + print_syscall_table(syscall_num-5, syscall_num+5); + + if (unlikely(overridden_syscall[syscall_num])) { + pr_loc_bug("Syscall %d is already overridden - will be replaced (bug?)", syscall_num); + } else { + //Only save original-original entry (not the override one) + overridden_syscall[syscall_num] = (unsigned long *)syscall_table_ptr[syscall_num]; + } + + if (org_sysc_ptr != 0) + *org_sysc_ptr = overridden_syscall[syscall_num]; + + set_mem_addr_rw((long)&syscall_table_ptr[syscall_num], sizeof(unsigned long)); + pr_loc_dbg("syscall #%d originally %ps<%p> will now be %ps<%p> @ %d", syscall_num, + (void *) overridden_syscall[syscall_num], (void *) overridden_syscall[syscall_num], new_sysc_ptr, + new_sysc_ptr, smp_processor_id()); + syscall_table_ptr[syscall_num] = (unsigned long) new_sysc_ptr; + set_mem_addr_ro((long)&syscall_table_ptr[syscall_num], sizeof(unsigned long)); + + print_syscall_table(syscall_num-5, syscall_num+5); + + return out; +} + +int restore_syscall(unsigned int syscall_num) +{ + pr_loc_dbg("Restoring syscall #%d", syscall_num); + + if (unlikely(!syscall_table_ptr)) { + pr_loc_bug("Syscall table not found in %s ?!", __FUNCTION__); + return -EFAULT; + } + + if (unlikely(syscall_num > __NR_syscall_max)) { + pr_loc_bug("Invalid syscall number: %d > %d", syscall_num, __NR_syscall_max); + return -EINVAL; + } + + if (unlikely(overridden_syscall[syscall_num] == 0)) { + pr_loc_bug("Syscall #%d cannot be restored - it was never overridden", syscall_num); + return -EINVAL; + } + + print_syscall_table(syscall_num-5, syscall_num+5); + + set_mem_addr_rw((long)&syscall_table_ptr[syscall_num], sizeof(unsigned long)); + pr_loc_dbg("Restoring syscall #%d from %ps<%p> to original %ps<%p>", syscall_num, + (void *) syscall_table_ptr[syscall_num], (void *) syscall_table_ptr[syscall_num], + (void *) overridden_syscall[syscall_num], (void *) overridden_syscall[syscall_num]); + syscall_table_ptr[syscall_num] = (unsigned long)overridden_syscall[syscall_num]; + set_mem_addr_rw((long)&syscall_table_ptr[syscall_num], sizeof(unsigned long)); + + print_syscall_table(syscall_num-5, syscall_num+5); + + return 0; +} \ No newline at end of file diff --git a/internal/override/override_syscall.h b/internal/override/override_syscall.h new file mode 100644 index 0000000..4d5c06c --- /dev/null +++ b/internal/override/override_syscall.h @@ -0,0 +1,59 @@ +#ifndef REDPILL_OVERRIDE_SYSCALL_H +#define REDPILL_OVERRIDE_SYSCALL_H + +#include "override_symbol.h" +#include + +//Modified syscall defines for shims based on native Linux syscalls (defined in linux/syscalls.h) +#define SYSCALL_SHIM_DEFINE1(name, ...) SYSCALL_SHIM_DEFINEx(1, _##name##_shim, __VA_ARGS__) +#define SYSCALL_SHIM_DEFINE2(name, ...) SYSCALL_SHIM_DEFINEx(2, _##name##_shim, __VA_ARGS__) +#define SYSCALL_SHIM_DEFINE3(name, ...) SYSCALL_SHIM_DEFINEx(3, _##name##_shim, __VA_ARGS__) +#define SYSCALL_SHIM_DEFINE4(name, ...) SYSCALL_SHIM_DEFINEx(4, _##name##_shim, __VA_ARGS__) +#define SYSCALL_SHIM_DEFINE5(name, ...) SYSCALL_SHIM_DEFINEx(5, _##name##_shim, __VA_ARGS__) +#define SYSCALL_SHIM_DEFINE6(name, ...) SYSCALL_SHIM_DEFINEx(6, _##name##_shim, __VA_ARGS__) +#define SYSCALL_SHIM_DEFINEx(x, name, ...) \ + static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + static asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ + return ret; \ + } \ + static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/** + * Non-destructively overrides a syscall + * + * This produces an effect similar to override_symbol(). However, it should be faster, safer, and most importantly + * allows calling the original syscall in the override. + * + * Warning: DO NOT use this method to override stubbed syscalls. These are syscall which aren't named "sys_foo" (e.g. + * sys_execve or SyS_execve [alias]) but are handled by ASM stubs in arch/x86/kernel/entry_64.S (and visible as e.g. + * stub_execve). If you do override such call with a normal function in the syscall table things will start breaking + * unexpectedly as the registries will be modified in an unexpected way (stubs don't use cdecl)! + * In such cases you need to override the actual sys_* (or even better: SyS_*) function with a jump using + * override_symbol(). Either way you should use SYSCALL_SHIM_DEFINE#() to define the new target/shim. + * Make sure to read https://lwn.net/Articles/604287/ and https://lwn.net/Articles/604406/ + * + * @param syscall_num Number of the syscall to override (e.g. open) + * You can find them as __NR_* defines in arch/x86/include/generated/uapi/asm/unistd_64.h + * @param new_sysc_ptr An address/pointer to a new function + * @param org_sysc_ptr Pointer to some space to save address of the original syscall (warning: it's a pointer-pointer); + * You can pass a null-ptr if you don't care about the original syscall and the function will not + * touch it + * + * @return 0 on success, -E on error + */ +int override_syscall(unsigned int syscall_num, const void *new_sysc_ptr, void * *org_sysc_ptr); + +/** + * Restores the syscall previously replaced by override_syscall() + * + * For details see override_syscall() docblock. + * + * @return 0 on success, -E on error + */ +int restore_syscall(unsigned int syscall_num); + +#endif //REDPILL_OVERRIDE_SYSCALL_H diff --git a/internal/scsi/hdparam.h b/internal/scsi/hdparam.h new file mode 100644 index 0000000..c93e681 --- /dev/null +++ b/internal/scsi/hdparam.h @@ -0,0 +1,350 @@ +/** + * This file serves a similar role to include/uapi/linux/hdreg.h, combining constants & macros for dealing with ATA + */ +#include //HDIO_* +#include //ATA_ID_*, ATA_CMD_*, ATA_SECT_SIZE + +#ifndef REDPILL_HDPARAM_H +#define REDPILL_HDPARAM_H + +/********************************** Parameters related to HDIO_DRIVE_CMD ioctl call ***********************************/ +//the following constants contain size/offsets/indexes of "struct hd_drive_cmd_hdr" fields from user API +// also see HDIO_DRIVE_CMD in https://www.kernel.org/doc/Documentation/ioctl/hdio.txt +#define HDIO_DRIVE_CMD_HDR_OFFSET 4 //same as HDIO_DRIVE_CMD_HDR_SIZE but in sectors +#define HDIO_DRIVE_CMD_HDR_CMD 0 //command +#define HDIO_DRIVE_CMD_HDR_SEC_NUM 1 //sector number +#define HDIO_DRIVE_CMD_HDR_FEATURE 2 //feature +#define HDIO_DRIVE_CMD_HDR_SEC_CNT 3 //sector count + +#define HDIO_DRIVE_CMD_RET_STATUS 0 +#define HDIO_DRIVE_CMD_RET_ERROR 1 +#define HDIO_DRIVE_CMD_RET_SEC_CNT 2 //sector count + +//convert sector size of data section of an ATA (sub)command to the full buffer size with header +#define ata_ioctl_buf_size(data_sectors) (u16)(HDIO_DRIVE_CMD_HDR_SIZE+((data_sectors)*(ATA_SECT_SIZE*sizeof(u8)))) + +//Expected sizes of various commands/subcommands data sections +#define ATA_CMD_ID_ATA_SECTORS 1 +#define ATA_SMART_READ_VALUES_SECTORS 1 //subcommand of ATA_CMD_SMART +#define ATA_SMART_READ_THRESHOLDS_SECTORS 1 //subcommand of ATA_CMD_SMART +#define ATA_WIN_SMART_READ_LOG_SECTORS 1 //subcommand of ATA_CMD_SMART +#define ATA_WIN_SMART_EXEC_TEST 1 //subcommand of ATA_CMD_SMART + +/********************************** Parameters related to HDIO_DRIVE_TASK ioctl call **********************************/ +//another method of calling some ioctls +// see https://github.com/mirror/smartmontools/blob/b63206bc12efb2ae543040b9008f42c037eb1f04/os_linux.cpp#L379 +// also see HDIO_DRIVE_TASK in https://www.kernel.org/doc/Documentation/ioctl/hdio.txt +#define HDIO_DRIVE_TASK_HDR_OFFSET 7 +#define HDIO_DRIVE_TASK_HDR_CMD 0 //command code +#define HDIO_DRIVE_TASK_HDR_FEATURE 1 //feature +#define HDIO_DRIVE_TASK_HDR_SEC_CNT 2 //sector count +#define HDIO_DRIVE_TASK_HDR_SEC_NUM 3 //sector number +#define HDIO_DRIVE_TASK_HDR_LCYL 4 //CYL LO +#define HDIO_DRIVE_TASK_HDR_HCYL 5 //CYL HI +#define HDIO_DRIVE_TASK_HDR_SEL 6 //device head + +#define HDIO_DRIVE_TASK_RET_STATUS 0 +#define HDIO_DRIVE_TASK_RET_ERROR 1 +#define HDIO_DRIVE_TASK_RET_SEC_CNT 2 //sector count +#define HDIO_DRIVE_TASK_RET_SEC_NUM 3 //sector number +#define HDIO_DRIVE_TASK_RET_LCYL 4 //CYL LO +#define HDIO_DRIVE_TASK_RET_HCYL 5 //CYL HI +#define HDIO_DRIVE_TASK_RET_SEL 6 //device head + +//all WIN_FT_* entries are defined under "WIN_SMART sub-commands" in hdreg.h +#define WIN_CMD_SMART 0xb0 //defined in full linux headers as WIN_SMART in hdreg.h +#define WIN_FT_SMART_IMMEDIATE_OFFLINE 0xd4 +#define WIN_FT_SMART_READ_LOG_SECTOR 0xd5 +#define WIN_FT_SMART_STATUS 0xda +#define WIN_FT_SMART_AUTOSAVE 0xd2 //this is not a typo (AUTOSAVE and AUTO_OFFLINE are spelled differently in ATA spec) +#define WIN_FT_SMART_AUTO_OFFLINE 0xdb + +/*************************************** Params related to ATA IDENTIFY command ***************************************/ +//Word numbers for the ATA IDENTIFY command response fields & bits in them (described in "struct hd_driveid") +#define ATA_ID_COMMAND_SET_1_SMART 0x01 //first bit of command set #1 contains SMART supported flag +#define ATA_ID_COMMAND_SET_2_VALID 0x4000 //14th bit with should always be 1 when disk supports cmd set 2 +#define ATA_ID_CFS_ENABLE_1_SMART 0x01 //first bit of command set #1 contains SMART enable flag +#define ATA_ID_CSF_DEFAULT_VALID 0x4000 //14th bit with should always be 1 when disk supports that + +/************************************************* ATA IDENTIFY macros ************************************************/ +//These can be used with ATA IDENTIFY data returned by HDIO_GET_IDENTITY or HDIO_DRIVE_CMD=>ATA_CMD_IDENTIFY_DEV with +// when buffer is corrected by HDIO_DRIVE_CMD_HDR_OFFSET (to move over the header data) +#define ata_is_smart_supported(id_data) (((id_data)[ATA_ID_COMMAND_SET_2] & ATA_ID_COMMAND_SET_2_VALID) && \ + ((id_data)[ATA_ID_COMMAND_SET_1] & ATA_ID_COMMAND_SET_1_SMART)) + +#define ata_is_smart_enabled(id_data) (((id_data)[ATA_ID_CSF_DEFAULT] & ATA_ID_CSF_DEFAULT_VALID) && \ + ((id_data)[ATA_ID_CFS_ENABLE_1] & ATA_ID_CFS_ENABLE_1_SMART)) + +//set_/unset_ are deliberately asymmetrical here - we don't want to invalidate whole word when disabling SMART +#define ata_set_smart_supported(id_data) \ + do { \ + (id_data)[ATA_ID_COMMAND_SET_2] |= ATA_ID_COMMAND_SET_2_VALID; \ + (id_data)[ATA_ID_COMMAND_SET_1] |= ATA_ID_COMMAND_SET_1_SMART; \ + } while(0) +#define ata_reset_smart_supported(id_data) \ + do { \ + (id_data)[ATA_ID_COMMAND_SET_1] &= ~ATA_ID_COMMAND_SET_1_SMART; \ + } while(0) + +#define ata_set_smart_enabled(id_data) \ + do { \ + (id_data)[ATA_ID_CSF_DEFAULT] |= ATA_ID_CSF_DEFAULT_VALID; \ + (id_data)[ATA_ID_CFS_ENABLE_1] |= ATA_ID_CFS_ENABLE_1_SMART; \ + } while(0) +#define ata_reset_smart_enabled(id_data) \ + do { \ + (id_data)[ATA_ID_CFS_ENABLE_1] &= ~ATA_ID_CFS_ENABLE_1_SMART; \ + } while(0) + +/*********************************************** Miscellaneous constants **********************************************/ +#define ATA_SMART_RECORD_LEN 12 //length of the SMART snapshot data row in bytes, defined + +//Modified for kernel use - it's the "hd_driveid" struct from Linux include/uapi/linux/hdreg.h which represents a +// response to HDIO_GET_IDENTITY. See "Table 26 − IDENTIFY DEVICE information" in ATA/ATAPI-6 spec for details. +struct rp_hd_driveid { + u16 config; /* lots of obsolete bit flags */ + u16 cyls; /* Obsolete, "physical" cyls */ + u16 reserved2; /* reserved (word 2) */ + u16 heads; /* Obsolete, "physical" heads */ + u16 track_bytes; /* unformatted bytes per track */ + u16 sector_bytes; /* unformatted bytes per sector */ + u16 sectors; /* Obsolete, "physical" sectors per track */ + u16 vendor0; /* vendor unique */ + u16 vendor1; /* vendor unique */ + u16 vendor2; /* Retired vendor unique */ + u8 serial_no[20]; /* 0 = not_specified */ + u16 buf_type; /* Retired */ + u16 buf_size; /* Retired, 512 byte increments + * 0 = not_specified + */ + u16 ecc_bytes; /* for r/w long cmds; 0 = not_specified */ + u8 fw_rev[8]; /* 0 = not_specified */ + u8 model[40]; /* 0 = not_specified */ + u8 max_multsect; /* 0=not_implemented */ + u8 vendor3; /* vendor unique */ + u16 dword_io; /* 0=not_implemented; 1=implemented */ + u8 vendor4; /* vendor unique */ + u8 capability; /* (upper byte of word 49) + * 3: IORDYsup + * 2: IORDYsw + * 1: LBA + * 0: DMA + */ + u16 reserved50; /* reserved (word 50) */ + u8 vendor5; /* Obsolete, vendor unique */ + u8 tPIO; /* Obsolete, 0=slow, 1=medium, 2=fast */ + u8 vendor6; /* Obsolete, vendor unique */ + u8 tDMA; /* Obsolete, 0=slow, 1=medium, 2=fast */ + u16 field_valid; /* (word 53) + * 2: ultra_ok word 88 + * 1: eide_ok words 64-70 + * 0: cur_ok words 54-58 + */ + u16 cur_cyls; /* Obsolete, logical cylinders */ + u16 cur_heads; /* Obsolete, l heads */ + u16 cur_sectors; /* Obsolete, l sectors per track */ + u16 cur_capacity0; /* Obsolete, l total sectors on drive */ + u16 cur_capacity1; /* Obsolete, (2 words, misaligned int) */ + u8 multsect; /* current multiple sector count */ + u8 multsect_valid; /* when (bit0==1) multsect is ok */ + unsigned int lba_capacity; /* Obsolete, total number of sectors */ + u16 dma_1word; /* Obsolete, single-word dma info */ + u16 dma_mword; /* multiple-word dma info */ + u16 eide_pio_modes; /* bits 0:mode3 1:mode4 */ + u16 eide_dma_min; /* min mword dma cycle time (ns) */ + u16 eide_dma_time; /* recommended mword dma cycle time (ns) */ + u16 eide_pio; /* min cycle time (ns), no IORDY */ + u16 eide_pio_iordy; /* min cycle time (ns), with IORDY */ + u16 words69_70[2]; /* reserved words 69-70 + * future command overlap and queuing + */ + u16 words71_74[4]; /* reserved words 71-74 + * for IDENTIFY PACKET DEVICE command + */ + u16 queue_depth; /* (word 75) + * 15:5 reserved + * 4:0 Maximum queue depth -1 + */ + u16 words76_79[4]; /* reserved words 76-79 */ + u16 major_rev_num; /* (word 80) */ + u16 minor_rev_num; /* (word 81) */ + u16 command_set_1; /* (word 82) supported + * 15: Obsolete + * 14: NOP command + * 13: READ_BUFFER + * 12: WRITE_BUFFER + * 11: Obsolete + * 10: Host Protected Area + * 9: DEVICE Reset + * 8: SERVICE Interrupt + * 7: Release Interrupt + * 6: look-ahead + * 5: write cache + * 4: PACKET Command + * 3: Power Management Feature Set + * 2: Removable Feature Set + * 1: Security Feature Set + * 0: SMART Feature Set + */ + u16 command_set_2; /* (word 83) + * 15: Shall be ZERO + * 14: Shall be ONE + * 13: FLUSH CACHE EXT + * 12: FLUSH CACHE + * 11: Device Configuration Overlay + * 10: 48-bit Address Feature Set + * 9: Automatic Acoustic Management + * 8: SET MAX security + * 7: reserved 1407DT PARTIES + * 6: SetF sub-command Power-Up + * 5: Power-Up in Standby Feature Set + * 4: Removable Media Notification + * 3: APM Feature Set + * 2: CFA Feature Set + * 1: READ/WRITE DMA QUEUED + * 0: Download MicroCode + */ + u16 cfsse; /* (word 84) + * cmd set-feature supported extensions + * 15: Shall be ZERO + * 14: Shall be ONE + * 13:6 reserved + * 5: General Purpose Logging + * 4: Streaming Feature Set + * 3: Media Card Pass Through + * 2: Media Serial Number Valid + * 1: SMART selt-test supported + * 0: SMART error logging + */ + u16 cfs_enable_1; /* (word 85) + * command set-feature enabled + * 15: Obsolete + * 14: NOP command + * 13: READ_BUFFER + * 12: WRITE_BUFFER + * 11: Obsolete + * 10: Host Protected Area + * 9: DEVICE Reset + * 8: SERVICE Interrupt + * 7: Release Interrupt + * 6: look-ahead + * 5: write cache + * 4: PACKET Command + * 3: Power Management Feature Set + * 2: Removable Feature Set + * 1: Security Feature Set + * 0: SMART Feature Set + */ + u16 cfs_enable_2; /* (word 86) + * command set-feature enabled + * 15: Shall be ZERO + * 14: Shall be ONE + * 13: FLUSH CACHE EXT + * 12: FLUSH CACHE + * 11: Device Configuration Overlay + * 10: 48-bit Address Feature Set + * 9: Automatic Acoustic Management + * 8: SET MAX security + * 7: reserved 1407DT PARTIES + * 6: SetF sub-command Power-Up + * 5: Power-Up in Standby Feature Set + * 4: Removable Media Notification + * 3: APM Feature Set + * 2: CFA Feature Set + * 1: READ/WRITE DMA QUEUED + * 0: Download MicroCode + */ + u16 csf_default; /* (word 87) + * command set-feature default + * 15: Shall be ZERO + * 14: Shall be ONE + * 13:6 reserved + * 5: General Purpose Logging enabled + * 4: Valid CONFIGURE STREAM executed + * 3: Media Card Pass Through enabled + * 2: Media Serial Number Valid + * 1: SMART selt-test supported + * 0: SMART error logging + */ + u16 dma_ultra; /* (word 88) */ + u16 trseuc; /* time required for security erase */ + u16 trsEuc; /* time required for enhanced erase */ + u16 CurAPMvalues; /* current APM values */ + u16 mprc; /* master password revision code */ + u16 hw_config; /* hardware config (word 93) + * 15: Shall be ZERO + * 14: Shall be ONE + * 13: + * 12: + * 11: + * 10: + * 9: + * 8: + * 7: + * 6: + * 5: + * 4: + * 3: + * 2: + * 1: + * 0: Shall be ONE + */ + u16 acoustic; /* (word 94) + * 15:8 Vendor's recommended value + * 7:0 current value + */ + u16 msrqs; /* min stream request size */ + u16 sxfert; /* stream transfer time */ + u16 sal; /* stream access latency */ + unsigned int spg; /* stream performance granularity */ + unsigned long long lba_capacity_2;/* 48-bit total number of sectors */ + u16 words104_125[22];/* reserved words 104-125 */ + u16 last_lun; /* (word 126) */ + u16 word127; /* (word 127) Feature Set + * Removable Media Notification + * 15:2 reserved + * 1:0 00 = not supported + * 01 = supported + * 10 = reserved + * 11 = reserved + */ + u16 dlf; /* (word 128) + * device lock function + * 15:9 reserved + * 8 security level 1:max 0:high + * 7:6 reserved + * 5 enhanced erase + * 4 expire + * 3 frozen + * 2 locked + * 1 en/disabled + * 0 capability + */ + u16 csfo; /* (word 129) + * current set features options + * 15:4 reserved + * 3: auto reassign + * 2: reverting + * 1: read-look-ahead + * 0: write cache + */ + u16 words130_155[26];/* reserved vendor words 130-155 */ + u16 word156; /* reserved vendor word 156 */ + u16 words157_159[3];/* reserved vendor words 157-159 */ + u16 cfa_power; /* (word 160) CFA Power Mode + * 15 word 160 supported + * 14 reserved + * 13 + * 12 + * 11:0 + */ + u16 words161_175[15];/* Reserved for CFA */ + u16 words176_205[30];/* Current Media Serial Number */ + u16 words206_254[49];/* reserved words 206-254 */ + u16 integrity_word; /* (word 255) + * 15:8 Checksum + * 7:0 Signature + */ +} __packed; + +#endif //REDPILL_HDPARAM_H diff --git a/internal/scsi/scsi_notifier.c b/internal/scsi/scsi_notifier.c new file mode 100644 index 0000000..933277a --- /dev/null +++ b/internal/scsi/scsi_notifier.c @@ -0,0 +1,239 @@ +/** + * Notification chain implementation for SCSI devices + * + * Linux kernel contains a subsystem responsible for delivering notifications about asynchronous events. It implements a + * pub/sub model. As many subsystems predate existence of the so-called Notification Chains these subsystems usually + * lack any pub/sub functionality. SCSI is no exception. SCSI layer/driver is ancient and huge. It does not have any way + * of delivering events to other parts of the system. This submodule retrofits notification chains to the SCSI layer to + * notify about new devices being added to the system. It can be easily extended to notify about removed devices as + * well. + * + * Before using this submodule you should read the notice below + the gitbooks article if you have never worked with + * Linux notification chains. + * + * !! READ ME - THIS IS IMPORTANT !! + * The core notifier.h contains the following return constants: + * - NOTIFY_DONE: "don't care about that event really" + * - NOTIFY_OK: "good, processed" (really the same as DONE; semantic is defined by a particular publisher[!]) + * - NOTIFY_BAD: "stop the notification chain! I veto that action!" + * - NOTIFY_STOP: "stop the notification chain. It's all good." + * This SCSI notifier defines them as such: + * - NOTIFY_DONE, NOTIFY_OK: processed, continue calling other + * - NOTIFY_BAD: + * scsi_event=SCSI_EVT_DEV_PROBING: stop sd_probe() with EBUSY error; subscribers with lower priority will not exec + * scsi_event=SCSI_EVT_DEV_PROBED_OK: subscribers with lower priority will not exec + * scsi_event=SCSI_EVT_DEV_PROBED_ERR: subscribers with lower priority will not exec + * - NOTIFY_STOP: + * scsi_event=SCSI_EVT_DEV_PROBING: stop sd_probe() with 0 err-code; subscribers with lower priority will not exec + * scsi_event=SCSI_EVT_DEV_PROBED_OK: subscribers with lower priority will not exec + * scsi_event=SCSI_EVT_DEV_PROBED_ERR: subscribers with lower priority will not exec + * + * SUPPORTED DEVICES + * Currently only SCSI disks are supported. This isn't a technical limitation but rather a practical one - we don't want + * to trigger notifications for all-all SCSI devices (which include hosts, buses, etc). If needed a new set of functions + * subscribe_.../ubsubscribe_... can easily be added which don't filter by type. + * + * TODO + * This notifier does not support notifying about disconnection of the device. It should as we need to know if device + * disappeared (e.g. while processing shimming of boot devices). + * + * ADDITIONAL TOOLS + * It is highly recommended to use scsi_toolbox when subscribing to notifications from the SCSI subsystem. + * + * References: + * - https://0xax.gitbooks.io/linux-insides/content/Concepts/linux-cpu-4.html (about notification chains subsystem) + */ +#include "scsi_notifier.h" +#include "../../common.h" +#include "../notifier_base.h" //notifier_*() +#include "scsi_notifier_list.h" +#include "scsi_toolbox.h" +#include "../intercept_driver_register.h" //watching for sd driver loading +#include //to_scsi_device() + +#define NOTIFIER_NAME "SCSI device" + +/*********************************** Interacting with an active/loaded SCSI driver ************************************/ +static driver_watcher_instance *driver_watcher = NULL; +static int (*org_sd_probe) (struct device *dev) = NULL; //set during register + +/** + * Main notification routine hooking sd_probe() + */ +static int sd_probe_shim(struct device *dev) +{ + pr_loc_dbg("Probing SCSI device using %s", __FUNCTION__); + if (!is_scsi_leaf(dev)) { + pr_loc_dbg("%s: new SCSI device connected - not a leaf, ignoring", __FUNCTION__); + return org_sd_probe(dev); + } + + struct scsi_device *sdp = to_scsi_device(dev); + if (!is_scsi_disk(sdp)) { + pr_loc_dbg("%s: new SCSI device connected - not a disk, ignoring", __FUNCTION__); + return org_sd_probe(dev); + } + + pr_loc_dbg("Triggering SCSI_EVT_DEV_PROBING notifications"); + int out = notifier_to_errno(blocking_notifier_call_chain(&rp_scsi_notify_list, SCSI_EVT_DEV_PROBING, sdp)); + if (unlikely(out == NOTIFY_STOP)) { + pr_loc_dbg("After SCSI_EVT_DEV_PROBING a callee stopped chain with non-error condition. Faking probe-ok."); + return 0; + } else if (unlikely(out == NOTIFY_BAD)) { + pr_loc_dbg("After SCSI_EVT_DEV_PROBING a callee stopped chain with non-error condition. Faking probe-ok."); + return -EIO; //some generic error + } + + pr_loc_dbg("Calling original sd_probe()"); + out = org_sd_probe(dev); + scsi_event evt = (out == 0) ? SCSI_EVT_DEV_PROBED_OK : SCSI_EVT_DEV_PROBED_ERR; + + pr_loc_dbg("Triggering SCSI_EVT_DEV_PROBED notifications - sd_probe() exit=%d", out); + blocking_notifier_call_chain(&rp_scsi_notify_list, evt, sdp); + + return out; +} + +/** + * Overrides sd_probe() to provide notifications via sd_probe_shim() + * + * @param drv "sd" driver instance + */ +static inline void install_sd_probe_shim(struct device_driver *drv) +{ + pr_loc_dbg("Overriding %pf()<%p> with %pf()<%p>", drv->probe, drv->probe, sd_probe_shim, sd_probe_shim); + org_sd_probe = drv->probe; + drv->probe = sd_probe_shim; +} + +/** + * Removes override of sd_probe(), installed by install_sd_probe_shim() + * + * @param drv "sd" driver instance + */ +static inline void uninstall_sd_probe_shim(struct device_driver *drv) +{ + if (unlikely(!org_sd_probe)) { + pr_loc_wrn( + "Cannot %s - original drv->probe is not saved. It was either never installed or it's a bug. " + "The current drv->probe is %pf()<%p>", + __FUNCTION__, drv->probe, drv->probe); + return; + } + + pr_loc_dbg("Restoring %pf()<%p> to %pf()<%p>", drv->probe, drv->probe, org_sd_probe, org_sd_probe); + drv->probe = org_sd_probe; + org_sd_probe = NULL; +} + +/** + * Watches for the sd driver to load in order to shim it. The driver registration is modified before the driver loads. + */ +static driver_watch_notify_result sd_load_watcher(struct device_driver *drv, driver_watch_notify_state event) +{ + if (unlikely(event != DWATCH_STATE_COMING)) + return DWATCH_NOTIFY_CONTINUE; + + pr_loc_dbg("%s driver loaded - triggering sd_probe shim installation", SCSI_DRV_NAME); + install_sd_probe_shim(drv); + + driver_watcher = NULL; //returning DWATCH_NOTIFY_DONE causes automatic unwatching + return DWATCH_NOTIFY_DONE; +} + +/******************************************** Public API of the notifier **********************************************/ +extern struct bus_type scsi_bus_type; + +int subscribe_scsi_disk_events(struct notifier_block *nb) +{ + notifier_sub(nb); + return blocking_notifier_chain_register(&rp_scsi_notify_list, nb); +} + +int unsubscribe_scsi_disk_events(struct notifier_block *nb) +{ + notifier_unsub(nb); + return blocking_notifier_chain_unregister(&rp_scsi_notify_list, nb); +} + +// We need an additional flag as depending on which method of sd_probe override (watcher vs. existing driver find & +// switch) +static bool notifier_registered = false; +int register_scsi_notifier(void) +{ + notifier_reg_in(); + + if (unlikely(notifier_registered)) { + pr_loc_bug("%s notifier is already registered", NOTIFIER_NAME); + return -EEXIST; + } + + struct device_driver *drv = find_scsi_driver(); + + if(unlikely(drv < 0)) { //some error occurred while looking for the driver + return PTR_ERR(drv); //find_scsi_driver() should already log what went wrong + } else if(drv) { //the driver is already loaded - driver watcher cannot help us + pr_loc_wrn( + "The %s driver was already loaded when %s notifier registered - some devices may already be registered", + SCSI_DRV_NAME, NOTIFIER_NAME); + install_sd_probe_shim(drv); + } else { //driver not yet loaded - driver watcher will trigger sd_probe_shim installation when driver loads + pr_loc_dbg("The %s driver is not ready to dispatch %s notifier events - awaiting driver", SCSI_DRV_NAME, + NOTIFIER_NAME); + driver_watcher = watch_scsi_driver_register(sd_load_watcher, DWATCH_STATE_COMING); + if (unlikely(IS_ERR(driver_watcher))) { + pr_loc_err("Failed to register driver watcher for driver %s", SCSI_DRV_NAME); + return PTR_ERR(driver_watcher); + } + } + + notifier_registered = true; + + notifier_reg_ok(); + return 0; +} + +int unregister_scsi_notifier(void) +{ + notifier_ureg_in(); + + if (unlikely(!notifier_registered)) { + pr_loc_bug("%s notifier is not registered", NOTIFIER_NAME); + return -ENOENT; + } + + bool is_error = false; + int out = -EINVAL; + + //Check if we're watching sd driver (i.e. SCSI notifier was registered and is now being unregistered before the + // driver had a chance to load) + if (unlikely(driver_watcher)) { + pr_loc_dbg("%s notifier is still observing %s driver - stopping observer", NOTIFIER_NAME, SCSI_DRV_NAME); + out = unwatch_driver_register(driver_watcher); + if (unlikely(out != 0)) { + pr_loc_err("Failed to unregister driver watcher - error=%d", out); + is_error = true; + } + } + + //sd_probe() was replaced either after watching for the driver or on-the-spot after the driver was already loaded + if (likely(org_sd_probe)) { + struct device_driver *drv = find_scsi_driver(); + if (unlikely(IS_ERR(drv))) { + return PTR_ERR(drv); //find_scsi_driver() should already log what went wrong + } else if(likely(drv)) { + uninstall_sd_probe_shim(drv); + } else { //that is almost impossible as sd is built-in, but we if it happens there's nothing to recover + pr_loc_wrn("%s driver went away (?!)", SCSI_DRV_NAME); + is_error = true; + } + } + + notifier_registered = false; + if (unlikely(is_error)) { + return out; + } else { + notifier_ureg_ok(); + return 0; + } +} diff --git a/internal/scsi/scsi_notifier.h b/internal/scsi/scsi_notifier.h new file mode 100644 index 0000000..66b1142 --- /dev/null +++ b/internal/scsi/scsi_notifier.h @@ -0,0 +1,29 @@ +#ifndef REDPILL_SCSI_NOTIFIER_H +#define REDPILL_SCSI_NOTIFIER_H + +#include //All other parts including scsi_notifier.h cannot really not use linux/notifier.h + +typedef enum { + SCSI_EVT_DEV_PROBING, //device is being probed; it can be modified or outright ignored + SCSI_EVT_DEV_PROBED_OK, //device is probed and ready + SCSI_EVT_DEV_PROBED_ERR, //device was probed but it failed +} scsi_event; + +/** + * Callback signature: void (*f)(struct notifier_block *self, unsigned long state, void *data), where: + * unsigned long state => scsi_event event + * void *data => struct scsi_device *sdp + * + * Currently these methods are DELIBERATELY limited to SCSI TYPE_DISK scope. If you need other SCSI devices watching + * add another set of methods (subscribe scsi_device_events() and such, do NOT extend the scope of these methods as + * other parts of the code rely on pre-filtered events as in most cases listening for ALL devices is a lot of noise). + * + * @return + */ +int subscribe_scsi_disk_events(struct notifier_block *nb); +int unsubscribe_scsi_disk_events(struct notifier_block *nb); + +int register_scsi_notifier(void); +int unregister_scsi_notifier(void); + +#endif //REDPILL_SCSI_NOTIFIER_H diff --git a/internal/scsi/scsi_notifier_list.c b/internal/scsi/scsi_notifier_list.c new file mode 100644 index 0000000..dd4cd62 --- /dev/null +++ b/internal/scsi/scsi_notifier_list.c @@ -0,0 +1,4 @@ +#include "scsi_notifier_list.h" +#include + +BLOCKING_NOTIFIER_HEAD(rp_scsi_notify_list); diff --git a/internal/scsi/scsi_notifier_list.h b/internal/scsi/scsi_notifier_list.h new file mode 100644 index 0000000..32de773 --- /dev/null +++ b/internal/scsi/scsi_notifier_list.h @@ -0,0 +1,27 @@ +/** + * This file exists solely as a workaround for GCC bug #275674 - static structures are misdirected as dynamic + * + * Linux contains many clever idioms. One of them is a complex initialization of heads for notifier chains + * (include/linux/notifier.h). They do contain an embedded cast to a struct. GCC <5 detects that as a dynamic allocation + * and refuses to initialize it statically. This breaks all the macros for notifier (e.g. BLOCKING_NOTIFIER_INIT). Old + * kernels (i.e. <3.18) cannot be compiled with GCC >4.9 so... we cannot use a newer GCC but we cannot use older due to + * a bug. One of the solutions would be to convert the whole code of this module to GNU89 but this is painful to use. + * + * Such structures are working in GNU89 mode as well as when defined as a heap variable in a function. However, GCC is + * smart enough to release the memory from within a function (so we cannot just wrap it in a function and return a ptr). + * Due to the complex nature of the struct we didn't want to hardcode it here as they change between kernel version. + * As a workaround we created a separate compilation unit containing just the struct and compile it in GNU89 mode, while + * rest of the project stays at GNU99. + * + * Resources + * - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63567 (bug report) + * - https://unix.stackexchange.com/a/275674 (kernel v3.18 restriction) + * - https://stackoverflow.com/a/49119902 (linking files compiled with different language standard in GCC) + * - https://www.kernel.org/doc/Documentation/kbuild/makefiles.txt (compilation option per file in Kbuild; sect. 3.7) + */ +#ifndef REDPILL_SCSI_NOTIFIER_LIST_H +#define REDPILL_SCSI_NOTIFIER_LIST_H + +extern struct blocking_notifier_head rp_scsi_notify_list; + +#endif //REDPILL_SCSI_NOTIFIER_LIST_H diff --git a/internal/scsi/scsi_toolbox.c b/internal/scsi/scsi_toolbox.c new file mode 100644 index 0000000..4f22d87 --- /dev/null +++ b/internal/scsi/scsi_toolbox.c @@ -0,0 +1,246 @@ +#include "scsi_toolbox.h" +#include "scsiparam.h" //SCSI_* +#include "../../common.h" +#include "../../internal/call_protected.h" //scsi_scan_host_selected() +#include //DMA_FROM_DEVICE +#include //get_unaligned_be32() +#include //msleep +#include //cmd consts (e.g. SERVICE_ACTION_IN), SCAN_WILD_CARD, and TYPE_DISK +#include //struct scsi_sense_hdr, scsi_sense_valid() +#include //struct Scsi_Host, SYNO_PORT_TYPE_SATA +#include //struct scsi_transport_template +#include //struct scsi_device, scsi_execute_req(), scsi_is_sdev_device() + +extern struct bus_type scsi_bus_type; //SCSI bus type for driver scanning + +/** + * Issues SCSI "READ CAPACITY (16)" command + * Make sure you read what this function returns! + * + * @param sdp + * @param buffer Pointer to a buffer of size SCSI_BUF_SIZE + * @param sshdr Sense header + * @return 0 on command success, >0 if command failed; if the command failed it MAY be repeated + */ +static int scsi_read_cap16(struct scsi_device *sdp, unsigned char *buffer, struct scsi_sense_hdr *sshdr) +{ + unsigned char cmd[16]; + memset(cmd, 0, 16); + cmd[0] = SCSI_SERVICE_ACTION_IN_16; + cmd[1] = SAI_READ_CAPACITY_16; + cmd[13] = SCSI_RC16_LEN; + memset(buffer, 0, SCSI_RC16_LEN); + + return scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, buffer, SCSI_RC16_LEN, sshdr, SCSI_CMD_TIMEOUT, + SCSI_CMD_MAX_RETRIES, NULL); +} + +/** + * Issues SCSI "READ CAPACITY (10)" command + * Make sure you read what this function returns! + * + * @param sdp + * @param buffer Pointer to a buffer of size SCSI_BUF_SIZE + * @param sshdr Sense header + * @return 0 on command success, >0 if command failed; if the command failed it MAY be repeated + */ +static int scsi_read_cap10(struct scsi_device *sdp, unsigned char *buffer, struct scsi_sense_hdr *sshdr) +{ + unsigned char cmd[16]; + cmd[0] = READ_CAPACITY; + memset(&cmd[1], 0, 9); + memset(buffer, 0, 8); + + return scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, buffer, 8, sshdr, SCSI_CMD_TIMEOUT, SCSI_CMD_MAX_RETRIES, NULL); +} + +long long opportunistic_read_capacity(struct scsi_device *sdp) +{ + //some drives work only with the 16 version but older ones can only accept the older variant + //to prevent false-positive "command failed" we need to try both + bool use_cap16 = true; + + unsigned char *buffer = NULL; + kmalloc_or_exit_int(buffer, SCSI_BUF_SIZE); + + int out; + int sense_valid = 0; + struct scsi_sense_hdr sshdr; + int read_retry = SCSI_CAP_MAX_RETRIES; + do { + //It can return 0 or a positive integer; 0 means immediate success where 1 means an error. Depending on the error + //the command may be repeated. + out = (use_cap16) ? scsi_read_cap16(sdp, buffer, &sshdr) : scsi_read_cap10(sdp, buffer, &sshdr); + if (out == 0) + break; //command just succeeded + + if (unlikely(out > 0)) { //it's technically an error but we may be able to recover + if (use_cap16) { //if we previously used CAP(16) and it failed we can try older CAP(10) [even on hard-fail] + use_cap16 = false; + continue; + } + + //Some failures are hard-failure (e.g. drive doesn't support the cmd), some are soft-failures + //In soft failures some are known to take more time (e.g. spinning rust is spinning up) and some should be + //fast-repeat. We really only distinguish hard from soft and just wait some time for others + //In a normal scenario this path will be cold as the drive will respond to CAP(16) or CAP(10) right away. + + sense_valid = scsi_sense_valid(&sshdr); + if (!sense_valid) { + pr_loc_dbg("Invalid sense - trying again"); + continue; //Sense invalid, this can be repeated right away + } + + //Drive deliberately rejected the request and indicated that this situtation will not change + if (sshdr.sense_key == ILLEGAL_REQUEST && (sshdr.asc == 0x20 || sshdr.asc == 0x24) && sshdr.ascq == 0x00) { + pr_loc_err("Drive refused to provide capacity"); + kfree(buffer); + return -EINVAL; + } + + //Drive is busy - wait for some time + if (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29 && sshdr.ascq == 0x00) { + pr_loc_dbg("Drive busy during capacity pre-read (%d attempts left), trying again", read_retry-1); + msleep(500); //if it's a spinning rust over USB we may need to wait + continue; + } + } + } while (--read_retry); + + if (out != 0) { + pr_loc_err("Failed to pre-read capacity of the drive after %d attempts due to SCSI errors", + (SCSI_CAP_MAX_RETRIES - read_retry)); + kfree(buffer); + return -EIO; + } + + unsigned sector_size = get_unaligned_be32(&buffer[8]); + unsigned long long lba = get_unaligned_be64(&buffer[0]); + + //Good up to 8192000000 pebibytes - good luck overflowing that :D + long long size_mb = ((lba+1) * sector_size) / 1024 / 1024; //sectors * sector size = size in bytes + + kfree(buffer); + return size_mb; +} + +bool is_scsi_disk(struct scsi_device *sdp) +{ + return (likely(sdp) && (sdp)->type == TYPE_DISK); +} + +bool is_sata_disk(struct device *dev) +{ + //from the kernel's pov SCSI devices include SCSI hosts, "leaf" devices, and others - this filters real SCSI devices + if (!is_scsi_leaf(dev)) + return false; + + struct scsi_device *sdp = to_scsi_device(dev); + + //end/leaf devices can be disks or other things - filter only real disks + //more than that use syno's private property (hey! not all of their kernel mods are bad ;)) to determine port which + //a given device uses (vanilla kernel doesn't care about silly ports - SCSI is SCSI) + if (!is_scsi_disk(sdp) || sdp->host->hostt->syno_port_type != SYNO_PORT_TYPE_SATA) + return false; + + return true; +} + +int scsi_force_replug(scsi_device *sdp) +{ + if (unlikely(!is_scsi_leaf(&sdp->sdev_gendev))) { + pr_loc_bug("%s expected SCSI leaf - got something else", __FUNCTION__); + return -EINVAL; + } + + struct Scsi_Host *host = sdp->host; + pr_loc_dbg("Removing device from host%d", host->host_no); + scsi_remove_device(sdp); //this will do locking for remove + + //See drivers/scsi/scsi_sysfs.c:scsi_scan() for details + if (unlikely(host->transportt->user_scan)) { + pr_loc_dbg("Triggering template-based rescan of host%d", host->host_no); + return host->transportt->user_scan(host, SCAN_WILD_CARD, SCAN_WILD_CARD, SCAN_WILD_CARD); + } else { + pr_loc_dbg("Triggering generic rescan of host%d", host->host_no); + //this is unfortunately defined in scsi_scan.c, it can be emulated because it's just bunch of loops, but why? + //This will also most likely never be used anyway + return _scsi_scan_host_selected(host, SCAN_WILD_CARD, SCAN_WILD_CARD, SCAN_WILD_CARD, 1); + } +} + +//We assume that if the sd was loaded once it will never unload (as on most kernels it's built in). +//If this assumption changes the cache can simply be removed +bool sd_driver_loaded = false; +struct device_driver *find_scsi_driver(void) +{ + struct device_driver *drv = driver_find("sd", &scsi_bus_type); + if (IS_ERR(drv)) { + pr_loc_err("Failed to query sd driver status - error=%ld", PTR_ERR(drv)); + return drv; + } + + if (drv) { + sd_driver_loaded = true; + return drv; + } + + return NULL; +} + +int is_scsi_driver_loaded(void) +{ + if (likely(sd_driver_loaded)) + return true; + + struct device_driver *drv = find_scsi_driver(); + if (IS_ERR(drv)) //get_scsi_driver() will already print an error message + return PTR_ERR(drv); + + return drv ? SCSI_DRV_LOADED : SCSI_DRV_NOT_LOADED; +} + +/** + * Filters out all SCSI leafs and calls the callback prescribed + */ +static int for_each_scsi_leaf_filter(struct device *dev, on_scsi_device_cb cb) +{ + if (!is_scsi_leaf(dev)) + return 0; + + return (cb)(to_scsi_device(dev)); +} + +/** + * Filters out all SCSI disks and calls the callback prescribed + */ +static int for_each_scsi_disk_filter(struct device *dev, on_scsi_device_cb cb) +{ + if (!is_scsi_leaf(dev)) + return 0; + + struct scsi_device *sdp = to_scsi_device(dev); + if (!is_scsi_disk(sdp)) + return 0; + + return (cb)(to_scsi_device(dev)); +} + +static int inline for_each_scsi_x(on_scsi_device_cb *cb, int (*filter)(struct device *dev, on_scsi_device_cb cb)) +{ + if (!is_scsi_driver_loaded()) + return -ENXIO; + + int code = bus_for_each_dev(&scsi_bus_type, NULL, cb, (int (*)(struct device *, void *))filter); + return unlikely(code == -ENXIO) ? -EIO : code; +} + +int for_each_scsi_leaf(on_scsi_device_cb *cb) +{ + return for_each_scsi_x(cb, for_each_scsi_leaf_filter); +} + +int for_each_scsi_disk(on_scsi_device_cb *cb) +{ + return for_each_scsi_x(cb, for_each_scsi_disk_filter); +} \ No newline at end of file diff --git a/internal/scsi/scsi_toolbox.h b/internal/scsi/scsi_toolbox.h new file mode 100644 index 0000000..f43687e --- /dev/null +++ b/internal/scsi/scsi_toolbox.h @@ -0,0 +1,100 @@ +#ifndef REDPILL_SCSI_TOOLBOX_H +#define REDPILL_SCSI_TOOLBOX_H + +#include //bool + +typedef struct device device; +typedef struct scsi_device scsi_device; +typedef int (on_scsi_device_cb)(struct scsi_device *sdp); + +#define SCSI_DRV_NAME "sd" //useful for triggering watchers +//To use this one import intercept_driver_register.h header (it's not imported here to avoid pollution) +#define watch_scsi_driver_register(callback, event_mask) \ + watch_driver_register(SCSI_DRV_NAME, (callback), (event_mask)) + +#define IS_SCSI_DRIVER_ERROR(state) (unlikely((state) < 0)) +typedef enum { + SCSI_DRV_NOT_LOADED = 0, + SCSI_DRV_LOADED = 1, +} scsi_driver_state; + +/** + * From the kernel's pov SCSI devices include SCSI hosts, "leaf" devices, and others - this filters real SCSI devices + * + * This is simply an alias for scsi_is_sdev_device() which is more descriptive for people who aren't SCSI wizards. + * + * @param dev struct device* + */ +#define is_scsi_leaf(dev) scsi_is_sdev_device(dev) + +/** + * Attempts to read capacity of a device assuming reasonably modern pathway + * + * This function (along with scsi_read_cap{10|16}) is loosely based on drivers/scsi/sd.c:sd_read_capacity(). However, + * this method cuts some corners to be faster as we're expecting rather modern hardware. Additionally, functions from + * sd.c cannot be used as they're static. Even that some of them can be called using kallsyms they aren't stateless and + * will cause a KP later on (as they modify the device passed to them). + * Thus this function should be seen as a way to quickly estimate (as it reports full mebibytes rounded down) the + * capacity without causing side effects. + * + * @param sdp + * @return capacity in full mebibytes, or -E on error + */ +long long opportunistic_read_capacity(struct scsi_device *sdp); + +/** + * Checks if a SCSI device is a SCSI-complain disk (e.g. SATA, SAS, iSCSI etc) + * + * To be 101% sure and proper you should probably call is_scsi_leaf() first + */ +bool is_scsi_disk(struct scsi_device *sdp); + +/** + * Checks if a given generic device is an SCSI disk connected to a SATA port/host controller + * + * Every SATA disk, by definition, will also be an SCSI disk (as SATA is a connector carrying SCSI commands) + */ +bool is_sata_disk(struct device *dev); + +/** + * Triggers a re-probe of SCSI leaf device by forcefully "unplugging" and "replugging" the device + * + * WARNING: be careful what are you doing - this method is no different than yanking a power cable from a device, so if + * you do that with a disk which is used data loss may occur! + * + * @return 0 on success, -E on error + */ +int scsi_force_replug(scsi_device *sdp); + +/** + * Locates & returns SCSI driver structure if loaded + * + * @return driver struct on success, NULL if driver is not loaded, ERR_PTR(-E) on error + */ +struct device_driver *find_scsi_driver(void); + +/** + * Checks if SCSI driver is loaded or not + * + * This function is useful to make a decision whether to just watch for new devices or watch for new ones + scan + * existing ones. You cannot just scan blindly as this will cause an error. + * + * @return 0 if not loaded, 1 if loaded, -E on error; see scsi_driver_state enum for constants + */ +int is_scsi_driver_loaded(void); + +/** + * Traverses list of all SCSI devices and calls the callback with every leaf/terminal device found + * + * @return 0 on success, -E on failure. -ENXIO is reserved to always mean that the driver is not loaded + */ +int for_each_scsi_leaf(on_scsi_device_cb *cb); + +/** + * Traverses list of all SCSI devices and calls the callback with every SCSCI-complaint disk found + * + * @return 0 on success, -E on failure. -ENXIO is reserved to always mean that the driver is not loaded + */ +int for_each_scsi_disk(on_scsi_device_cb *cb); + +#endif //REDPILL_SCSI_TOOLBOX_H diff --git a/internal/scsi/scsiparam.h b/internal/scsi/scsiparam.h new file mode 100644 index 0000000..153b2a4 --- /dev/null +++ b/internal/scsi/scsiparam.h @@ -0,0 +1,24 @@ +/** + * This file contains a list of cherry-picked constants useful while dealing with SCSI subsystem + */ +#ifndef REDPILL_SCSIPARAM_H +#define REDPILL_SCSIPARAM_H + +#include //KERNEL_VERSION_CODE, KERNEL_VERSION() +#include //SERVICE_ACTION_IN or SERVICE_ACTION_IN_16 + +#define SCSI_RC16_LEN 32 //originally defined in drivers/scsi/sd.c as RC16_LEN +#define SCSI_CMD_TIMEOUT (30 * HZ) //originally defined in drivers/scsi/sd.h as SD_TIMEOUT +#define SCSI_CMD_MAX_RETRIES 5 //normal drives shouldn't fail the command even once +#define SCSI_CAP_MAX_RETRIES 3 +#define SCSI_BUF_SIZE 512 //originally defined in drivers/scsi/sd.h as SD_BUF_SIZE + +//Old kernels used ambiguous constant: https://github.com/torvalds/linux/commit/eb846d9f147455e4e5e1863bfb5e31974bb69b7c +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0) +#define SCSI_SERVICE_ACTION_IN_16 SERVICE_ACTION_IN +#else +#define SCSI_SERVICE_ACTION_IN_16 SERVICE_ACTION_IN_16 +#endif + + +#endif //REDPILL_SCSIPARAM_H diff --git a/internal/stealth.c b/internal/stealth.c new file mode 100644 index 0000000..7ca8684 --- /dev/null +++ b/internal/stealth.c @@ -0,0 +1,60 @@ +#include "stealth.h" +#include "stealth/sanitize_cmdline.h" +#include //struct module (for list_del) + +//TODO: +//https://github.com/xcellerator/linux_kernel_hacking/blob/master/3_RootkitTechniques/3.0_hiding_lkm/rootkit.c +//remove file which was used for insmod +//remove kernel taint +//remove module loading from klog +//delete module file from ramdisk + +int initialize_stealth(void *config2) +{ + struct runtime_config *config = config2; + + int error = 0; +#if STEALTH_MODE <= STEALTH_MODE_OFF + //STEALTH_MODE_OFF shortcut + return error; +#endif + +#if STEALTH_MODE > STEALTH_MODE_OFF + //These are STEALTH_MODE_BASIC ones + if ((error = register_stealth_sanitize_cmdline(config->cmdline_blacklist)) != 0) + return error; +#endif + +#if STEALTH_MODE > STEALTH_MODE_BASIC + //These will be STEALTH_MODE_NORMAL ones +#endif + +#if STEALTH_MODE > STEALTH_MODE_NORMAL + //These will be STEALTH_MODE_FULL ones + list_del(&THIS_MODULE->list); +#endif + + return error; +} + +int uninitialize_stealth(void) +{ + int error; + +#if STEALTH_MODE > STEALTH_MODE_NORMAL + //These will be STEALTH_MODE_FULL ones +#endif + +#if STEALTH_MODE > STEALTH_MODE_BASIC + //These will be STEALTH_MODE_NORMAL ones +#endif + +#if STEALTH_MODE > STEALTH_MODE_OFF + //These are STEALTH_MODE_BASIC ones + if ((error = unregister_stealth_sanitize_cmdline()) != 0) + return error; +#endif + + //Mode set to STEALTH_MODE_OFF or nothing failed before + return error; +} \ No newline at end of file diff --git a/internal/stealth.h b/internal/stealth.h new file mode 100644 index 0000000..fa58282 --- /dev/null +++ b/internal/stealth.h @@ -0,0 +1,29 @@ +/* + * This header file should be included as the first one before anything else so other header files can use STEALTH_MODE + */ + +#ifndef REDPILLLKM_STEALTH_H +#define REDPILLLKM_STEALTH_H + +#define STEALTH_MODE_OFF 0 //Nothing is hidden, useful for full-on debugging +#define STEALTH_MODE_BASIC 1 //Hides basic things like cmdline (which is more to prevent DSM code from complaining about unknown options etc.) +#define STEALTH_MODE_NORMAL 2 //Hides everything except making the module not unloadable +#define STEALTH_MODE_FULL 3 //Same as STEALTH_MODE_NORMAL + removes the module from list of loaded modules & all logs + +//Define just after levels so other headers can use it if needed to e.g. replace some macros +#ifndef STEALTH_MODE +//#warning "Stealth mode not specified - using default" +#define STEALTH_MODE STEALTH_MODE_BASIC +#endif + +//Some compile-time stealthiness +#if STEALTH_MODE > STEALTH_MODE_OFF //STEALTH_MODE_BASIC or above +#define VIRTUAL_UART_THREAD_FMT "irq/%d-serial" //pattern format for vUART kernel thread which spoofs IRQ one +#endif + +struct runtime_config; + +int initialize_stealth(void *config); +int uninitialize_stealth(void); + +#endif //REDPILLLKM_STEALTH_H \ No newline at end of file diff --git a/internal/stealth/sanitize_cmdline.c b/internal/stealth/sanitize_cmdline.c new file mode 100644 index 0000000..ccb96d0 --- /dev/null +++ b/internal/stealth/sanitize_cmdline.c @@ -0,0 +1,162 @@ +/* + * This submodule removes blacklisted entries from /proc/cmdline to hide some options after the LKM is loaded + * + * OVERVIEW + * The main reason to sanitize cmdline is to avoid leaking information like "vid=..." or "pid=..." to userspace. Doing + * this may cause some other external modules parsing kernel cmdline to be confused why such options are present in the + * boot params. + * + * HOW IT WORKS? + * The module overrides cmdline_proc_show() from fs/proc/cmdline.c with a jump to our implementation. The implementation + * here serves a filtrated version of the cmdline. + * + * WHY OVERRIDE A STATIC METHOD? + * This module has actually been rewritten to hard-override cmdline_proc_show() instead of "gently" finding the dentry + * for /proc/cmdline, and then without modifying the dentry replacing the read operation in file_operations struct. + * While this method is much cleaner and less invasive it has two problems: + * - Requires "struct proc_dir_entry" (which is internal and thus not available in toolkit builds) + * - Doesn't work if the module is loaded as ioscheduler (as funny enough this code will execute BEFORE /proc/cmdline + * is created) + * This change has been made in commit "Rewrite cmdline sanitize to replace cmdline_proc_show". + * + * FILTRATION + * The second part of the code deals with the actual filtration. List of blacklisted entries is passed during + * registration (to allow flexibility). Usually it will be gathered form pre-generated config. Then a filtrated copy of + * cmdline is created once (as this is a quite expensive string-ridden operation). + * The only sort-of way to find the original implementation is to access the kmesg buffer where the original cmdline is + * baked into early on boot. Technically we can replace that too but this will get veeery messy and I doubt anyone will + * try dig through kmesg messages with a regex for cmdline (especially that with a small dmesg buffer it will roll over) + */ + +#include "sanitize_cmdline.h" +#include "../../common.h" +#include "../../config/cmdline_delegate.h" //get_kernel_cmdline() & CMDLINE_MAX +#include "../override/override_symbol.h" //override_symbol() & restore_symbol() +#include //seq_file, seq_printf() + +/** + * Pre-generated filtered cmdline (by default it's an empty string in case it's somehow printed before filtration) + * See filtrate_cmdline() for details + */ +static char *filtrated_cmdline = NULL; + +/** + * Check if a given cmdline token is on the blacklist + */ +static bool +is_token_blacklisted(const char *param_pointer, cmdline_token *cmdline_blacklist[MAX_BLACKLISTED_CMDLINE_TOKENS]) { + for (int i = 0; i < MAX_BLACKLISTED_CMDLINE_TOKENS; i++) { + if (!cmdline_blacklist[i]) + return false; + + if (strncmp(param_pointer, (char *)cmdline_blacklist[i], strlen((char *)cmdline_blacklist[i])) == 0) + return true; + } + + return false; +} + +/** + * Filters-out all blacklisted entries from the cmdline string (fetched from /proc/cmdline) + */ +static int filtrate_cmdline(cmdline_token *cmdline_blacklist[MAX_BLACKLISTED_CMDLINE_TOKENS]) +{ + char *raw_cmdline; + kmalloc_or_exit_int(raw_cmdline, strlen_to_size(CMDLINE_MAX)); + + long cmdline_len = get_kernel_cmdline(raw_cmdline, CMDLINE_MAX); + if(unlikely(cmdline_len < 0)) { //if <0 it's an error code + pr_loc_dbg("get_kernel_cmdline failed with %ld", cmdline_len); + kfree(raw_cmdline); + return (int) cmdline_len; + } + + filtrated_cmdline = kmalloc(strlen_to_size(cmdline_len), GFP_KERNEL); + if (unlikely(!filtrated_cmdline)) { + kfree(raw_cmdline); + kalloc_error_int(filtrated_cmdline, strlen_to_size(cmdline_len)); + } + + char *single_param_chunk; //Pointer to the beginning of the cmdline token + char *filtrated_ptr = &filtrated_cmdline[0]; //Pointer to the current position in filtered + + size_t curr_param_len; + while ((single_param_chunk = strsep(&raw_cmdline, CMDLINE_SEP)) != NULL) { + if (single_param_chunk[0] == '\0') //Skip empty + continue; + + if (is_token_blacklisted(single_param_chunk, cmdline_blacklist)) { + pr_loc_dbg("Cmdline param \"%s\" blacklisted - skipping", single_param_chunk); + continue; + } + + curr_param_len = strlen(single_param_chunk); + memcpy(filtrated_ptr, single_param_chunk, curr_param_len); + filtrated_ptr += curr_param_len; + *(filtrated_ptr++) = ' '; + } + + *(filtrated_ptr-1) = '\0'; //Terminate whole param string (removing the trailing space) + kfree(raw_cmdline); + + pr_loc_dbg("Sanitized cmdline to: %s", filtrated_cmdline); + + return 0; +} + +/** + * Handles fs/proc/ semantics for reading. See include/linux/fs.h:file_operations.read for details. + */ +static int cmdline_proc_show_filtered(struct seq_file *m, void *v) +{ + seq_printf(m, "%s\n", filtrated_cmdline); + return 0; +} + +static override_symbol_inst *ov_cmdline_proc_show = NULL; +int register_stealth_sanitize_cmdline(cmdline_token *cmdline_blacklist[MAX_BLACKLISTED_CMDLINE_TOKENS]) +{ + if (unlikely(ov_cmdline_proc_show)) { + pr_loc_bug("Attempted to %s while already registered", __FUNCTION__); + return 0; //Technically it succeeded + } + + int out; + //This has to be done once (we're assuming cmdline doesn't change without reboot). In case this submodule is + // re-registered the filtrated_cmdline is left as-is and reused + if (!filtrated_cmdline && (out = filtrate_cmdline(cmdline_blacklist)) != 0) + return out; + + ov_cmdline_proc_show = override_symbol("cmdline_proc_show", cmdline_proc_show_filtered); + if (unlikely(IS_ERR(ov_cmdline_proc_show))) { + out = PTR_ERR(ov_cmdline_proc_show); + pr_loc_err("Failed to override cmdline_proc_show - error %d", out); + ov_cmdline_proc_show = NULL; + return out; + } + + pr_loc_inf("/proc/cmdline sanitized"); + + return 0; +} + +int unregister_stealth_sanitize_cmdline(void) +{ + if (unlikely(!ov_cmdline_proc_show)) { + pr_loc_bug("Attempted to %s while it's not registered", __FUNCTION__); + return 0; //Technically it succeeded + } + + int out = restore_symbol(ov_cmdline_proc_show); + //We deliberately fall through here without checking as we have to free stuff at this point no matter what + + kfree(filtrated_cmdline); + filtrated_cmdline = NULL; + + if (likely(out == 0)) + pr_loc_inf("Original /proc/cmdline restored"); + else + pr_loc_err("Failed to restore original /proc/cmdline: org_cmdline_proc_show failed - error %d", out); + + return out; +} \ No newline at end of file diff --git a/internal/stealth/sanitize_cmdline.h b/internal/stealth/sanitize_cmdline.h new file mode 100644 index 0000000..00bfbf0 --- /dev/null +++ b/internal/stealth/sanitize_cmdline.h @@ -0,0 +1,23 @@ +#ifndef REDPILL_SANITIZE_CMDLINE_H +#define REDPILL_SANITIZE_CMDLINE_H + +#include "../../config/cmdline_delegate.h" //MAX_BLACKLISTED_CMDLINE_TOKENS + +/** + * Register submodule sanitizing /proc/cmdline + * + * After registration /proc/cmdline will be non-destructively cleared from entries listed in cmdline_blacklist param. + * It can be reversed using unregister_stealth_sanitize_cmdline() + * + * @return 0 on success, -E on error + */ +int register_stealth_sanitize_cmdline(cmdline_token *cmdline_blacklist[MAX_BLACKLISTED_CMDLINE_TOKENS]); + +/** + * Reverses what register_stealth_sanitize_cmdline() did + * + * @return 0 on success, -E on error + */ +int unregister_stealth_sanitize_cmdline(void); + +#endif //REDPILL_SANITIZE_CMDLINE_H diff --git a/internal/uart/uart_swapper.c b/internal/uart/uart_swapper.c new file mode 100644 index 0000000..28e4991 --- /dev/null +++ b/internal/uart/uart_swapper.c @@ -0,0 +1,472 @@ +/** + * This tool is an isolated UART port-swapping utility allowing you to swap any two ports on runtime + * + * REASONING + * Some kernels are compiled with CONFIG_SYNO_X86_SERIAL_PORT_SWAP set which effectively swaps first two serial ports. + * This function reverses that. It also makes sure to move kernel console output between them (if configured). + * + * + * OVERVIEW + * Swapping the serials involves two things: swapping console drivers and swapping kernel console printk itself. + * The first one can be done on any kernel by modifying exported "console_drivers". The second one requires an access + * to either struct serial8250_ports (drivers/tty/serial/8250/8250_core.c) or to struct console_cmdline + * (kernel/printk/printk.c). Both of them are static so they're no-go directly. + * Kernels before v4.1 had a convenient method update_console_cmdline(). Unfortunately this method was removed: + * https://github.com/torvalds/linux/commit/c7cef0a84912cab3c9df8949b034e4aa62982ec9 so there's currently no method + * of un-swapping on v4. Even worse calling this method on lower kernels is a combination of luck and timing (as this is + * a init-only method). + * + * + * IMPLEMENTATION + * Things we've tried and failed: + * - Set the new console as preferred (making it default for /dev/console) -> /dev/ttyS0 and 1 are still wrong + * - Unregistering both ports and re-registering them -> we tried a bit and it's a nightmare to re-do and crashes the + * kernel + * - Unregistering and re-registering consoles -> will fail with KP or do nothing (and even if it worked if a + * non-console port is involved it will be broken) + * - Recreating the flow for serial8250_isa_init_ports() -> it appears to work (i.e. doesn't crash) but the serial port + * is dead afterwards and doesn't pass any traffic (we've never discovered why) + * => Hours wasted trying to reverse stupid ports: 37 + * + * What actually did work was carefully split-stopping the port (stopping the driver/hardware end of but not the higher + * level ttyS# side), exchanging the iobase & IRQ (+ some other internal things generated during init), restarting the + * port and hoping for the best. It does work BUT it's ridden with edge cases. Properly implementing and testing this + * took ~3 full days and two people... so if you're not sure what you're doing here you can easily break it. + * + * + * INTERNAL DETAILS + * What we are doing here may not be intuitive if you don't exactly know how the 8250 driver works internally. Let's go + * over this first. Each port is composed of outer uart_8250_port which contains various driver-specific information. + * Inside it there's an uart_port struct which contains the information about the actual physical UART channel. + * + * When the ports were flipped their position in the internal 8250 list, their line# in uart_8250_port and other + * internal properties were kept intact. The only two things which were changed are ->port.iobase and ->port.irq + * Flipping these is enough to make port sort-of-working. The port will pass data BUT only if the other port triggers + * interrupts (e.g. you type something on ttyS0 and nothing happens, you hold a space bar on ttyS1 afterwards and + * stuff you've typed on ttyS0 starts appearing). This strange effect is caused by how 8250 driver implements IRQ + * handling (serial8250_interrupt in 8250_core.c). It does not iterate over ALL ports looking for ports which match + * the IRQ triggering the function. [FROM NOW ON READ CAREFULLY!] Instead, it uses and struct irq_info passed as + * "user data" to the IRQ handler function. That struct contains a POINTER to a list_head of the first port, making + * the irq_struct the list owner. That doubly-linked list ties together multiple uart_8250_port structures (->list) + * which share the same IRQ. Here's the major problem: changing the NUMERIC IRQ value in uart_8250_port->port.irq + * does nothing to the actual IRQ handling for that port. + * + * When an IRQ happens and serial8250_interrupt() fires, it looks at the irq_struct, gets the pointer to list_head + * containing all active ports sharing a given IRQ and then just iterates over them triggering their internal + * handling of stuff. The irq_struct list_head pointer is just a memory address and our swaps of anything will never + * change it. This means two things: we need to fix the shared IRQ lists in uart_8250_port for ttyS0 and ttyS1 AND + * fix the mapping of IRQ => uart_8250_port element. Keep in mind that uart_8250_port->list contains only the ports + * which are currently active (i.e. enabled AND open). Kernel registers for IRQs only if something actually opens + * the port (as there's no point to receive data where you don't place to deliver them). + * + * Unfortunately, fixing these lists isn't really possible. The irq_struct contains a pointer to list_head which can + * be in any of the uart_8250_port. If it points to ttyS0 and/or ttyS1 (spoiler: most likely yes, as they're open) + * deleting an element from that list will break it completely (as irq_struct only knows the address of that one + * uart_8250_port in practice). If we replace prev/next to point to a different port it will still break because + * the interrupt handler calls container_of() which make prev/next irrelevant for the first fetch. + * struct_irq is contained in two places: internally cached in the 8250_core.c and as a pointer in the kernel IRQ + * handling. It will be unwise to try to modify it (ok, we're saying this only because we found a better way :D). + * + * Normally the IRQ manipulation is enabled/disabled by serial_link_irq_chain/serial_unlink_irq_chain in 8250_core.c + * but they're static. However, we an exploit the fact that 8250 driver is modular and operations on the Linux port + * are separated from operations on the hardware (which makes sense). We can command the UART chip to shutdown + * before we touch iobase or irq (which naturally has to remove IRQ if present) and then command the chip to startup + * which will register IRQ if needed (for the new irq value of course ;)) + * + * References: + * - Linux kernel sources (mainly drivers/tty/serial/8250/8250_core.c and drivers/tty/serial/serial_core.c) + * - https://linux-kernel-labs.github.io/refs/heads/master/labs/interrupts.html + * - https://www.ti.com/lit/ug/sprugp1/sprugp1.pdf + */ + +#include "../../common.h" +#include "../call_protected.h" //early_serial_setup() +#include "../override/override_symbol.h" //overriding uart_match_port() +#include "../../config/uart_defs.h" //struct uart_port, COM ports definition, UART_NR +#include //struct uart_8250_port +#include //console_lock(), console_unlock() +#include //synchronize_irq() +#include //LIST_POISON1, LIST_POISON2 +#include //timer_pending() +#include //disable_irq()/enable_irq() +#include // irq_common_data +#include //irq_has_action + +#define pause_irq_save(irq) ({bool __state = irq_has_action(irq); if (__state) { disable_irq(irq); } __state; }) +#define resume_irq_saved(irq, saved) if (saved) { enable_irq(irq); } + +/*********************************************** Extracting 8250 ports ************************************************/ +static struct uart_8250_port *serial8250_ports[UART_NR] = { NULL }; //recovered ptrs to serial8250_ports structure +static override_symbol_inst *ov_uart_match_port = NULL; + +/** + * Fake uart_match_port() which always returns "no match" but collects all passing ports to serial8250_ports + * + * See recover_serial8250_ports() for usage. This is a very specific thing and shouldn't be used standalone. + * + * @return 0 + */ +static int uart_match_port_collector(struct uart_port *port1, struct uart_port *port2) +{ + //our fake trigger calls with one port being NULL, that's how we can easily detect which one is the one provided by + //the driver ;] + struct uart_port *port = port1 ? port1:port2; + pr_loc_dbg("Found ptr to line=%d iobase=0x%03lx irq=%d", port->line, port->iobase, port->irq); + + serial8250_ports[port->line] = container_of(port, struct uart_8250_port, port); + + return 0; +} + +/** + * Enables collecting of 8250 serial port structures + * + * Warning: before you do that you MUST disable IRQs or you're risking a serious crash or a silent corruption of the + * kernel! + * + * @return 0 on success, -E on failure + */ +static int __must_check enable_collector_matcher(void) +{ + if (unlikely(ov_uart_match_port)) + return 0; //it's not a problem is we already enabled it as it's enabled all the time + + ov_uart_match_port = override_symbol("uart_match_port", uart_match_port_collector); + if (unlikely(IS_ERR(ov_uart_match_port))) { + int out = PTR_ERR(ov_uart_match_port); + ov_uart_match_port = NULL; + return out; + } + + return 0; +} + +/** + * Disabled collecting of 8250 serial port structures (reverses enable_collector_matcher()) + * + * @return 0 on success or noop, -E on failure + */ +static int disable_collector_matcher(void) +{ + if (unlikely(!ov_uart_match_port)) + return 0; //it's not a problem is we already disabled it + + int out = restore_symbol(ov_uart_match_port); + ov_uart_match_port = NULL; + + if (unlikely(out != 0)) + pr_loc_err("Failed to disable collector matcher, error=%d", out); + + return out; +} + +/** + * Fish-out 8250 serial driver ports from its internal structures + * + * The 8250 serial driver is very secretive of its ports and doesn't allow anyone to access them. This is for a good + * reason - it's very easy to cause a deadlock, KP, or a runaway CPU-hogging process. However, we must access them as + * we're intentionally messing up with the structures of them (as SOMEONE had a BRILLIANT idea to break them by swapping + * iobases and IRQs defined since the 1970s). + * + * Ports will be populated in a serial8250_ports. + */ +static int recover_serial8250_ports(void) +{ + int out = 0; + //Stops and buffers printks while pulling console semaphore down (in case console is active on any of the ports) + console_lock(); + preempt_disable(); + + //We cannot acquire any locks as we don't have ports information. The most we can do is ensure nothing tirggers + // while we collect them. It's imperfect as some ports are timer based etc. However, the chance is abysmal that + // with preempt disabled and IRQs disabled something magically triggers ports lookup (which is rare by itself). + //While there may be more than 4 ports their IRQs aren't well defined by the platform nor kernel. + //Some of these may be shared but we don't make assumptions here (as it will be a noop if we call it twice) + bool com1_irq_state = pause_irq_save(STD_COM1_IRQ); + bool com2_irq_state = pause_irq_save(STD_COM2_IRQ); + bool com3_irq_state = pause_irq_save(STD_COM3_IRQ); + bool com4_irq_state = pause_irq_save(STD_COM4_IRQ); + + if (unlikely((out = enable_collector_matcher()) != 0)) { //Install a fake matching function + pr_loc_err("Failed to enable collector!"); + goto out; + } + + _serial8250_find_port(NULL); //Force the driver to iterate over all its ports... using our fake matching function + + if (unlikely((out = disable_collector_matcher()) != 0)) //Restore normal matcher + pr_loc_err("Failed to enable collector!"); + + //Other processes will use spinlocks with IRQ-save as we now know the ports + out: + resume_irq_saved(STD_COM1_IRQ, com1_irq_state); + resume_irq_saved(STD_COM2_IRQ, com2_irq_state); + resume_irq_saved(STD_COM3_IRQ, com3_irq_state); + resume_irq_saved(STD_COM4_IRQ, com4_irq_state); + preempt_enable(); + console_unlock(); + + return out; +} + +/** + * Gets an internal 8250 driver port structure for the line/ttyS specified + * + * Things to know: + * - line = ttyS#, so line=0 = ttyS0 (this is universal across Linux UART subsystem) + * - this function returns things as-is in the 8250 driver, so if ports are already reversed you will get them reversed + * - this function only runs scanning once but only ptrs are stored, so if you flip ports the re-scan is not needed as + * 8250 builds its internal array (to which elements we get ptrs) only once during boot + * + * @return ptr to a port OR error ptr with -E + */ +static __must_check struct uart_8250_port *get_8250_port(unsigned int line) +{ + if (unlikely(line >= UART_NR)) { + pr_loc_bug("Requested UART line %u but kernel supports up to %u", line, UART_NR); + return ERR_PTR(-EINVAL); + } + + if (!serial8250_ports[0]) //Port not recovered or port 0 doesn't exist (HIGHLY unlikely) + recover_serial8250_ports(); //there's no point in checking the return code here - it will fail below + + return (likely(serial8250_ports[line])) ? serial8250_ports[line] : ERR_PTR(-ENODEV); +} + + +/****************************************** Shutting down & restarting ports ******************************************/ +#define is_irq_port(uart_port_ptr) ((uart_port_ptr)->irq != 0) + +/** + * Check if IRQ-based port is active (i.e. open and running) + * + * To use this function the caller is responsible for obtaining a port spinlock. + * + * Warning: it's up to the CALLER to check type of the port (is_irq_port()). Passing a timer-based port here will + * always return false, as timer ports don't register for IRQs and are not listed in IRQ-sharing list. + */ +static bool __always_inline is_irq_port_active(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + + //if the kernel doesn't have an action for the IRQ there's no way 8250 has the port active in interrupt mode + if (!irq_has_action(port->irq)) { + pr_loc_dbg("IRQ=%d not active => port not active", port->irq); + return false; + } + + //IRQ port list was never initialized, or it was deleted (which poisons it) => list element is invalid + // We don't care where prev/next point - they can point both at us (=we're the only ones active on that IRQ), + // can both point at a single other element (=two element list with us included), or can point to two different + // elements (=list with >2 elements). Either way WE are active. + if (!up->list.prev || !up->list.next) { + pr_loc_dbg("IRQ sharing list not initialized => port not active"); + return false; + } + + if (up->list.next == LIST_POISON1 && up->list.prev == LIST_POISON2) { + pr_loc_dbg("IRQ sharing list poisoned/deleted => port not active"); + return false; + } + + pr_loc_dbg("Port is active (IRQ=%d active, list valid p=%p/n=%p)", port->irq, up->list.prev, up->list.next); + return true; +} + +/** + * Checks if a timer-based port is active (i.e. open and running) + * + * To use this function the caller is responsible for obtaining a port spinlock. + * + * For the timer-based port to be active it must: have a function set (=it was configured at least once), and be + * in active or pending state. We only care about the pending one as time timer cannot be active (=currently + * executing handler function) when we have a lock on the port. + * + * Warning: it's up to the CALLER to check type of the port (is_irq_port()). Passing IRQ port here will always return + * false, as IRQ ports don't use timers. + */ +static bool __always_inline is_timer_port_active(struct uart_8250_port *up) +{ + return (likely(up->timer.function) && timer_pending(&up->timer)); +} + +/** + * Checks if a given port is active (i.e. open and running) + * + * The startup & shutdown of the port is needed any time the port is active/open. The port is formally shut down if + * there's nooone using it. The 8250 driver doesn't really know that (ok, it does if you try to probe the chip etc) + * directly, as only the TTY serial layer tracks that (drivers/tty/serial/serial_core.c). + * + * The trick here is that We cannot check if the driver has the + * interrupt for a given port directly (as the irq_lists is static). However, we can derive this by checking if the + * kernel has the IRQ handler registered for the given IRQ# *AND* if the port in question is part of the list for + * the IRQ. We can cheat here as we only need to know our own state. So in practice we need to just check if our + * list element (embedded list_head) is valid. See code for details. + * While technically we CAN re-shutdown a port as many times as we want AS LONG AS it's not using the IRQ subsystem we + * shouldn't re-start a port which wasn't started before we tinkered with it! This is why we take care of IRQ and non- + * IRQ ports in the same. If you attempt to shutdown already shutdown port which is an IRQ one it will result in a + * kernel BUG() as the driver detects that something went wrong as it expects the IRQ to be running. If you do the + * same with timer-based port it will simply re-clear registries on the UART chip which will be a noop hardware-wise. + * This is because the 8250 and derivates cannot be really turned off once they start/reset. They can only be set in a + * way that they don't deliver interrupts for new data (and any new data will just override existing one). With timer- + * based port the kernel simply don't ask the chip if there's any data but the chip is still running. This is exactly + * why the 8250 driver will always attempt a read before "starting" the port and clear FIFOs on it. + */ +static bool is_port_active(struct uart_8250_port *up) +{ + bool out; + struct uart_port *port = &up->port; + pr_loc_dbg("Checking if port iobase=0x%03lx irq=%d (mapped to ttyS%d) active", port->iobase, port->irq, port->line); + + //Most of the ports will be IRQs unless something's broken/special about the platform + if (likely(is_irq_port(port))) + out = is_irq_port_active(up); + else + out = is_timer_port_active(up); + + return out; +} + +/** + * Shuts down the port if it's active + * + * You should NOT call this function with a lock active! + * + * @return 0 if the operation resulted in noop, 1 if the port was actually shut down; currently there are no error + * conditions + */ +static inline int try_shutdown_port(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + pr_loc_dbg("Shutting down physical port iobase=0x%03lx (mapped to ttyS%d)", port->iobase, port->line); + + if (!is_port_active(up)) { + pr_loc_dbg("Port not active - noop"); + return 0; + } + + port->ops->shutdown(port); //this must be called with the lock released or otherwise a deadlock may occur + if (is_irq_port(port)) + synchronize_irq(port->irq); //Make sure interrupt handler is not running on another CPU/core + + pr_loc_dbg("Port iobase=0x%03lx ttyS%d is now DOWN", port->iobase, port->line); + + return 1; +} + +/** + * Restart previously stopped port + * + * Warnings: + * - you shouldn't attempt to restart ports which weren't configured; this can lead to a KP + * - you should NOT call this function when holding a lock + * + */ +static inline void restart_port(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + pr_loc_dbg("Restarting physical port iobase=0x%03lx (mapped to ttyS%d)", port->iobase, port->line); + + //We are not checking if the port is active here due to an edge case of swap between one port which is active where + // another one isn't. In such case when we shut down that active port and try to activate the other (to keep the + // userland state happy) the check will lead to a false-negative state saying the port is already active. This is + // because we did swap IRQ values. However, we MUST restart such port not to reinit the hardware (which doesn't + // care) but to fix the interrupt mapping in the kernel! + //skip extensive tests - it was working before +#if LINUX_VERSION_CODE <= KERNEL_VERSION(5,0,0) + port->flags |= UPF_NO_TXEN_TEST; +#else + port->quirks |= UPQ_NO_TXEN_TEST; +#endif + port->flags |= UPF_SKIP_TEST; + port->ops->startup(port); //this must be called with the lock released or otherwise a deadlock may occur + + pr_loc_dbg("Port iobase=0x%03lx ttyS%d is now UP", port->iobase, port->line); +} + + +/*************************************************** Swapping logic ***************************************************/ +/** + * Swaps two UART data lines with proper locking + * + * This function assumes ports are already stopped. + */ +static inline void swap_uart_lanes(struct uart_8250_port *a, struct uart_8250_port *b) +{ + unsigned long flags_a, flags_b; + spin_lock_irqsave(&a->port.lock, flags_a); + spin_lock_irqsave(&b->port.lock, flags_b); + + swap(a->port.iobase, b->port.iobase); + swap(a->port.irq, b->port.irq); + swap(a->port.uartclk, b->port.uartclk); //Just to be complete we should move flags & clock + swap(a->port.flags, b->port.flags); // (they're probably the same anyway) + swap(a->timer, b->timer); //if one port was timer based and another wasn't this ensures they aren't broken + + spin_unlock_irqrestore(&a->port.lock, flags_b); //flags_a were a property of B + spin_unlock_irqrestore(&b->port.lock, flags_a); +} + +int uart_swap_hw_output(unsigned int from, unsigned int to) +{ + if (unlikely(from == to)) + return -EINVAL; + + pr_loc_dbg("Swapping ttyS%d<=>ttyS%d started", from, to); + + struct uart_8250_port *port_a = get_8250_port(from); + struct uart_8250_port *port_b = get_8250_port(to); + + if (unlikely(!port_a)) { + pr_loc_err("Failed to locate ttyS%d port", from); + return PTR_ERR(port_a); + } + if (unlikely(!port_b)) { + pr_loc_err("Failed to locate ttyS%d port", to); + return PTR_ERR(port_b); + } + + + pr_loc_dbg("Disabling preempt & locking console"); + pr_loc_inf("======= OUTPUT ON THIS PORT WILL STOP AND CONTINUE ON ANOTHER ONE (swapping ttyS%d & ttyS%d) =======", + from, to); //That will be the last message user sees before swap on the "old" port + + pr_loc_dbg("### LAST MESSAGE BEFORE SWAP ON \"OLD\" PORT ttyS%d<=>ttyS%d", from, to); + preempt_disable(); //we cannot be rescheduled here due to timing constraint and possibly IRQ interactions + console_lock(); //We don't want stray messages landing somewhere randomly when we swap, + the ports will be down + //this will be the first message after port unlocks after swapping + pr_loc_dbg("### FIRST MESSAGE AFTER SWAP ON \"NEW\" PORT ttyS%d<=>ttyS%d", from, to); + + //This is an edge case when swapping two ports where one is active and another one is not. Since the active status + // is a property of the software (i.e. port opened/used by something) and shutting down/starting alters the state + // of the hardware we may have a problem with restarting the previously inactive port. If WE did shut it down there + // is no issue as we know the hardware is initialized. But if it wasn't and we try to just start it up without + // reinit we can either crash the driver or leave the port in inactive state. + pr_loc_dbg("Disabling ports"); + int port_a_was_running = try_shutdown_port(port_a); + int port_b_was_running = try_shutdown_port(port_b); + if (unlikely(port_a_was_running != port_b_was_running)) + pr_loc_wrn("Swapping hw data paths of ttyS%d (was %sactive) and ttyS%d (was %sactive). We will attempt to " + "reactivate inactive one but this may fail.", port_a->port.line, port_a_was_running ? "" : "in", + port_b->port.line, port_b_was_running ? "" : "in"); + + swap_uart_lanes(port_a, port_b); + //This code IS CORRECT - make sure to read comment next to port_a_was_running/port_b_was_running vars initialization + //We swapped the data paths but we need to restore the state as the userland expects it. + pr_loc_dbg("Restarting ports"); + if (port_a_was_running) + restart_port(port_a); + if (port_b_was_running) + restart_port(port_b); + + console_unlock(); + preempt_enable(); + + pr_loc_inf("======= OUTPUT ON THIS PORT CONTINUES FROM A DIFFERENT ONE (swapped ttyS%d & ttyS%d) =======", from, + to); + + pr_loc_dbg("Swapping ttyS%d (curr_iob=0x%03lx) <=> ttyS%d (curr_iob=0x%03lx) finished successfully", from, + port_a->port.iobase, to, port_b->port.iobase); + + return 0; +} diff --git a/internal/uart/uart_swapper.h b/internal/uart/uart_swapper.h new file mode 100644 index 0000000..e04bff8 --- /dev/null +++ b/internal/uart/uart_swapper.h @@ -0,0 +1,18 @@ +#ifndef REDPILL_UART_SWAPPER_H +#define REDPILL_UART_SWAPPER_H + +/** + * Swaps two given UARTs/serial prots so that their data paths are exchanged without the change of /dev/tty# + * + * This method is blind to whether UARTs were swapped during kernel build. However, it's the reason it exists to un-swap + * these stupid ports. You can swap any ports you want. It's not recommended to swap ports which are in different run + * state (i.e. one is active/open/running and the other one is not). In such cases the swap will be attempted BUT the + * port which was active may not be usable until re-opened (usually it will be, but there's a chance). + * + * @param from Line number (line = ttyS#, so line=0 = ttyS0; this is universal across Linux UART subsystem)) + * @param to Line number + * @return 0 on success or -E on error + */ +int uart_swap_hw_output(unsigned int from, unsigned char to); + +#endif //REDPILL_UART_SWAPPER_H diff --git a/internal/uart/virtual_uart.c b/internal/uart/virtual_uart.c new file mode 100644 index 0000000..1e957f3 --- /dev/null +++ b/internal/uart/virtual_uart.c @@ -0,0 +1,1032 @@ +/** + * A true National Semiconductors 16550A software emulator + * + * WHAT IS THIS? + * ------------- + * In short this provides a feature-complete emulation of the now-famous 16550A chip present in IBM/PC compatibles + * since ~1987. This emulator was prepared to work with the Linux 8250 driver and fool it into believing it talks with a + * real chip. Moreover, the code isn't hacking around any private parts of the kernel but rather fully emulates + * registries and their behaviors according to the chip's data sheet. + * The emulation layer supports standard 8250-compliant feature (in essence UART) set with addition of two 16 bytes + * TX/RX FIFOs with configurable threshold as well as timer or virtual IRQ model. The code should be pretty + * straight-forward to read but it contains MANY quirks. All of them however are heavily documented throughout the file. + * + * DEALING WITH OPEN PORTS + * ----------------------- + * While using this module you should know that there's a one important quirk: since we're in the Linux kernel we can + * do anything with the port even if it's open. It's a blessing and a curse. Even if the physical ttyS1 port is open and + * you add a virtual ttyS1 all of the sudden all applications will talk to your virtual port. This is great as you don't + * have to restart them but it's also bad while debugging as you may get input you don't expect. To see what's using the + * port execute "ls -l /proc/[0-9]/fd/ 2>&1 |grep /dev/ttyS1 2>&1 | grep ttyS" (replace ) + * as lsof is not available in pre-boot. + * A note however: the /dev/ttyS node WILL be recreated, so if you do replce a port which was opened you can expect to + * see "/dev/ttyS1 (deleted)" in the ls output from above. It is not an issue as we're "taking over" the port anyway so + * both the /dev/ttyS1 as well as the old fd are pointing to the same place kernel-wise. + * + * LIMITATIONS + * ----------- + * - For obvious reasons (as we are not working with a real hw) the DMA portion of the chip is not emulated + * - On most system the maximum number of UARTs emulated is 4 (driver's limitation, see CONFIG_SERIAL_8250_NR_UARTS) + * - FIFOs, true to the original 16550A, are limited to 16 bytes each. In theory, if needed, they can be enlarge up to + * even 256 bytes each with chip model change (as 8250 driver actually tests how big a FIFO is on setup) + * - FIFO mode is always enabled. There are some not-fully-accurate pieces which don't handle non-FIFO operation. There + * is (at least to our knowledge) no reason to use it adn kernel always asks for FIFO to save CPU anyway. + * + * USAGE + * ----- + * See header file docs. + * + * INTERNALS + * --------- + * - To DISABLE vIRQ and fall back to a timer (offered by 8250) define VUART_USE_TIMER_FALLBACK - this will cause the + * Linux driver to poll every so often for new data. This is fine if the port is opened-written to-closed but not + * when apps keep it long open (as the APIC timer will constantly fire for nothing) + * - To see detailed logs of registries being accessed and modified and what not define VUART_DEBUG_LOG - you will get + * all the info you need for debugging. However keep in mind setting this along with VUART_USE_TIMER_FALLBACK will be + * pretty catastrophic as you will be flooded with messages about IIR being read as long as the port stays open in + * the userland. This consciously does not use kernel's dynamic debug facilities are some (e.g. 918+) kernels are + * compiled without it. + * - To change name of the vIRQ thread define VUART_THREAD_FMT which gets a real port IRQ # and ttyS# as its params. + * - UART_BUG_SWAPPED (defined in uart_defs.h) is used to detect swapped ports and make sure numbers used here are real + * ttyS* values and not swapped bs (as 8250 matches ports by iobase and not line#) + * + * References: + * - https://github.com/clearlinux/kvmtool/blob/b5891a4337eb6744c8ac22cc02df3257961ae23e/hw/serial.c (inspiration) + * - https://www.ti.com/lit/ug/sprugp1/sprugp1.pdf (everything you need to know abt UART, referred in code as "Ti doc") + * - http://caro.su/msx/ocm_de1/16550.pdf (useful and short UART know-how with a good registry table in Table 2, p. 9) + * - https://www.linuxjournal.com/article/8144 (handling threading in kernel) + */ + +//Here are some flags which can be used to modify the behavior of VirtualUART. They're checked by other header files. +//Keep in mind you may need to set the debug in vuart_virtual_irq separatedly (or in common.h) +//#define VUART_DEBUG_LOG +//#define VUART_USE_TIMER_FALLBACK + +#include "virtual_uart.h" +#include "vuart_internal.h" +#include "../../common.h" //can set VUART_DEBUG_LOG and others +#include "../../debug/debug_vuart.h" //it will provide normal or nooped versions of macros; CHECKS VUART_DEBUG_LOG +#include "../../config/uart_defs.h" //COM defs & struct uart_port +#include "../../internal/intercept_driver_register.h" //is_driver_registered, watch_driver_register, unwatch_driver_register +#include "vuart_virtual_irq.h" //vIRQ handling & shimming; CHECKS VUART_USE_TIMER_FALLBACK +#include //serial8250_unregister_port, uart_8250_port +#include //UART_* consts +#include //locking devices (vdev->lock) +#include //kfifo_* + +/************************************************* Static definitions *************************************************/ +/* + * According to https://en.wikibooks.org/wiki/Serial_Programming/8250_UART_Programming bits 6 and 7 must be set to + * consider FIFO as enabled-and-working (bit 7 only designates "FIFO enabled, but not functioning" (?) + */ +#define UART_IIR_FIFOEN 0xc0 +#define UART_IIR_FIFEN_B6 0x40 +#define UART_IIR_FIFEN_B7 0x80 +#define UART_DRIVER_NAME "serial8250" //see drivers/tty/serial/8250/8250_core.c in "serial8250_isa_driver" + +/** + * Static definition of all possible UARTs in the system supported by 8250 driver + * These definitions are exactly the same as in arch/x86/include/asm/serial.h + */ +static struct serial8250_16550A_vdev ttySs[] = { +//we're crying too... the issue is normally operate on port lines (=ttyS#) but during port registration ports the driver +// performs matching based on its internal iobase mapping, so we can ask for the port to be line=0 but if the driver +// finds a port with iobase specified under line=1 it will just register is as line=1 instead of line=0. This causes all +// sorts of problems as during reads we expect the vdev line to match what we actually registered. To fix it and make it +// independent of all fucking swapping and reswapping we will have to emit events from uart_swapper and other nonsense +// ...this is ridiculous. So we take a sane assumptions: +// - if the kernel is broken we accommodate for that assuming no un-swapping will be done afterwards +// - if the kernel is broken and swap fix is disabled by debug flag we handle the swapping +// - if something is borked we don't offer a detection comparing lines because before we get a response from the driver +// registering the port it will call our read function and break everything +// TODO: this whole code should switch to relying on iobases instead o lines. This way when we do reads or writes we +// don't care if something is swapped - we call for registration on line 0, we lookup what's the expected iobase +// for that ttyS and we register for it. If the driver decides to use a different line# we shouldn't care. +#if defined(UART_BUG_SWAPPED) && defined(DBG_DISABLE_UART_SWAP_FIX) + [0] = { .line = 0, .iobase = STD_COM2_IOBASE, .irq = STD_COM2_IRQ, .baud = STD_COMX_BAUD }, //COM1 aka ttyS1 + [1] = { .line = 1, .iobase = STD_COM1_IOBASE, .irq = STD_COM1_IRQ, .baud = STD_COMX_BAUD }, //COM2 aka ttyS0 +#else + [0] = { .line = 0, .iobase = STD_COM1_IOBASE, .irq = STD_COM1_IRQ, .baud = STD_COMX_BAUD }, //COM1 aka ttyS0 + [1] = { .line = 1, .iobase = STD_COM2_IOBASE, .irq = STD_COM2_IRQ, .baud = STD_COMX_BAUD }, //COM2 aka ttyS1 +#endif + [2] = { .line = 2, .iobase = STD_COM3_IOBASE, .irq = STD_COM3_IRQ, .baud = STD_COMX_BAUD }, //COM3 aka ttyS2 + [3] = { .line = 3, .iobase = STD_COM4_IOBASE, .irq = STD_COM4_IRQ, .baud = STD_COMX_BAUD }, //COM4 aka ttyS3 +}; + +//Internal type for callbacks; see vuart_set_tx_callback() for details +struct flush_callback { + vuart_callback_t *fn; + void *buffer; + int threshold; +}; +//Storage for all TX callbacks, see vuart_set_tx_callback() +static struct flush_callback *flush_cbs[SERIAL8250_LAST_ISA_LINE] = { NULL }; +static volatile bool kernel_driver_ready = false; //Whether the 8250 UART driver is ready + +/**************************************** Internal helper function-like macros ****************************************/ +//Get vDEV from line/ttyS number (created for consistency) +#define get_line_vdev(line) (&ttySs[(line)]) + +//8250 driver doesn't give access to the real uart_port upon adding but does it on first read/write +#define capture_uart_port(vdev, port) if (unlikely(!(vdev)->up)) (vdev)->up = port; + +//Some functions should warn use out of courtesy that we're running in a stupid environment +#if defined(UART_BUG_SWAPPED) && defined(DBG_DISABLE_UART_SWAP_FIX) +#define warn_bug_swapped(line) \ + if ((line) < 2) { \ + pr_loc_inf( \ + "Requested ttyS%d vUART - this kernel has UART SWAP => modifying what physically is ttyS%d (io=0x%x)", \ + line, !line, get_line_vdev(line)->iobase); \ + } +#else +#define warn_bug_swapped(line) //noop +#endif + +#define for_each_vdev() for (int line=0; line < ARRAY_SIZE(ttySs); ++line) + +//Before v3.13 the kfifo_put() accepted a pointer, since then it accepts a value +//ffs... https://github.com/torvalds/linux/commit/498d319bb512992ef0784c278fa03679f2f5649d +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0) +#define kfifo_put_val(fifo, val) kfifo_put(fifo, &val) +#else +#define kfifo_put_val(fifo, val) kfifo_put(fifo, val) +#endif + +/****************************************** Internal chip emulation functions ******************************************/ +/** + * Updates state of the IIR register + * + * In the physical world when the UART chip is connected to the CPU there's an interrupt line which goes high when + * the chip detects any of the conditions which are interrupt-worthy. Whether something is interrupt-worthy is + * determined by the driver (i.e. Linux kernel) and set in the IER. When an interrupt is generated the kernel gets only + * the information "something happened on IRQ 4" (which means SOMETHING happened on COM1 *or* COM 3). To determine + * what it is (or maybe there multiple things even!) the kernel reads IIR which gives the REASON why a given interrupt + * happened (and thus also indirectly specifies which channel/chip generated the interrupt). + * + * This code below is written based on the Table 3-6 in Ti doc - it summarizes IIR state and how it should change. It + * should be called AFTER everything else modified registers. In general if you changed some other registries you should + * call this function. You usually want to do it once upon returning control to an outside caller (after making all + * all changes). + * + * Regardless of whether vIRQ is enabled or not this register MUST be updated. + */ +static void update_interrupts_state(struct serial8250_16550A_vdev *vdev) +{ + uart_prdbg("Recomputing IIR state"); + //Order of these if/elseifs is CRUCIAL - interrupts have priorities and they're masked + u8 new_iir_int_state = 0; + if ((vdev->ier & UART_IER_RLSI) && + unlikely((vdev->lsr & UART_LSR_OE) || (vdev->lsr & UART_LSR_PE) || (vdev->lsr & UART_LSR_FE) || + (vdev->lsr & UART_LSR_BI))) { + //Kernel enabled OE/PE/FE/BI interrupts and there's one of them + uart_prdbg("IIR: setting RLS (errors) interrupt"); + new_iir_int_state |= UART_IIR_RLSI; + } else if ((vdev->ier & UART_IER_RDI) && (vdev->lsr & UART_LSR_DR)) { + //We don't distinguish between FIFO and non-FIFO mode and just set interrupt if there's some data to be read + //We also don't support the receiver time-out (kernel should pick up the data in time as it's a virtual port) + uart_prdbg("IIR: setting RD (data-ready) interrupt"); + new_iir_int_state |= UART_IIR_RDI; + } else if ((vdev->ier & UART_IER_THRI) && ((vdev->lsr & UART_LSR_TEMT) || kfifo_is_empty(vdev->tx_fifo))) { + //When THR is empty or FIFO is empty (for us it's the same thing) kernel wants to know about that + uart_prdbg("IIR: setting THR (transmitter empty) interrupt"); + new_iir_int_state |= UART_IIR_THRI; + } + + //If any interrupts are triggered (or not) we need to set IPEND accordingly + if (new_iir_int_state) { + new_iir_int_state &= ~UART_IIR_NO_INT; //since there were some interrupts we clear IPEND (=interrupts pending) + vuart_virq_wake_up(vdev); + } else { + new_iir_int_state |= UART_IIR_NO_INT; //since there were no interrupts we set IPEND (=no interrupts pending) + } + + //IIR (despite its name) also contains FIFO status along interrupts + vdev->iir = new_iir_int_state; + if (likely(vdev->fcr & UART_FCR_ENABLE_FIFO)) + vdev->iir |= UART_IIR_FIFOEN; + + dump_iir(vdev); + uart_prdbg("Finished IIR state"); +} + +/** + * Put registries into the "chip reset" state as described by the datasheet (see Tables 3-* in Ti doc) + * You should NOT modify these values under any circumstances as they're meant to represent the real chip RESET state + */ +static void reset_device(struct serial8250_16550A_vdev *vdev) +{ + uart_prdbg("Resetting virtual chip @ ttyS%d", vdev->line); + lock_vuart_oppr(vdev); + + //Upon reset both FIFOs must be erased + if (vdev->tx_fifo) + kfifo_reset(vdev->tx_fifo); + if (vdev->rx_fifo) + kfifo_reset(vdev->rx_fifo); + + //Registries for when DLAB=0 + vdev->rhr = 0x00; //no data in receiving channel + vdev->thr = 0x00; //no data in transmission channel + vdev->ier = 0x00; //no interrupts enabled + vdev->iir = UART_IIR_NO_INT; //no pending interrupts, FIFO not active + vdev->fcr = 0x00; //FIFO disabled (which invalidates other FIFO properties in FCR), DMA disabled + vdev->lcr = 0x00; //non-DLAB mode, errors cleared, 1 STOP bit, 5 bit words (not that it matters for virtual port) + vdev->mcr = UART_MCR_OUT2; //autoflow disabled, loop mode disabled, OUT2 enabled as global interrupt + vdev->lsr = UART_LSR_TEMT | UART_LSR_THRE; //transmitter empty & idle, all errors cleared, break not requested + vdev->msr = 0x00; //all flow control flags not triggered + vdev->scr = 0x00; //empty scratchpad + + //Additional registries when DLAB=1 + vdev->dll = 0x00; //undefined divisor LSB latch + vdev->dlm = 0x00; //undefined divisor MSB latch + + unlock_vuart_oppr(vdev); + uart_prdbg("Virtual chip @ ttyS%d reset done", vdev->line); +} + +/** + * Allocate/create FIFOs on the device if they don't exist (and if they do you shouldn't call this function) + * + * @todo it should free on errors + */ +static int alloc_fifos(struct serial8250_16550A_vdev *vdev) +{ + if (unlikely(vdev->rx_fifo)) { //this shouldn't happen on non-initialized port + pr_loc_bug("RX FIFO @ %d already alloc'd", vdev->line); + return -EINVAL; + } + + if (unlikely(vdev->tx_fifo)) { //this shouldn't happen on non-initialized port + pr_loc_bug("TX FIFO @ %d already alloc'd", vdev->line); + return -EINVAL; + } + + kzalloc_or_exit_int(vdev->rx_fifo, sizeof(struct kfifo)); + kzalloc_or_exit_int(vdev->tx_fifo, sizeof(struct kfifo)); + + if (unlikely(kfifo_alloc(vdev->rx_fifo, VUART_FIFO_LEN, GFP_KERNEL) != 0)) { + pr_loc_crt("kfifo_alloc for RX FIFO elements @ %d failed", vdev->line); + return -EFAULT; + } + + if (unlikely(kfifo_alloc(vdev->tx_fifo, VUART_FIFO_LEN, GFP_KERNEL) != 0)) { + pr_loc_crt("kfifo_alloc for TX FIFO elements @ %d failed", vdev->line); + return -EFAULT; + } + + return 0; +} + +/** + * Reverses what alloc_fifos() did + */ +static int free_fifos(struct serial8250_16550A_vdev *vdev) +{ + //This should be called when the vIRQ thread is killed so nothing call the IRQ handler without FIFOs + if (unlikely(!vdev->rx_fifo || !vdev->tx_fifo)) { //this shouldn't happen on initialized port + pr_loc_bug("RX and/or TX FIFO @ %d are not alloc'd (nothing to free)", vdev->line); + return -EINVAL; + } + + kfifo_free(vdev->rx_fifo); + kfifo_free(vdev->tx_fifo); + + return 0; +} + +/** + * Deposits the TX queue contents into callbacks set using vuart_set_tx_callback() and clears the FIFO itself + * If no callbacks were defined it will simply clear. + * + * This function does NOT recalculate IIRs (see update_interrupts_state()) and assumes you have vdev lock. + */ +static void flush_tx_fifo(struct serial8250_16550A_vdev *vdev, vuart_flush_reason reason) +{ + uart_prdbg("Flushing TX FIFO now! reason=%d", reason); + + if (likely(flush_cbs[vdev->line])) { + unsigned int flushed_bytes = 0; + flushed_bytes = kfifo_out(vdev->tx_fifo, flush_cbs[vdev->line]->buffer, VUART_FIFO_LEN); + flush_cbs[vdev->line]->fn(vdev->line, flush_cbs[vdev->line]->buffer, flushed_bytes, reason); + } else { + uart_prdbg("No callback for TX FIFO @ %d - discarding", vdev->line); + kfifo_reset(vdev->tx_fifo); + } + + vdev->lsr |= UART_LSR_TEMT | UART_LSR_THRE; //nothing should be in the buffer +} + +/** + * Pulls a character/byte from RX FIFO and places it into RHR for the driver to read it + * - It updates all registers according to the specs + * - It assumes you have vdev lock + * - It does NOT recalculate IIRs (see update_interrupts_state()) + * - It will produce an error if you try to do the transfer while FIFO is empty but it will not crash. You should check + * UART_LSR_DR before calling this function. + * + * @return character which was read + */ +static unsigned char transfer_char_fifo_rhr(struct serial8250_16550A_vdev *vdev) +{ + //Before this function is called UART_LSR_DR should be verified - it wasn't or it was wrong if this exploded + if(unlikely(kfifo_get(vdev->rx_fifo, &vdev->rhr) == 0)) + pr_loc_bug("Attempted to %s with empty FIFO - that shouldn't happen if the DR flag was checked", __FUNCTION__); + + if (kfifo_is_empty(vdev->rx_fifo)) + vdev->lsr &= ~UART_LSR_DR; + + //See descriptions of these fields in Table 3-12 from TI doc - these flags are cleared on character read + vdev->lsr &= ~UART_LSR_BI; + vdev->lsr &= ~UART_LSR_FE; + vdev->lsr &= ~UART_LSR_PE; + vdev->lsr &= ~UART_LSR_OE; //by definition, we cannot have overrun if a character was just read + + return vdev->rhr; +} + +/** + * An alternative to transfer_char_fifo_rhr() when FIFOs aren't used for transfers (e.g. in MSR TEST/LOOP mode) + * + * This function does NOT recalculate IIRs (see update_interrupts_state()) and assumes you have vdev lock. + */ +static void handle_receive_char(struct serial8250_16550A_vdev *vdev, unsigned char value) +{ + //@todo this only handles overruns in FIFO mode and does not do that in non-FIFO; it behaves correctly but it + // doesn't report OEs in non-FIFO + vdev->rhr = value; //RHR is always populated with the value no matter the FIFO or non-FIFO mode + + //Put value in FIFO, it will indicate with return of 0 if it was full before attempted put (overrun/overflow) + if (kfifo_put_val(vdev->rx_fifo, value) == 0) { + vdev->lsr |= UART_LSR_OE; //set overrun flag as FIFO detected that + + //During TEST/LOOP mode many overflows are caused on purpose - we don't want to hear about them really + if (unlikely(!(vdev->mcr & UART_MCR_LOOP))) + pr_loc_wrn("RX FIFO overflow detected @ ttyS%d", vdev->line); + } else { + vdev->lsr &= ~UART_LSR_OE; //no overrun condition - clear OE flag just in case + } + + vdev->lsr |= UART_LSR_DR; //receiver has something for the kernel to pickup +} + +/** + * Called when kernel sent something to the device and it has to be put into TX FIFO & THR + * + * This function does NOT recalculate IIRs (see update_interrupts_state()) and assumes you have vdev lock. + * + * CAUTION: order of these "ifs" for flushes here is crucial: we make a guarantee to the reason parameter that if both + * VUART_FLUSH_THRESHOLD and VUART_FLUSH_FULL are true (i.e. callback was set with threshold == VUART_FIFO_LEN) we + * will prioritize threshold trigger (as a user-specified event takes precedence over internal event of FIFO full) + * If the threshold specified by the callback setter was met flush the FIFO + */ +static void handle_transmit_char(struct serial8250_16550A_vdev *vdev, unsigned char value) +{ + //@todo this only handle non-FIFO properly: doesn't detect OE, and doesn't reset THRE + vdev->thr = value; //THR is always populated with the value no matter the FIFO or non-FIFO mode + vdev->lsr &= ~UART_LSR_THRE; + + int fifo_len = kfifo_len(vdev->tx_fifo); + uart_prdbg("%s got new char ascii=%c hex=%02x on ttyS%d (FIFO#=%d)", __FUNCTION__, value, value, vdev->line, + fifo_len); + + //FIFO is full - try to flush it; if we got here it means the threshold is for sure >VUART_FIFO_LEN as this is + // checked after we put data into the FIFO (to make sure we trigger THRESHOLD event and not FULL) + //The reason why we check this at the beginning of new char and not after adding to FIFO is that if the transmitting + // party sends exactly VUART_FIFO_LEN bytes and then ends the transmission we don't want to flush with FULL but with + // IDLE to give a better sense of what's going on to the caller. FULL implies "we got too much data, there may be + // more coming" while IDLE implies that the unit of transmission ended. + if (unlikely(fifo_len == VUART_FIFO_LEN)) + flush_tx_fifo(vdev, VUART_FLUSH_FULL); + + //Put value in FIFO, it will indicate with return of 0 if it was full before attempted put (overrun/overflow) + //This, if we are correct, cannot happen if the flush_tx_fifo() is functioning correctly as we try to flush above + int fifo_add = kfifo_put_val(vdev->tx_fifo, value); + fifo_len += fifo_add; //we can call kfifo_ API for this but why if we have both pieces of info anyway? ;) + if (unlikely(fifo_add == 0)) { + vdev->lsr |= UART_LSR_OE; //set overrun flag as FIFO detected that + pr_loc_wrn("TX FIFO overflow detected"); + } else { + vdev->lsr &= ~UART_LSR_OE; //no overrun condition - clear OE flag just in case + } + + vdev->lsr &= ~UART_LSR_TEMT; //transmitter buffers are no longer empty + + //@todo THRE should be reset immediately in non-FIFO mode (i.e. at the same time as TEMT) + //This is to prevent kernel from freaking out about "blackhole" UART (see https://unix.stackexchange.com/a/387650) + if (fifo_len >= VUART_FIFO_LEN / 2) + vdev->lsr &= ~UART_LSR_THRE; + + if (likely(flush_cbs[vdev->line]) && fifo_len >= flush_cbs[vdev->line]->threshold) + flush_tx_fifo(vdev, VUART_FLUSH_THRESHOLD); +} + +/** + * The main READ routing passed to the 8250 driver. It should be as fast as possible and MUST be multithread-safe + * + * Device ==responding-to==> kernel; aka "do you have something for me?" + * This function is used to read data and registers. + * + * @param offset This is really the register value. It's named "offset" in accordance with Linux nomenclature which + * makes sense for physical chips (as this is a memory offset from chip's memory base) + */ +static unsigned int serial_remote_read(struct uart_port *port, int offset) +{ + uart_prdbg("Serial READ for line=%d/%d", port->line, ttySs[port->line].line); + + struct serial8250_16550A_vdev *vdev = get_line_vdev(port->line); + lock_vuart(vdev); + capture_uart_port(vdev, port); + unsigned int out; + switch (offset) { + case UART_RX: + //if DLAB is enabled DLL registry is desired; otherwise we should send THR + //See Table 2 in the chip manual. DLAB controls access to address 000, 001, and 101. When DLAB=1 these + //addrs respond with DLL, DLM, and PSD respectively, when DLAB=0 they respond with RHR/THR, IER/DLM, and LSR + if (vdev->lcr & UART_LCR_DLAB) { + out = vdev->dll; + reg_read("DLL"); + } else if (vdev->lsr & UART_LSR_BI) { //chip wants a break? + out = 0; + vdev->lsr &= ~UART_LSR_BI; //clear the break for the next cycle; see BI in Table 3-12 from TI doc + uart_prdbg("LSR indicated break request, cleared"); + dump_lsr(vdev); + } else if(vdev->lsr & UART_LSR_DR) { //Did we receive anything? + out = transfer_char_fifo_rhr(vdev); + dump_lsr(vdev); + uart_prdbg("Providing RHR registry (val=%x DLAB=0 LSR_DR=1)", out); + } else { + out = 0; + //Such read isn't invalid. However, it is done e.g. in the init sequence as a workaround for some + // physical chips bugs in the past or to clear the RHR before other operations (even if LSR DR=0) + uart_prdbg("Nothing in RHR (DLAB=0; LSR_DR=0) - noop"); + dump_lsr(vdev); + } + break; + case UART_IER: + if (vdev->lcr & UART_LCR_DLAB) { + out = vdev->dlm; + reg_read("DLM"); + } else { + out = vdev->ier; + reg_read_dump(vdev, ier, "IER"); + } + break; + case UART_IIR: + out = vdev->iir; + reg_read_dump(vdev, iir, "IIR/ISR"); + break; + //case UART_FCR not present - write only register + case UART_LCR: + out = vdev->lcr; + reg_read_dump(vdev, lcr, "LCR"); + break; + case UART_MCR: + out = vdev->mcr; + reg_read_dump(vdev, mcr, "MCR"); + break; + case UART_LSR: + out = vdev->lsr; + reg_read_dump(vdev, lsr, "LSR"); + vdev->lsr &= ~UART_LSR_OE; //See "OE" Table 3-12 or Table 3-6 - it needs to be cleared on LSR read + break; + case UART_MSR: + out = vdev->msr; + reg_read_dump(vdev, msr, "MSR"); + + //See table 3-13 in Ti doc; MSR is masked with values from MCR when MCR indicates test/loop mode + if (unlikely(vdev->mcr & UART_MCR_LOOP)) { + if (vdev->mcr & UART_MCR_RTS) out |= UART_MSR_CTS; else out &= ~UART_MSR_CTS; + if (vdev->mcr & UART_MCR_DTR) out |= UART_MSR_DSR; else out &= ~UART_MSR_DSR; + if (vdev->mcr & UART_MCR_OUT1) out |= UART_MSR_RI; else out &= ~UART_MSR_RI; + if (vdev->mcr & UART_MCR_OUT2) out |= UART_MSR_DCD; else out &= ~UART_MSR_DCD; + uart_prdbg("[!] Masked real MSR values to: CTS=%d | DSR=%d | RI=%d | DCD=%d", + out&UART_MSR_CTS?1:0, out&UART_MSR_DSR?1:0, out&UART_MSR_RI?1:0, out&UART_MSR_DCD?1:0); + } + break; + case UART_SCR: + out = vdev->scr; + reg_read("SCR/SPR"); + break; + default: + pr_loc_bug("Unknown registry %x read attempt on ttyS%d", offset, vdev->line); + out = 0; + break; + } + + update_interrupts_state(vdev); + unlock_vuart(vdev); + + return out; +} + +/** + * The main WRITE routing passed to the 8250 driver. It should be as fast as possible and MUST be multithread-safe + * + * Kernel => device, aka "I have something FOR YOU, send it along" + * This function is also used to write registers. + * + * @param offset This is really the register value. It's named "offset" in accordance with Linux nomenclature which + * makes sense for physical chips (as this is a memory offset from chip's memory base) + */ +static void serial_remote_write(struct uart_port *port, int offset, int value) +{ + //uart_prdbg("Serial WRITE for line=%d/%d", port->line, ttySs[port->line].line); + + struct serial8250_16550A_vdev *vdev = get_line_vdev(port->line); + lock_vuart(vdev); + capture_uart_port(vdev, port); + + switch (offset) { + case UART_TX: + //See "case UART_RX" for explanation + if (vdev->lcr & UART_LCR_DLAB) { //DLAB overrides everything + vdev->dll = value; + reg_write("DLL"); + } else if (vdev->mcr & UART_MCR_LOOP) { //are we in the reflection/loop mode? (=> fake TX->RX connection) + uart_prdbg("Loopback enabled, writing %x meant for THR to RHR directly", value); + handle_receive_char(vdev, (unsigned char)value); //loopback emulates receiving char on RX + dump_mcr(vdev); + dump_lsr(vdev); + } else { //just pickup the data from kernel + handle_transmit_char(vdev, (unsigned char)value); + reg_write("THR"); + dump_lsr(vdev); + } + break; + case UART_IER: + if (vdev->lcr & UART_LCR_DLAB) { + vdev->dlm = value; + reg_write("DLM"); + break; + } + + /* + * This is a little shortcut to deliver data to the callback even if the threshold wasn't met. This is + * done since kernel DISABLES THR empty interrupts when it finishes writing (which makes sense - otherwise + * it will be flooded with interrupts all the time as nothing gets written to THR). This means that + * kernel wrote everything what was there to write and [presumably] nothing else is coming anytime soon + * So in short: if THReINT was enabled and it JUST got disabled flush the FIFO if it isn't empty + */ + if ((vdev->ier & UART_IER_THRI) && !(value & UART_IER_THRI) && !kfifo_is_empty(vdev->tx_fifo)) { + uart_prdbg("Kernel driver disabled THRe interrupt and fifo isn't empty - triggering IDLE flush"); + flush_tx_fifo(vdev, VUART_FLUSH_IDLE); + } + vdev->ier = value & 0x0f; //we're not letting kernel set DMA registers since we don't support DMA + reg_write_dump(vdev, ier, "IER"); + break; + //case UART_IIR not present - read only register + case UART_FCR: + //FIFO registers are guarded by the FIFOEN - if it's not set only FIFOEN can be modified, see p27 of Ti doc + if (!(vdev->fcr & UART_FCR_ENABLE_FIFO) && !(value & UART_FCR_ENABLE_FIFO)) + value &= UART_FCR_ENABLE_FIFO; + + vdev->fcr = value; + reg_write_dump(vdev, fcr, "FCR"); + + //If the new FCR value called for flush of TX and/or RX do that right away + if (vdev->fcr & UART_FCR_CLEAR_XMIT) { + kfifo_reset(vdev->tx_fifo); + vdev->lsr |= UART_LSR_TEMT | UART_LSR_THRE; + uart_prdbg("TX FIFO flushed on FCR request"); + dump_lsr(vdev); + } + + if (vdev->fcr & UART_FCR_CLEAR_RCVR) { + kfifo_reset(vdev->rx_fifo); + vdev->lsr &= ~UART_LSR_DR; + uart_prdbg("RX FIFO flushed on FCR request"); + dump_lsr(vdev); + } + break; + case UART_LCR: + vdev->lcr = value; + reg_write_dump(vdev, lcr, "LCR"); + break; + case UART_MCR: + vdev->mcr = value; + reg_write_dump(vdev, mcr, "MCR"); + break; + case UART_LSR: + vdev->lsr = value; + pr_loc_bug("Bogus LSR write attempt on ttyS%d - why?", vdev->line); + dump_lsr(vdev); + break; + case UART_MSR: + vdev->msr = value; + pr_loc_bug("Bogus MSR write attempt on ttyS%d - why?", vdev->line); + dump_msr(vdev); + break; + case UART_SCR: + vdev->scr = value; + reg_write("SCR"); + break; + default: + pr_loc_bug("Unknown registry %x write attempt on ttyS%d with %x", offset, vdev->line, value); + break; + } + + update_interrupts_state(vdev); + unlock_vuart(vdev); +} + + +/************************************************** vUART Glue Layer **************************************************/ +static driver_watcher_instance *driver_watcher = NULL; +static int update_serial8250_isa_port(struct serial8250_16550A_vdev *vdev); +static int restore_serial8250_isa_port(struct serial8250_16550A_vdev *vdev); + +/** + * Initializes/allocates what's needed in a fresh vdev structure (or one which was previously freed) + */ +static int initialize_ttyS(struct serial8250_16550A_vdev *vdev) +{ + int out; + + pr_loc_dbg("Initializing ttyS%d vUART", vdev->line); + if (unlikely(vdev->initialized)) { + pr_loc_bug("ttyS%d is already initialized", vdev->line); + return -EBUSY; + } + + reset_device(vdev); //Puts device in a known RESET state as defined by the real chip docs + if ((out = alloc_fifos(vdev) != 0)) + return out; + + kmalloc_or_exit_int(vdev->lock, sizeof(spinlock_t)); + spin_lock_init(vdev->lock); + + //virq_* stuff is allocated/freed by enable_/disable_interrupts() + + vdev->initialized = true; + pr_loc_dbg("Initialized ttyS%d vUART", vdev->line); + + return 0; +} + +/** + * Deinitializes/frees what was previously built by initialize_ttyS() + */ +static int deinitialize_ttyS(struct serial8250_16550A_vdev *vdev) +{ + int out; + + pr_loc_dbg("Deinitializing ttyS%d vUART", vdev->line); + if (unlikely(!vdev->initialized)) { + pr_loc_bug("ttyS%d is not initialized", vdev->line); + return -ENODEV; + } + + if ((out = free_fifos(vdev) != 0)) + return out; + + kfree(vdev->lock); + vdev->initialized = false; + pr_loc_dbg("Deinitialized ttyS%d vUART", vdev->line); + + return 0; +} + +/** + * Watches for the serial8250 driver to load in order to register ports which were added before the driver loaded + */ +static driver_watch_notify_result serial8250_ready_watcher(struct device_driver *drv, driver_watch_notify_state event) +{ + if (unlikely(event != DWATCH_STATE_LIVE)) + return DWATCH_NOTIFY_CONTINUE; + + pr_loc_dbg("%s driver loaded - adding queued ports", UART_DRIVER_NAME); + kernel_driver_ready = true; + + int out; + for_each_vdev() { + //non-initialized ports are these which were never added as vUARTs + if (!ttySs[line].initialized || ttySs[line].registered) + continue; + + pr_loc_dbg("Processing enqueued port %d", line); + if ((out = update_serial8250_isa_port(&ttySs[line])) != 0) { + //This is critical as ports were promised to be registered to other parts of the application but we cannot + // fulfill that promise now + pr_loc_crt("Failed to process port %d - error=%d", line, out); + } + } + + pr_loc_dbg("Finished processing enqueued ports"); + return DWATCH_NOTIFY_DONE; +} + +/** + * Checks the current serial8250 status + * + * @return 0 if not loaded, 1 if loaded, -E on error + */ +static int probe_driver(void) +{ + if (kernel_driver_ready) + return 1; //we've already checked the state and confirmed as ready before + + int driver_ready_tristate = is_driver_registered(UART_DRIVER_NAME, NULL); + if (driver_ready_tristate < 0) { + pr_loc_err("Failed to check %s driver state - error=%d", UART_DRIVER_NAME, driver_ready_tristate); + return -EIO; + } + + if (driver_ready_tristate == 1) + kernel_driver_ready = true; + + return driver_ready_tristate; +} + +/** + * Attempt to watch for the serial8250 driver readiness (if needed) + * + * @return 0 if driver is not loaded and a watcher has been set up, + * 1 if driver is already loaded (and nothing needs to be done), + * -E on error + */ +static int try_wait_for_serial8250_driver(void) +{ + int driver_ready_tristate = probe_driver(); + if (driver_ready_tristate != 0) + return driver_ready_tristate; //if the driver is ready (=1) or an error occurred (-E) we don't do anything here + + pr_loc_inf("%s driver is not ready - the port addition will be delayed until the driver loads", UART_DRIVER_NAME); + driver_watcher = watch_driver_register(UART_DRIVER_NAME, serial8250_ready_watcher, DWATCH_STATE_LIVE); + + if (IS_ERR(driver_watcher)) { + pr_loc_err("Failed to register driver watcher - no ports can be registered till the driver loads"); + return PTR_ERR(driver_watcher); + } + + return 0; +} + +/** + * Disable the driver watcher if it was set up + * + * @return 0 on success, -E on error + */ +static int try_leave_serial8250_driver(void) +{ + if (!driver_watcher) //we're only concerned about watching the driver + return 0; + + for_each_vdev() { + if (ttySs[line].initialized && !ttySs[line].registered) { + pr_loc_dbg("Cannot leave %s driver yet - port %d is still awaiting registration", UART_DRIVER_NAME, line); + return 0; + } + } + + int out = unwatch_driver_register(driver_watcher); + driver_watcher = NULL; + if (out != 0) + pr_loc_err("Failed to unwatch driver (error=%d)", out); + + return out; +} + +/** + * Asks the Linux 8250 driver to UPDATE properties of a given serial device which matches line & iobase + * + * The reason why this function is called update_ rather than add_ is that we're NOT adding anything new to the driver. + * Rather we're registering a port which is already there (as vUART only deals with COM1-4, i.e. legacy IBM/PC ports) + * and matches our spec. + */ +static int update_serial8250_isa_port(struct serial8250_16550A_vdev *vdev) +{ + int out; + pr_loc_dbg("Registering ttyS%d (io=0x%x) in the driver", vdev->line, vdev->iobase); + + if (unlikely(vdev->registered)) { + pr_loc_bug("Port ttyS%d (io=0x%x) is already registered in the driver", vdev->line, vdev->iobase); + return -EEXIST; + } + + int driver_ready_tristate = try_wait_for_serial8250_driver(); + if (driver_ready_tristate == 0) { + pr_loc_wrn("The %s driver is not ready - vUART port ttyS%d (io=0x%x) will be activated later", UART_DRIVER_NAME, + vdev->line, vdev->iobase); + return 0; + } + + if (driver_ready_tristate < 0) { + pr_loc_err("%s failed due to underlining driver error", __FUNCTION__); + return driver_ready_tristate; + } + + + struct uart_8250_port *up; + kzalloc_or_exit_int(up, sizeof(struct uart_8250_port)); + struct uart_port *port = &up->port; + + port->line = vdev->line; + port->iobase = vdev->iobase; + port->uartclk = vdev->baud * 16; + port->flags = STD_COMX_FLAGS; + + //This is a silly workaround to let the kernel know "we don't REALLY support IRQ" + //While the code do support IRQs handling we weren't able to find a smart way to "simulate" IRQ 3-4 (which are + // normally HW interrupts). However, the 8250 driver will emulate them for us using APIC + port->irq = (vuart_virq_supported()) ? vdev->irq : SERIAL8250_SOFT_IRQ; + port->irqflags = 0; + port->hub6 = 0; + port->membase = 0; + port->iotype = 0; + port->regshift = 0; + port->serial_in = serial_remote_read; + port->serial_out = serial_remote_write; + port->type = PORT_16550A; + up->cur_iotype = 0xFF; + + //DO NOT EVEN THINK about assigning "port" top vdev->port!!! serial8250_register_8250_port() uses our passed port to + // match internally reserved (during boot) port structure. Our structure misses a lot of stuff like handlers and so + //YOU CANNOT ASSIGN IT HERE! + + //This is the most explosion-prone section so logs are useful + uart_prdbg("Calling serial8250_register_8250_port to register port"); + if ((out = serial8250_register_8250_port(up)) < 0) { //it returns port # on success or -E on error + pr_loc_err("Failed to register ttyS%d - driver failure (error=%d)", vdev->line, out); + goto out_free; + } + pr_loc_dbg("ttyS%d registered with driver (line=%d)", vdev->line, out); + out = 0; //serial8250_register_8250_port return serial port line # or -E code + vdev->registered = true; + + out_free: + kfree(up); + return out; +} + +/** + * Restores original UART in 8250 driver + */ +static int restore_serial8250_isa_port(struct serial8250_16550A_vdev *vdev) +{ + int out; + pr_loc_dbg("Unregistering ttyS%d (io=0x%x) from the driver", vdev->line, vdev->iobase); + + if (unlikely(!vdev->registered)) { + pr_loc_dbg("Port ttyS%d (io=0x%x) is not registered in the driver - nothing to restore", vdev->line, + vdev->iobase); + return 0; + } + + if (unlikely(!kernel_driver_ready)) { + pr_loc_wrn("Port ttyS%d (io=0x%x) cannot be restored - kernel driver not ready", vdev->line, vdev->iobase); + return 0; //not an error as technically the port is NOT in the driver + } + + struct uart_8250_port *up; + kzalloc_or_exit_int(up, sizeof(struct uart_8250_port)); + struct uart_port *port = &up->port; + + port->line = vdev->line; + up->cur_iotype = 0xFF; + port->iobase = vdev->iobase; + port->uartclk = vdev->baud * 16; + port->irq = vdev->irq; //set a REAL IRQ + port->flags = STD_COMX_FLAGS; + up->port = *port; + + //This is the most explosion-prone section so logs are useful + //This may sound counter-intuitive but we don't want to REMOVE the port, we want to just re-register it with + //all default callbacks. + pr_loc_dbg("Calling serial8250_register_8250_port to restore port"); + if ((out = serial8250_register_8250_port(up)) < 0) { //it returns port # on success or -E on error + pr_loc_err("Failed to restore ttyS%d - driver failure (error=%d)", vdev->line, out); + goto out_free; + } + pr_loc_dbg("ttyS%d finished unregistraton from driver (line=%d)", vdev->line, out); + out = 0; //serial8250_register_8250_port return serial port line # or -E code + + vdev->registered = false; + out = try_leave_serial8250_driver(); + + out_free: + kfree(up); + return out; +} + +int vuart_set_tx_callback(int line, vuart_callback_t *cb, char *buffer, int threshold) +{ + validate_isa_line(line); + + struct serial8250_16550A_vdev *vdev = get_line_vdev(line); + if (!cb) { + pr_loc_dbg("Removing TX callback for ttyS%d (line=%d)", line, vdev->line); + if (unlikely(!flush_cbs[line])) { + pr_loc_dbg("Nothing to do - no TX callback set"); + return 0; + } + + //We don't really need to lock for that + kfree(flush_cbs[line]); + flush_cbs[line] = NULL; + + pr_loc_dbg("Removed TX callback for ttyS%d (line=%d)", line, vdev->line); + return 0; + } + + pr_loc_dbg("Setting TX callback for for ttyS%d (line=%d)", line, vdev->line); + line = vdev->line; //this looks to make no sense BUT it does when serials are swapped + if (likely(!flush_cbs[line])) { //if there was already a cb there we don't need to reserve memory + kmalloc_or_exit_int(flush_cbs[line], sizeof(struct flush_callback)); + } + + //This can technically be called during serial port operation so we need to get a lock before we change these or + // we risk sending a buffer to a wrong function. That lock may not exist when device is not added yet. + lock_vuart_oppr(vdev); + flush_cbs[line]->fn = cb; + flush_cbs[line]->buffer = buffer; + flush_cbs[line]->threshold = threshold; + unlock_vuart_oppr(vdev); + + pr_loc_dbg("Added TX callback for ttyS%d (line=%d)", line, vdev->line); + + return 0; +} + +int vuart_inject_rx(int line, const char *buffer, int length) +{ + validate_isa_line(line); + + if (unlikely(length > VUART_FIFO_LEN)) { + pr_loc_bug("Attempted to inject buffer of %d bytes - it's larger than FIFO size (%d bytes)", length, VUART_FIFO_LEN); + return -E2BIG; + } + + struct serial8250_16550A_vdev *vdev = get_line_vdev(line); + if (unlikely(!vdev->initialized)) { + pr_loc_bug("Cannot inject data into non-initialized or non-registered device"); + return -ENXIO; + } + + if (unlikely(!vdev->registered)) { + pr_loc_wrn("Cannot inject data into unregistered device"); //...as it will be removed by the driver on reg + return 0; + } + + //No space to put data - not an error per-sen as this can be re-run again + if ((vdev->lsr & UART_LSR_DR) && unlikely(kfifo_is_full(vdev->rx_fifo) || unlikely(vdev->mcr & UART_MCR_LOOP))) + return 0; + + + int put_bytes = kfifo_in(vdev->rx_fifo, buffer, VUART_FIFO_LEN); + if (likely(put_bytes > 0)) + vdev->lsr |= UART_LSR_DR; + + uart_prdbg("Injected %d bytes into ttyS%d RX", put_bytes, line); + update_interrupts_state(vdev); + + return put_bytes; +} + +int vuart_add_device(int line) +{ + pr_loc_dbg("Adding vUART ttyS%d", line); + + validate_isa_line(line); + warn_bug_swapped(line); + + int out; + struct serial8250_16550A_vdev *vdev = get_line_vdev(line); + + if ((out = initialize_ttyS(vdev)) != 0) + return out; + + if ((out = update_serial8250_isa_port(vdev)) != 0) + goto error_deinit; + + if ((out = vuart_enable_interrupts(vdev)) != 0) + goto error_restore; + + pr_loc_inf("Added vUART at ttyS%d", line); + return 0; + + error_restore: + restore_serial8250_isa_port(vdev); + + error_deinit: + deinitialize_ttyS(vdev); + + return out; +} + +int vuart_remove_device(int line) +{ + pr_loc_dbg("Removing vUART ttyS%d", line); + + validate_isa_line(line); + warn_bug_swapped(line); + + int out; + struct serial8250_16550A_vdev *vdev = get_line_vdev(line); + if ((out = vuart_disable_interrupts(vdev)) != 0 || (out = deinitialize_ttyS(vdev)) != 0 || + (out = restore_serial8250_isa_port(vdev)) != 0 || (out = vuart_set_tx_callback(line, NULL, NULL, 0)) != 0) + return out; + + pr_loc_inf("Removed vUART & restored original UART at ttyS%d", line); + + return 0; +} \ No newline at end of file diff --git a/internal/uart/virtual_uart.h b/internal/uart/virtual_uart.h new file mode 100644 index 0000000..b6c2226 --- /dev/null +++ b/internal/uart/virtual_uart.h @@ -0,0 +1,126 @@ +#ifndef REDPILL_VIRTUAL_UART_H +#define REDPILL_VIRTUAL_UART_H + +#include //bool + +/** + * Length of the RX/TX FIFO in bytes + * Do NOT change this value just because you want to inject more data at once - it's a hardware-defined property + */ +#define VUART_FIFO_LEN 16 + +/** + * Defines maximum threshold possible; in practice this means you will never get any THRESHOLD events but only ID:E and + * FULL ones. + */ +#define VUART_THRESHOLD_MAX INT_MAX + +/** + * Specified the reason why the vUART flushed the buffer + * + * This value carries one guarantee: they will be evaluated in that order of priority. If you set a threshold to exactly + * VUART_FIFO_LEN and the application sends exactly VUART_FIFO_LEN bytes you will get a reason of VUART_FLUSH_THRESHOLD + * even thou all three conditions are met. If you set the threshold to 10 and the app sends 12 bytes you will get a call + * with VUART_FLUSH_THRESHOLD after 10 bytes, then another one with two bytes and VUART_FLUSH_IDLE. + */ +typedef enum { + //Threshold specified while setting the callback has been reached + VUART_FLUSH_THRESHOLD, + + //Kernel put the transmitter in an idle mode, which most of the time indicated end of transmission/packet + VUART_FLUSH_IDLE, + + //FIFO was full before threshold has been reached and the transmission isn't finished yet + VUART_FLUSH_FULL, +} vuart_flush_reason ; + +/** + * Represents a callback signature + * + * @param line UART# where the data arrived; you can ignore it if you registered only one UART + * @param buffer Place where you can read the data from; as of now this is the same buffer but don't rely on this! + * @param len Number of bytes you're allowed to read from beginning of the buffer + * @param reason Denotes why the vUART decided to flush the buffer to the callback + */ +typedef void (vuart_callback_t)(int line, const char *buffer, unsigned int len, vuart_flush_reason reason); + +/** + * Adds a virtual UART device + * + * Calling this function will immediately yank the port from the real one and began capturing its output, so that no + * data will leave through the real one. However, by itself the data will not be delivered anywhere until you call + * vuart_set_tx_callback(), which you can do before or after calling vuart_add_device(). + * + * @param line UART number to replace, e.g. 0 for ttyS0. On systems with inverted UARTs you should use the real one, so + * even if ttyS0 points to 2nd physical port this method will ALWAYS use the one corresponding to ttyS* + * + * @return 0 on success or -E on error + */ +int vuart_add_device(int line); + +/** + * Removes a virtual UART device + * + * Calling this function restores previously replaced port. Unlike vuart_add_device() this function WILL alter TX + * callbacks by removing all of them. The reasoning behind this is that adding a device and later on adding/changing + * callbacks makes sense while removing the device and potentially leaving broken pointers can lead to nasty and hard to + * trace bugs. + * + * @param line UART number to replace, e.g. 0 for ttyS0. On systems with inverted UARTs you should use the real one, so + * even if ttyS0 points to 2nd physical port this method will ALWAYS use the one corresponding to ttyS* + * + * @return 0 on success or -E on error + */ +int vuart_remove_device(int line); + +/** + * Injects data into RX stream of the port + * + * It may be confusing at first what's TX and RX in the context here. Imagine a physical chip connected to a computer + * with some bus (not UART). The chip's RX is what the chip would get from *something*. So injecting data into RX of the + * chip causes the data to be processed by the chip and arrive in the kernel and then in the application which opened + * the port. So while TX implies "transmission" from the perspective of the chip and the app opening the port it's an + * RX side. This naming is consistent with what the whole 8250 subsystem uses. + * + * @param line UART number to replace, e.g. 0 for ttyS0. On systems with inverted UARTs you should use the real one, so + * even if ttyS0 points to 2nd physical port this method will ALWAYS use the one corresponding to ttyS* + * @param buffer Pointer to a buffer where we will read from. There's no assumption as to what the buffer contains. + * @param length Length to read from the buffer up to VUART_FIFO_LEN + * + * @return 0 on success or -E on error + */ +int vuart_inject_rx(int line, const char *buffer, int length); + +/** + * Set a function which will be called upon data transmission by the port opener + * + * In short you will get data which some app (e.g. cat file > /dev/ttyS0) sent. If you're confused by the RX/TX read the + * comment for inject_rx(). + * + * Example of the callback usage: + * //The len is the real number of bytes available to read. The buffer ptrs is the same as you gave to set_tx_ + * void dummy_tx_callback(int line, const char *buffer, int len, vuart_flush_Reason reason) { + * pr_loc_inf("TX @ ttyS%d: |%.*s|", line, len, buffer); + * } + * //.... + * char buf[VUART_FIFO_LEN]; //Your buffer should be able to accommodate at least VUART_FIFO_LEN + * vuart_set_tx_callback(TRY_PORT, dummy_tx_callback, buf, VUART_FIFO_LEN); + * + * WARNING: + * You callback should be multithreading-aware. It may be called from different contexts. You shouldn't do a lot of work + * on the thread where your callback has been called. If you need something more copy the buffer and process it on a + * separate thread. + * + * @param line UART number to replace, e.g. 0 for ttyS0. On systems with inverted UARTs you should use the real one, so + * even if ttyS0 points to 2nd physical port this method will ALWAYS use the one corresponding to ttyS* + * @param cb Function to be called; call it with a NULL ptr to remove callback, see docblock for vuart_callback_t + * @param buffer A pointer to a buffer where data will be placed. The buffer should be able to accommodate + * VUART_FIFO_LEN number of bytes. The buffer you pass will be the same one as passed back during a call + * @param threshold a *HINT* how many bytes at minimum should be deposited in the FIFO before callback is called. Keep + * in mind that this is just a hint and you callback may be called sooner (e.g. when a client program + * wrote only a single byte using e.g. echo -n X > /dev/ttyS0). + * @return 0 on success or -E on error + */ +int vuart_set_tx_callback(int line, vuart_callback_t *cb, char *buffer, int threshold); + +#endif //REDPILL_VIRTUAL_UART_H diff --git a/internal/uart/vuart_internal.h b/internal/uart/vuart_internal.h new file mode 100644 index 0000000..0377ed6 --- /dev/null +++ b/internal/uart/vuart_internal.h @@ -0,0 +1,73 @@ +#ifndef REDPILL_VUART_INTERNAL_H +#define REDPILL_VUART_INTERNAL_H + +#include +#ifndef VUART_USE_TIMER_FALLBACK +#include +#endif + + +//Lock/unlock vdev for registries operations +#define lock_vuart(vdev) spin_lock_irqsave((vdev)->lock, (vdev)->lock_flags); +#define unlock_vuart(vdev) spin_unlock_irqrestore((vdev)->lock, (vdev)->lock_flags); + +//In some circumstances operations may be performed on the chip before or after the chip is initialized. If it is +// initialized we need a lock first; otherwise we do not. This is a shortcut for this opportunistic/conditional locking. +#define lock_vuart_oppr(vdev) if ((vdev)->initialized) { lock_vuart(vdev); } +#define unlock_vuart_oppr(vdev) if ((vdev)->initialized) { unlock_vuart(vdev); } + +#define validate_isa_line(line) \ + if (unlikely((line) > SERIAL8250_LAST_ISA_LINE)) { \ + pr_loc_bug("%s failed - requested line %d but kernel supports only %d", __FUNCTION__, line, \ + SERIAL8250_LAST_ISA_LINE); \ + return -EINVAL; \ + } + +/** + * An emulated 16550A chips internal state + * + * See http://caro.su/msx/ocm_de1/16550.pdf for details; registers are on page 9 (Table 2) + */ +struct serial8250_16550A_vdev { + //Port properties + u8 line; + u16 iobase; + u8 irq; + unsigned int baud; + + //The 8250 driver port structure - it will be populated as soon as 8250 gives us the real pointer + struct uart_port *up; + + //Chip emulated FIFOs + struct kfifo *tx_fifo; //character to be sent (aka what we've got from the OS) + struct kfifo *rx_fifo; //characters received (aka what we want the OS to get from us) + + //Chip registries (they're considered volatile but there's a spinlock protecting them) + u8 rhr; //Receiver Holding Register (characters received) + u8 thr; //Transmitter Holding Register (characters REQUESTED to be sent, TSR will contain these to be TRANSMITTED) + u8 ier; //Interrupt Enable Register + u8 iir; //Interrupt ID Register (same as ISR/Interrupt Status Register) + u8 fcr; //FIFO Control Register (not really used but holds values written to it) + u8 lcr; //Line Control Register (not really used but holds values written to it) + u8 mcr; //Modem Control Register (used to control autoflow) + u8 lsr; //Line Status Register + u8 msr; //Modem Status Register + u8 scr; //SCratch pad Register (in the original docs refered to as SPR, but linux uses SCR name) + u8 dll; //Divisor Lat Least significant byte (not really used but holds values written to it) + u8 dlm; //Divisor Lat Most significant byte (not really used but holds values written to it; also called DLH) + u8 psd; //Prescaler Division (not really used but holds values written to it) + + //Some operations (e.g. FIFO access) must be locked + bool initialized:1; + bool registered:1; //whether the vdev is actually registered with 8250 subsystem + spinlock_t *lock; + unsigned long lock_flags; + +#ifndef VUART_USE_TIMER_FALLBACK + //We emulate (i.e. self-trigger) interrupts on threads + struct task_struct *virq_thread; //where fake interrupt code is executed + wait_queue_head_t *virq_queue; //wait queue used to put thread to sleep +#endif +}; + +#endif //REDPILL_VUART_INTERNAL_H diff --git a/internal/uart/vuart_virtual_irq.c b/internal/uart/vuart_virtual_irq.c new file mode 100644 index 0000000..5e5924d --- /dev/null +++ b/internal/uart/vuart_virtual_irq.c @@ -0,0 +1,152 @@ +#ifndef VUART_USE_TIMER_FALLBACK + +#include "vuart_virtual_irq.h" +#include "vuart_internal.h" +#include "../../common.h" +#include "../../debug/debug_vuart.h" +#include //UART_* consts +#include //running vIRQ thread +#include //wait queue handling (init_waitqueue_head etc.) +#include //serial8250_handle_irq + +//Default name of the thread for vIRQ +#ifndef VUART_THREAD_FMT +#define VUART_THREAD_FMT "vuart/%d-ttyS%d" +#endif + +/** + * Function running on a separate kernel thread responsible for simulating the IRQ call (normally done via hardware + * interrupt triggering CPU to invoke Linux IRQ subsystem) + * + * There's no sane way to trigger IRQs in the low range used by 8250 UARTs. A pure asm call of "int $4" will result in a + * crash (yes, we did try first ;)). So instead of hacking around the kernel we simply used the 8250 public interface to + * trigger interrupt routines and implemented a small IRQ handling subsystem on our own. + * @param data + * @return + */ +static int virq_thread(void *data) +{ + allow_signal(SIGKILL); + + int out = 0; + struct serial8250_16550A_vdev *vdev = data; + + uart_prdbg("%s started for ttyS%d pid=%d", __FUNCTION__, vdev->line, current->pid); + while(likely(!kthread_should_stop())) { + wait_event_interruptible(*vdev->virq_queue, !(vdev->iir & UART_IIR_NO_INT) || unlikely(kthread_should_stop())); + if (unlikely(signal_pending(current))) { + uart_prdbg("%s started for ttyS%d pid=%d received signal", __FUNCTION__, vdev->line, current->pid); + out = -EPIPE; + break; + } + + if (unlikely(kthread_should_stop())) + break; + + if (unlikely(!vdev->up)) { + pr_loc_bug("Cannot call serial8250 interrupt handler - port not captured (yet?)"); + continue; + } + + uart_prdbg("Calling serial8250 interrupt handler"); + serial8250_handle_irq(vdev->up, vdev->iir); + } + uart_prdbg("%s stopped for ttyS%d pid=%d exit=%d", __FUNCTION__, vdev->line, current->pid, out); + + //that can lead to a small memory leak for virq_queue if thread is killed outisde disable_interrupts() but this + // shouldn't normally happen unless something goes horribly wrong + vdev->virq_thread = NULL; + + return out; +} + +int vuart_enable_interrupts(struct serial8250_16550A_vdev *vdev) +{ + int out; + pr_loc_dbg("Enabling vIRQ for ttyS%d", vdev->line); + lock_vuart(vdev); + + if (unlikely(!vdev->initialized)) { + pr_loc_bug("ttyS%d is not initialized as vUART", vdev->line); + out = -ENODEV; + goto error_unlock_free; + } + + if (unlikely(vuart_virq_active(vdev))) { + pr_loc_bug("Interrupts are already enabled & scheduled for ttyS%d", vdev->line); + out = -EBUSY; + goto error_unlock_free; + } + + if (!(vdev->virq_queue = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL)) || + !(vdev->virq_thread = kmalloc(sizeof(struct task_struct), GFP_KERNEL))) { + out = -ENOMEM; + pr_loc_crt("kernel memory alloc failure - tried to reserve memory for vIRQ structures"); + goto error_unlock_free; + } + + init_waitqueue_head(vdev->virq_queue); + unlock_vuart(vdev); //we can safely unlock after reserving memory but before starting thread (so we're not atomic) + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-extra-args" + //VUART_THREAD_FMT can resolve to anonymized version without line or even IRQ# + vdev->virq_thread = kthread_run(virq_thread, vdev, VUART_THREAD_FMT, vdev->irq, vdev->line); +#pragma GCC diagnostic pop + if (IS_ERR(vdev->virq_thread)) { + out = PTR_ERR(vdev->virq_thread); + pr_loc_bug("Failed to start vIRQ thread"); + goto error_free; + } + pr_loc_dbg("vIRQ fully enabled for for ttyS%d", vdev->line); + + return 0; + + error_unlock_free: + unlock_vuart(vdev); + error_free: + if (vdev->virq_queue) { + kfree(vdev->virq_queue); + vdev->virq_queue = NULL; + } + if (vdev->virq_thread) { + kfree(vdev->virq_thread); + vdev->virq_thread = NULL; + } + return out; +} + +int vuart_disable_interrupts(struct serial8250_16550A_vdev *vdev) +{ + int out; + pr_loc_dbg("Disabling vIRQ for ttyS%d", vdev->line); + lock_vuart(vdev); + + if (unlikely(!vdev->initialized)) { + pr_loc_bug("ttyS%d is not initialized as vUART", vdev->line); + out = -ENODEV; + goto out_unlock; + } + + if (unlikely(!vuart_virq_active(vdev))) { + pr_loc_bug("Interrupts are not enabled/scheduled for ttyS%d", vdev->line); + out = -EBUSY; + goto out_unlock; + } + + out = kthread_stop(vdev->virq_thread); + if (out < 0) { + pr_loc_bug("Failed to stop vIRQ thread"); + goto out_unlock; + } + + kfree(vdev->virq_thread); + vdev->virq_thread = NULL; + pr_loc_dbg("vIRQ disabled for ttyS%d", vdev->line); + + out_unlock: + unlock_vuart(vdev); + + return 0; +} +#endif \ No newline at end of file diff --git a/internal/uart/vuart_virtual_irq.h b/internal/uart/vuart_virtual_irq.h new file mode 100644 index 0000000..963a8ad --- /dev/null +++ b/internal/uart/vuart_virtual_irq.h @@ -0,0 +1,20 @@ +#ifndef REDPILL_VUART_VIRTUAL_IRQ_H +#define REDPILL_VUART_VIRTUAL_IRQ_H + +#ifdef VUART_USE_TIMER_FALLBACK +#define vuart_virq_supported() 0 +#define vuart_virq_wake_up(dummy) //noop +#define vuart_enable_interrupts(dummy) (0) +#define vuart_disable_interrupts(dummy) (0) + +#else //VUART_USE_TIMER_FALLBACK +#include "vuart_internal.h" + +#define vuart_virq_supported() 1 +#define vuart_virq_active(vdev) (!!(vdev)->virq_thread) +#define vuart_virq_wake_up(vdev) if (vuart_virq_active(vdev)) { wake_up_interruptible(vdev->virq_queue); } +int vuart_enable_interrupts(struct serial8250_16550A_vdev *vdev); +int vuart_disable_interrupts(struct serial8250_16550A_vdev *vdev); +#endif //VUART_USE_TIMER_FALLBACK + +#endif //REDPILL_VUART_VIRTUAL_IRQ_H \ No newline at end of file diff --git a/internal/virtual_pci.c b/internal/virtual_pci.c new file mode 100644 index 0000000..6b7905c --- /dev/null +++ b/internal/virtual_pci.c @@ -0,0 +1,575 @@ +/* + * This file is a SIMPLE (yes, this IS simple) software emulation layer for PCI devices. + * + * Before you even start reading it you need to get familiar with references listed below. As the kernel people put it + * mildly "The world of PCI is vast and full of (mostly unpleasant) surprises.". This module tries to abstract hardware + * space emulation into highest level API possible. + * + * + * QUICK INTRODUCTION + * ------------------ + * To use it you need to supply a descriptor (e.g. struct pci_dev_descriptor) and give it domain-unique combination of + * {bus#, device#, function#}. The domain for most (all?) physical devices is usually 0x0000. This module uses 0x0001 to + * avoid conflicts. + * Fast PCI facts (read this to add devices): + * - every device is in the system has a location of BDF (256 buses max, 32 devices/bus, 8 functions/device = 65536) + * - every device MUST contain a function 0 (and may contain 1-7) + * - function is kind-of a subdevice (e.g. a quad-port network card will usually have functions 0-3) + * - you should (but you don't HAVE to) set "master bus" (.command |= PCI_COMMAND_MASTER) for every function 0 device + * instance + * - every device MUST have a valid VID/DEV. None of the fields can be 0x0000 or 0xFFFF (they have special meanings) + * - this module does NOT have any support for capabilities (CAPs) as they're variable length and we don't want to + * force every device struct to take 256K of memory (todo if needed?) + * - there are three types of headers: PCI device, PCI-PCI bridge, PCI-CardBus bridge. Only the first one was tested. + * The second one allows for more levels of the tree and should work if configured properly (see struct + * pci_pci_bridge_descriptor) but it wasn't needed yet. The third one is practically a bitrot now. + * - EVERYTHING IN PCI IS LITTLE ENDIAN no matter what your CPU says. Triple check if you're setting values correctly. + * Then you realize you set them incorrectly. + * - "devfn" in Linux terminology does NOT mean "device function" but rather "device# and function#". It is described + * in drivers/pci/search.c as "encodes number of PCI slot in which the desired PCI device resides and the logical + * device number within that slot in case of multi-function devices". + * You can use macros PCI_SLOT() and PCI_FUNC() to get dev# and fn# from that field. + * - Linux provides class & subclass constants (PCI_CLASS_* in include/linux/pci_ids.h). However they're defined as + * either: + * - 8 bit class + * - e.g. PCI_BASE_CLASS_SERIAL [0x0c] + * - can be put into pci_dev_descriptor.class directly + * - 16 bit class+subclass + * - e.g. PCI_CLASS_SERIAL_USB [0x0c03] + * - use U16_CLASS_TO_U8_CLASS(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.class [0x0c] + * - use U16_CLASS_TO_U8_SUBCLASS(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.subclass [0x03] + * - 24 bit (sic!) class+subclass+prog_if + * - e.g. PCI_CLASS_SERIAL_USB_EHCI (0x0c0320) + * - use U24_CLASS_TO_U8_CLASS(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.class [0x0c] + * - use U24_CLASS_TO_U8_SUBCLASS(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.subclass [0x03] + * - use U24_CLASS_TO_U8_PROGIF(PCI_CLASS_SERIAL_USB) for pci_dev_descriptor.prog_if [0x03] + * - "pci_dev_conf_default_normal_dev" provides a sane-default device where you need to only set: vid, dev, class, + * and subclass. + * + * + * DEBUGGING DEVICES + * ----------------- + * To see the tree you can use "lspci -tvnn". Here's a quick cheat-sheet from the output format: + * 0001:0a:00.0 Class 0000: Device 1b4b:9235 (rev ff) + * ^ ^ ^ ^ ^ ^ ^ ^ + * | | | | | | | |_______ pci_dev_descriptor.class_revision (lower 24 bits) + * | | | | | | |_________________ pci_dev_descriptor.dev (device ID) + * | | | | | |______________________ pci_dev_descriptor.vid (vendor ID) + * | | | | |___________________________________ pci_dev_descriptor.class_revision (higher 24 bits) + * | | | |___________________________________________ PCI device function + * | | |______________________________________________ device num on the bus + * | |_________________________________________________ PCI bus no + * |_____________________________________________________ PCIBUS_VIRTUAL_DOMAIN + * + * + * To debug the Linux PCI subsytem side of things these will be useful: + * echo 'file probe.c +p' > /sys/kernel/debug/dynamic_debug/control + * echo 'file search.c +p' > /sys/kernel/debug/dynamic_debug/control + * echo 'file delete.c +p' > /sys/kernel/debug/dynamic_debug/control + * + * + * INTERNAL STRUCTURE + * ------------------ + * The module emulates PCI on the lowest possible level - it literally fakes the otherwise-physical memory of + * configuration registries. + * + * The two header types are memory-mapped as follows: (PCI-CardBus isn't shown as nobody uses that) + * HEADER TYPE 0x00 (Normal Device) HEADER TYPE 0x01 (PCI-PCI Bridge) + * 31 16 15 0 hh 31 16 15 0 hh + * ╠══════════╩══════════╬══════════╩═══════════╬════ ╠═════════╩══════════╬═══════════╩═══════════╬════ + * ║ Device ID ║ Vendor ID ║ 00 ║ Device ID ║ Vendor ID ║ 00 + * ╠═════════════════════╬══════════════════════╬════ ╠════════════════════╬═══════════════════════╬════ + * ║ Status ║ Command ║ 04 ║ Status ║ Command ║ 04 + * ╠══════════╦══════════╬══════════╦═══════════╬════ ╠═════════╦══════════╬═══════════╦═══════════╬════ + * ║ Class ║ Subclass ║ ProgIF ║ Rev. ID ║ 08 ║ Class ║ Subclass ║ ProgIF ║ Rev. ID ║ 08 + * ╠══════════╬══════════╬══════════╬═══════════╬════ ╠═════════╬══════════╬═══════════╬═══════════╬════ + * ║ BIST ║ HeaderT ║ Lat.Tmr. ║ Cache LS ║ 0c ║ BIST ║ HeaderT ║ Lat.Tmr. ║ Cache LS ║ 0c + * ╠══════════╩══════════╩══════════╩═══════════╬════ ╠═════════╩══════════╩═══════════╩═══════════╬════ + * ║ BAR0 ║ 10 ║ BAR0 ║ 10 + * ╠════════════════════════════════════════════╬════ ╠════════════════════════════════════════════╬════ + * ║ BAR1 ║ 14 ║ BAR1 ║ 14 + * ╠════════════════════════════════════════════╬════ ╠═════════╦══════════╦═══════════╦═══════════╬════ + * ║ BAR2 ║ 18 ║ SecLatT ║ SubordB# ║ SecBus# ║ PriBus# ║ 18 + * ╠════════════════════════════════════════════╬════ ╠═════════╩══════════╬═══════════╬═══════════╬════ + * ║ BAR3 ║ 1c ║ Secondary Status ║ I/O Limit ║ I/O Base ║ 1c + * ╠════════════════════════════════════════════╬════ ╠════════════════════╬═══════════╩═══════════╬════ + * ║ BAR4 ║ 20 ║ Memory limit ║ Memory base ║ 20 + * ╠════════════════════════════════════════════╬════ ╠════════════════════╬═══════════════════════╬════ + * ║ BAR5 ║ 24 ║ Prefetch. Mem. L. ║ Prefetch. Mem. B. ║ 24 + * ╠════════════════════════════════════════════╬════ ╠════════════════════╩═══════════════════════╬════ + * ║ Cardbus CIS ptr ║ 28 ║ Prefetchable Base Upper 32 bit ║ 28 + * ╠═════════════════════╦══════════════════════╬════ ╠════════════════════════════════════════════╬════ + * ║ Subsys ID ║ Subsys VID ║ 2c ║ Prefetchable Limit Upper 32 bit ║ 2c + * ╠═════════════════════╩══════════════════════╬════ ╠════════════════════╦═══════════════════════╬════ + * ║ Exp. ROM Base Addr. ║ 30 ║ I/O Lim. Up. 16b ║ I/O Base Up. 16b ║ 30 + * ╠════════════════════════════════╦═══════════╬════ ╠════════════════════╩═══════════╦═══════════╬════ + * ║ *RSV* ║ Cap. ptr ║ 34 ║ *RSV* ║ Cap. ptr ║ 34 + * ╠════════════════════════════════╩═══════════╬════ ╠════════════════════════════════╩═══════════╬════ + * ║ *RSV* ║ 38 ║ Exp. ROM Base Addr. ║ 38 + * ╠══════════╦══════════╦══════════╦═══════════╬════ ╠════════════════════╦═══════════╦═══════════╬════ + * ║ Max Lat. ║ Min Gnt. ║ Int. pin ║ Int. lin. ║ 3c ║ Bridge Control ║ Int. pin ║ Int. lin. ║ 3c + * ╠══════════╩══════════╩══════════╩═══════════╬═══════ ╠════════════════════╩═══════════╩═══════════╬═══════ + * ║ Optional Dev.-Dep. Config (192 bytes) ║ 40-100 ║ Optional Dev.-Dep. Config (192 bytes) ║ 40-100 + * ╚════════════════════════════════════════════╩═══════ ╚════════════════════════════════════════════╩═══════ + * + * + * LINUX PCI SUBSYSTEM SCANNING ROUTINE + * ------------------------------------ + * The kernel has a surprisingly readable code for the PCI scanning. We recommend starting from drivers/pci/probe.c and + * "struct pci_bus *pci_scan_bus()" function. + * In a big simplification it goes something like this: + * probe.c + * pci_scan_bus() + * => pci_scan_child_bus + * => loop pci_scan_slot(bus, devfn) with devfn=<0,0x100> every 8 bytes + * => pci_scan_single_device + * => pci_get_slot to check if device already exists + * => pci_scan_device to probe the device + * => pci_bus_read_dev_vendor_id + * => .... [and others] + * => pci_device_add if device probe succeeded + * + * THE ACPI SAGA + * ------------- + * If you were thinking PCI is hard you haven't heard about ACPI. Kernels starting from v3.13 require ACPI companion + * for PCI devices when the system was configured to run on an ACPI-complain x86 platform. This isn't an unusual + * assumption. Before v3.13 the struct x86_sysdata contained a simple ACPI handle, which could be NULL. Now it should + * contain a structure. However it still PROBABLY can be NULL. + * See https://github.com/torvalds/linux/commit/7b1998116bbb2f3e5dd6cb9a8ee6db479b0b50a9 for details of that change. + * + * When the structure (=ACPI data) is NULL the error "ACPI: \: failed to evaluate _DSM (0x1001)" will be logged upon + * scanning. However it seems to be harmless. There are two ways to get rid of this error: 1) Implement a proper ACPI + * _DSM [no, just NO], or 2) user override_symbol() for acpi_evaluate_dsm() with a function doing the following (for the + * time of scanning ONLY): + * union acpi_object *obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); + * obj->type = ACPI_TYPE_INTEGER; + * obj->integer.value = 1; + * return obj; + * + * x86 BUS SCANNING BUG (>=v4.1) + * ----------------------------- + * Since v4.1 adding a new bus under a different domain will cause devices on the bus to not be fully populated. See the + * comment in "vpci_add_single_device()" here for details & a simple fix. + * + * KNOWN BUGS + * ---------- + * Under Linux v3.10 once bus is added it cannot be fully removed (or we didn't find the correct way). When you do the + * initial add and scan everything works correctly. You can later even remove that bus BUT the kernel leaves some sysfs + * stuff behind in /sys/devices (while /sys/bus/pci/devices are cleaned up). This means that if you try to re-register + * the same bus it explode with sysfs duplication errors. + * As of now we have no idea how to go around that. + * + * + * References: + * - https://stackoverflow.com/a/31465293 (how PCI subsystem works) + * - https://docs.oracle.com/cd/E19120-01/open.solaris/819-3196/hwovr-25/index.html (PCI working theory) + * - https://elixir.bootlin.com/linux/v3.10.108/source/include/uapi/linux/pci_regs.h (Linux PCI registers) + * - https://elixir.bootlin.com/linux/v3.10.108/source/drivers/pci/probe.c (PCI scanning code; very readable) + * - https://blog.csdn.net/moon146/article/details/18988849 (scanning process) + * - https://wiki.osdev.org/PCI (details regarding flags & commands) + */ +#include "virtual_pci.h" +#include "../common.h" +#include "../config/vpci_types.h" //MAX_VPCI_BUSES +#include +#include //PCI device header constants +#include //Constants for vendors, classes, and other +#include //list_for_each +#include //device_del + +#define PCIBUS_VIRTUAL_DOMAIN 0x0001 //normal PC buses are (always?) on domain 0, this is just a next one +#define PCI_DEVICE_NOT_FOUND_VID_DID 0xFFFFFFFF //A special case to detect non-existing devices (per PCI spec) + +/* As per PCI spec + * If a single function device is detected (i.e., bit 7 in the Header + * Type register of function 0 is 0), no more functions for that + * Device Number will be checked. If a multi-function device is + * detected (i.e., bit 7 in the Header Type register of function 0 + * is 1), then all remaining Function Numbers will be checked. + * This helper converts single-function header type to multifunction header type + */ +#define PCI_HEADER_TO_MULTI(x) ((1 << 7) | (x)) +#define IS_PCI_HEADER_MULTI(x) (!!((x) & 0x80)) + +//Model of a default config for a device +const struct pci_dev_descriptor pci_dev_conf_default_normal_dev = { + .vid = 0xDEAD, //set me! + .dev = 0xBEEF, //set me! + + .command = 0x0000, + .status = 0x0000, + + .rev_id = PCI_DSC_REV_NONE, + .prog_if = PCI_DSC_PROGIF_NONE, + .subclass = U16_CLASS_TO_U8_CLASS(PCI_CLASS_NOT_DEFINED), //set me! + .class = U16_CLASS_TO_U8_CLASS(PCI_CLASS_NOT_DEFINED), //set me! + + .cache_line_size = 0x00, + .latency_timer = 0x00, + .header_type = PCI_HEADER_TYPE_NORMAL, + .bist = PCI_DSC_BIST_NONE, //Built-In Self Test + + .bar0 = PCI_DSC_NULL_BAR, + .bar1 = PCI_DSC_NULL_BAR, + .bar2 = PCI_DSC_NULL_BAR, + .bar3 = PCI_DSC_NULL_BAR, + .bar4 = PCI_DSC_NULL_BAR, + .bar5 = PCI_DSC_NULL_BAR, + + .cardbus_cis = 0x00000000, + + .subsys_vid = 0x0000, //you probably want to set this + .subsys_id = 0x0000, //you probably want to set this + + .exp_rom_base_addr = 0x00000000, + + .cap_ptr = PCI_DSC_NULL_CAP, + .reserved_34_8_15 = PCI_DSC_RSV8, + .reserved_34_16_31 = PCI_DSC_RSV16, + + .reserved_38h = 0x00000000, + + .interrupt_line = PCI_DSC_NO_INT_LINE, + .interrupt_pin = PCI_DSC_NO_INT_PIN, + .min_gnt = PCI_DSC_ZERO_BURST, + .max_lat = PCI_DSC_INF_LATENCY, +}; + +struct virtual_device { + unsigned char *bus_no; //same as bus->number, used when bus is not initialized yet (e.g. during scanning) + unsigned char dev_no; + unsigned char fn_no; + struct pci_bus* bus; + void *descriptor; +}; +static unsigned int free_bus_idx = 0; //Used to find next free bus and for indexing other arrays +static struct pci_bus *buses[MAX_VPCI_BUSES] = { NULL }; //All virtual buses + +static unsigned int free_dev_idx = 0; //Used to find next free bus and for indexing other arrays +static struct virtual_device *devices[MAX_VPCI_DEVS] = { NULL }; //All virtual devices + +//Macros to easily iterate over lists above +#define for_each_bus_idx() for (int i = 0, last_bus_idx = free_bus_idx-1; i <= last_bus_idx; i++) +#define for_each_dev_idx() for (int i = 0, last_dev_idx = free_dev_idx-1; i <= last_dev_idx; i++) + +/** + * Prints pci_dev_descriptor or pci_pci_bridge_descriptor + */ +void print_pci_descriptor(void *test_dev) +{ + pr_loc_dbg("Printing PCI descriptor @ %p", test_dev); + pr_loc_dbg_raw("\n31***********0***ADDR*******************\n"); + u8 *ptr = (u8 *)test_dev; + DBG_ALLOW_UNUSED(*ptr); + + for (int row = 3; row < 64; row += 4) { + for (int byte = 0; byte > -4; byte--) { + pr_loc_dbg_raw("%02x ", *(ptr + row + byte)); + if (byte == -1) pr_loc_dbg_raw(" "); + } + + pr_loc_dbg_raw(" | 0x%02X\n", row - 3); + } + //The following format will be useful when/if CAPs are implemented +// printk("\n--------------Device Private--------------\n"); +// printk("00000000 00000000 00000000 00000000 | xxx\n"); +// printk("******************************************\n"); +} + +/** + * @param bus The bus (may be under first scan so only its number may be present in virtual_device) + * @param devfn Device AND its function; it's a 0-256 number allowing for 32 devices with 8 functions each + * @param where Offset in the device structure to read + * @param size How many BYTES (not bits) to read + * @param val Pointer to save read bytes + * @return PCIBIOS_* + */ +static int pci_read_cfg(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) +{ + //devfn is a combination of device number on bus and function number (Bus/Device/Function addressing) + //Each device which exists MUST implement function 0. So every 8th value of devfn we have a new device. + unsigned char vdev_no = PCI_SLOT(devfn); + unsigned char vdev_fn = PCI_FUNC(devfn); + + void *pci_descriptor = NULL; + + //Very noisy! + //pr_loc_dbg("Read SYN wh=0x%d sz=%d B / %d for vDEV @ bus=%02x dev=%02x fn=%02x", where, size, size * 8, + // bus->number, vdev_no, vdev_fn); + for_each_dev_idx() { + //Very noisy! + //pr_loc_dbg("Checking vDEV @ bus=%02x dev=%02x fn=%02x", *devices[i]->bus_no, devices[i]->dev_no, + // devices[i]->fn_no); + + //We cannot use devices[i]->bus->number during scan as the bus may just being created and no ->bus is available + if(*devices[i]->bus_no == bus->number && devices[i]->dev_no == vdev_no && devices[i]->fn_no == vdev_fn) { + //Very noisy! + //pr_loc_dbg("Found matching vDEV @ bus=%02x dev=%02x fn=%02x => vidx=%d mf=%d", bus->number, vdev_no, + // vdev_fn, i, + // IS_PCI_HEADER_MULTI(((struct pci_dev_descriptor *) devices[i]->descriptor)->header_type) ? 1:0); + pci_descriptor = devices[i]->descriptor; + break; + } + }; + + if (!pci_descriptor) { //This is not a hack - this is per PCI spec to return special "not found pid/vid" + if (where == PCI_VENDOR_ID || where == PCI_DEVICE_ID) + *val = PCI_DEVICE_NOT_FOUND_VID_DID; + + //Very noisy! + //pr_loc_dbg("Read NAK wh=0x%d sz=%d B / %d for vDEV @ bus=%02x dev=%02x fn=%02x", where, size, size * 8, bus->number, + // vdev_no, vdev_fn); + return PCIBIOS_DEVICE_NOT_FOUND; + } + + //Very noisy! + //pr_loc_dbg("Read ACK wh=0x%d sz=%d B / %d for vDEV @ bus=%02x dev=%02x fn=%02x", where, size, size * 8, bus->number, + // vdev_no, vdev_fn); + memcpy(val, (u8 *)pci_descriptor + where, size); + + return PCIBIOS_SUCCESSFUL; +} + +static int pci_write_cfg(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) +{ + return PCIBIOS_SET_FAILED; +} + +//Definition of callbacks the PCI subsystem uses to query the root bus +static struct pci_ops pci_shim_ops = { + .read = pci_read_cfg, + .write = pci_write_cfg +}; + +//x86-specific sysdata which is expected to be present while running on x86 (if it's not you will get a KP) +static struct pci_sysdata x86_sysdata = { + .domain = PCIBUS_VIRTUAL_DOMAIN, +#ifdef CONFIG_ACPI +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0) + .companion = NULL, //See https://github.com/torvalds/linux/commit/7b1998116bbb2f3e5dd6cb9a8ee6db479b0b50a9 +#else + .acpi = NULL, +#endif //LINUX_VERSION_CODE +#endif //CONFIG_ACPI + .iommu = NULL +}; + +//_NO => number according to the PCI spec +//_IDX => index in arrays (internal to this emulation layer only) +#define BUS_NO_VALID(x) ((x) >= 0 && (x) <= 0xFF) //Check if a given bus# is valid according to the PCI spec +#define DEV_NO_VALID(x) ((x) >= 0 && (x) <= 32) //Check if a given dev# is valid according to the PCI spec +#define FN_NO_VALID(x) ((x) >= 0 && (x) <= 7) //Check if a given function# is valid according to the PCI spec +#define VBUS_IDX_VALID(x) ((x) >= 0 && (x) < MAX_VPCI_BUSES-1) //Check if virtual bus INDEX is valid for this emulator +#define VBUS_IDX_USED(x) ((x) >= 0 && (x) < free_bus_idx) //Check if a given bus index is used now in the emulator +#define VDEV_IDX_VALID(x) ((x) >= 0 && (x) < MAX_VPCI_DEVS-1) //Check if virtual device INDEX is valid for this emulator +#define VDEV_IDX_USED(x) ((x) >= 0 && (x) < free_dev_idx) //Check if a given bus index is used now in the emulator + +static inline int validate_bdf(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no) +{ + if (unlikely(!BUS_NO_VALID(bus_no))) { + pr_loc_err("%02x is not a valid PCI bus number", bus_no); + return -EINVAL; + } + + if (unlikely(!DEV_NO_VALID(dev_no))) { + pr_loc_err("%02x is not a valid PCI device number", dev_no); + return -EINVAL; + } + + if (unlikely(!FN_NO_VALID(fn_no))) { + pr_loc_err("%02x is not a valid PCI device function number", fn_no); + return -EINVAL; + } + + //if the free device index is not valid it means we're out of free IDs for devices + if (unlikely(!VDEV_IDX_VALID(free_dev_idx))) { + pr_loc_bug("No more device indexes are available (max devs: %d)", MAX_VPCI_DEVS); + return -ENOMEM; + } + + //If the device has the same B/D/F address it is a duplicate + for_each_dev_idx() { + if ( + likely(*devices[i]->bus_no == bus_no) && + unlikely(devices[i]->dev_no == dev_no && devices[i]->fn_no == fn_no) + ) { + pr_loc_err("Device bus=%02x dev=%02x fn=%02x already exists in vidx=%d", bus_no, dev_no, fn_no, i); + return -EEXIST; + } + }; + + return 0; +} + +static inline struct pci_bus *get_vbus_by_number(unsigned char bus_no) +{ + for_each_bus_idx() { //Determine whether we need to rescan existing bus after adding a device OR scan a new root bus + if (buses[i]->number == bus_no) { + pr_loc_dbg("Found existing bus_no=%d @ bidx=%d", bus_no, i); + return buses[i]; + break; + } + }; + + return NULL; +} + +const __must_check struct virtual_device * +vpci_add_device(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, void *descriptor) +{ + pr_loc_dbg("Attempting to add vPCI device [printed below] @ bus=%02x dev=%02x fn=%02x", bus_no, dev_no, fn_no); + print_pci_descriptor(descriptor); + + int error = validate_bdf(bus_no, dev_no, fn_no); + if (error != 0) + return ERR_PTR(error); + + struct pci_bus *bus = get_vbus_by_number(bus_no); + + //At this point we know the device can be added either to a new or existing bus so we have to populate their struct + struct virtual_device *device; + kmalloc_or_exit_ptr(device, sizeof(struct virtual_device)); + + device->dev_no = dev_no; + device->fn_no = fn_no; + device->descriptor = descriptor; + + if (bus) { //We have an existing bus to use + device->bus_no = &bus->number; + devices[free_dev_idx++] = device; + + //We cannot use "pci_scan_single_device" here in case there are mf devices + pci_rescan_bus(bus); //this cannot fail - it simply return max device num + + pr_loc_err("Added device with existing bus @ bus=%02x dev=%02x fn=%02x", *device->bus_no, device->dev_no, + device->fn_no); + return device; + } + + //No existing bus - check if we can add a new one + //if the free bus index is not valid it means we're out of free IDs for buses + if (unlikely(!VBUS_IDX_VALID(free_bus_idx))) { + pr_loc_bug("No more bus indexes are available (max buses: %d)", MAX_VPCI_BUSES); + return ERR_PTR(-ENOMEM); + } + + //Since we don't have a bus so we need to add the device with a mock dev_no and trigger scanning (which actually + // creates the bus). While it sounds counter-intuitive it is how the PCI subsystem works. + unsigned char tmp_bus_no = bus_no; //It will be valid for the time of initial scan + device->bus_no = &tmp_bus_no; + devices[free_dev_idx++] = device; + + bus = pci_scan_bus(*device->bus_no, &pci_shim_ops, &x86_sysdata); + if (!bus) { + pr_loc_err("pci_scan_bus failed - cannot add new bus"); + devices[free_dev_idx--] = NULL; //Reverse adding & ensure idx is still free + kfree(device); //Free memory for the device itself + return ERR_PTR(-EIO); + } + + device->bus_no = &bus->number; //Replace temp bus number pointer with the actual bus struct pointer + device->bus = bus; + buses[free_bus_idx++] = bus; + + /* + * There was a commit in v4.1 which made "subtle" change aimed to "cleanup control flow" by moving + * pci_bus_add_devices(bus) from drivers/pci/probe.c:pci_scan_bus() to a higher order + * arch/x86/pci/common.c:pcibios_scan_root(). + * However this means that adding a bus with a domain different than 0 as used on x86 with BIOS/ACPI causes some + * resources to not be created (e.g. /sys/bus/pci/devices/..../config) which in turn breaks a ton of tools (lspci + * included). This is because pci_bus_add_devices() calls pci_create_sysfs_dev_files(). + * It's important to mention that this is broken only for new buses - pci_rescan_bus() calls pci_bus_add_devices(). + * + * Don't even fucking ask how long we looked for that... + * + * See https://github.com/torvalds/linux/commit/8e795840e4d89df3d594e736989212ee8a4a1fca# + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) + pr_loc_dbg("Linux >=v4.1 quirk: calling pci_bus_add_devices(bus) manually"); + pci_bus_add_devices(bus); +#endif + + pr_loc_inf("Added device with new bus @ bus=%02x dev=%02x fn=%02x", *device->bus_no, device->dev_no, device->fn_no); + return device; +} + +const struct virtual_device * +vpci_add_single_device(unsigned char bus_no, unsigned char dev_no, struct pci_dev_descriptor *descriptor) +{ + if (unlikely(IS_PCI_HEADER_MULTI(descriptor->header_type))) { + pr_loc_bug("Attempted to use %s() to add multifunction device." + "Did you mean to use vpci_add_multifunction_device()?", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + + return vpci_add_device(bus_no, dev_no, 0x00, descriptor); +} + +const struct virtual_device * +vpci_add_multifunction_device(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, + struct pci_dev_descriptor *descriptor) +{ + descriptor->header_type = PCI_HEADER_TO_MULTI(descriptor->header_type); + + return vpci_add_device(bus_no, dev_no, fn_no, descriptor); +} + +const struct virtual_device * +vpci_add_single_bridge(unsigned char bus_no, unsigned char dev_no, struct pci_pci_bridge_descriptor *descriptor) +{ + if (unlikely(IS_PCI_HEADER_MULTI(descriptor->header_type))) { + pr_loc_bug("Attempted to use %s() to add multifunction device." + "Did you mean to use vpci_add_multifunction_device()?", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + + return vpci_add_device(bus_no, dev_no, 0x00, descriptor); +} + +const struct virtual_device * +vpci_add_multifunction_bridge(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, + struct pci_pci_bridge_descriptor *descriptor) +{ + descriptor->header_type = PCI_HEADER_TO_MULTI(descriptor->header_type); + + return vpci_add_device(bus_no, dev_no, fn_no, descriptor); +} + +int vpci_remove_all_devices_and_buses(void) +{ + //The order here is crucial - kernel WILL NOT remove references to devices on bus removal (and cause a KP) + //Doing this in any other order will cause an instant KP when PCI subsys tries to access its structures (e.g. lspci) + //However, this is still leaving dangling things in /sys/devices which cannot be removed (kernel bug?) + + struct pci_dev *pci_dev, *pci_dev_n; + for_each_bus_idx() { + list_for_each_entry_safe(pci_dev, pci_dev_n, &buses[i]->devices, bus_list) { + pr_loc_dbg("Detaching vDEV dev=%02x fn=%02x from bus=%02x [add=%d]", PCI_SLOT(pci_dev->devfn), +#if LINUX_VERSION_CODE <= KERNEL_VERSION(5,0,0) + PCI_FUNC(pci_dev->devfn), buses[i]->number, pci_dev->is_added); +#else + PCI_FUNC(pci_dev->devfn), buses[i]->number, 0); // Not found a replacement for pci_dev->is_added +#endif + pci_stop_and_remove_bus_device(pci_dev); + } + } + + for_each_dev_idx() { + pr_loc_dbg("Removing PCI vDEV @ didx %d", i); + kfree(devices[i]); + devices[i] = NULL; + }; + free_dev_idx = 0; + + for_each_bus_idx() { + pr_loc_dbg("Removing child PCI vBUS @ bidx %d", i); + pci_rescan_bus(buses[i]); + pci_remove_bus(buses[i]); + buses[i] = NULL; + } + free_bus_idx = 0; + + pr_loc_inf("All vPCI devices and buses removed"); + + return -EIO; //This is hardcoded to return an error as there's a known bug (see "KNOWN BUGS" in the file header) +} diff --git a/internal/virtual_pci.h b/internal/virtual_pci.h new file mode 100644 index 0000000..137c6a3 --- /dev/null +++ b/internal/virtual_pci.h @@ -0,0 +1,202 @@ +#ifndef REDPILL_VIRTUAL_PCI_H +#define REDPILL_VIRTUAL_PCI_H + +#include + +/* + * The following macros are useful for converting PCI_CLASS_* constants to individual values in structs + * 31 .................. 0 + * [class][sub][prog][rev] (each is 8 bit number, often times class-sub-prog is represented as on 24-bit int) + * + * For more information see header comment in the corresponding .c file. + */ +#define U24_CLASS_TO_U8_CLASS(x) (((x) >> 16) & 0xFF) +#define U24_CLASS_TO_U8_SUBCLASS(x) (((x) >> 8) & 0xFF) +#define U24_CLASS_TO_U8_PROGIF(x) ((x) & 0xFF) +#define U16_CLASS_TO_U8_CLASS(x) (((x) >> 8) & 0xFF) +#define U16_CLASS_TO_U8_SUBCLASS(x) ((x) & 0xFF) + +//Some helpful constants on top of what's in linux/pci_ids.h & linux/pci_regs.h +#define PCI_DSC_NO_INT_LINE 0xFF +#define PCI_DSC_NO_INT_PIN 0x00 +#define PCI_DSC_PROGIF_NONE 0x00 +#define PCI_DSC_REV_NONE 0x00 +#define PCI_DSC_NULL_BAR 0x00000000 +#define PCI_DSC_NULL_CAP 0x00 +#define PCI_DSC_RSV8 0x00 +#define PCI_DSC_RSV16 0x0000 +#define PCI_DSC_INF_LATENCY 0xFF //i.e. accepts any latency in access +#define PCI_DSC_ZERO_BURST 0xFF //i.e. doesn't need any length of burst +#define PCI_DSC_BIST_NONE 0x00 + +//See https://en.wikipedia.org/wiki/PCI_configuration_space#/media/File:Pci-config-space.svg +//This struct MUST be packed to allow for easy reading, see https://kernelnewbies.org/DataAlignment +struct pci_dev_descriptor { + u16 vid; //Vendor ID + u16 dev; //Device ID + + u16 command; //see PCI_COMMAND_*. Simply do "dev.command |= PCI_COMMAND_xxx" to set a flag. + u16 status; //see PCI_STATUS_*. Simply do "dev.status |= PCI_STATUS_xxx" to set a flag. + + u8 rev_id; + u8 prog_if; // ] + u8 subclass; // ]-> prof_if, subclass, and class are normally represented as 24-bit class code + u8 class; // ] + + u8 cache_line_size; + u8 latency_timer; + u8 header_type; //see PCI_HEADER_TYPE_* + u8 bist; //see PCI_BIST_* + + u32 bar0; + u32 bar1; + u32 bar2; + u32 bar3; + u32 bar4; + u32 bar5; + + u32 cardbus_cis; + + u16 subsys_vid; + u16 subsys_id; + + u32 exp_rom_base_addr; //see PCI_ROM_* (esp PCI_ROM_ADDRESS_MASK) + + u8 cap_ptr; + u8 reserved_34_8_15; //should be 0x00 + u16 reserved_34_16_31; //should be 0x00 + + u32 reserved_38h; + + u8 interrupt_line; + u8 interrupt_pin; + u8 min_gnt; + u8 max_lat; +} __packed; +extern const struct pci_dev_descriptor pci_dev_conf_default_normal_dev; //See details in the .c file + +//Support for bridges wasn't tested +struct pci_pci_bridge_descriptor { + u16 vid; //Vendor ID + u16 dev; //Device ID + + u16 command; //see PCI_COMMAND_*. Simply do "dev.command |= PCI_COMMAND_xxx" to set a flag. + u16 status; //see PCI_STATUS_*. Simply do "dev.status |= PCI_STATUS_xxx" to set a flag. + + u8 rev_id; + u8 prog_if; // ] + u8 subclass; // ]-> prof_if, subclass, and class are normally represented as 24-bit class code + u8 class; // ] + + u8 cache_line_size; + u8 latency_timer; + u8 header_type; //see PCI_HEADER_TYPE_* + u8 bist; //see PCI_BIST_* + + u32 bar0; + u32 bar1; + + u8 pri_bus_no; + u8 sec_bus_no; + u8 subord_bus_no; + u8 sec_lat_timer; + + u8 io_base; + u8 io_limit; + u16 sec_status; + + u16 mem_base; + u16 mem_limit; + + u16 prefetch_mem_base; + u16 prefetch_mem_limit; + + u32 prefetch_base_up32b; + u32 prefetch_limit_up32b; + + u16 io_base_up16b; + u16 io_limit_up16b; + + u8 cap_ptr; + u8 reserved_34_8_15; //should be 0x00 + u16 reserved_34_16_31; //should be 0x00 + + u32 exp_rom_base_addr; //see PCI_ROM_* (esp PCI_ROM_ADDRESS_MASK) + + u8 interrupt_line; + u8 interrupt_pin; + u16 bridge_ctrl; +} __packed; + +//This is currently not implemented +struct pci_dev_capability { + u8 cap_id; //see PCI_CAP_ID_*, set to 0x00 to denote null-capability + u8 cap_next; //offset where next capability exists, set to 0x00 to denote null-capability + u8 cap_data[]; +} __packed; + +/** + * Adds a single new device (along with the bus if needed) + * + * If you don't want to create the descriptor from scratch you can use "const struct pci_dev_conf_default_normal_dev" + * while setting some missing params (see .c file header for details). + * Note: you CAN reuse the same descriptor under multiple BDFs (bus_no/dev_no/fn_no) + * + * @param bus_no (0x00 - 0xFF) + * @param dev_no (0x00 - 0x20) + * @param descriptor Pointer to pci_dev_descriptor or pci_pci_bridge_descriptor + * @return virtual_device ptr or error pointer (ERR_PTR(-E)) + */ +const struct virtual_device * +vpci_add_single_device(unsigned char bus_no, unsigned char dev_no, struct pci_dev_descriptor *descriptor); + +/** + * See vpci_add_single_device() for details + */ +const struct virtual_device * +vpci_add_single_bridge(unsigned char bus_no, unsigned char dev_no, struct pci_pci_bridge_descriptor *descriptor); + + +/* + * Adds a new multifunction device (along with the bus if needed) + * + * Warning about multifunctional devices + * - this function has a slight limitation due to how Linux scans devices. You HAVE TO add fn_no=0 entry as the LAST + * one when calling it multiple times. Kernel scans devices only once for changes and if it finds fn=0 and it's the + * only one (i.e. you added fn=0 first) adding more functions will not populate them (as kernel will never re-scan + * the device). + * - As per PCI spec Linux doesn't allow devices to have fn>0 if they don't have corresponding fn=0 entry + * + * @param bus_no (0x00 - 0xFF) + * @param dev_no (0x00 - 0x20) + * @param fn_no (0x00 - 0x07) + * @param descriptor Pointer to pci_dev_descriptor or pci_pci_bridge_descriptor + * @return virtual_device ptr or error pointer (ERR_PTR(-E)) + */ +const struct virtual_device * +vpci_add_multifunction_device(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, + struct pci_dev_descriptor *descriptor); + +/** + * See vpci_add_multifunction_device() for details + */ +const struct virtual_device * +vpci_add_multifunction_bridge(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, + struct pci_pci_bridge_descriptor *descriptor); + +/** + * Removes all previously added devices and buses + * + * Known bug: while you can remove things and they will be gone from the system you CANNOT re-add the same under the + * same BFD coordinates. This will cause the kernel to complain about duplicated internal sysfs entries. It's most + * likely an old kernel bug (we tried everything... it doesn't work). + * + * @param bus_no + * @param dev_no + * @param fn_no + * @param descriptor + * @return + */ +int vpci_remove_all_devices_and_buses(void); + +#endif //REDPILL_VIRTUAL_PCI_H diff --git a/lockfiles b/lockfiles new file mode 100644 index 0000000..6695d9d --- /dev/null +++ b/lockfiles @@ -0,0 +1,12 @@ +# no need change, this file hash will check by actions/cache +https://sourceforge.net/projects/dsgpl/files/toolkit/DSM7.0/ds.apollolake-7.0.dev.txz/download +https://sourceforge.net/projects/dsgpl/files/Tool%20Chain/DSM%207.0.0%20Tool%20Chains/Intel%20x86%20Linux%204.4.180%20%28Apollolake%29/apollolake-gcc750_glibc226_x86_64-GPL.txz/download + +https://sourceforge.net/projects/dsgpl/files/toolkit/DSM7.0/ds.bromolow-7.0.dev.txz/download +https://sourceforge.net/projects/dsgpl/files/Tool%20Chain/DSM%207.0.0%20Tool%20Chains/Intel%20x86%20linux%203.10.108%20%28Bromolow%29/bromolow-gcc750_glibc226_x86_64-GPL.txz/download + +https://sourceforge.net/projects/dsgpl/files/toolkit/DSM6.2/ds.apollolake-6.2.dev.txz/download +https://sourceforge.net/projects/dsgpl/files/Tool%20Chain/DSM%206.2.4%20Tool%20Chains/Intel%20x86%20Linux%204.4.59%20%28Apollolake%29/apollolake-gcc493_glibc220_linaro_x86_64-GPL.txz/download + +https://sourceforge.net/projects/dsgpl/files/toolkit/DSM6.2/ds.bromolow-6.2.dev.txz/download +https://sourceforge.net/projects/dsgpl/files/Tool%20Chain/DSM%206.2.4%20Tool%20Chains/Intel%20x86%20linux%203.10.105%20%28Bromolow%29/bromolow-gcc493_glibc220_linaro_x86_64-GPL.txz/download diff --git a/output/rp-apollolake-4.4.180-dev.ko.gz b/output/rp-apollolake-4.4.180-dev.ko.gz new file mode 100644 index 0000000..944b86d Binary files /dev/null and b/output/rp-apollolake-4.4.180-dev.ko.gz differ diff --git a/output/rp-apollolake-4.4.180-prod.ko.gz b/output/rp-apollolake-4.4.180-prod.ko.gz new file mode 100644 index 0000000..d22bd52 Binary files /dev/null and b/output/rp-apollolake-4.4.180-prod.ko.gz differ diff --git a/output/rp-broadwell-4.4.180-dev.ko.gz b/output/rp-broadwell-4.4.180-dev.ko.gz new file mode 100644 index 0000000..e74b8e1 Binary files /dev/null and b/output/rp-broadwell-4.4.180-dev.ko.gz differ diff --git a/output/rp-broadwell-4.4.180-prod.ko.gz b/output/rp-broadwell-4.4.180-prod.ko.gz new file mode 100644 index 0000000..e8ce57f Binary files /dev/null and b/output/rp-broadwell-4.4.180-prod.ko.gz differ diff --git a/output/rp-broadwellnk-4.4.180-dev.ko.gz b/output/rp-broadwellnk-4.4.180-dev.ko.gz new file mode 100644 index 0000000..ac176cb Binary files /dev/null and b/output/rp-broadwellnk-4.4.180-dev.ko.gz differ diff --git a/output/rp-broadwellnk-4.4.180-prod.ko.gz b/output/rp-broadwellnk-4.4.180-prod.ko.gz new file mode 100644 index 0000000..3732816 Binary files /dev/null and b/output/rp-broadwellnk-4.4.180-prod.ko.gz differ diff --git a/output/rp-bromolow-3.10.108-dev.ko.gz b/output/rp-bromolow-3.10.108-dev.ko.gz new file mode 100644 index 0000000..4e2f48f Binary files /dev/null and b/output/rp-bromolow-3.10.108-dev.ko.gz differ diff --git a/output/rp-bromolow-3.10.108-prod.ko.gz b/output/rp-bromolow-3.10.108-prod.ko.gz new file mode 100644 index 0000000..3213301 Binary files /dev/null and b/output/rp-bromolow-3.10.108-prod.ko.gz differ diff --git a/output/rp-denverton-4.4.180-dev.ko.gz b/output/rp-denverton-4.4.180-dev.ko.gz new file mode 100644 index 0000000..1dfde49 Binary files /dev/null and b/output/rp-denverton-4.4.180-dev.ko.gz differ diff --git a/output/rp-denverton-4.4.180-prod.ko.gz b/output/rp-denverton-4.4.180-prod.ko.gz new file mode 100644 index 0000000..bfb165e Binary files /dev/null and b/output/rp-denverton-4.4.180-prod.ko.gz differ diff --git a/output/rp-geminilake-4.4.180-dev.ko.gz b/output/rp-geminilake-4.4.180-dev.ko.gz new file mode 100644 index 0000000..9a64e5d Binary files /dev/null and b/output/rp-geminilake-4.4.180-dev.ko.gz differ diff --git a/output/rp-geminilake-4.4.180-prod.ko.gz b/output/rp-geminilake-4.4.180-prod.ko.gz new file mode 100644 index 0000000..b74370e Binary files /dev/null and b/output/rp-geminilake-4.4.180-prod.ko.gz differ diff --git a/output/rp-r1000-4.4.180-dev.ko.gz b/output/rp-r1000-4.4.180-dev.ko.gz new file mode 100644 index 0000000..5654946 Binary files /dev/null and b/output/rp-r1000-4.4.180-dev.ko.gz differ diff --git a/output/rp-r1000-4.4.180-prod.ko.gz b/output/rp-r1000-4.4.180-prod.ko.gz new file mode 100644 index 0000000..38ffede Binary files /dev/null and b/output/rp-r1000-4.4.180-prod.ko.gz differ diff --git a/output/rp-v1000-4.4.180-dev.ko.gz b/output/rp-v1000-4.4.180-dev.ko.gz new file mode 100644 index 0000000..8b6bce3 Binary files /dev/null and b/output/rp-v1000-4.4.180-dev.ko.gz differ diff --git a/output/rp-v1000-4.4.180-prod.ko.gz b/output/rp-v1000-4.4.180-prod.ko.gz new file mode 100644 index 0000000..00e2325 Binary files /dev/null and b/output/rp-v1000-4.4.180-prod.ko.gz differ diff --git a/redpill_main.c b/redpill_main.c new file mode 100644 index 0000000..e576ea1 --- /dev/null +++ b/redpill_main.c @@ -0,0 +1,126 @@ +#include "internal/stealth.h" +#include "redpill_main.h" +#include "config/runtime_config.h" +#include "common.h" //commonly used headers in this module +#include "internal/intercept_execve.h" //Handling of execve() replacement +#include "internal/scsi/scsi_notifier.h" //the missing pub/sub handler for SCSI driver +#include "internal/ioscheduler_fixer.h" //reset_elevator() to correct elevator= boot cmdline +#include "config/cmdline_delegate.h" //Parsing of kernel cmdline +#include "shim/boot_device_shim.h" //Registering & deciding between boot device shims +#include "shim/bios_shim.h" //Shimming various mfgBIOS functions to make them happy +#include "shim/block_fw_update_shim.h" //Prevent firmware update from running +#include "shim/disable_exectutables.h" //Disable common problematic executables +#include "shim/pci_shim.h" //Handles PCI devices emulation +#include "shim/storage/smart_shim.h" //Handles emulation of SMART data for devices without it +#include "shim/storage/sata_port_shim.h" //Handles VirtIO & SAS storage devices/disks peculiarities +#include "shim/uart_fixer.h" //Various fixes for UART weirdness +#include "shim/pmu_shim.h" //Emulates the platform management unit +#include "internal/helper/symbol_helper.h" //kln_func + +//Handle versioning stuff +#ifndef RP_VERSION_POSTFIX +#define RP_VERSION_POSTFIX "(NULL)" +#endif +#define RP_VERSION_MAJOR 0 +#define RP_VERSION_MINOR 6 +#define STRINGIFY(x) #x +#define VERSIONIFY(major,minor,postfix) "v" STRINGIFY(major) "." STRINGIFY(minor) "-" postfix +#define RP_VERSION_STR VERSIONIFY(RP_VERSION_MAJOR, RP_VERSION_MINOR, RP_VERSION_POSTFIX) + +/** + * Force panic to land on a stack trace + * + * This ensures we always get this on the stack trace so that we know it was an intentional crash due to a detected + * error rather than an accidental bug. + */ +void noinline __noreturn rp_crash(void) { + //Deliberately not reveling any context in case we're running in stealth mode + //This message is a generic one from arch/x86/kernel/dumpstack.c + panic("Fatal exception"); +} + +static int __init init_(void) +{ + int out = 0; + + pr_loc_dbg("================================================================================================"); + pr_loc_inf("RedPill %s loading...", RP_VERSION_STR); + + if ( + get_kln_p() < 0 //Find pointer of kallsyms_lookup_name function, This MUST be the first entry + || (out = extract_config_from_cmdline(¤t_config)) != 0 //This MUST be the second entry + || (out = populate_runtime_config(¤t_config)) != 0 //This MUST be third + || (out = register_uart_fixer(current_config.hw_config)) != 0 //Fix consoles ASAP + || (out = register_scsi_notifier()) != 0 //Load SCSI notifier so that boot shim (& others) can use it + || (out = register_sata_port_shim()) //This should be bfr boot shim as it can fix some things need by boot + || (out = register_boot_shim(¤t_config.boot_media)) //Make sure we're quick with this one + || (out = register_execve_interceptor()) != 0 //Register this reasonably high as other modules can use it blindly + || (out = register_bios_shim(current_config.hw_config)) != 0 + || (out = register_disable_executables_shim()) != 0 + || (out = register_fw_update_shim()) != 0 +#ifndef DBG_DISABLE_UNLOADABLE + || (out = register_pci_shim(current_config.hw_config)) != 0 //it's a core hw but it's not checked early +#endif + || (out = register_disk_smart_shim()) != 0 //provide fake SMART to userspace + || (out = register_pmu_shim(current_config.hw_config)) != 0 //this is used as early as mfgBIOS loads (=late) + || (out = initialize_stealth(¤t_config)) != 0 //Should be after any shims to let shims have real stuff + || (out = reset_elevator()) != 0 //Cosmetic, can be the last one + ) + goto error_out; + + pr_loc_inf("RedPill %s loaded successfully (stealth=%d)", RP_VERSION_STR, STEALTH_MODE); + return 0; + + error_out: + pr_loc_crt("RedPill %s cannot be loaded, initializer error=%d", RP_VERSION_STR, out); +#ifdef KP_ON_LOAD_ERROR + rp_crash(); +#else + return out; +#endif +} +module_init(init_); + +#if STEALTH_MODE < STEALTH_MODE_FULL //module cannot be unloaded in full-stealth anyway +static void __exit cleanup_(void) +{ + pr_loc_inf("RedPill %s unloading...", RP_VERSION_STR); + + int (*cleanup_handlers[])(void ) = { + uninitialize_stealth, + unregister_pmu_shim, + unregister_disk_smart_shim, +#ifndef DBG_DISABLE_UNLOADABLE + unregister_pci_shim, +#endif + unregister_fw_update_shim, + unregister_disable_executables_shim, + unregister_bios_shim, + unregister_execve_interceptor, + unregister_boot_shim, + unregister_sata_port_shim, + unregister_scsi_notifier, + unregister_uart_fixer + }; + + int out; + for (int i = 0; i < ARRAY_SIZE(cleanup_handlers); i++) { + pr_loc_dbg("Calling cleanup handler %pF<%p>", cleanup_handlers[i], cleanup_handlers[i]); + out = cleanup_handlers[i](); + if (out != 0) + pr_loc_wrn("Cleanup handler %pF failed with code=%d", cleanup_handlers[i], out); + } + + free_runtime_config(¤t_config); //A special snowflake ;) + + pr_loc_inf("RedPill %s is dead", RP_VERSION_STR); + pr_loc_dbg("================================================================================================"); +} +module_exit(cleanup_); + +MODULE_AUTHOR("TTG"); +MODULE_VERSION(RP_VERSION_STR); +#endif + + +MODULE_LICENSE("GPL"); diff --git a/redpill_main.h b/redpill_main.h new file mode 100644 index 0000000..f14d536 --- /dev/null +++ b/redpill_main.h @@ -0,0 +1,4 @@ +#ifndef REDPILLLKM_REDPILL_MAIN_H +#define REDPILLLKM_REDPILL_MAIN_H + +#endif //REDPILLLKM_REDPILL_MAIN_H \ No newline at end of file diff --git a/shim/bios/bios_hwcap_shim.c b/shim/bios/bios_hwcap_shim.c new file mode 100644 index 0000000..e50c379 --- /dev/null +++ b/shim/bios/bios_hwcap_shim.c @@ -0,0 +1,117 @@ +/** + * Overrides GetHwCapability to provide additional capabilities for older platforms (e.g. 3615xs) + */ +#include "bios_hwcap_shim.h" +#include "../../common.h" +#include "../shim_base.h" +#include "../../internal/override/override_symbol.h" //overriding GetHWCapability +#include "../../config/platform_types.h" //hw_config, platform_has_hwmon_* +#include //CAPABILITY_*, CAPABILITY + +#define SHIM_NAME "mfgBIOS HW Capability" + +static const struct hw_config *hw_config = NULL; +static override_symbol_inst *GetHwCapability_ovs = NULL; + +static void dbg_compare_cap_value(SYNO_HW_CAPABILITY id, int computed_support) +{ +#ifdef DBG_HWCAP + int org_fout = -1; + CAPABILITY org_cap = { '\0' }; + org_cap.id = id; + int ovs_fout = call_overridden_symbol(org_fout, GetHwCapability_ovs, &org_cap); + + pr_loc_dbg("comparing GetHwCapability(id=%d)->support => computed=%d vs. real=%d [org_fout=%d, ovs_fout=%d]", id, + computed_support, org_cap.support, org_fout, ovs_fout); +#endif +} + +static int GetHwCapability_shim(CAPABILITY *cap) +{ + if (unlikely(!cap)) { + pr_loc_err("Got NULL-ptr to %s", __FUNCTION__); + return -EINVAL; + } + + switch (cap->id) { + case CAPABILITY_THERMAL: + cap->support = platform_has_hwmon_thermal(hw_config) ? 1 : 0; + dbg_compare_cap_value(cap->id, cap->support); + return 0; + + case CAPABILITY_CPU_TEMP: + cap->support = hw_config->has_cpu_temp; + dbg_compare_cap_value(cap->id, cap->support); + return 0; + + case CAPABILITY_FAN_RPM_RPT: + cap->support = platform_has_hwmon_fan_rpm(hw_config) ? 1 : 0; + dbg_compare_cap_value(cap->id, cap->support); + return 0; + + case CAPABILITY_DISK_LED_CTRL: + case CAPABILITY_AUTO_POWERON: + case CAPABILITY_S_LED_BREATH: + case CAPABILITY_MICROP_PWM: + case CAPABILITY_CARDREADER: + case CAPABILITY_LCM: { + if (unlikely(!GetHwCapability_ovs)) { + pr_loc_bug("%s() was called with proxy need when no OVS was available", __FUNCTION__); + return -EIO; + } + + int org_fout = -1; + int ovs_fout = call_overridden_symbol(org_fout, GetHwCapability_ovs, cap); + pr_loc_dbg("proxying GetHwCapability(id=%d)->support => real=%d [org_fout=%d, ovs_fout=%d]", cap->id, + cap->support, org_fout, ovs_fout); + + return org_fout; + } + + default: + pr_loc_err("unknown GetHwCapability(id=%d) => out=-EINVAL", cap->id); + return -EINVAL; + } +} + +int register_bios_hwcap_shim(const struct hw_config *hw) +{ + shim_reg_in(); + + if (unlikely(GetHwCapability_ovs)) + shim_reg_already(); + + hw_config = hw; + override_symbol_or_exit_int(GetHwCapability_ovs, "GetHwCapability", GetHwCapability_shim); + + shim_reg_ok(); + return 0; +} + +int unregister_bios_hwcap_shim(void) +{ + shim_ureg_in(); + + if (unlikely(!GetHwCapability_ovs)) + return 0; //this is deliberately a noop + + int out = restore_symbol(GetHwCapability_ovs); + if (unlikely(out != 0)) { + pr_loc_err("Failed to restore GetHwCapability - error=%d", out); + return out; + } + GetHwCapability_ovs = NULL; + + shim_ureg_ok(); + return 0; +} + +int reset_bios_hwcap_shim(void) +{ + shim_reset_in(); + put_overridden_symbol(GetHwCapability_ovs); + GetHwCapability_ovs = NULL; + + shim_reset_ok(); + return 0; +} \ No newline at end of file diff --git a/shim/bios/bios_hwcap_shim.h b/shim/bios/bios_hwcap_shim.h new file mode 100644 index 0000000..67308cf --- /dev/null +++ b/shim/bios/bios_hwcap_shim.h @@ -0,0 +1,23 @@ +#ifndef REDPILL_BIOS_HWCAP_SHIM_H +#define REDPILL_BIOS_HWCAP_SHIM_H + +#include //bool + +struct hw_config; +int register_bios_hwcap_shim(const struct hw_config *hw); + +/** + * This function should be called when we're unloading cleanly (=mfgBIOS is alive, we're going away). If the bios went + * away on its own call reset_bios_hwcap_shim() + */ +int unregister_bios_hwcap_shim(void); + +/** + * This function should be called when we're unloading because mfgBIOS went away. If the unload should be clean and + * restore all mfgBIOS elements to its original state (i.e. the mfgBIOS is still loaded and not currently unloading) + * call unregister_bios_hwcap_shim() instead. + */ +int reset_bios_hwcap_shim(void); + + +#endif //REDPILL_BIOS_HWCAP_SHIM_H diff --git a/shim/bios/bios_hwmon_shim.c b/shim/bios/bios_hwmon_shim.c new file mode 100644 index 0000000..1ad7dd5 --- /dev/null +++ b/shim/bios/bios_hwmon_shim.c @@ -0,0 +1,374 @@ +/** + * Responds to all HWMON ("hardware monitor") calls coming to the mfgBIOS + * + * This submodule emulates both legitimate HWMON calls as well as "legacy" hardware monitoring calls get_fan_status() + */ +#include "bios_hwmon_shim.h" +#include "../shim_base.h" //shim_reg_in(), shim_reg_ok(), shim_reset_in(), shim_reset_ok() +#include "bios_shims_collection.h" //_shim_bios_module_entry() +#include "../../common.h" +#include "../../internal/helper/math_helper.h" //prandom_int_range_stable +#include "mfgbios_types.h" //HWMON_* +#include "../../config/platform_types.h" //HWMON_*_ID + +#define SHIM_NAME "mfgBIOS HW Monitor" +#ifdef DBG_HWMON +#define hwmon_pr_loc_dbg(...) pr_loc_dbg(__VA_ARGS__) +#else +#define hwmon_pr_loc_dbg(...) //noop +#endif + +/************************************* Standards for generating fake sensor readings **********************************/ +//Standard deviations for ongoing sensor readings +#define FAN_SPEED_DEV 50 //Fan speed (RPM) deviation +#define VOLT_DEV 5 //Voltage (mV) deviation +#define TEMP_DEV 2 //Temperature (°C) deviation + +#define FAKE_SURFACE_TEMP_MIN 25 +#define FAKE_SURFACE_TEMP_MAX 35 +#define FAKE_CPU_TEMP_MIN 55 +#define FAKE_CPU_TEMP_MAX 65 +#define FAKE_RPM_MIN 980 +#define FAKE_RPM_MAX 1000 + +//These percentages are precalculated as we cannot use FPU [safely and easily] in kernel space +#define FAKE_V33_MIN 3135 // mV (-5% of 3.3V) +#define FAKE_V33_MAX 3465 // mV (+5% of 3.3V) +#define FAKE_V5_MIN 4750 // mV (-5% of 5.0V) +#define FAKE_V5_MAX 5250 // mV (+5% of 5.0V) +#define FAKE_V12_MIN 11400 // mV (-5% of 12.0V) +#define FAKE_V12_MAX 12600 // mV (+5% of 12.0V) +#define fake_volt_min(type) (hwmon_sys_vsens_type_base[(type)][0]) // mV +#define fake_volt_max(type) (hwmon_sys_vsens_type_base[(type)][1]) // mV + +/************************************* Maps between hwmon sensor types & their names **********************************/ +static const char *hwmon_sys_thermal_zone_id_map[] = { + [HWMON_SYS_TZONE_NULL_ID] = "", + [HWMON_SYS_TZONE_REMOTE1_ID] = HWMON_SYS_TZONE_REMOTE1_NAME, + [HWMON_SYS_TZONE_REMOTE2_ID] = HWMON_SYS_TZONE_REMOTE2_NAME, + [HWMON_SYS_TZONE_LOCAL_ID] = HWMON_SYS_TZONE_LOCAL_NAME, + [HWMON_SYS_TZONE_SYSTEM_ID] = HWMON_SYS_TZONE_SYSTEM_NAME, + [HWMON_SYS_TZONE_ADT1_LOC_ID] = HWMON_SYS_TZONE_ADT1_LOC_NAME, + [HWMON_SYS_TZONE_ADT2_LOC_ID] = HWMON_SYS_TZONE_ADT2_LOC_NAME, +}; + +static const char *hwmon_sys_vsens_id_map[] = { + [HWMON_SYS_VSENS_NULL_ID] = "", + [HWMON_SYS_VSENS_VCC_ID] = HWMON_SYS_VSENS_VCC_NAME, + [HWMON_SYS_VSENS_VPP_ID] = HWMON_SYS_VSENS_VPP_NAME, + [HWMON_SYS_VSENS_V33_ID] = HWMON_SYS_VSENS_V33_NAME, + [HWMON_SYS_VSENS_V5_ID] = HWMON_SYS_VSENS_V5_NAME, + [HWMON_SYS_VSENS_V12_ID] = HWMON_SYS_VSENS_V12_NAME, + [HWMON_SYS_VSENS_ADT1_V33_ID] = HWMON_SYS_VSENS_ADT1_V33_NAME, + [HWMON_SYS_VSENS_ADT2_V33_ID] = HWMON_SYS_VSENS_ADT2_V33_NAME, +}; + +static const int hwmon_sys_vsens_type_base[][2] = { + [HWMON_SYS_VSENS_NULL_ID] = {0, 0}, + [HWMON_SYS_VSENS_VCC_ID] = { FAKE_V12_MIN, FAKE_V12_MAX }, //todo: this is probably per-model + [HWMON_SYS_VSENS_VPP_ID] = { 100, 500 }, //todo: if this is really peak-to-peak it should be small + [HWMON_SYS_VSENS_V33_ID] = { FAKE_V33_MIN, FAKE_V33_MAX }, + [HWMON_SYS_VSENS_V5_ID] = { FAKE_V5_MIN, FAKE_V5_MAX }, + [HWMON_SYS_VSENS_V12_ID] = { FAKE_V12_MIN, FAKE_V12_MAX }, + [HWMON_SYS_VSENS_ADT1_V33_ID] = { FAKE_V33_MIN, FAKE_V33_MAX }, + [HWMON_SYS_VSENS_ADT2_V33_ID] = { FAKE_V33_MIN, FAKE_V33_MAX }, +}; + +static const char *hwmon_sys_fan_id_map[] = { + [HWMON_SYS_FAN_NULL_ID] = "", + [HWMON_SYS_FAN1_ID] = HWMON_SYS_FAN1_RPM, + [HWMON_SYS_FAN2_ID] = HWMON_SYS_FAN2_RPM, + [HWMON_SYS_FAN3_ID] = HWMON_SYS_FAN3_RPM, + [HWMON_SYS_FAN4_ID] = HWMON_SYS_FAN4_RPM, +}; + +static const char *hwmon_hdd_bp_id_map[] = { + [HWMON_SYS_HDD_BP_NULL_ID] = "", + [HWMON_SYS_HDD_BP_DETECT_ID] = HWMON_HDD_BP_DETECT, + [HWMON_SYS_HDD_BP_ENABLE_ID] = HWMON_HDD_BP_ENABLE, +}; + +//todo: it's defined as __used as we know the structure but don't implement it yet +static const __used char *hwmon_psu_id_map[] = { + [HWMON_PSU_NULL_ID] = "", + [HWMON_PSU_PWR_IN_ID] = HWMON_PSU_SENSOR_PIN, + [HWMON_PSU_PWR_OUT_ID] = HWMON_PSU_SENSOR_POUT, +#if RP_MODULE_TARGET_VER == 6 + [HWMON_PSU_TEMP_ID] = HWMON_PSU_SENSOR_TEMP, +#elif RP_MODULE_TARGET_VER == 7 + [HWMON_PSU_TEMP1_ID] = HWMON_PSU_SENSOR_TEMP1, + [HWMON_PSU_TEMP2_ID] = HWMON_PSU_SENSOR_TEMP2, + [HWMON_PSU_TEMP3_ID] = HWMON_PSU_SENSOR_TEMP3, + [HWMON_PSU_FAN_VOLT] = HWMON_PSU_SENSOR_FAN_VOLT, +#endif + [HWMON_PSU_FAN_RPM_ID] = HWMON_PSU_SENSOR_FAN, + [HWMON_PSU_STATUS_ID] = HWMON_PSU_SENSOR_STATUS, +}; + +//todo: it's defined as __used as we know the structure but don't implement it yet +static const __used char *hwmon_current_id_map[] = { + [HWMON_SYS_CURR_NULL_ID] = "", + [HWMON_SYS_CURR_ADC_ID] = HWMON_SYS_CURR_ADC_NAME, +}; + +/************************************************ Various small tools *************************************************/ +static const struct hw_config_hwmon *hwmon_cfg = NULL; +#define guard_hwmon_cfg() \ + if (unlikely(!hwmon_cfg)) { \ + pr_loc_bug("Called %s without hwmon_cfg context being populated", __FUNCTION__); \ + return -EIO; \ + } + +#define guarded_strscpy(dest, src, count) \ + if (unlikely(strscpy(dest, src, count) == -E2BIG)) { \ + pr_loc_err("Failed to copy %lu bytes string", count); \ + return -EFAULT; \ + } + +/******************************************* mfgBIOS LKM replacement functions ****************************************/ +/** + * Provides fan status + * + * Currently the fan is always assumed to be running + */ +static int bios_get_fan_state(int no, enum MfgCompatFanStatus *status) +{ + hwmon_pr_loc_dbg("mfgBIOS: GET_FAN_STATE(%d) => MFGC_FAN_RUNNING", no); + *status = MFGC_FAN_RUNNING; + return 0; +} + +static int cur_cpu_temp = 0; +/** + * Returns CPU temperature across all cores + * + * Currently it always returns a fake value. However, it should only do so if running under hypervisor. In bare-metal + * scenario we can simply proxy to syno_cpu_temperature() [or not override that part at all]. + */ +static int bios_get_cpu_temp(SYNOCPUTEMP *temp) +{ + int fake_temp = prandom_int_range_stable(&cur_cpu_temp, TEMP_DEV, FAKE_CPU_TEMP_MIN, FAKE_CPU_TEMP_MAX); + temp->cpu_num = MAX_CPU; + for(int i=0; i < MAX_CPU; ++i) + temp->cpu_temp[i] = fake_temp; + + hwmon_pr_loc_dbg("mfgBIOS: GET_CPU_TEMP(surf=%d, cpuNum=%d) => %d°C", temp->blSurface, temp->cpu_num, fake_temp); + + return 0; +} + +static int *hwmon_thermals = NULL; +/** + * Returns various HWMON temperatures + * + * @param reading Pointer to save results + * @return 0 on success, -E on error + */ +static int bios_hwmon_get_thermal(SYNO_HWMON_SENSOR_TYPE *reading) +{ + guard_hwmon_cfg(); + if (unlikely(!hwmon_thermals)) + kzalloc_or_exit_int(hwmon_thermals, sizeof(int) * HWMON_SYS_THERMAL_ZONE_IDS); + + guarded_strscpy(reading->type_name, HWMON_SYS_THERMAL_NAME, sizeof(reading->type_name)); + hwmon_pr_loc_dbg("mfgBIOS: => %s(type=%s)", __FUNCTION__, reading->type_name); + + for (int i = 0; i < HWMON_SYS_THERMAL_ZONE_IDS; i++) { + if (hwmon_cfg->sys_thermal[i] == HWMON_SYS_TZONE_NULL_ID) + break; + + guarded_strscpy(reading->sensor[i].sensor_name, hwmon_sys_thermal_zone_id_map[hwmon_cfg->sys_thermal[i]], + sizeof(reading->sensor[i].sensor_name)); //Save the name of the sensor + hwmon_thermals[i] = prandom_int_range_stable(&hwmon_thermals[i], TEMP_DEV, FAKE_SURFACE_TEMP_MIN, + FAKE_SURFACE_TEMP_MAX); + snprintf(reading->sensor[i].value, sizeof(reading->sensor[i].value), "%d", hwmon_thermals[i]); + ++reading->sensor_num; + + hwmon_pr_loc_dbg("mfgBIOS: <= %s() %s->%d °C", __FUNCTION__, + hwmon_sys_thermal_zone_id_map[hwmon_cfg->sys_thermal[i]], hwmon_thermals[i]); + } + + return 0; +} + +static int *hwmon_voltages = NULL; +/** + * Returns various HWMON voltages + * + * @param reading Pointer to save results + * @return 0 on success, -E on error + */ +static int bios_hwmon_get_voltages(SYNO_HWMON_SENSOR_TYPE *reading) +{ + guard_hwmon_cfg(); + if (unlikely(!hwmon_voltages)) + kzalloc_or_exit_int(hwmon_voltages, sizeof(int) * HWMON_SYS_VOLTAGE_SENSOR_IDS); + + guarded_strscpy(reading->type_name, HWMON_SYS_VOLTAGE_NAME, sizeof(reading->type_name)); + hwmon_pr_loc_dbg("mfgBIOS: => %s(type=%s)", __FUNCTION__, reading->type_name); + + for (int i = 0; i < HWMON_SYS_VOLTAGE_SENSOR_IDS; i++) { + if (hwmon_cfg->sys_voltage[i] == HWMON_SYS_VSENS_NULL_ID) + break; + + guarded_strscpy(reading->sensor[i].sensor_name, hwmon_sys_vsens_id_map[hwmon_cfg->sys_voltage[i]], + sizeof(reading->sensor[i].sensor_name)); //Save the name of the sensor + hwmon_voltages[i] = prandom_int_range_stable(&hwmon_voltages[i], VOLT_DEV, + fake_volt_min(hwmon_cfg->sys_voltage[i]), + fake_volt_max(hwmon_cfg->sys_voltage[i])); + snprintf(reading->sensor[i].value, sizeof(reading->sensor[i].value), "%d", hwmon_voltages[i]); + ++reading->sensor_num; + + hwmon_pr_loc_dbg("mfgBIOS: <= %s() %s->%d mV", __FUNCTION__, hwmon_sys_vsens_id_map[hwmon_cfg->sys_voltage[i]], + hwmon_voltages[i]); + } + + return 0; +} + +static int *hwmon_fans_rpm = NULL; +/** + * Returns HWMON fan speeds + * + * @param reading Pointer to save results + * @return 0 on success, -E on error + */ +static int bios_hwmon_get_fans_rpm(SYNO_HWMON_SENSOR_TYPE *reading) +{ + guard_hwmon_cfg(); + if (unlikely(!hwmon_fans_rpm)) + kzalloc_or_exit_int(hwmon_fans_rpm, sizeof(int) * HWMON_SYS_FAN_RPM_IDS); + + guarded_strscpy(reading->type_name, HWMON_SYS_FAN_RPM_NAME, sizeof(reading->type_name)); + hwmon_pr_loc_dbg("mfgBIOS: => %s(type=%s)", __FUNCTION__, reading->type_name); + + for (int i = 0; i < HWMON_SYS_FAN_RPM_IDS; i++) { + if (hwmon_cfg->sys_fan_speed_rpm[i] == HWMON_SYS_FAN_NULL_ID) + break; + + guarded_strscpy(reading->sensor[i].sensor_name, hwmon_sys_fan_id_map[hwmon_cfg->sys_fan_speed_rpm[i]], + sizeof(reading->sensor[i].sensor_name)); //Save the name of the sensor + hwmon_fans_rpm[i] = prandom_int_range_stable(&hwmon_fans_rpm[i], FAN_SPEED_DEV, FAKE_RPM_MIN, FAKE_RPM_MAX); + snprintf(reading->sensor[i].value, sizeof(reading->sensor[i].value), "%d", hwmon_fans_rpm[i]); + ++reading->sensor_num; + + hwmon_pr_loc_dbg("mfgBIOS: <= %s() %s->%d RPM", __FUNCTION__, + hwmon_sys_fan_id_map[hwmon_cfg->sys_fan_speed_rpm[i]], hwmon_fans_rpm[i]); + } + + return 0; +} + +/** + * Returns HWMON disk backplane status + * + * Currently values here are just a guesstimation - we don't have a platform to see the real values but based on their + * names it's assumed these are number of detected and enabled disks. + * This probably should ask the SCSI driver for the number of disks overall (as no PC architecture has any clue about + * number of disks present physically if they don't register with the system). On a real hardware it's probably checked + * by some contact switch/IR sensor to check if a given slot for a disk isn't empty. + * + * @param reading Pointer to save results + * @return 0 on success, -E on error + */ +static int bios_hwmon_get_hdd_backplane(SYNO_HWMON_SENSOR_TYPE *reading) +{ + guard_hwmon_cfg(); + const int hdd_num = 1; //todo: this should be taken from SCSI layer + + guarded_strscpy(reading->type_name, HWMON_HDD_BP_STATUS_NAME, sizeof(reading->type_name)); + hwmon_pr_loc_dbg("mfgBIOS: => %s(type=%s)", __FUNCTION__, reading->type_name); + + for (int i = 0; i < HWMON_SYS_HDD_BP_IDS; i++) { + if (hwmon_cfg->hdd_backplane[i] == HWMON_SYS_HDD_BP_NULL_ID) + break; + + guarded_strscpy(reading->sensor[i].sensor_name, hwmon_hdd_bp_id_map[hwmon_cfg->hdd_backplane[i]], + sizeof(reading->sensor[i].sensor_name)); //Save the name of the sensor + snprintf(reading->sensor[i].value, sizeof(reading->sensor[i].value), "%d", hdd_num); + ++reading->sensor_num; + + hwmon_pr_loc_dbg("mfgBIOS: <= %s() %s->%d", __FUNCTION__, hwmon_hdd_bp_id_map[hwmon_cfg->hdd_backplane[i]], + hdd_num); + } + + return 0; +} + +/** + * (Should) Return HWMON power supplies status + * + * Currently this command is not implemented and always return an error as we haven't yet seen any devices using it. + * + * @param reading Pointer to save results + * @return 0 on success, -E on error + */ +static int bios_hwmon_get_psu_status(struct hw_config_hwmon *hwc, SYNO_HWMON_SENSOR_TYPE *reading) +{ + pr_loc_wrn("mfgBIOS: **UNIMPLEMENTED** %s(type=%s)", __FUNCTION__, HWMON_PSU_STATUS_NAME); + return -EIO; //todo: we haven't [yet] seen a device using this +} + +/** + * (Should) Return HWMON power consumption + * + * Currently this command is not implemented and always return an error as we haven't yet seen any devices using it. + * + * @param hwc Platform HWMON configuration + * @param reading Pointer to save results + * + * @return 0 on success, -E on error + */ +static int bios_hwmon_get_current(struct hw_config_hwmon *hwc, SYNO_HWMON_SENSOR_TYPE *reading) +{ + pr_loc_wrn("mfgBIOS: **UNIMPLEMENTED** %s(type=%s)", __FUNCTION__, HWMON_SYS_CURRENT_NAME); + return -EIO; //todo: we haven't [yet] seen a device using this +} + + +/************************************************ mfgBIOS shim interface **********************************************/ +int shim_bios_module_hwmon_entries(const struct hw_config *hw) +{ + shim_reg_in(); + hwmon_cfg = &hw->hwmon; + + _shim_bios_module_entry(VTK_GET_FAN_STATE, bios_get_fan_state); + + if (hw->has_cpu_temp) + _shim_bios_module_entry(VTK_GET_CPU_TEMP, bios_get_cpu_temp); + + if (platform_has_hwmon_thermal(hw)) + _shim_bios_module_entry(VTK_GET_HWMON_THERMAL, bios_hwmon_get_thermal); + + if (platform_has_hwmon_voltage(hw)) + _shim_bios_module_entry(VTK_GET_HWMON_VOLTAGE, bios_hwmon_get_voltages); + + if (platform_has_hwmon_fan_rpm(hw)) + _shim_bios_module_entry(VTK_GET_HWMON_FAN_RPM, bios_hwmon_get_fans_rpm); + + if (platform_has_hwmon_hdd_bpl(hw)) + _shim_bios_module_entry(VTK_GET_HWMON_HDD_BKPLANE, bios_hwmon_get_hdd_backplane); + + if (platform_has_hwmon_psu_status(hw)) + _shim_bios_module_entry(VTK_GET_HWMON_PSU_STATUS, bios_hwmon_get_psu_status); + + if (platform_has_hwmon_current_sens(hw)) + _shim_bios_module_entry(VTK_GET_HWMON_CURRENT, bios_hwmon_get_current); + + shim_reg_ok(); + return 0; +} + +int reset_bios_module_hwmon_shim(void) +{ + shim_reset_in(); + + hwmon_cfg = NULL; + cur_cpu_temp = 0; + try_kfree(hwmon_thermals); + try_kfree(hwmon_voltages); + try_kfree(hwmon_fans_rpm); + + shim_reset_ok(); + return 0; +} \ No newline at end of file diff --git a/shim/bios/bios_hwmon_shim.h b/shim/bios/bios_hwmon_shim.h new file mode 100644 index 0000000..e9c1b41 --- /dev/null +++ b/shim/bios/bios_hwmon_shim.h @@ -0,0 +1,12 @@ +#ifndef REDPILL_BIOS_HWMON_SHIM_H +#define REDPILL_BIOS_HWMON_SHIM_H + +#include + +struct hw_config; + +//This should be called from shim_bios_module() as it depends on the state of the vtable; it can be called many times +int shim_bios_module_hwmon_entries(const struct hw_config *hw); +int reset_bios_module_hwmon_shim(void); + +#endif //REDPILL_BIOS_HWMON_SHIM_H diff --git a/shim/bios/bios_shims_collection.c b/shim/bios/bios_shims_collection.c new file mode 100644 index 0000000..10e8294 --- /dev/null +++ b/shim/bios/bios_shims_collection.c @@ -0,0 +1,309 @@ +#include "bios_shims_collection.h" +#include "../../config/platform_types.h" +#include "rtc_proxy.h" +#include "bios_hwmon_shim.h" +#include "../../common.h" +#include "../../internal/helper/symbol_helper.h" //kernel_has_symbol() +#include "../../internal/override/override_symbol.h" //shimming leds stuff + + +#define DECLARE_NULL_ZERO_INT(for_what) \ + static __used int bios_##for_what##_null_zero_int(void) { \ + pr_loc_dbg("mfgBIOS: nullify zero-int for " #for_what); \ + return 0; \ + } +#define SHIM_TO_NULL_ZERO_INT(for_what) _shim_bios_module_entry(for_what, bios_##for_what##_null_zero_int); + +/********************************************* mfgBIOS LKM static shims ***********************************************/ +static unsigned long org_shimmed_entries[VTK_SIZE] = { '\0' }; //original entries which were shimmed by custom entries +static unsigned long cust_shimmed_entries[VTK_SIZE] = { '\0' }; //custom entries which were set as shims + +static int shim_get_gpio_pin_usable(int *pin) +{ + pin[1] = 0; + return 0; +} + +static int shim_set_gpio_pin_usable(int *pin) +{ + pr_loc_dbg("set_gpio pin info 0 %d", pin[0]); + pr_loc_dbg("set_gpio pin info 1 %d", pin[1]); + pr_loc_dbg("set_gpio pin info 2 %d", pin[2]); + pr_loc_dbg("set_gpio pin info 3 %d", pin[3]); + return 0; +} + +static int bios_get_buz_clr(unsigned char *state) +{ + *state = 0; + return 0; +} + +/***************************************** Debug shims for unknown bios functions **************************************/ +DECLARE_NULL_ZERO_INT(VTK_SET_FAN_STATE); +DECLARE_NULL_ZERO_INT(VTK_SET_DISK_LED); +DECLARE_NULL_ZERO_INT(VTK_SET_PWR_LED); +// DECLARE_NULL_ZERO_INT(VTK_SET_GPIO_PIN); +DECLARE_NULL_ZERO_INT(VTK_SET_GPIO_PIN_BLINK); +DECLARE_NULL_ZERO_INT(VTK_SET_ALR_LED); +DECLARE_NULL_ZERO_INT(VTK_SET_BUZ_CLR); +DECLARE_NULL_ZERO_INT(VTK_SET_CPU_FAN_STATUS); +DECLARE_NULL_ZERO_INT(VTK_SET_PHY_LED); +DECLARE_NULL_ZERO_INT(VTK_SET_HDD_ACT_LED); +DECLARE_NULL_ZERO_INT(VTK_GET_MICROP_ID); +DECLARE_NULL_ZERO_INT(VTK_SET_MICROP_ID); + +/********************************************** mfgBIOS shimming routines *********************************************/ +static unsigned long *vtable_start = NULL; //set when shim_bios_module is called() +void _shim_bios_module_entry(const unsigned int idx, const void *new_sym_ptr) +{ + if (unlikely(!vtable_start)) { + pr_loc_bug("%s called without vtable start populated - are you calling it outside of shim_bios_module scope?!", + __FUNCTION__); + return; + } + + if (unlikely(idx > VTK_SIZE-1)) { + pr_loc_bug("Attempted shim on index %d - out of range", idx); + return; + } + + //The vtable entry is either not shimmed OR already shimmed with what we set before OR already *was* shimmed but + // external (i.e. mfgBIOS) code overrode the shimmed entry. + //We only save the original entry if it was set by the mfgBIOS (so not shimmed yet or ext. override situation) + + //it was already shimmed and the shim is still there => noop + if (cust_shimmed_entries[idx] && cust_shimmed_entries[idx] == vtable_start[idx]) + return; + + pr_loc_dbg("mfgBIOS vtable [%d] originally %ps<%p> will now be %ps<%p>", idx, (void *) vtable_start[idx], + (void *) vtable_start[idx], new_sym_ptr, new_sym_ptr); + org_shimmed_entries[idx] = vtable_start[idx]; + cust_shimmed_entries[idx] = (unsigned long)new_sym_ptr; + vtable_start[idx] = cust_shimmed_entries[idx]; +} + +/** + * Prints a table of memory between vtable_start and vtable_end, trying to resolve symbols as it goes + */ +static void print_debug_symbols(const unsigned long *vtable_end) +{ + if (unlikely(!vtable_start)) { + pr_loc_dbg("Cannot print - no vtable address"); + return; + } + + int im = vtable_end - vtable_start; //Should be multiplies of 8 in general (64 bit alignment) + pr_loc_dbg("Will print %d bytes of memory from %p", im, vtable_start); + + unsigned long *call_ptr = vtable_start; + unsigned char *byte_ptr = (char *)vtable_start; + for (int i = 0; i < im; i++, byte_ptr++) { + pr_loc_dbg_raw("%02x ", *byte_ptr); + if ((i+1) % 8 == 0) { + pr_loc_dbg_raw(" [%02d] 0x%03x \t%p\t%pS\n", i / 8, i-7, (void *) (*call_ptr), (void *) (*call_ptr)); + call_ptr++; + } + } + pr_loc_dbg_raw("\n"); + + pr_loc_dbg("Finished printing memory at %p", byte_ptr); +} + +/** + * Applies shims to the vtable used by the bios + * + * These calls may execute multiple times as the mfgBIOS is loading. + * + * @return true when shimming succeeded, false otherwise + */ +bool shim_bios_module(const struct hw_config *hw, struct module *mod, unsigned long *vt_start, unsigned long *vt_end) +{ + if (unlikely(!vt_start || !vt_end)) { + pr_loc_bug("%s called without vtable start or vt_end populated?!", __FUNCTION__); + return false; + } + + vtable_start = vt_start; + + print_debug_symbols(vt_end); + SHIM_TO_NULL_ZERO_INT(VTK_SET_FAN_STATE); + SHIM_TO_NULL_ZERO_INT(VTK_SET_DISK_LED); + SHIM_TO_NULL_ZERO_INT(VTK_SET_PWR_LED); + // SHIM_TO_NULL_ZERO_INT(VTK_SET_GPIO_PIN); + _shim_bios_module_entry(VTK_GET_GPIO_PIN, shim_get_gpio_pin_usable); + _shim_bios_module_entry(VTK_SET_GPIO_PIN, shim_set_gpio_pin_usable); + SHIM_TO_NULL_ZERO_INT(VTK_SET_GPIO_PIN_BLINK); + SHIM_TO_NULL_ZERO_INT(VTK_SET_ALR_LED); + _shim_bios_module_entry(VTK_GET_BUZ_CLR, bios_get_buz_clr); + SHIM_TO_NULL_ZERO_INT(VTK_SET_BUZ_CLR); + SHIM_TO_NULL_ZERO_INT(VTK_SET_CPU_FAN_STATUS); + SHIM_TO_NULL_ZERO_INT(VTK_SET_PHY_LED); + SHIM_TO_NULL_ZERO_INT(VTK_SET_HDD_ACT_LED); + SHIM_TO_NULL_ZERO_INT(VTK_GET_MICROP_ID); + SHIM_TO_NULL_ZERO_INT(VTK_SET_MICROP_ID); + + if (hw->emulate_rtc) { + pr_loc_dbg("Platform requires RTC proxy - enabling"); + register_rtc_proxy_shim(); + _shim_bios_module_entry(VTK_RTC_GET_TIME, rtc_proxy_get_time); + _shim_bios_module_entry(VTK_RTC_SET_TIME, rtc_proxy_set_time); + _shim_bios_module_entry(VTK_RTC_INT_APWR, rtc_proxy_init_auto_power_on); + _shim_bios_module_entry(VTK_RTC_GET_APWR, rtc_proxy_get_auto_power_on); + _shim_bios_module_entry(VTK_RTC_SET_APWR, rtc_proxy_set_auto_power_on); + _shim_bios_module_entry(VTK_RTC_UINT_APWR, rtc_proxy_uinit_auto_power_on); + } else { + pr_loc_dbg("Native RTC supported - not enabling proxy (emulate_rtc=%d)", hw->emulate_rtc ? 1:0); + } + + shim_bios_module_hwmon_entries(hw); //Shim all hardware environment stuff (temps, fans, etc.) + + print_debug_symbols(vt_end); + + return true; +} + +bool unshim_bios_module(unsigned long *vt_start, unsigned long *vt_end) +{ + for (int i = 0; i < VTK_SIZE; i++) { + //make sure to check the cust_ one as org_ may contain NULL ptrs and we should restore them as NULL if they were + // so originally + if (!cust_shimmed_entries[i]) + continue; + + pr_loc_dbg("Restoring vtable [%d] from %ps<%p> to %ps<%p>", i, (void *) vt_start[i], + (void *) vt_start[i], (void *) org_shimmed_entries[i], (void *) org_shimmed_entries[i]); + vtable_start[i] = org_shimmed_entries[i]; + } + + reset_bios_shims(); + + return true; +} + +void reset_bios_shims(void) +{ + memset(org_shimmed_entries, 0, sizeof(org_shimmed_entries)); + memset(cust_shimmed_entries, 0, sizeof(cust_shimmed_entries)); + unregister_rtc_proxy_shim(); + reset_bios_module_hwmon_shim(); +} + +/******************************** Kernel-level shims related to mfgBIOS functionality *********************************/ +extern void *funcSYNOSATADiskLedCtrl; //if this explodes one day we need to do kernel_has_symbol() on it dynamically + +/* + * Syno kernel has ifdefs for "MY_ABC_HERE" for syno_ahci_disk_led_enable() and syno_ahci_disk_led_enable_by_port() so + * we need to check if they really exist and we cannot determine it statically + */ +static override_symbol_inst *ov_funcSYNOSATADiskLedCtrl = NULL; +static override_symbol_inst *ov_syno_ahci_disk_led_enable = NULL; +static override_symbol_inst *ov_syno_ahci_disk_led_enable_by_port = NULL; + +static int funcSYNOSATADiskLedCtrl_shim(int host_num, SYNO_DISK_LED led) +{ + pr_loc_dbg("Received %s with host=%d led=%d", __FUNCTION__, host_num, led); + //exit code is not used anywhere in the public code, so this value is an educated guess based on libata-scsi.c + return 0; +} + +int syno_ahci_disk_led_enable_shim(const unsigned short host_num, const int value) +{ + pr_loc_dbg("Received %s with host=%d val=%d", __FUNCTION__, host_num, value); + return 0; +} + +int syno_ahci_disk_led_enable_by_port_shim(const unsigned short port, const int value) +{ + pr_loc_dbg("Received %s with port=%d val=%d", __FUNCTION__, port, value); + return 0; +} + +int shim_disk_leds_ctrl(const struct hw_config *hw) +{ + //we're checking this here to remove knowledge of "struct hw_config" from bios_shim letting others know it's NOT + //the place to do BIOS shimming decisions + if (!hw->fix_disk_led_ctrl) + return 0; + + pr_loc_dbg("Shimming disk led control API"); + + int out; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,0,0) + //funcSYNOSATADiskLedCtrl exists on (almost?) all platforms, but it's null on some... go figure ;) + if (funcSYNOSATADiskLedCtrl) { + ov_funcSYNOSATADiskLedCtrl = override_symbol("funcSYNOSATADiskLedCtrl", funcSYNOSATADiskLedCtrl_shim); + if (unlikely(IS_ERR(ov_funcSYNOSATADiskLedCtrl))) { + out = PTR_ERR(ov_funcSYNOSATADiskLedCtrl); + ov_funcSYNOSATADiskLedCtrl = NULL; + pr_loc_err("Failed to shim funcSYNOSATADiskLedCtrl, error=%d", out); + return out; + } + } +#endif + + if (kernel_has_symbol("syno_ahci_disk_led_enable")) { + ov_syno_ahci_disk_led_enable = override_symbol("syno_ahci_disk_led_enable", syno_ahci_disk_led_enable_shim); + if (unlikely(IS_ERR(ov_syno_ahci_disk_led_enable))) { + out = PTR_ERR(ov_syno_ahci_disk_led_enable); + ov_syno_ahci_disk_led_enable = NULL; + pr_loc_err("Failed to shim syno_ahci_disk_led_enable, error=%d", out); + return out; + } + } + + if (kernel_has_symbol("syno_ahci_disk_led_enable_by_port")) { + ov_syno_ahci_disk_led_enable_by_port = override_symbol("syno_ahci_disk_led_enable_by_port", syno_ahci_disk_led_enable_by_port_shim); + if (unlikely(IS_ERR(ov_syno_ahci_disk_led_enable_by_port))) { + out = PTR_ERR(ov_syno_ahci_disk_led_enable_by_port); + ov_syno_ahci_disk_led_enable_by_port = NULL; + pr_loc_err("Failed to shim syno_ahci_disk_led_enable_by_port, error=%d", out); + return out; + } + } + + pr_loc_dbg("Finished %s", __FUNCTION__); + return 0; +} + +int unshim_disk_leds_ctrl(void) +{ + pr_loc_dbg("Unshimming disk led control API"); + + int out; + bool failed = false; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,0,0) + if (ov_funcSYNOSATADiskLedCtrl) { + out = restore_symbol(ov_funcSYNOSATADiskLedCtrl); + ov_funcSYNOSATADiskLedCtrl = NULL; + if (unlikely(out != 0)) { //falling through to try to unshim others too + pr_loc_err("Failed to unshim funcSYNOSATADiskLedCtrl, error=%d", out); + failed = true; + } + } +#endif + + if (ov_syno_ahci_disk_led_enable) { + out = restore_symbol(ov_syno_ahci_disk_led_enable); + ov_syno_ahci_disk_led_enable = NULL; + if (unlikely(out != 0)) { //falling through to try to unshim others too + pr_loc_err("Failed to unshim syno_ahci_disk_led_enable, error=%d", out); + failed = true; + } + } + + if (ov_syno_ahci_disk_led_enable_by_port) { + out = restore_symbol(ov_syno_ahci_disk_led_enable_by_port); + ov_syno_ahci_disk_led_enable_by_port = NULL; + if (unlikely(out != 0)) { + pr_loc_err("Failed to unshim syno_ahci_disk_led_enable_by_port, error=%d", out); + failed = true; + } + } + + out = failed ? -EINVAL : 0; + pr_loc_dbg("Finished %s (exit=%d)", __FUNCTION__, out); + + return out; +} diff --git a/shim/bios/bios_shims_collection.h b/shim/bios/bios_shims_collection.h new file mode 100644 index 0000000..7ff60e6 --- /dev/null +++ b/shim/bios/bios_shims_collection.h @@ -0,0 +1,56 @@ +#ifndef REDPILL_BIOS_SHIMS_COLLECTION_H +#define REDPILL_BIOS_SHIMS_COLLECTION_H + +#include //bool +#include //struct module + + +typedef struct hw_config hw_config_bios_shim_col; +/** + * Insert all the shims to the mfgBIOS + */ +bool shim_bios_module(const hw_config_bios_shim_col *hw, struct module *mod, unsigned long *vtable_start, unsigned long *vtable_end); + +/** + * Removes all shims from the mfgBIOS & uninitializes all components used to shim bios module + */ +bool unshim_bios_module(unsigned long *vtable_start, unsigned long *vtable_end); + +/** + * Forcefully forgets all original calls used to do unshim_bios() & cleans-up all other components + * + * This function is useful when the BIOS unloads without this module being unloaded - then there's no point in keeping + * stale entries. This will also prevent warning regarding already-shimmed BIOS when it reloads. + */ +void reset_bios_shims(void); + +/** + * Nullifies manual disks LED control + * + * The underlying reason for this isn't known but sometimes the manual LED control for disks (presumably used to blink + * to identify disks from the UI) will cause a kernel panic pointing to the internals of mfgBIOS. The functionality is + * implemented in the kernel (funcSYNOSATADiskLedCtrl) but it delegates the task to mfgBIOS via ioctl() + * To prevent the crash we replace the manual LED altering subsystem in models which support it (because not all do). + * + * The kernel panic is most likely caused by the gap between early and full bios shimming where the bios will be early + * shimmed, continue setting something and in between gets an ioctl to the LED api + * + * @return 0 on success or -E on error + */ +int shim_disk_leds_ctrl(const struct hw_config *hw); + +/** + * Reverses what shim_disk_leds_ctrl did + * + * You CAN call this function any time, if shims weren't registered (yet) it will be a noop-call. + * + * @return 0 on success or -E on error + */ +int unshim_disk_leds_ctrl(void); + +/** + * Used by mfgBIOS sub-shims. Should NOT be called from ANY other context as it depends on the internal state. + */ +void _shim_bios_module_entry(unsigned int idx, const void *new_sym_ptr); + +#endif //REDPILL_BIOS_SHIMS_COLLECTION_H diff --git a/shim/bios/mfgbios_types.h b/shim/bios/mfgbios_types.h new file mode 100644 index 0000000..35e2838 --- /dev/null +++ b/shim/bios/mfgbios_types.h @@ -0,0 +1,273 @@ +/* + * This file contains structures/types used for compatibility with the mfg bios + * + * These are not original types used by the mfg BIOS, but rather recreations from available documentation unless + * available in GPLed source (in which case they're #include'd) + */ +#ifndef REDPILL_SYNOBIOS_COMPAT_H +#define REDPILL_SYNOBIOS_COMPAT_H + +#include //SYNO_* & SYNO* types + +//Missing HWMON types... only in some toolchains +#ifndef HWMON_PSU_SENSOR_TEMP1 +#define HWMON_PSU_SENSOR_TEMP1 "temperature_1" +#endif +#ifndef HWMON_PSU_SENSOR_TEMP2 +#define HWMON_PSU_SENSOR_TEMP2 "temperature_2" +#endif +#ifndef HWMON_PSU_SENSOR_TEMP3 +#define HWMON_PSU_SENSOR_TEMP3 "temperature_3" +#endif +#ifndef HWMON_PSU_SENSOR_FAN_VOLT +#define HWMON_PSU_SENSOR_FAN_VOLT "fan_voltage" +#endif + +//These aren't defined in synobios.h (while sensors from other categories are... go figure) +#define HWMON_SYS_TZONE_REMOTE1_NAME "Remote1" +#define HWMON_SYS_TZONE_REMOTE2_NAME "Remote2" +#define HWMON_SYS_TZONE_LOCAL_NAME "Local" +#define HWMON_SYS_TZONE_SYSTEM_NAME "system" +#define HWMON_SYS_TZONE_ADT1_LOC_NAME "ADT1 Local" +#define HWMON_SYS_TZONE_ADT2_LOC_NAME "ADT2 Local" +#define HWMON_SYS_VSENS_VCC_NAME "VCC" +#define HWMON_SYS_VSENS_VPP_NAME "VPP" +#define HWMON_SYS_VSENS_V33_NAME "V33" +#define HWMON_SYS_VSENS_V5_NAME "V5" +#define HWMON_SYS_VSENS_V12_NAME "V12" +#define HWMON_SYS_VSENS_ADT1_V33_NAME "ADT1 V33" +#define HWMON_SYS_VSENS_ADT2_V33_NAME "ADT2 V33" +#define HWMON_SYS_CURR_ADC_NAME "ADC" + +struct MfgCompatTime { + unsigned char second; + unsigned char minute; + unsigned char hours; + unsigned char wkday; + unsigned char day; + unsigned char month; + unsigned char year; +}; + +enum MfgCompatFanStatus { + MFGC_FAN_UNKNOWN = -1, + MFGC_FAN_STOPPED = 0, + MFGC_FAN_RUNNING = 1, +}; + +enum MfgCompatFanSpeed { + MFGC_FAN_SPD_STOP1, + MFGC_FAN_SPD_STOP2, + + //Fan speeds in 1-8 scale + MFGC_FAN_SPD_1, + MFGC_FAN_SPD_2, + MFGC_FAN_SPD_3, + MFGC_FAN_SPD_4, + MFGC_FAN_SPD_5, + MFGC_FAN_SPD_6, + MFGC_FAN_SPD_7, + MFGC_FAN_SPD_8, + + //Fan speeds in 1-18 scale, used for testing only + MFGC_FAN_SPD_TST_1, + MFGC_FAN_SPD_TST_2, + MFGC_FAN_SPD_TST_3, + MFGC_FAN_SPD_TST_4, + MFGC_FAN_SPD_TST_5, + MFGC_FAN_SPD_TST_6, + MFGC_FAN_SPD_TST_7, + MFGC_FAN_SPD_TST_8, + MFGC_FAN_SPD_TST_9, + MFGC_FAN_SPD_TST_10, + MFGC_FAN_SPD_TST_11, + MFGC_FAN_SPD_TST_12, + MFGC_FAN_SPD_TST_13, + MFGC_FAN_SPD_TST_14, + MFGC_FAN_SPD_TST_15, + MFGC_FAN_SPD_TST_16, + MFGC_FAN_SPD_TST_17, + MFGC_FAN_SPD_TST_18, + + MFGC_FAN_SPD_PWM = 1000 +}; + +struct MfgCompatHddLedStatus { + int hdd_no; + SYNO_DISK_LED state; + int pos_name_len; //includes null terminator + char *pos_name; +}; + +enum MfgCompatGenericLedState { + MFGC_LED_OFF, + MFGC_LED_LIT, + MFGC_LED_BLINK, +}; + +struct MfgCompatMemoryByte { + unsigned char offset; + unsigned char value; +}; + +struct MfgCompatMemoryUInt { + unsigned char address; + unsigned char value; +}; + +struct MfgCompatCPLDReg { + unsigned char hddLedCtrl; + unsigned char hddPwrState; + unsigned char hwModelNum; + unsigned char fanState; +}; + +struct MfgCompatGPIOPin { + int num; + int val; +}; + +struct MfgCompatRtcEvent { + unsigned char minutes; //BCD format + unsigned char hours; //BCD format + unsigned char weekdays; //7 bit field, Sun => Sat +}; + +struct MfgCompatAutoPwrOn { + int num; + bool enabled; + struct MfgCompatRtcEvent events[100]; +}; + +struct MfgCompatPowerStatus { + bool primary_ok; + bool secondary_ok; +}; + +enum MfgCompatBackplaneStatus { + MFGC_BKPLANE_UNK = -1, + MFGC_BKPLANE_ERR = 0, + MFGC_BKPLANE_OK = 1, +}; + +struct MfgCompatPWMState { + int channel; + int freq_hz; + int duty_cycle; + int rpm; +}; + +struct MfgCompatSuperIOMem { + unsigned char ldn; + unsigned char reg; + unsigned char val; +}; + +struct MfgCompatBusPacket { + long num; + long len; + char msg[128]; +}; + +struct MfgCompatCPUState { + unsigned int cpu; + char clock[16]; +#if defined(CONFIG_SYNO_GRANTLEY) || defined(CONFIG_SYNO_PURLEY) + unsigned int core[CONFIG_SYNO_MULTI_CPU_NUM]; +#endif +}; + +enum MfgCompatCopyBtnState { + MFGC_BTN_DOWN = 0, //aka pressed + MFGC_BTN_UP = 1, //aka not pressed +}; + +typedef int (*mfgc_void_cb)(void); //int f(void) +typedef int (*mfgc_time_cb)(struct MfgCompatTime *); //int f(MfgCompatTime *) +typedef int (*mfgc_get_fan_state_cb)(int, enum MfgCompatFanStatus *); //int f(int, MfgCompatFanStatus *) +typedef int (*mfgc_set_fan_state_cb)(enum MfgCompatFanStatus, enum MfgCompatFanSpeed); //int f(MfgCompatFanStatus, MfgCompatFanSpeed) +typedef int (*mfgc_hwmon_sensor_cb)(SYNO_HWMON_SENSOR_TYPE *); //int f(SYNO_HWMON_SENSOR_TYPE *) +//TODO: this list is not complete - add all callback types + +#ifdef CONFIG_SYNO_PORT_MAPPING_V2 +typedef int (*mfgc_set_hdd_led_cb)(struct MfgCompatHddLedStatus *status); +#else +typedef int (*mfgc_set_hdd_led_cb)(int, SYNO_DISK_LED state); //int f(void) +#endif + +//List of known indexes in the mfgBIOS vtable. The table can be recovered by shim/bios_shim.c. Some of its entries are +// replaced by shim/bios/bios_shims_collection.c +//The following indexes were determined based on +// - Jun's module code +// - Looking at the symbols when BIOS is loaded +// - Observing logs from mfgBIOS +#define VTK_STRUCT_OWNER 0 //you shouldn't really modify this +#define VTK_GET_BRAND 1 //Sig: int f(void) +#define VTK_GET_MODEL 2 //Sig: int f(void) +#define VTK_GET_CPLD_VER 3 //Sig: int f(void) +#define VTK_RTC_GET_TIME 4 //Sig: int f(MfgCompatTime *) +#define VTK_RTC_SET_TIME 5 //Sig: int f(MfgCompatTime *) +#define VTK_GET_FAN_STATE 6 //Sig: int f(int, MfgCompatFanStatus *) | present in: DS918+; not: DS3615xs +#define VTK_SET_FAN_STATE 7 //Sig: int f(MfgCompatFanStatus, MfgCompatFanSpeed) +#define VTK_GET_SYS_TEMP 8 //Sig: int f(SYNO_THERMAL_TEMP *) | present in: DS3615xs; not: DS918+ +#define VTK_GET_CPU_TEMP 9 //Sig: int f(SYNOCPUTEMP *) +#define VTK_SET_DISK_LED 10 //Sig: varies, see mfgc_set_hdd_led_cb type +#define VTK_SET_PWR_LED 11 //Sig: int f(MfgCompatGenericLedState) +#define VTK_GET_CPLD_REG 12 //Sig: int f(MfgCompatCPLDReg *) +#define VTK_SET_PMU_MEM_BYTE 13 //Sig: int f(MfgCompatMemoryByte *) +#define VTK_GET_PMU_MEM_BYTE 14 //Sig: int f(MfgCompatMemoryByte *) +#define VTK_SET_GPIO_PIN 15 //Sig: int f(MfgCompatGPIOPin *) +#define VTK_GET_GPIO_PIN 16 //Sig: int f(MfgCompatGPIOPin *) +#define VTK_SET_GPIO_PIN_BLINK 17 //Sig: int f(MfgCompatGPIOPin *) +#define VTK_RTC_SET_APWR 18 //Sig: int f(MfgCompatAutoPwrOn *) | set auto power on +#define VTK_RTC_GET_APWR 19 //Sig: int f(MfgCompatAutoPwrOn *) | get auto power on +#define VTK_RTC_INT_APWR 20 //Sig: int f(void) | initialize auto power on. present in: DS918+; not: DS3615xs +#define VTK_RTC_UINT_APWR 21 //Sig: int f(void) | uninitialize auto power on. present in: DS918+; not: DS3615xs +#define VTK_SET_ALR_LED 22 //Sig: int f(MfgCompatGenericLedState) | alarm led +#define VTK_GET_BUZ_CLR 23 //Sig: int f(unsigned char *) +#define VTK_SET_BUZ_CLR 24 //Sig: int f(unsigned char) +#define VTK_GET_PWR_STATUS 25 //Sig: int f(MfgCompatPowerStatus *) +#define VTK_GET_BKPLANE_STATUS 26 //Sig: int f(MfgCompatBackplaneStatus *) | backplane status +#define VTK_INT_MOD_TPE 27 +#define VTK_UNINIT 28 //Sig: int f(void) +#define VTK_SET_CPU_FAN_STATUS 29 //Sig: int f(MfgCompatFanStatus, MfgCompatFanSpeed) +#define VTK_SET_PHY_LED 30 //Sig: int f(MfgCompatGenericLedState) | present in: DS620; not: DS3615xs, DS918+ +#define VTK_SET_HDD_ACT_LED 31 //Sig: int f(MfgCompatGenericLedState) +#define VTK_SET_PWM 32 //Sig: int f(MfgCompatPWMState *) +#define VTK_GET_MICROP_ID 33 +#define VTK_SET_MICROP_ID 34 //Sig: int f(void) +#define VTK_GET_SIO_MEM 35 //Sig: int f(MfgCompatSuperIOMem *) +#define VTK_SET_SIO_MEM 36 //Sig: int f(MfgCompatSuperIOMem *) +#define VTK_SEND_LCD_PKT 37 //Sig: int f(MfgCompatBusPacket *) +#define VTK_GET_MEM_UINT 38 //Sig: int f(MfgCompatMemoryUInt *) +#define VTK_SET_MEM_UINT 39 //Sig: int f(MfgCompatMemoryUInt *) +#define VTK_GET_CPU_INF 40 //Sig: void f(MfgCompatCPUState*, uint) +#define VTK_SET_HA_LED 41 //present in: RC18015xs+ (and other HA units? don't have other); not: DS3615xs,DS918+ +#define VTK_GET_CPY_BTN 42 //Sig: MfgCompatCopyBtnState f(void) | present in: DS718+ + +#if RP_MODULE_TARGET_VER == 6 +#define VTK_SET_SAFE_REMOVE_LED 43 //Sig: int f(bool) | present in: DS3615xs; not: DS918+ +#define VTK_GET_SYS_CURRENT 44 //Sig: int f(SYNO_DISK_INTF_INFO *) +#define VTK_GET_HWMON_FAN_RPM 45 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_PSU_STATUS 46 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_VOLTAGE 47 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_HDD_BKPLANE 48 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_THERMAL 49 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_CURRENT 50 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) + +#elif RP_MODULE_TARGET_VER == 7 //moved VTK_SET_SAFE_REMOVE_LED and VTK_GET_SYS_CURRENT below HWMON_ stuff (sic!) +#define VTK_GET_HWMON_FAN_RPM 43 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_PSU_STATUS 44 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_VOLTAGE 45 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_HDD_BKPLANE 46 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_THERMAL 47 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_GET_HWMON_CURRENT 48 //Sig: int f(SYNO_HWMON_SENSOR_TYPE *) +#define VTK_SET_SAFE_REMOVE_LED 49 //Sig: int f(bool) | present in: DS3615xs; not: DS918+ +#define VTK_GET_SYS_CURRENT 50 //Sig: int f(SYNO_DISK_INTF_INFO *) +#endif //RP_MODULE_TARGET_VER + +#define VTK_GET_HDD_IFACE 51 + +#define VTK_SIZE 52 + +#endif //REDPILL_SYNOBIOS_COMPAT_H \ No newline at end of file diff --git a/shim/bios/rtc_proxy.c b/shim/bios/rtc_proxy.c new file mode 100644 index 0000000..e7551d3 --- /dev/null +++ b/shim/bios/rtc_proxy.c @@ -0,0 +1,270 @@ +/* + * Proxy between an ACPI RTC and mfgBIOS calls + * + * Some platforms don't use a standard RTC chip but implement a custom platform-specific one. To handle different chips + * mfgBIOS uses a standardized interface. This works perfectly fine when mfgBIOS expects an ACPI-complaint RTC to be + * present. However, it does not work when a given platform is expected to contain some 3rd-party I2C clock chip. + * + * Motorola MC146818 was a de facto standard RTC chip when PC/AT emerged. Later on other clones started emulating the + * interface. This become so prevalent that ACPI standardized the basic interface of RTC on PC-compatibile systems as + * MC146818 interface. Thus, this module assumes that mfgBIOS calls can be proxied to MC146818 interface (which will + * work on any ACPI-complaint system and any sane hypervisor). + * + * As some of the functions are rarely used (and often even completely broken on many systems), like RTC wakeup they're + * not really implemented but instead mocked to look "just good enough". + * + * References: + * - https://www.kernel.org/doc/html/latest/admin-guide/rtc.html + * - https://embedded.fm/blog/2018/6/5/an-introduction-to-bcd + */ +#include "../../common.h" +#include "rtc_proxy.h" +#include "../shim_base.h" //shim_*() +#include +#include + +#define SHIM_NAME "RTC proxy" + +//Confused? See https://slate.com/technology/2016/02/the-math-behind-leap-years.html +#define year_is_leap(year) !((year)%((year)%25?4:16)) +#define mfg_year_to_full(val) ((val)+1900) //MfgCompatTime counts years as offset from 1900 +#define mfg_month_to_normal(val) ((val)+1) //MfgCompatTime has 0-based months +#define normal_month_to_mfg(val) ((val)-1) +static const unsigned char months_to_days[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +static struct MfgCompatAutoPwrOn *auto_power_on_mock = NULL; + +inline static void debug_print_mfg_time(struct MfgCompatTime *mfgTime) +{ + pr_loc_dbg("MfgCompatTime raw data: sec=%u min=%u hr=%u wkd=%u day=%u mth=%u yr=%u", mfgTime->second, + mfgTime->minute, mfgTime->hours, mfgTime->wkday, mfgTime->day, mfgTime->month, mfgTime->year); +} + +/** + * Standardizes & abstracts RTC reading + * + * Reading & writing RTC requires conversion of values based on some registers and chips. This function simply accept + * pointers to YY-MM-DD WeekDay HHmmss values and does all the conversions for you after reading. + */ +static void read_rtc_num(unsigned char *yy, unsigned char *mm, unsigned char *dd, unsigned char *wd, unsigned char *hr, + unsigned char *mi, unsigned char *ss) +{ + //As the clock uses IRQ 8 normally we need to atomically stop it to read all values and restore it later + unsigned long flags; + spin_lock_irqsave(&rtc_lock, flags); + const unsigned char rtc_control = CMOS_READ(RTC_CONTROL); + + //There are two formats how RTCs can report time: normal numbers or an ancient BCD. Currently (at least in Linux v4) + //BCD is always used for MC146818 (but this can change). This we need to handle both cases. + if (likely(RTC_ALWAYS_BCD) || (rtc_control & RTC_DM_BINARY)) { //a common idiom, search for RTC_ALWAYS_BCD in kernel + pr_loc_dbg("Reading BCD-based RTC"); + *yy = bcd2bin(CMOS_READ(RTC_YEAR)); + *mm = bcd2bin(CMOS_READ(RTC_MONTH)); + *dd = bcd2bin(CMOS_READ(RTC_DAY_OF_MONTH)); + *wd = bcd2bin(CMOS_READ(RTC_DAY_OF_WEEK)); + *hr = bcd2bin(CMOS_READ(RTC_HOURS)); + *mi = bcd2bin(CMOS_READ(RTC_MINUTES)); + *ss = bcd2bin(CMOS_READ(RTC_SECONDS)); + } else { + pr_loc_dbg("Reading binary-based RTC"); + *yy = CMOS_READ(RTC_YEAR); + *mm = CMOS_READ(RTC_MONTH); + *dd = CMOS_READ(RTC_DAY_OF_MONTH); + *wd = CMOS_READ(RTC_DAY_OF_WEEK); + *hr = CMOS_READ(RTC_HOURS); + *mi = CMOS_READ(RTC_MINUTES); + *ss = CMOS_READ(RTC_SECONDS); + } + spin_unlock_irqrestore(&rtc_lock, flags); +} + +/** + * Standardizes & abstracts RTC time setting + * + * Reading & writing RTC requires conversion of values based on some registers and chips. This function simply accepts + * values to be set in YY-MM-DD WeekDay HHmmss format and does all the conversions/locking/freq resets for you. + */ +static void write_rtc_num(unsigned char yy, unsigned char mm, unsigned char dd, unsigned char wd, unsigned char hr, + unsigned char mi, unsigned char ss) +{ + unsigned long flags; + spin_lock_irqsave(&rtc_lock, flags); + unsigned char rtc_control = CMOS_READ(RTC_CONTROL); //RTC control register value locked for us + unsigned char rtc_freq_tick = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((rtc_control|RTC_SET), RTC_CONTROL); //enter clock setting state + CMOS_WRITE(rtc_freq_tick|RTC_DIV_RESET2, RTC_FREQ_SELECT); //this should reset the ticks + + if (likely(RTC_ALWAYS_BCD) || (rtc_control & RTC_DM_BINARY)) { //a common idiom, search for RTC_ALWAYS_BCD in kernel + pr_loc_dbg("Writing BCD-based RTC"); + CMOS_WRITE(bin2bcd(yy), RTC_YEAR); + CMOS_WRITE(bin2bcd(mm), RTC_MONTH); + CMOS_WRITE(bin2bcd(dd), RTC_DAY_OF_MONTH); + CMOS_WRITE(bin2bcd(wd), RTC_DAY_OF_WEEK); + CMOS_WRITE(bin2bcd(hr), RTC_HOURS); + CMOS_WRITE(bin2bcd(mi), RTC_MINUTES); + CMOS_WRITE(bin2bcd(ss), RTC_SECONDS); + } else { + pr_loc_dbg("Writing binary-based RTC"); + CMOS_WRITE(yy, RTC_YEAR); + CMOS_WRITE(mm, RTC_MONTH); + CMOS_WRITE(dd, RTC_DAY_OF_MONTH); + CMOS_WRITE(wd, RTC_DAY_OF_WEEK); + CMOS_WRITE(hr, RTC_HOURS); + CMOS_WRITE(mi, RTC_MINUTES); + CMOS_WRITE(ss, RTC_SECONDS); + } + + CMOS_WRITE(rtc_control, RTC_CONTROL); //restore original control register + CMOS_WRITE(rtc_freq_tick, RTC_FREQ_SELECT); //...and the ticks too + spin_unlock_irqrestore(&rtc_lock, flags); +} + +int rtc_proxy_get_time(struct MfgCompatTime *mfgTime) +{ + if (mfgTime == NULL) { + pr_loc_wrn("Got an invalid call to %s", __FUNCTION__); + return -EPERM; + } + + debug_print_mfg_time(mfgTime); + + unsigned char rtc_year; //mfgTime uses offset from 1900 while RTC uses 2-digit format (see below) + unsigned char rtc_month; //mfgTime uses 0-11 while RTC uses 1-12 + read_rtc_num(&rtc_year, &rtc_month, &mfgTime->day, &mfgTime->wkday, &mfgTime->hours, &mfgTime->minute, + &mfgTime->second); + + //So yeah, it's 2021 and PC RTCs still use 2 digit year so we have to do a classic Y2K hack with epoch + //RTC nowadays is assumed to have a range of 1970-2069 which forces two assumptions: + // - Values 0-69 indicate 2000-2069 + // - Values 70-99 indicate 1970-1999 + //As the mfgTime->year uses value of years since 1900 without magic rollovers we need to correct it by 100 for the + //2000s epoch. Search for e.g. "mc146818_decode_year" in Linux, that's (sadly) a common method. + mfgTime->year = (likely(rtc_year < 70)) ? rtc_year + 100 : rtc_year; + mfgTime->month = normal_month_to_mfg(rtc_month); + + pr_loc_inf("Time got from RTC is %4d-%02d-%02d %2d:%02d:%02d (UTC)", mfg_year_to_full(mfgTime->year), + mfg_month_to_normal(mfgTime->month), mfgTime->day, mfgTime->hours, mfgTime->minute, mfgTime->second); + debug_print_mfg_time(mfgTime); + + return 0; +} + +int rtc_proxy_set_time(struct MfgCompatTime *mfgTime) +{ + if (mfgTime == NULL) { + pr_loc_wrn("Got an invalid call to %s", __FUNCTION__); + return -EPERM; + } + + debug_print_mfg_time(mfgTime); + + //Ok, this is PROBABLY not needed but we don't want to crash the RTC if an invalid value is passed to this function + //Also, we are aware of leap seconds but do you think 1984 hardware is? (spoiler alert: no) + if (unlikely(mfgTime->second > 59 || mfgTime->minute > 59 || mfgTime->hours > 24 || mfgTime->wkday > 6 || + mfgTime->day == 0 || mfgTime->month > 11)) { + pr_loc_wrn("Got invalid generic RTC data in %s", __FUNCTION__); + return -EINVAL; + } + + //Year validation needs to take leap years into account. This code can be shorter but it's expended for readability + if (unlikely(mfgTime->month == 1 && year_is_leap(mfgTime->year))) { + if (mfgTime->day > (months_to_days[mfgTime->month] + 1)) { + pr_loc_wrn("Invalid RTC leap year day (%u > %u) of month %u in %s", mfgTime->day, + (months_to_days[mfgTime->month] + 1), mfgTime->month, __FUNCTION__); + return -EINVAL; + } + } else if (mfgTime->day > months_to_days[mfgTime->month]) { + pr_loc_wrn("Invalid RTC regular year day (%u > %u) of month %u in %s", mfgTime->day, + months_to_days[mfgTime->month], mfgTime->month, __FUNCTION__); + return -EINVAL; + } + + //mfgTime->year uses a positive offset since 1900. However, ACPI-complain RTC cannot handle range higher than + //1970-2069 (see comment in rtc_proxy_get_time()). + unsigned char rtc_year = mfgTime->year; //mfgTime uses offset from 1900 while RTC uses 2-digit format (see below) + if (unlikely(rtc_year > 169)) { //This cannot be valid as RTC cannot handle >2069 + pr_loc_wrn("Year overflow in %s", __FUNCTION__); + return -EINVAL; + } else if(likely(rtc_year > 100)) { + rtc_year -= 100; //RTC uses 0-69 for 2000s so we need to shift mfgTime 1900-now offset by 100 + } + + unsigned char rtc_month = mfg_month_to_normal(mfgTime->month); //mfgTime uses 0-11 while RTC uses 1-12 + + write_rtc_num(rtc_year, rtc_month, mfgTime->day, mfgTime->wkday, mfgTime->hours, mfgTime->minute, mfgTime->second); + + pr_loc_inf("RTC time set to %4d-%02d-%02d %2d:%02d:%02d (UTC)", mfg_year_to_full(mfgTime->year), + mfg_month_to_normal(mfgTime->month), mfgTime->day, mfgTime->hours, mfgTime->minute, mfgTime->second); + + return 0; +} + +int rtc_proxy_init_auto_power_on(void) +{ + pr_loc_dbg("RTC power-on \"enabled\" via %s", __FUNCTION__); + + return 0; +} + +int rtc_proxy_get_auto_power_on(struct MfgCompatAutoPwrOn *mfgPwrOn) +{ + if (unlikely(!auto_power_on_mock)) { + pr_loc_bug("Auto power-on mock is not initialized - did you forget to call register?"); + return -EINVAL; + } + + pr_loc_dbg("Mocking auto-power GET on RTC"); + memcpy(mfgPwrOn, auto_power_on_mock, sizeof(struct MfgCompatAutoPwrOn)); + + return 0; +} + +int rtc_proxy_set_auto_power_on(struct MfgCompatAutoPwrOn *mfgPwrOn) +{ + if (!mfgPwrOn || mfgPwrOn->num < 0) { //That's just either a bogus call or a stupid call + pr_loc_wrn("Got an invalid call to %s", __FUNCTION__); + return -EINVAL; + } + + pr_loc_dbg("Mocking auto-power SET on RTC"); + memcpy(auto_power_on_mock, mfgPwrOn, sizeof(struct MfgCompatAutoPwrOn)); + + return 0; +} + +int rtc_proxy_uinit_auto_power_on(void) +{ + pr_loc_dbg("RTC power-on \"disabled\" via %s", __FUNCTION__); + + return 0; +} + +int unregister_rtc_proxy_shim(void) +{ + shim_ureg_in(); + + //This is not an error as bios shim collections calls unregister blindly + if (!auto_power_on_mock) { + pr_loc_dbg("The %s shim is not registered - ignoring", SHIM_NAME); + return 0; + } + + kfree(auto_power_on_mock); + auto_power_on_mock = NULL; + shim_ureg_ok(); + return 0; +} + +int register_rtc_proxy_shim(void) +{ + shim_reg_in(); + + if (unlikely(auto_power_on_mock)) { + pr_loc_wrn("The %s shim is already registered - unregistering first", SHIM_NAME); + unregister_rtc_proxy_shim(); + } + + kzalloc_or_exit_int(auto_power_on_mock, sizeof(struct MfgCompatAutoPwrOn)); + shim_reg_ok(); + return 0; +} diff --git a/shim/bios/rtc_proxy.h b/shim/bios/rtc_proxy.h new file mode 100644 index 0000000..fd82136 --- /dev/null +++ b/shim/bios/rtc_proxy.h @@ -0,0 +1,42 @@ +#ifndef REDPILL_RTC_PROXY_H +#define REDPILL_RTC_PROXY_H + +#include "mfgbios_types.h" + +/** + * Gets current RTC time (shims VTK_RTC_GET_TIME) + */ +int rtc_proxy_get_time(struct MfgCompatTime *mfgTime); + +/** + * Sets current RTC time (shims VTK_RTC_SET_TIME) + */ +int rtc_proxy_set_time(struct MfgCompatTime *mfgTime); + +/** + * Enables auto-power on functionality (shims VTK_RTC_INT_APWR). + * + * This is not REALLY implemented and only shimmed. Many motherboards don't handle it well or only support it from + * certain ACPI PSTATEs. It is even more unsupported by hypervisors. If you REALLY need it create a bug report or a PR. + */ +int rtc_proxy_init_auto_power_on(void); + +/** + * Gets time for auto-power on (shims VTK_RTC_GET_APWR). **See note for rtc_proxy_init_auto_power_on()** + */ +int rtc_proxy_get_auto_power_on(struct MfgCompatAutoPwrOn *mfgPwrOn); + +/** + * Sets time for auto-power on (shims VTK_RTC_SET_APWR). **See note for rtc_proxy_init_auto_power_on()** + */ +int rtc_proxy_set_auto_power_on(struct MfgCompatAutoPwrOn *mfgPwrOn); + +/** + * Disables auto-power on functionality (shims VTK_RTC_UINT_APWR). **See note for rtc_proxy_init_auto_power_on()** + */ +int rtc_proxy_uinit_auto_power_on(void); + +int unregister_rtc_proxy_shim(void); +int register_rtc_proxy_shim(void); + +#endif //REDPILL_RTC_PROXY_H diff --git a/shim/bios_shim.c b/shim/bios_shim.c new file mode 100644 index 0000000..b9cc85c --- /dev/null +++ b/shim/bios_shim.c @@ -0,0 +1,398 @@ +/* + * This shim is responsible for making the hardware<>DSM glue (aka mfg BIOS) happy by providing nullified + * implementations of hardware-specific calls. + * + * The process relies on the fact that the original BIOS module keeps a vtable table in memory. That vtable contains + * pointers to various functions used to communicate with the hardware. The most tricky part here is finding the vtable + * and replacing calls in it with our own. Original ELF contains unscrambled symbols for the table under "synobios_ops" + * name (see: readelf --syms /usr/lib/modules/synobios-dis.ko | grep 'synobios_ops'). However this is NOT a symbol which + * gets exported to the kernel. + * + * When the Linux kernel loads a module it does a couple of things after loading the .ko file. From the important ones + * here it reads the ELF, loads the .symtab (all symbols), processes all relocations, and then does a cleanup of stuff + * which is not needed after module is loaded. The earliest hook normally available for other modules is the access + * through modules notification API. It will provide access to the module as soon as its binary is loaded and init + * function is executing. However: + * - we only get the access to the "struct module" + * - the data available contains kallsyms + * - at this point all non-kernel symbols are discarded from memory (see kernel/module.c:simplify_symbols()) + * + * While the symbols exist in the memory the symbol table cannot be accessed (short of loading the ELF again and + * re-parsing the binary... which is way too complex). Most of the ELF parsing routines in the kernel are implemented + * in kernel/module.c in `static` functions. This unfortunately means they aren't really replaceable as they are + * inlined and mangled. However, there's one place where CPU architecture-dependent step happens: relocation of + * symbols. When module.c:apply_relocations() is called on x86_64 it calls the + * arch/x86/kernel/module.c:apply_relocate_add(). Since this function is external it can be "gently" replaced. + * + * During the lifetime of apply_relocate_add(), which is redirected to _apply_relocate_add() here, the full ELF with + * symbol table is available and thus the vtable can be located using process_bios_symbols(). However, it cannot be + * just like that modified at this moment (remember: we're way before module init is called) as 1) functions it points + * to may be relocated still, and 2) it's hardware-dependent (as seen by doing print_debug_symbols() before & after + * init). We need to hook to the module notification API and shim what's needed AFTER module started initializing. + * + * So in summary: + * 1. Redirect apply_relocate_add() => _apply_relocate_add() using internal/override_symbol.h + * 2. Setup module notifier + * 3. Look for "*_synobios" module in _apply_relocate_add() and if found iterate through symbols + * 4. Find "synobios_ops" in full symbols table and save it's start & end addresses; disable override from [1] + * 5. Wait until notified by the kernel about module started loaded (see bios_module_notifier_handler()) + * 6. Replace what's needed (see bios/bios_shims_collection.c:shim_bios_module()) + * 7. Wait until notified by the kernel about module fully loaded (and replace what was broken since 5.) + * 8. Drink a beer + * + * Additionally, this module also handles replacement of some kernel structures called by the mfgBIOS: + * - see bios_shims_collection.c:shim_disk_leds_ctrl() + * + * References: + * - https://en.wikipedia.org/wiki/Virtual_method_table + */ +#define SHIM_NAME "mfgBIOS" + +#include "bios_shim.h" +#include "shim_base.h" +#include "../common.h" +#include "../internal/override/override_symbol.h" +#include "../internal/helper/symbol_helper.h" //kernel_has_symbol() +#include "bios/bios_shims_collection.h" //shim_bios_module(), unshim_bios_module(), shim_bios_disk_leds_ctrl() +#include "bios/bios_hwcap_shim.h" //register_bios_hwcap_shim(), unregister_bios_hwcap_shim(), reset_bios_hwcap_shim() +#include //module notification +#include //struct module + +static bool bios_shimmed = false; +static bool module_notify_registered = false; +static unsigned long *vtable_start = NULL; +static unsigned long *vtable_end = NULL; +static const struct hw_config *hw_config = NULL; +static inline int enable_symbols_capture(void); +static inline int disable_symbols_capture(void); + +/********************************************* Shimming of mfgBIOS module *********************************************/ +/** + * Unified way to determine if a given module is a bios module (as this is not a simple == check) + */ +static inline bool is_bios_module(const char *name) +{ + char *separator_pos = strrchr(name, '_'); //bios will be named e.g. bromolow_synobios - find's the last _ + + //Check if it's synobios or sth else really + return (separator_pos && strcmp(separator_pos, "_synobios") == 0); +} + +/** + * Handles notifications regarding modules loading. It will only perform actions on modules matching is_bios_module() + * + * This is constantly loaded to provide useful error information in case the bios module goes away (it shouldn't). In + * non-dev builds it can probably just go away. + * + * @return NOTIFY_* const + */ +static int bios_module_notifier_handler(struct notifier_block * self, unsigned long state, void * data) +{ + struct module *mod = data; + + if (!is_bios_module(mod->name)) + return NOTIFY_OK; + + if (state == MODULE_STATE_GOING) { + //So this is actually not a problem with RP but rather with the bios module - it cannot be unloaded at will. + //As soon as you try it will cause a circular error with page faults and the kernel will demand a reboot + //We're not unregistering notifier in case one day this is fixed by the bios module ¯\_(ツ)_/¯ + pr_loc_err("%s BIOS went away - you may get a kernel panic if YOU unloaded it", mod->name); + bios_shimmed = false; + vtable_start = vtable_end = NULL; + enable_symbols_capture(); + reset_bios_shims(); + reset_bios_hwcap_shim(); + + return NOTIFY_OK; + } + + if (bios_shimmed) + return NOTIFY_OK; + + //So, this is really tricky actually. Some parts of the vtable are populated AND USED during init and some are + // populated in init but used later. This means we need to try to shim twice - as fast as possible after init call + // and just after init call finished. + + //We react to every module action by re-shimming its vtable as it might have changed. Other actions are done only + // once below. + if (!shim_bios_module(hw_config, mod, vtable_start, vtable_end)) { + bios_shimmed = false; + return NOTIFY_OK; + } + + if (state == MODULE_STATE_LIVE) { + bios_shimmed = true; + pr_loc_inf("%s BIOS *fully* shimmed", mod->name); + } else { //MODULE_STATE_COMING or MODULE_STATE_UNFORMED [but most likely actually MODULE_STATE_COMING] + if (likely(state == MODULE_STATE_COMING)) + register_bios_hwcap_shim(hw_config); + + pr_loc_inf("%s BIOS *early* shimmed", mod->name); + } + + return NOTIFY_OK; +} + +static struct notifier_block bios_notifier_block = { + .notifier_call = bios_module_notifier_handler +}; +/** + * Registers module notifier to modify vtable as soon as module finishes loading + * + * @return 0 on success, -E on failure + */ +static int register_bios_module_notifier(void) +{ + if (unlikely(module_notify_registered)) { + pr_loc_bug("%s called while notifier already registered", __FUNCTION__); + return -EALREADY; + } + + //Check if the bios module is already present in the system. If it is we have a problem as the vtable must be + // patched as it loads. It's unclear if it can be patched after it's loaded but most certainly we don't have the + // address of the table. That's why this is an error. If by any chance we have an address we can try patching but + // this scenario is unlikely to work (and re-loading of the bios is not possible as it KPs). There's also no EASY + // way of accessing list of modules (and the bios module name depends on platform etc...) + //This symbol is chosen semi-randomly (i.e. it should be stable over time) but it shouldn't be present anywhere else + if (unlikely(kernel_has_symbol("synobios_ioctl"))) { + pr_loc_err("BIOS module is already loaded (did you load this module too late?) - cannot recover!"); + return -EDEADLOCK; + } + + int out = register_module_notifier(&bios_notifier_block); + if(unlikely(out != 0)) { + pr_loc_err("Failed to register module notifier"); //Currently it's impossible to happen... currently + return out; + } + + module_notify_registered = true; + pr_loc_dbg("Registered bios module notifier"); + + return 0; +} + +/** + * Reverses what register_bios_module_notifier did + * + * @return 0 on success, -E on failure + */ +static int unregister_bios_module_notifier(void) +{ + if (unlikely(!module_notify_registered)) { + pr_loc_bug("%s called while notifier not yet registered", __FUNCTION__); + return -ENOMEDIUM; + } + + int out = unregister_module_notifier(&bios_notifier_block); + if(unlikely(out != 0)) { + pr_loc_err("Failed to unregister module notifier"); + return out; + } + + module_notify_registered = false; + pr_loc_dbg("Unregistered bios module notifier"); + + return 0; +} + +#define BIOS_CALLTABLE "synobios_ops" +/** + * Scans module ELF headers for BIOS_CALLTABLE and saves its address + */ +static void process_bios_symbols(Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, struct module *mod) +{ + Elf64_Shdr *symsec = &sechdrs[symindex]; + pr_loc_dbg("Symbol section <%p> @ vaddr<%llu> size[%llu]", symsec, symsec->sh_addr, symsec->sh_size); + + Elf64_Sym *sym; + Elf64_Sym *vtable = NULL; + sym = (void *)symsec->sh_addr; //First symbol in the table + + unsigned int i; + for (i = 0; i < symsec->sh_size / sizeof(Elf64_Sym); i++) { + const char *symname = strtab + sym[i].st_name; + pr_loc_dbg("Symbol #%d in mfgBIOS \"%s\" {%s}<%p>", i, mod->name, symname, (void *)sym[i].st_value); + + //There are more than one, we're looking for THE table (not a pointer) + if (strncmp(symname, BIOS_CALLTABLE, sizeof(BIOS_CALLTABLE)) == 0 && sym[i].st_size > sizeof(void *)) { + pr_loc_dbg("Found vtable - size %llu", sym[i].st_size); + vtable = &sym[i]; + break; + } + } + + //That, to my knowledge, shouldn't happen + if (unlikely(!vtable)) { + pr_loc_wrn("Didn't find \"%s\" in \"%s\" this time - that's weird?", BIOS_CALLTABLE, mod->name); + return; + } + + vtable_start = (unsigned long *)vtable->st_value; + vtable_end = vtable_start + vtable->st_size; + pr_loc_dbg("Found \"%s\" in \"%s\" @ <%p =%llu=> %p>", (strtab + vtable->st_name), mod->name, vtable_start, + vtable->st_size, vtable_end); + disable_symbols_capture(); +} + +/**************************************************** Entrypoints *****************************************************/ +int register_bios_shim(const struct hw_config *hw) +{ + int out; + hw_config = hw; + shim_reg_in(); + + if ( + (out = shim_disk_leds_ctrl(hw)) != 0 || + (out = enable_symbols_capture()) != 0 || + (out = register_bios_module_notifier()) != 0 + ) { + return out; + } + + shim_reg_ok(); + return 0; +} + +int unregister_bios_shim(void) +{ + int out; + + shim_ureg_in(); + if (likely(bios_shimmed)) { + if (!unshim_bios_module(vtable_start, vtable_end)) + return -EINVAL; + } + + out = unregister_bios_module_notifier(); + if (unlikely(out != 0)) + return out; + + out = disable_symbols_capture(); + if (unlikely(out != 0)) + return out; + + unshim_disk_leds_ctrl(); //this will be noop if nothing was registered + unregister_bios_hwcap_shim(); //this will be noop if nothing was registered + + hw_config = NULL; + + shim_ureg_ok(); + return 0; +} + +/************************************************** Internal Helpers **************************************************/ +/** + * A modified arch/x86/kernel/module.c:apply_relocate_add() from Linux v3.10.108 to save synobios_ops address + * + * This is taken straight from Linux v3.10 and modified: + * - added call to process_bios_symbols + * - commented-out DEBUGP + * Original author notice: Copyright (C) 2001 Rusty Russell + */ +static int _apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relsec, struct module *me) +{ + unsigned int i; + Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; + Elf64_Sym *sym; + void *loc; + u64 val; + + //Well, this is here because there isn't a good place to plug-in into modules loading to get the full symbols table + //Later on kernel removes "useless" symbols (see module.c:simplify_symbols())... but we need them + //After vtable address is found this override of apply_relocate_add() is removed + if (!vtable_start && is_bios_module(me->name)) + process_bios_symbols(sechdrs, strtab, symindex, me); + +// DEBUGP("Applying relocate section %u to %u\n", +// relsec, sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rel[i].r_info); + +// DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", +// (int)ELF64_R_TYPE(rel[i].r_info), +// sym->st_value, rel[i].r_addend, (u64)loc); + + val = sym->st_value + rel[i].r_addend; + + switch (ELF64_R_TYPE(rel[i].r_info)) { + case R_X86_64_NONE: + break; + case R_X86_64_64: + *(u64 *)loc = val; + break; + case R_X86_64_32: + *(u32 *)loc = val; + if (val != *(u32 *)loc) + goto overflow; + break; + case R_X86_64_32S: + *(s32 *)loc = val; + if ((s64)val != *(s32 *)loc) + goto overflow; + break; + case R_X86_64_PC32: + val -= (u64)loc; + *(u32 *)loc = val; + break; + default: + pr_err("%s: Unknown rela relocation: %llu\n", + me->name, ELF64_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + } + return 0; + + overflow: + pr_err("overflow in relocation type %d val %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), val); + pr_err("`%s' likely not compiled with -mcmodel=kernel\n", + me->name); + return -ENOEXEC; +} + +static override_symbol_inst *ov_apply_relocate_add = NULL; +/** + * Enables override of apply_relocate_add() to redirect it to _apply_relocate_add() in order to plug into a moment where + * process_bios_symbols() can extract the data. + * + * @return 0 on success, -E on failure + */ +static inline int enable_symbols_capture(void) +{ + if (unlikely(ov_apply_relocate_add)) + return 0; //Technically it's working so it's a non-error scenario (and it may happen with modules notification) + + ov_apply_relocate_add = override_symbol("apply_relocate_add", _apply_relocate_add); + if (unlikely(IS_ERR(ov_apply_relocate_add))) { + int out = PTR_ERR(ov_apply_relocate_add); + ov_apply_relocate_add = NULL; + pr_loc_err("Failed to override apply_relocate_add, error=%d", out); + return out; + } + + return 0; +} + +/** + * Disables override of apply_relocate_add() if enabled. + * + * @return 0 on success/noop, -E on failure + */ +static inline int disable_symbols_capture(void) +{ + if (!ov_apply_relocate_add) //may have been restored before + return 0; + + int out = restore_symbol(ov_apply_relocate_add); + ov_apply_relocate_add = NULL; + + return out; +} \ No newline at end of file diff --git a/shim/bios_shim.h b/shim/bios_shim.h new file mode 100644 index 0000000..82a216f --- /dev/null +++ b/shim/bios_shim.h @@ -0,0 +1,8 @@ +#ifndef REDPILLLKM_BIOS_SHIM_H +#define REDPILLLKM_BIOS_SHIM_H + +struct hw_config; +int register_bios_shim(const struct hw_config *hw); +int unregister_bios_shim(void); + +#endif //REDPILLLKM_BIOS_SHIM_H diff --git a/shim/block_fw_update_shim.c b/shim/block_fw_update_shim.c new file mode 100644 index 0000000..364e0e2 --- /dev/null +++ b/shim/block_fw_update_shim.c @@ -0,0 +1,88 @@ +/** + * This rather simple shim prevents execution of a firmware update program when done in a one specific way + * + * During the OS installation process one of the steps executes a command "./H2OFFT-Lx64". This is a board firmware + * update program. When executed under KVM it will crash the virtual CPU (and I wasn't brave enough to try it on bare + * metal). All in all the execution must succeed from the perspective of the user-space and the file cannot be modified + * due to checksum check. + * + * This shim hooks a execve() syscall and filter it through shim_sys_execve(). This in turn is self-explanatory for the + * most part - it simply fakes successful execution without invoking anything. While such trickery can be detected (as + * the real process is not really replaced) it is good enough for this case. + * + * Additionally, to make the firmware picking happy we need to pass a sanity check (which is presumably done to ensure + * flasher doesn't accidentally brick an incorrect board) using DMI data. This is handled here by overriding one string + * in the DMI data array (as the kernel API lacks any way of changing that). + * + * References: + * - https://linux.die.net/man/3/execve + * - https://0xax.gitbooks.io/linux-insides/content/SysCall/linux-syscall-4.html + * - https://help.ubuntu.com/community/FimwareUpgrade/Insyde + */ +#define SHIM_NAME "firmware update blocker" + +#include "block_fw_update_shim.h" +#include "shim_base.h" +#include "../common.h" +#include "../internal/intercept_execve.h" +#include //dmi_get_system_info(), DMI_* + +#define DMI_MAX_LEN 512 +#define FW_BOARD_NAME "\x53\x79\x6e\x6f\x64\x65\x6e" +#define FW_UPDATE_PATH "./H2OFFT-Lx64" + +static char dmi_product_name_backup[DMI_MAX_LEN] = { '\0' }; +static void patch_dmi(void) +{ + char *ptr = (char *)dmi_get_system_info(DMI_PRODUCT_NAME); + size_t org_len = strlen(ptr); + if (org_len > DMI_MAX_LEN) + pr_loc_wrn("DMI field longer than %zu - restoring on module unload will be limited to that length", org_len); + + if(strlcpy((char *)&dmi_product_name_backup, ptr, DMI_MAX_LEN) < 0) + pr_loc_wrn("Backup DMI truncated to %d", DMI_MAX_LEN); + + pr_loc_dbg("Saved backup DMI: %s", dmi_product_name_backup); + + //This TECHNICALLY can cause overflow but DMI has buffer for such a short string + if (org_len < strlen_static(FW_BOARD_NAME)) + pr_loc_bug("Shimmed DMI field will be longer than original!"); + + strcpy(ptr, FW_BOARD_NAME); +} + +static void unpatch_dmi(void) +{ + if (dmi_product_name_backup[0] == '\0') { + pr_loc_dbg("Skipping %s - DMI not patched", __FUNCTION__); + return; + } + + strcpy((char *)dmi_get_system_info(DMI_PRODUCT_NAME), dmi_product_name_backup); + pr_loc_dbg("DMI unpatched"); +} + +int register_fw_update_shim(void) +{ + shim_reg_in(); + + int out = add_blocked_execve_filename(FW_UPDATE_PATH); + if (out != 0) + return out; + + patch_dmi(); + + shim_reg_ok(); + return 0; +} + +int unregister_fw_update_shim(void) +{ + shim_ureg_in(); + + //Do not remove execve registration here - it will be cleared in one sweep during unregister of interceptor + unpatch_dmi(); + + shim_ureg_ok(); + return 0; +} \ No newline at end of file diff --git a/shim/block_fw_update_shim.h b/shim/block_fw_update_shim.h new file mode 100644 index 0000000..0eda2f5 --- /dev/null +++ b/shim/block_fw_update_shim.h @@ -0,0 +1,7 @@ +#ifndef REDPILL_BLOCK_FW_UPDATE_SHIM_H +#define REDPILL_BLOCK_FW_UPDATE_SHIM_H + +int register_fw_update_shim(void); +int unregister_fw_update_shim(void); + +#endif //REDPILL_BLOCK_FW_UPDATE_SHIM_H diff --git a/shim/boot_dev/boot_shim_base.c b/shim/boot_dev/boot_shim_base.c new file mode 100644 index 0000000..2535e25 --- /dev/null +++ b/shim/boot_dev/boot_shim_base.c @@ -0,0 +1,70 @@ +#include "boot_shim_base.h" +#include "../../common.h" +#include "../../config/runtime_config.h" //struct boot_media +#include "../../internal/scsi/scsi_toolbox.h" //is_sata_disk(), opportunistic_read_capacity() +#include //struct scsi_device +#include //struct usb_device + +//Definition of known VID/PIDs for USB-based shims +#define SBOOT_RET_VID 0xf400 //Retail boot drive VID +#define SBOOT_RET_PID 0xf400 //Retail boot drive PID +#define SBOOT_MFG_VID 0xf401 //Force-reinstall boot drive VID +#define SBOOT_MFG_PID 0xf401 //Force-reinstall boot drive PID + +void *mapped_shim_data = NULL; + +void set_shimmed_boot_dev(void *private_data) +{ + mapped_shim_data = private_data; +} + +void *get_shimmed_boot_dev(void) +{ + return mapped_shim_data; +} + +bool scsi_is_boot_dev_target(const struct boot_media *boot_dev_config, struct scsi_device *sdp) +{ + if (!is_sata_disk(&sdp->sdev_gendev)) { + pr_loc_dbg("%s: it's not a SATA disk, ignoring", __FUNCTION__); + return false; + } + + pr_loc_dbg("Checking if SATA disk is a shim target - id=%u channel=%u vendor=\"%s\" model=\"%s\"", sdp->id, + sdp->channel, sdp->vendor, sdp->model); + + long long capacity_mib = opportunistic_read_capacity(sdp); + if (unlikely(capacity_mib < 0)) { + pr_loc_dbg("Failed to estimate drive capacity (error=%lld) - it WILL NOT be shimmed", capacity_mib); + return false; + } + + if (capacity_mib > boot_dev_config->dom_size_mib) { + pr_loc_dbg("Device has capacity of ~%llu MiB - it WILL NOT be shimmed (>%lu)", capacity_mib, + boot_dev_config->dom_size_mib); + return false; + } + + if (unlikely(get_shimmed_boot_dev())) { + pr_loc_wrn("Boot device was already shimmed but a new matching device (~%llu MiB <= %lu) appeared again - " + "this may produce unpredictable outcomes! Ignoring - check your hardware", capacity_mib, + boot_dev_config->dom_size_mib); + return false; + } + + pr_loc_dbg("Device has capacity of ~%llu MiB - it is a shimmable target (<=%lu)", capacity_mib, + boot_dev_config->dom_size_mib); + + return true; +} + +void usb_shim_as_boot_dev(const struct boot_media *boot_dev_config, struct usb_device *usb_device) +{ + if (boot_dev_config->mfg_mode) { + usb_device->descriptor.idVendor = cpu_to_le16(SBOOT_MFG_VID); + usb_device->descriptor.idProduct = cpu_to_le16(SBOOT_MFG_PID); + } else { + usb_device->descriptor.idVendor = cpu_to_le16(SBOOT_RET_VID); + usb_device->descriptor.idProduct = cpu_to_le16(SBOOT_RET_PID); + } +} \ No newline at end of file diff --git a/shim/boot_dev/boot_shim_base.h b/shim/boot_dev/boot_shim_base.h new file mode 100644 index 0000000..d75e98b --- /dev/null +++ b/shim/boot_dev/boot_shim_base.h @@ -0,0 +1,57 @@ +#ifndef REDPILL_BOOT_SHIM_BASE_H +#define REDPILL_BOOT_SHIM_BASE_H + +#include //bool + +struct boot_media; +struct usb_device; +struct scsi_device; + +/** + * Modify given USB device instance to conform to syno kernel boot device specification + * + * This function takes into consideration the boot_media configuration regarding mfg vs retail mode and will change the + * device descriptors accordingly. It is safe to call this function multiple times on the same object. + * + * @param boot_dev_config Configuration to determine boot mode + * @param usb_device Device to change + */ +void usb_shim_as_boot_dev(const struct boot_media *boot_dev_config, struct usb_device *usb_device); + +/** + * Save a free-form pointer into a global storage to mark boot device as shimmed + * + * Other subsystems can determine if the boot device has been shimmed by calling get_shimmed_boot_dev(). However, the + * data passed to this function is opaque by design and only makes sense to the submodule which originally set it. + * + * @param private_data Any non-null pointer or value castable to a pointer type (e.g. unsigned long number) + */ +void set_shimmed_boot_dev(void *private_data); + +/** + * Shortcut to remove previously marked as shimmed boot device. It is an equivalent of simply calling set with NULL ptr. + */ +#define reset_shimmed_boot_dev() set_shimmed_boot_dev(NULL); + +/** + * Gets shimmed boot device private data (if any) + * + * The caller should not try to interpret the value returned beyond NULL vs. non-NULL, unless the caller is the original + * submodule which set the value using set_shimmed_boot_dev(). + * + * @return non-NULL pointer if device has been shimmed or NULL ptr if it wasn't + */ +void *get_shimmed_boot_dev(void); + +/** + * Checks if a given SCSI disk can become a boot device + * + * To fully understand the rules and intricacies of how it is used in context you should read the file comment for the + * native SATA DOM shim in shim/boot_dev/sata_boot_shim.c + * + * @param boot_dev_config User-controllable configuration with a threshold for considering an SCSI disk a boot device + * @param sdp SCSI device which ideally should be an SCSI disk (as passing any other ones doesn't make sense) + */ +bool scsi_is_boot_dev_target(const struct boot_media *boot_dev_config, struct scsi_device *sdp); + +#endif //REDPILL_BOOT_SHIM_BASE_H diff --git a/shim/boot_dev/fake_sata_boot_shim.c b/shim/boot_dev/fake_sata_boot_shim.c new file mode 100644 index 0000000..7644b8d --- /dev/null +++ b/shim/boot_dev/fake_sata_boot_shim.c @@ -0,0 +1,317 @@ +/** + * A crazy attempt to use SATA disks as proper boot devices on systems without SATA DOM support + * + * BACKGROUND + * The syno-modifed SCSI driver (sd.c) contains support for so-called boot disks. It is a logical designation for drives + * separated from normal data disks. Normally that designation is based on vendor & model of the drive. The native SATA + * boot shim uses that fact to modify user-supplied drive to match that vendor-model and be considered bootable. + * Likewise similar mechanism exists for USB boot media. Both are completely separate and work totally differently. + * While both USB storage and SATA are SCSI-based systems they different in the ways devices are identified and pretty + * much in almost everything else except the protocol. + * + * + * HOW DOES IT WORK? + * This shim performs a nearly surgical task of grabbing a SATA disk (similarly to native SATA boot shim) and modifying + * its descriptors to look like a USB drive for a short while. The descriptors cannot be left in such state for too + * long, and have to be reverted as soon as the disk type is determined by the "sd.c" driver. This is because other + * processes actually need to read & probe the drive as a SATA one (as you cannot communicate with a SATA device like + * you do with a USB stick). + * In a birds-eye view the descriptors are modified just before the sd_probe() is called and removed when ida_pre_get() + * is called by the sd_probe(). The ida_pre_get() is nearly guaranteed [even if the sd.c code changes] to be called + * very early in the process as the ID allocation needs to be done for anything else to use structures created within. + * + * + * HERE BE DRAGONS + * This code is highly experimental and may explode at any moment. We previously thought we cannot do anything with + * SATA boot due to lack of kernel support for it (and userland method being broken now). This crazy idea actually + * worked and after many tests on multiple platforms it seems to be stable. However, we advise against using it if USB + * is an option. Code here has many safety cheks and safeguards but we will never consider it bullet-proof. + * + * + * References: + * - https://www.kernel.org/doc/html/latest/core-api/idr.html (IDs assignment in the kernel) + * - drivers/scsi/sd.c in syno kernel GPL sources (look at sd_probe() and syno_disk_type_get()) + */ +#include "fake_sata_boot_shim.h" +#include "boot_shim_base.h" //set_shimmed_boot_dev(), get_shimmed_boot_dev(), scsi_is_shim_target(), usb_shim_as_boot_dev() +#include "../shim_base.h" //shim_* +#include "../../common.h" +#include "../../internal/scsi/scsi_toolbox.h" //scsi_force_replug() +#include "../../internal/scsi/scsi_notifier.h" //waiting for the drive to appear +#include "../../internal/call_protected.h" //ida_pre_get() +#include "../../internal/override/override_symbol.h" //overriding ida_pre_get() +#include //struct scsi_device +#include //struct Scsi_Host, SYNO_PORT_TYPE_* +#include //struct usb_device +#include <../drivers/usb/storage/usb.h> //struct us_data + +#define SHIM_NAME "fake SATA boot device" + +static const struct boot_media *boot_dev_config = NULL; //passed to scsi_is_shim_target() & usb_shim_as_boot_dev() +static struct scsi_device *camouflaged_sdp = NULL; //set when ANY device is under camouflage +static struct usb_device *fake_usbd = NULL; //ptr to our fake usb device scaffolding +static int org_port_type = 0; //original port type of the device which registered +static override_symbol_inst *ida_pre_get_ovs = NULL; //trap override +static unsigned long irq_flags = 0; //saved flags when IRQs are disabled (to prevent rescheduling) + +//They call each other, see their own docblocks +static int camouflage_device(struct scsi_device *sdp); +static int uncamouflage_device(struct scsi_device *sdp); + +struct ida; +/** + * Called by the sd_probe() very early after disk type is determined. We can restore the disk to its original shape + */ +static int ida_pre_get_trap(struct ida *ida, gfp_t gfp_mask) +{ + //This can happen if the kernel decides to reschedule and/or some device appears JUST between setting up the trap + // and disabling of rescheduling. We cannot reverse the order as setting up the trap requires flushing CPU caches + // which isn't really feasible in non-preempt & IRQ-disabled state... a catch-22 + //It is also possible that it happens during uncamouflage_device - this is why we force-restore here and just call + // it. + if (unlikely(!camouflaged_sdp)) { + pr_loc_bug("Hit ida_pr_get() trap without sdp saved - removing trap and calling original"); + restore_symbol(ida_pre_get_ovs); + return _ida_pre_get(ida, gfp_mask); + } + + pr_loc_dbg("Hit ida_pre_get() trap! Removing camouflage..."); + uncamouflage_device(camouflaged_sdp); + + pr_loc_dbg("Calling original ida_pre_get()"); + return _ida_pre_get(ida, gfp_mask); +} + +/** + * Checks if the device passes is "camouflaged" as a USB device + */ +static bool is_camouflaged(struct scsi_device *sdp) +{ + return likely(camouflaged_sdp) && camouflaged_sdp == sdp; +} + +/** + * Alters a SATA device to look like a USB boot disk + * + * Order of operations in camouflage/uncamouflage is VERY particular. We make sure we CANNOT fail (at least not without + * a KP resulting from pagefault) once we disable preemption & irqs AND that no changes before preemption is disabled + * are overriding anything external (as we can be rescheduled and we cannot leave stuff half-replaced) + * + * @param sdp A valid SATA disk (it's assumed it passed through scsi_is_boot_dev_target() already) to disguise as USB + * + * @return 0 on success, -E on error + */ +static int camouflage_device(struct scsi_device *sdp) +{ + //This is very serious - it means something went TERRIBLY wrong. The camouflage should last only through the + // duration of probing. If we got here again before camouflaging it means there's a device floating around which + // is a SATA device but with broken USB descriptors. This should never ever happen as it may lead to data loss and + // crashes at best. + if (unlikely(camouflaged_sdp)) { + pr_loc_crt("Attempting to camouflage when another device is undergoing camouflage"); + return -EEXIST; + } + + //Here's the kicker: most of the subsystems save a pointer to some driver-related data into sdp->host->hostdata. + // Unfortunately usb-storage saves a whole us_data structure there. It can do that as it allows them to use some + // neat container_of() tricks later on. However, it means that we must fake that arrangement. This means we have to + // practically go over the boundaries of the struct memory passed as ->pusb_dev is simply +40 bytes over the struct + // (+ 8 bytes to save the ptr). USUALLY it should be safe as there's spare empty space due to memory fragmentation. + // Since we're doing this only for a short moment it shouldn't be a problem but we are making sure here the memory + // is indeed empty where we want to make a change. There's no guarantees that we don't damage anything but with all + // the safeguards here the chance is minimal. + if (unlikely(host_to_us(sdp->host)->pusb_dev)) { + pr_loc_crt("Cannot camouflage - space on pointer not empty"); + return -EINVAL; + } + + if (unlikely(get_shimmed_boot_dev())) { + pr_loc_wrn("Refusing to camouflage. Boot device was already shimmed but a new matching device appeared again - " + "this may produce unpredictable outcomes! Ignoring - check your hardware"); + return -EEXIST; + } + + pr_loc_dbg("Camouflaging SATA disk vendor=\"%s\" model=\"%s\" to look like a USB boot device", sdp->vendor, + sdp->model); + + pr_loc_dbg("Generating fake USB descriptor"); + kzalloc_or_exit_int(fake_usbd, sizeof(struct usb_device)); + usb_shim_as_boot_dev(boot_dev_config, fake_usbd); + + pr_loc_dbg("Setting-up ida_pre_get() trap"); + ida_pre_get_ovs = override_symbol("ida_pre_get", ida_pre_get_trap); + if (unlikely(IS_ERR(ida_pre_get_ovs))) { + pr_loc_err("Failed to override ida_pre_get - error=%ld", PTR_ERR(ida_pre_get_ovs)); + ida_pre_get_ovs = NULL; + kfree(fake_usbd); + return PTR_ERR(ida_pre_get_ovs); + } + + pr_loc_dbg("Disabling rescheduling"); + preempt_disable(); + local_irq_save(irq_flags); + + pr_loc_dbg("Changing port type %d => %d", sdp->host->hostt->syno_port_type, SYNO_PORT_TYPE_USB); + org_port_type = sdp->host->hostt->syno_port_type; + sdp->host->hostt->syno_port_type = SYNO_PORT_TYPE_USB; + + pr_loc_dbg("Faking ptr to usb_device at %p", &host_to_us(sdp->host)->pusb_dev); + host_to_us(sdp->host)->pusb_dev = fake_usbd; + + camouflaged_sdp = sdp; + set_shimmed_boot_dev(sdp); + + return 0; +} + +/** + * Undoes what camouflage_device() does; i.e. restores device to its normal SATA-view + * + * @param sdp Previously camouflaged device + * + * @return 0 on success, -E on error + */ +static int uncamouflage_device(struct scsi_device *sdp) +{ + int out = 0; + pr_loc_dbg("Uncamouflaging SATA disk vendor=\"%s\" model=\"%s\"", sdp->vendor, sdp->model); + + if (unlikely(host_to_us(sdp->host)->pusb_dev != fake_usbd)) { + pr_loc_bug("Fake USB device in the scsi_device is not the same as our fake one - something changed it"); + return -EINVAL; + } + + camouflaged_sdp = NULL; + + pr_loc_dbg("Removing fake usb_device ptr at %p", &host_to_us(sdp->host)->pusb_dev); + host_to_us(sdp->host)->pusb_dev = NULL; + + pr_loc_dbg("Restoring port type %d => %d", sdp->host->hostt->syno_port_type, org_port_type); + sdp->host->hostt->syno_port_type = org_port_type; + org_port_type = 0; + + pr_loc_dbg("Re-enabling scheduling"); + local_irq_restore(irq_flags); + preempt_enable(); + + if (likely(ida_pre_get_ovs)) { //scheduling race condition may have removed that already in ida_pre_get_trap() + pr_loc_dbg("Removing ida_pre_get() trap"); + if ((out = restore_symbol(ida_pre_get_ovs)) != 0) + pr_loc_err ("Failed to restore original ida_pre_get() - error=%d", out); + ida_pre_get_ovs = NULL; + } + + pr_loc_dbg("Cleaning fake USB descriptor"); + kfree(fake_usbd); + fake_usbd = NULL; + + return out; +} + +/** + * Called for every existing SCSI disk to determine if any of them is a candidate to be a boot device. + * + * If a given device is a SATA drive which matches shim criteria it will be unplugged & replugged to be shimmed. + * + * @param sdp This "struct device" should already be guaranteed to be an scsi_device with type=TYPE_DISK (i.e. returning + * "true" from is_scsi_disk()). It will be re-checked anyway but there's no point in passing anything which + * is not a SCSI disk. + * + * @return 0 means "continue calling me" while any other value means "I found what I was looking for, stop calling me". + * This convention is based on how bus_for_each_dev() works + */ +static int on_existing_scsi_disk_device(struct scsi_device *sdp) +{ + if (!scsi_is_boot_dev_target(boot_dev_config, sdp)) + return 0; + + pr_loc_dbg("Found a shimmable SCSI device - reconnecting to trigger shimming"); + scsi_force_replug(sdp); + + return 1; +} + +/** + * Called for every new (or recently forcefully re-plugged) device to camouflage it as a USB boot disk + */ +static int scsi_disk_probe_handler(struct notifier_block *self, unsigned long state, void *data) +{ + struct scsi_device *sdp = data; + + switch (state) { + case SCSI_EVT_DEV_PROBING: + if (unlikely(camouflaged_sdp)) { + pr_loc_bug("Got device probe when other one is camouflaged - surprise reschedule happened?"); + uncamouflage_device(camouflaged_sdp); + return NOTIFY_OK; + } + + if (scsi_is_boot_dev_target(boot_dev_config, data)) + camouflage_device(sdp); + + return NOTIFY_OK; + + case SCSI_EVT_DEV_PROBED_OK: + case SCSI_EVT_DEV_PROBED_ERR: + if (is_camouflaged(sdp)) { //camouflage is expected to be removed by the ida_pre_get() trap + pr_loc_bug("Probing finished but device is still camouflages - something went terribly wrong"); + uncamouflage_device(sdp); + } + + return NOTIFY_OK; + + default: + pr_loc_dbg("Not interesting SCSI EVT %lu - ignoring", state); + return NOTIFY_DONE; + } + +} + +static struct notifier_block scsi_disk_nb = { + .notifier_call = scsi_disk_probe_handler, + .priority = INT_MIN, //we want to be FIRST so that we other things can get the correct drive type +}; + +int register_fake_sata_boot_shim(const struct boot_media *config) +{ + shim_reg_in(); + +#ifdef NATIVE_SATA_DOM_SUPPORTED + pr_loc_wrn("This platform supports native SATA DoM - usage of %s is highly discouraged", SHIM_NAME); +#else + pr_loc_inf("This %s is a prototype - if stability is desired use USB boot media instead", SHIM_NAME); +#endif + + int out; + boot_dev_config = config; + + pr_loc_dbg("Registering for new devices notifications"); + out = subscribe_scsi_disk_events(&scsi_disk_nb); + if (unlikely(out != 0)) { + pr_loc_err("Failed to register for SCSI disks notifications - error=%d", out); + boot_dev_config = NULL; + return out; + } + + pr_loc_dbg("Iterating over existing devices"); + out = for_each_scsi_disk(on_existing_scsi_disk_device); + if (unlikely(out != 0 && out != -ENXIO)) { + pr_loc_err("Failed to enumerate current SCSI disks - error=%d", out); + boot_dev_config = NULL; + return out; + } + + shim_reg_ok(); + return 0; +} + +int unregister_fake_sata_boot_shim(void) +{ + shim_ureg_in(); + + unsubscribe_scsi_disk_events(&scsi_disk_nb); + boot_dev_config = NULL; + + shim_ureg_ok(); + return 0; //noop +} \ No newline at end of file diff --git a/shim/boot_dev/fake_sata_boot_shim.h b/shim/boot_dev/fake_sata_boot_shim.h new file mode 100644 index 0000000..9799785 --- /dev/null +++ b/shim/boot_dev/fake_sata_boot_shim.h @@ -0,0 +1,8 @@ +#ifndef REDPILL_FAKE_SATA_BOOT_SHIM_H +#define REDPILL_FAKE_SATA_BOOT_SHIM_H + +struct boot_media; +int register_fake_sata_boot_shim(const struct boot_media *config); +int unregister_fake_sata_boot_shim(void); + +#endif //REDPILL_FAKE_SATA_BOOT_SHIM_H diff --git a/shim/boot_dev/native_sata_boot_shim.c b/shim/boot_dev/native_sata_boot_shim.c new file mode 100644 index 0000000..4510610 --- /dev/null +++ b/shim/boot_dev/native_sata_boot_shim.c @@ -0,0 +1,285 @@ +/** + * Implements shimming SATA device to look like a SATA DOM (Disk-on-Module) device supported by the syno kernel + * If you didn't read the docs for shim/boot_device_shim.c go there and read it first! + * + * HOW THE KERNEL ASSIGNS SYNOBOOT TYPE? + * The determination of what is or isn't the correct synoboot device for SATA is made using vendor and model *names*, as + * standard SCSI/SATA don't have any VID/PID designation like USB or PCI. + * Syno kernel uses different vendor/model names depending on the platform. They are taken from the kernel config option + * pairs CONFIG_SYNO_SATA_DOM_VENDOR/CONFIG_SYNO_SATA_DOM_MODEL and CONFIG_SYNO_SATA_DOM_VENDOR_SECOND_SRC/ + * CONFIG_SYNO_SATA_DOM_MODEL_SECOND_SRC. This gives the following supported matrix at the time of writing: + * - vendor-name="SATADOM" and model-name="TYPE D 3SE" (purley only) + * - vendor-name="SATADOM-" and model-name="TYPE D 3SE" (all except purley) + * - vendor-name="SATADOM" and model-name="3SE" (purley only) + * - vendor-name="SATADOM" and model-name="D150SH" (all other) + * + * HOW THIS SHIM MATCHES DEVICE TO SHIM? + * The decision is made based on "struct boot_media" (derived from boot config) passed to the register method. The + * only criterion used is the physical size of the disk. The *first* device which is smaller or equal to + * boot_media->dom_size_mib will be shimmed. If a consecutive device matching this rule appears a warning will be + * triggered. + * This sounds quite unusual. We considered multiple options before going that route: + * - Unlike USB we cannot easily match SATA devices using any stable identifier so any VID/PID was out of the window + * - S/N sounds like a good candidate unless you realize hypervisors use the same one for all disks + * - Vendor/Model names cannot be edited by the user and hypervisors ust the same one for all disks + * - Host/Port location can change (and good luck updating it in the boot params every time) + * - The only stable factor seems to be size + * + * HOW IT WORKS FOR HOT PLUGGED DEVICES? + * While the USB boot shim depends on a race condition (albeit a pretty stable one) there's no way to use the same + * method for SATA, despite both of them using SCSI under the hood. This is because true SCSI/SATA devices are directly + * supported by the drivers/scsi/sd.c which generates no events before the type is determined. Because of this we + * decided to exploit the dynamic nature of Linux drivers subsystem. All drivers register their buses with the kernel + * and are automatically informed by the kernel if something appears on these buses (either during boot or via hot plug) + * + * This module simply asks the kernel drivers subsystem for the driver registered for "sd" (SCSI) devices. Then it + * replaces its trigger function pointer. Normally it points to drivers/scsi/sd.c:sd_probe() which "probes" and configs + * the device. Our sd_probe_shim() first reads the capacity and if criteria are met (see section above) it replaces + * the vendor & model names and passes the control to the real sd_probe(). If nothing matches it transparently calls + * the real sd_probe(). + * + * If you're debugging you can test it without restarting the whole SD by removing and re-adding device. For example for + * "sd 6:0:0:0: [sdg] 630784 512-byte logical blocks: (322 MB/308 MiB)" you should do: + * echo 1 > /sys/block/sdg/device/delete # change SDG to the correct device + * echo "0 0 0" > /sys/class/scsi_host/host6/scan # host 6 is the same as "sd 6:..." notation in dmesg + * Warning: rescans and delete hard-yanks the device from controller so DO NOT do this on a disk with important data! + * + * HOW IT WORKS FOR EXISTING DEVICES? + * Unfortunately, our sd_probe() replacement is still a bit of a race condition. However, this time we're racing with + * SCSI driver loading which usually isn't a module. Because of this we need to expect some (probably all) devices to be + * already probed. We need to do essentially what's described above (with /sys) but from kernel space. + * To avoid any crashes and possible data loss we are never touching disks which aren't SATA and matching the size + * match criterion. In other words this shim will NOT yank a data drive from the system. + * + * WHAT IF THIS CODE LOADS BEFORE THE DRIVER?! + * Despite the SCSI driver being one of the first things loaded by the kernel and something which almost everywhere is + * baked into to kernel there's a way to load our module earlier (via ioscheduler). In such case we cannot even use + * driver_find("sd", ...) as it will return NULL (since there's no driver for "sd" *yet*). In such case we can hook + * "scsi_register_driver()" which is an exported symbol (==it will last) and keep it hooked until we find the + * registration of "sd" driver in particular (as SCSI also handles CDROMs, USBs, iSCSI and others) + * + * THE FINAL PICTURE + * Ok, it is pretty complex indeed. Here's the decision tree this submodule goes through: + * register_native_sata_boot_shim() + * => driver_find("sd", ...) + * ===FOUND=== + * + shim sd_probe() to sd_probe_shim() + * + * + probe_existing_devices() + * + * ===NOT FOUND=== + * + override scsi_register_driver() [using start_scsi_register_driver_watcher()] + * + * ===scsi_register_driver() called & drv->name is "sd"=== [see scsi_register_driver_shim()] + * + modify drv->probe to &sd_probe_shim + * + stop_scsi_register_driver_watcher() + * + * + call [now original] scsi_register_driver() + * + * + * KNOWN LIMITATIONS + * If you hot-unplug that SATA drive which is used for synoboot it will NOT be shimmed the next time you plug it without + * rebooting. This is because we were lazy and didn't implement the removal shimming (as this behavior isn't defined + * anyway with synoboot devices as they're not user-removable). + * + * This shim is only supported on kernels compiled with CONFIG_SYNO_BOOT_SATA_DOM enabled. Kernels built without that + * option will never check for the vendor/model names and will never be considered SYNOBOOT. + * + * SOURCES + * - Synology's kernel GPL source -> drivers/scsi/sd.c, search for "gSynoBootSATADOM" + * - https://www.seagate.com/files/staticfiles/support/docs/manual/Interface%20manuals/100293068j.pdf + */ +#include "native_sata_boot_shim.h" +#include "../../common.h" +#include "../../config/runtime_config.h" //consts, NATIVE_SATA_DOM_SUPPORTED + +#ifdef NATIVE_SATA_DOM_SUPPORTED +#include "boot_shim_base.h" //set_shimmed_boot_dev(), get_shimmed_boot_dev(), scsi_is_boot_dev_target() +#include "../shim_base.h" //shim_reg_*(), scsi_ureg_*() +#include "../../internal/call_protected.h" //scsi_scan_host_selected() +#include "../../internal/scsi/scsi_toolbox.h" //scsi_force_replug(), for_each_scsi_disk() +#include "../../internal/scsi/scsi_notifier.h" //watching for new devices to shim them as they appear +#include //struct scsi_device + +#define SHIM_NAME "native SATA DOM boot device" + +static const struct boot_media *boot_dev_config = NULL; //passed to scsi_is_shim_target() + +//Structure for watching for new devices (via SCSI notifier / scsi_notifier.c event system) +static int on_new_scsi_disk(struct notifier_block *self, unsigned long state, void *data); +static struct notifier_block scsi_disk_nb = { + .notifier_call = on_new_scsi_disk, + .priority = INT_MAX //We want to be LAST, after all other possible fixes has been already applied +}; + +/********************************************* Actual shimming routines ***********************************************/ +/** + * Attempts to shim the device passed + * + * @return 0 if device was successfully shimmed, -E on error + */ +static int shim_device(struct scsi_device *sdp) +{ + pr_loc_dbg("Trying to shim SCSI device vendor=\"%s\" model=\"%s\"", sdp->vendor, sdp->model); + + if (get_shimmed_boot_dev()) { + pr_loc_wrn("The device should be shimmed but another device has been already shimmed as boot dev." + "Device has been ignored."); + return -EEXIST; + } + + pr_loc_dbg("Shimming device to vendor=\"%s\" model=\"%s\"", CONFIG_SYNO_SATA_DOM_VENDOR, + CONFIG_SYNO_SATA_DOM_MODEL); + strcpy((char *)sdp->vendor, CONFIG_SYNO_SATA_DOM_VENDOR); + strcpy((char *)sdp->model, CONFIG_SYNO_SATA_DOM_MODEL); + set_shimmed_boot_dev(sdp); + + return 0; +} + +/** + * Handles registration of newly plugged SCSI/SATA devices. It's called by the SCSI notifier automatically. + * + * @return NOTIFY_* + */ +static __used int on_new_scsi_disk(struct notifier_block *self, unsigned long state, void *data) +{ + if (state != SCSI_EVT_DEV_PROBING) + return NOTIFY_DONE; + + struct scsi_device *sdp = data; + + pr_loc_dbg("Found new SCSI disk vendor=\"%s\" model=\"%s\": checking boot shim viability", sdp->vendor, sdp->model); + if (!scsi_is_boot_dev_target(boot_dev_config, sdp)) + return NOTIFY_OK; + + int err = shim_device(data); + if (unlikely(err != 0)) { + //If we let the device register it may be misinterpreted as a normal disk and possibly formatted + pr_loc_err("Shimming process failed with error=%d - " + "preventing the device from appearing in the OS to avoid possible damage", err); + return NOTIFY_BAD; + } + + return NOTIFY_OK; +} + +/** + * Processes existing device and if it's a SATA drive which matches shim criteria it will be unplugged & replugged to be + * shimmed + * + * @param sdp This "struct device" should already be guaranteed to be an scsi_device with type=TYPE_DISK (i.e. returning + * "true" from is_scsi_disk()) + * + * @return 0 means "continue calling me" while any other value means "I found what I was looking for, stop calling me". + * This convention is based on how bus_for_each_dev() works + */ +static int on_existing_scsi_disk(struct scsi_device *sdp) +{ + pr_loc_dbg("Found existing SCSI disk vendor=\"%s\" model=\"%s\": checking boot shim viability", sdp->vendor, + sdp->model); + + if (!scsi_is_boot_dev_target(boot_dev_config, sdp)) + return 0; + + //So, now we know it's a shimmable target but we cannot just call shim_device() as this will change vendor+model on + // already connected device, which will change these information but will not trigger syno type change. When we + // disconnect & reconnect the device it will reappear and go through the on_new_scsi_disk() route. + pr_loc_inf("SCSI disk vendor=\"%s\" model=\"%s\" is already connected but it's a boot dev. " + "It will be forcefully reconnected to shim it as boot dev.", sdp->vendor, sdp->model); + + int out = scsi_force_replug(sdp); + if (out < 0) + pr_loc_err("Failed to replug the SCSI device (error=%d) - it may not shim as expected", out); + else + pr_loc_dbg("SCSI device replug triggered successfully"); + + return 1; +} + +/****************************************** Standard public API of the shim *******************************************/ +static bool shim_registered = false; +int register_native_sata_boot_shim(const struct boot_media *config) +{ + shim_reg_in(); + + //Regardless of the method we must set the expected size (in config) as shim may be called any moment from now on + boot_dev_config = config; + int out = 0; + + if (unlikely(boot_dev_config->type != BOOT_MEDIA_SATA_DOM)) { + pr_loc_bug("%s doesn't support device type %d", __FUNCTION__, boot_dev_config->type); + out = -EINVAL; + goto fail; + } + + if (unlikely(shim_registered)) { + pr_loc_bug("Native SATA boot shim is already registered"); + out = -EEXIST; + goto fail; + } + + /* We always set-up watching for new devices, as the SCSI notifier is smart enough to accept new subscribers + * regardless of the driver state, but if the driver is already loaded we also need to take care of existing devs. + * Additionally, subscribing for notifications will, in the future, give us info if a device went away. + */ + out = subscribe_scsi_disk_events(&scsi_disk_nb); + if (unlikely(out != 0)) { + pr_loc_err("Failed to register for SCSI disks notifications - error=%d", out); + goto fail; + } + + //This will already check if driver is loaded and only iterate if it is + out = for_each_scsi_disk(on_existing_scsi_disk); + //0 means "call me again" or "success", 1 means "found what I wanted, stop iterating", -ENXIO is "driver not ready" + if (unlikely(out < 0 && out != -ENXIO)) { + pr_loc_dbg("SCSI driver is already loaded but iteration over existing devices failed - error=%d", out); + goto fail_unwatch; + } + + shim_registered = true; + shim_reg_ok(); + return 0; + + fail_unwatch: + unsubscribe_scsi_disk_events(&scsi_disk_nb); //we keep the original code, so this function return code is ignored + fail: + boot_dev_config = NULL; + return out; +} + +int unregister_native_sata_boot_shim(void) +{ + shim_ureg_in(); + + if (unlikely(!shim_registered)) { + pr_loc_bug("Native SATA boot shim is not registered"); + return -ENOENT; + } + + int out = unsubscribe_scsi_disk_events(&scsi_disk_nb); + if (out != 0) + pr_loc_err("Failed to unsubscribe from SCSI events"); + + + //@todo we are consciously NOT doing reset_shimmed_boot_dev(). It may be registered and we're not doing anything to + // unregister it + + shim_registered = false; + shim_ureg_ok(); + return 0; +} +#else //ifdef NATIVE_SATA_DOM_SUPPORTED +int register_native_sata_boot_shim(const struct boot_media *boot_dev_config) +{ + pr_loc_err("Native SATA boot shim cannot be registered in a kernel built without SATA DoM support"); + return -ENODEV; +} + +int unregister_native_sata_boot_shim(void) +{ + pr_loc_err("Native SATA boot shim cannot be unregistered in a kernel built without SATA DoM support"); + return -ENODEV; +} +#endif //ifdef else NATIVE_SATA_DOM_SUPPORTED \ No newline at end of file diff --git a/shim/boot_dev/native_sata_boot_shim.h b/shim/boot_dev/native_sata_boot_shim.h new file mode 100644 index 0000000..3853b7e --- /dev/null +++ b/shim/boot_dev/native_sata_boot_shim.h @@ -0,0 +1,8 @@ +#ifndef REDPILL_NATIVE_SATA_BOOT_SHIM_H +#define REDPILL_NATIVE_SATA_BOOT_SHIM_H + +struct boot_media; +int register_native_sata_boot_shim(const struct boot_media *config); +int unregister_native_sata_boot_shim(void); + +#endif //REDPILL_NATIVE_SATA_BOOT_SHIM_H diff --git a/shim/boot_dev/usb_boot_shim.c b/shim/boot_dev/usb_boot_shim.c new file mode 100644 index 0000000..2dd3b10 --- /dev/null +++ b/shim/boot_dev/usb_boot_shim.c @@ -0,0 +1,271 @@ +/** + * Implements shimming USB device to look like an embedded USB device supported by the syno kernel + * If you didn't read the docs for shim/boot_device_shim.c go there and read it first! + * + * HOW THE KERNEL ASSIGNS SYNOBOOT TYPE? + * The determination of what is or isn't the correct synoboot device for USBs is made using VID & PID of the device. + * During normal operation both of them need to equal 0xf400 to be considered a boot device. In a special "mfg" mode the + * installation is forced with 0xf401 ids instead. + * + * HOW THIS SHIM MATCHES DEVICE TO SHIM? + * The decision is made based on "struct boot_media" (derived from boot config) passed to the register method: + * - if vid/pid combo is set (i.e. not VID_PID_EMPTY) it must match the newly detected device + * - if vid/pid is not set (i.e. VID_PID_EMPTY) the first device is used (NOT recommended unless you don't use USB) + * - if a second device matching any of the criteria above appears a warning is emitted and device is ignored + * + * HOW IT WORKS? + * In order to dynamically change VID & PID of a USB device we need to modify device descriptor just after the device is + * detected by the USB subsystem. However it has to be done before the device is picked up by the SCSI subsystem, which + * is responsible for creating /dev/xxx entries. + * Since the assumption is that the USB stick is present from boot the sequence of events needs to look like that: + * 0. Kernel starts + * 1. This LKM is loaded + * 2. USB subsystem loads + * 3. Drive is detected + * 4. This LKM changes VID+PID + * 5. SCSI subsystem detects the device and creates a /dev/... node for it + * + * This poses several problems. First this module must load before USB subsystem. Then to get the device quicker than + * SCSI subsystem can a notification receiver is set up. However we need to wait to do this after the usbcore actually + * loads. To make sure it's the case a kernel module watcher is used. That's why symbols from usbcore are loaded + * dynamically, as at the moment of this LKM insertion they aren't available. In case usbcore is loaded before this LKM + * VID+PID change may not be effective (this scenario is supported pretty much for debugging only). + * This sequence is rather time sensitive. It shouldn't fail on any modern multicore system. + * + * References + * - Synology's kernel GPL source -> drivers/scsi/sd.c, search for "IS_SYNO_USBBOOT_ID_" + * - https://0xax.gitbooks.io/linux-insides/content/Concepts/linux-cpu-4.html + * - https://lwn.net/Articles/160501/ + */ +#include "usb_boot_shim.h" +#include "boot_shim_base.h" //set_shimmed_boot_dev(), get_shimmed_boot_dev(), usb_shim_as_boot_dev() +#include "../shim_base.h" //shim_* +#include "../../common.h" +#include "../../config/runtime_config.h" //struct boot_device & consts +#include "../../internal/helper/symbol_helper.h" //kernel_has_symbol() +#include "../../internal/call_protected.h" //dynamically calling usb_* functions +#include +#include +#include //struct module + +#define SHIM_NAME "USB boot device" + +static bool module_notify_registered = false; +static bool device_notify_registered = false; +static const struct boot_media *boot_media = NULL; //passed to usb_shim_as_boot_dev() + +/** + * Responds to USB devices being added/removed + */ +static int device_notifier_handler(struct notifier_block *b, unsigned long event, void *data) +{ + struct usb_device *device = (struct usb_device*)data; + struct usb_device *prev_device = get_shimmed_boot_dev(); + + if (event == USB_DEVICE_ADD) { + //TODO: Can we even check if it matched mass storage here... (bInterfaceClass == USB_CLASS_MASS_STORAGE) + if (boot_media->vid == VID_PID_EMPTY || boot_media->pid == VID_PID_EMPTY) { + pr_loc_wrn("Your boot device VID and/or PID is not set - " + "using device found (prev_shimmed=%d)", + device->descriptor.idVendor, device->descriptor.idProduct, prev_device ? 1:0); + } else if (device->descriptor.idVendor != boot_media->vid || device->descriptor.idProduct != boot_media->pid) { + pr_loc_dbg("Found new device - " + "didn't match expected (prev_shimmed=%d)", + device->descriptor.idVendor, device->descriptor.idProduct, boot_media->vid, boot_media->pid, + prev_device ? 1:0); + + return NOTIFY_OK; + } + + //This will happen especially when VID+PID weren't set and two USB devices were detected + if (prev_device) { + pr_loc_wrn("Boot device was already shimmed but a new matching device appeared again - " + "this may produce unpredictable outcomes! Ignoring - check your hardware"); + return NOTIFY_OK; + } + + usb_shim_as_boot_dev(boot_media, device); + set_shimmed_boot_dev(device); + + pr_loc_inf("Device shimmed to ", boot_media->vid, boot_media->pid, + device->descriptor.idVendor, device->descriptor.idProduct); + + return NOTIFY_OK; + } + + + if (prev_device && event == USB_DEVICE_REMOVE && device == prev_device) { + pr_loc_wrn("Previously shimmed boot device gone away"); + reset_shimmed_boot_dev(); + return NOTIFY_OK; + } + + return NOTIFY_OK; +} + +static struct notifier_block device_notifier_block = { + .notifier_call = device_notifier_handler, + .priority = INT_MIN, //We need to be first +}; +/** + * Watches for USB events + */ +static void register_device_notifier(void) +{ + //This should never happen but there's never enough error checking. + //Even if the module was already loaded register_device_notifier() should not be called twice before module is + // unloaded and reloaded + if (unlikely(device_notify_registered)) { + pr_loc_bug("Device notify re-registration via %s w/o module unload (?!)", __FUNCTION__); + return; + } + + //This has to use dynamic calling to avoid being dependent on usbcore (since we need to load before usbcore) + _usb_register_notify(&device_notifier_block); //has no return value + + device_notify_registered = true; + pr_loc_dbg("Registered USB device notifier"); +} + +static int unregister_device_notifier(void) +{ + if (unlikely(!device_notify_registered)) { + pr_loc_bug("%s called while notifier not registered", __FUNCTION__); + return -ENOENT; + } + + //This has to use dynamic calling to avoid being dependent on usbcore (since we need to load before usbcore) + _usb_unregister_notify(&device_notifier_block); //has no return value + device_notify_registered = false; + pr_loc_dbg("Unregistered USB device notifier"); + + return 0; +} + +/** + * Responds to "usbcore" [and others] load + */ +static int ubscore_notifier_handler(struct notifier_block * self, unsigned long state, void * data) +{ + struct module *mod = data; + if (strcmp(mod->name, "usbcore") != 0) + return NOTIFY_OK; + + if (state == MODULE_STATE_GOING) { + //TODO: call unregister with some force flag? + device_notify_registered = false; + reset_shimmed_boot_dev(); + pr_loc_wrn("usbcore module unloaded - this should not happen normally"); + return NOTIFY_OK; + } + + //This may need to be changed to MODULE_STATE_LIVE if MODULE_STATE_COMING is too early for device notification + if (state != MODULE_STATE_LIVE) + return NOTIFY_OK; + + pr_loc_dbg("usbcore registered, adding device watcher"); + register_device_notifier(); + + return NOTIFY_OK; +} + +static struct notifier_block usbcore_notifier_block = { + .notifier_call = ubscore_notifier_handler +}; +/** + * Watches for "usbcore" module load + */ +static int register_usbcore_notifier(void) +{ + int error = 0; + + if (unlikely(module_notify_registered)) { + pr_loc_bug("%s called while notifier already registered", __FUNCTION__); + return 0; //technically it's not an error + } + + error = register_module_notifier(&usbcore_notifier_block); + if(unlikely(error != 0)) { + pr_loc_err("Failed to register module notifier"); //Currently it's impossible to happen... currently + return error; + } + + module_notify_registered = true; + pr_loc_dbg("Registered usbcore module notifier"); + + //check if usbcore is MAYBE already loaded and give a warning + call register_device_notifier() manually + // this state is FINE for debugging but IS NOT FINE for production use + //We're using kernel_has_symbol() to not acquire module mutex needed for module checks + if (kernel_has_symbol("usb_register_notify")) { + pr_loc_wrn("usbcore module is already loaded (did you load this module too late?) " + "-> registering device notifier right away"); + register_device_notifier(); + } + + return error; +} + +static int unregister_usbcore_notifier(void) +{ + if (unlikely(!module_notify_registered)) { //unregister should be called first if so + pr_loc_bug("%s called while notifier not registered", __FUNCTION__); + return -ENOENT; + } + + int out = unregister_module_notifier(&usbcore_notifier_block); + if(unlikely(out != 0)) { + pr_loc_err("Failed to unregister module notifier"); //Currently it's impossible to happen... currently + return out; + } + + module_notify_registered = false; + pr_loc_dbg("Unregistered usbcore module notifier"); + + return 0; +} + +int register_usb_boot_shim(const struct boot_media *boot_dev_config) +{ + shim_reg_in(); + + if (unlikely(boot_dev_config->type != BOOT_MEDIA_USB)) { + pr_loc_bug("%s doesn't support device type %d", __FUNCTION__, boot_dev_config->type); + return -EINVAL; + } + + if (unlikely(boot_media)) { + pr_loc_bug("USB boot shim is already registered"); + return -EEXIST; + } + + boot_media = boot_dev_config; + + int out = register_usbcore_notifier(); //it will register device notifier when module loads + if (out != 0) + return out; + + shim_reg_ok(); + return out; +} + +int unregister_usb_boot_shim(void) +{ + shim_ureg_in(); + + if (unlikely(!boot_media)) { + pr_loc_bug("USB boot shim is not registered"); + return -ENOENT; + } + + int out = 0; + if ( + (out = unregister_usbcore_notifier()) != 0 + || (out = unregister_device_notifier()) != 0 + ) + return out; + + boot_media = NULL; + + shim_ureg_ok(); + return out; +} diff --git a/shim/boot_dev/usb_boot_shim.h b/shim/boot_dev/usb_boot_shim.h new file mode 100644 index 0000000..1220dd0 --- /dev/null +++ b/shim/boot_dev/usb_boot_shim.h @@ -0,0 +1,8 @@ +#ifndef REDPILL_USB_BOOT_SHIM_H +#define REDPILL_USB_BOOT_SHIM_H + +struct boot_media; +int register_usb_boot_shim(const struct boot_media *boot_dev_config); +int unregister_usb_boot_shim(void); + +#endif //REDPILL_USB_BOOT_SHIM_H diff --git a/shim/boot_device_shim.c b/shim/boot_device_shim.c new file mode 100644 index 0000000..1c6bcb6 --- /dev/null +++ b/shim/boot_device_shim.c @@ -0,0 +1,109 @@ +/** + * Boot device shim ensures that DSM assigns a proper /dev/ device to our USB stick or SATA DOM + * + * WHY IS THIS NEEDED? + * In short the DSM has multiple types of SCSI devices (boot device, USB drive, eSATA drive etc). The boot device is + * always mounted to /dev/synoboot (with respective partitions at /dev/synobootN). The determination what to place there + * is made based on different factors depending on the type of device used to boot (see drivers/scsi/sd.c): + * 1) Standard USB stick + * - it has to be connected via a real USB port (i.e. not a fake-usb-over-sata like ESXi tries to do) + * - it must have VID/PID combo of 0xf400/0xf400 + * - allows for normal boot when OS is installed, or triggers OS install/repair screen + * 2) Force-install USB stick + * - it has to be connected via a real USB port + * - it must have VID/PID combo of 0xf401/0xf401 + * - always triggers OS install/repair screen + * 3) SATA DOM (Disk-on-Module) + * - kernel is compiled with SATA DOM support (NATIVE_SATA_DOM_SUPPORTED => CONFIG_SYNO_BOOT_SATA_DOM) + * - is a real SATA (i.e. not SCSI/iSCSI/VirtIO) device + * - has platform dependent vendor/model strings of CONFIG_SYNO_SATA_DOM_VENDOR/CONFIG_SYNO_SATA_DOM_MODEL + * - has platform dependent vendor/model strings of CONFIG_SYNO_SATA_DOM_VENDOR_SECOND_SRC/CONFIG_SYNO_SATA_DOM_MODEL_SECOND_SRC + * - SATA DOM *cannot* be used to force-reinstall (as there isn't an equivalent of USB's VID/PID of 0xf401/0xf401) + * - restrictions of native SATA-DOM are lifted by sata_port_shim.c and fake_sata_boot_shim.c + * + * There are other special ones (e.g. iSCSI) which aren't supported here. These only apply to small subset of platforms. + * + * HOW IT WORKS? + * Depending on the runtime configuration this shim will either engage USB-based shim or SATA-based one. See respective + * implementations in shim/boot_dev/. + * + * References: + * - See drivers/scsi/sd.c in Linux sources (especially sd_probe() method) + */ +#define SHIM_NAME "boot device router" + +#include "boot_device_shim.h" +#include "shim_base.h" +#include "../common.h" +#include "../config/runtime_config.h" +#include "boot_dev/usb_boot_shim.h" +#include "boot_dev/fake_sata_boot_shim.h" +#include "boot_dev/native_sata_boot_shim.h" + +#define BOOT_MEDIA_SHIM_NULL (-1) + +static int registered_type = BOOT_MEDIA_SHIM_NULL; +int register_boot_shim(const struct boot_media *boot_dev_config) +{ + shim_reg_in(); + + if (unlikely(registered_type != BOOT_MEDIA_SHIM_NULL)) { + pr_loc_bug("Boot shim is already registered with type=%d", registered_type); + return -EEXIST; + } + + int out; + switch (boot_dev_config->type) { + case BOOT_MEDIA_USB: + out = register_usb_boot_shim(boot_dev_config); + break; + case BOOT_MEDIA_SATA_DOM: + out = register_native_sata_boot_shim(boot_dev_config); + break; + case BOOT_MEDIA_SATA_DISK: + out = register_fake_sata_boot_shim(boot_dev_config); + break; + default: + pr_loc_bug("Failed to %s - unknown type=%d", __FUNCTION__, boot_dev_config->type); + return -EINVAL; + } + + if (out != 0) + return out; //individual shims should print what went wrong + + registered_type = boot_dev_config->type; + + shim_reg_ok(); + return 0; +} + +int unregister_boot_shim(void) +{ + shim_ureg_in(); + + int out; + switch (registered_type) { + case BOOT_MEDIA_USB: + out = unregister_usb_boot_shim(); + break; + case BOOT_MEDIA_SATA_DOM: + out = unregister_native_sata_boot_shim(); + break; + case BOOT_MEDIA_SATA_DISK: + out = unregister_fake_sata_boot_shim(); + case BOOT_MEDIA_SHIM_NULL: + pr_loc_bug("Boot shim is no registered"); + return -ENOENT; + default: //that cannot happen unless register_boot_shim() is broken + pr_loc_bug("Failed to %s - unknown type=%d", __FUNCTION__, registered_type); + return -EINVAL; + } + + if (out != 0) + return out; //individual shims should print what went wrong + + registered_type = BOOT_MEDIA_SHIM_NULL; + + shim_ureg_ok(); + return 0; +} \ No newline at end of file diff --git a/shim/boot_device_shim.h b/shim/boot_device_shim.h new file mode 100644 index 0000000..71df881 --- /dev/null +++ b/shim/boot_device_shim.h @@ -0,0 +1,8 @@ +#ifndef REDPILLLKM_BOOT_DEVICE_SHIM_H +#define REDPILLLKM_BOOT_DEVICE_SHIM_H + +struct boot_media; +int register_boot_shim(const struct boot_media *boot_dev_config); +int unregister_boot_shim(void); + +#endif //REDPILLLKM_BOOT_DEVICE_SHIM_H diff --git a/shim/disable_exectutables.c b/shim/disable_exectutables.c new file mode 100644 index 0000000..2df725c --- /dev/null +++ b/shim/disable_exectutables.c @@ -0,0 +1,37 @@ +#define SHIM_NAME "common executables disabler" + +#include "disable_exectutables.h" +#include "shim_base.h" +#include "../common.h" +#include "../internal/intercept_execve.h" + +#define PSTORE_PATH "/usr/syno/bin/syno_pstore_collect" +#define BOOTLOADER_UPDATE1_PATH "uboot_do_upd.sh" +#define BOOTLOADER_UPDATE2_PATH "./uboot_do_upd.sh" +#define SAS_FW_UPDATE_PATH "/tmpData/upd@te/sas_fw_upgrade_tool" + +int register_disable_executables_shim(void) +{ + shim_reg_in(); + + int out; + if ( + (out = add_blocked_execve_filename(BOOTLOADER_UPDATE1_PATH)) != 0 + || (out = add_blocked_execve_filename(BOOTLOADER_UPDATE2_PATH)) != 0 + || (out = add_blocked_execve_filename(PSTORE_PATH)) != 0 + || (out = add_blocked_execve_filename(SAS_FW_UPDATE_PATH)) != 0 + ) { + pr_loc_bug("Failed to disable some executables"); + return out; + } + + shim_reg_ok(); + return 0; +} + +int unregister_disable_executables_shim(void) +{ + //noop - execve entries will be cleared in one sweep during unregister of interceptor (it's much faster this way) + //this function is kept for consistency + return 0; +} \ No newline at end of file diff --git a/shim/disable_exectutables.h b/shim/disable_exectutables.h new file mode 100644 index 0000000..7ef62e6 --- /dev/null +++ b/shim/disable_exectutables.h @@ -0,0 +1,7 @@ +#ifndef REDPILL_DISABLE_EXECTUTABLES_H +#define REDPILL_DISABLE_EXECTUTABLES_H + +int register_disable_executables_shim(void); +int unregister_disable_executables_shim(void); + +#endif //REDPILL_DISABLE_EXECTUTABLES_H diff --git a/shim/pci_shim.c b/shim/pci_shim.c new file mode 100644 index 0000000..346cfb6 --- /dev/null +++ b/shim/pci_shim.c @@ -0,0 +1,243 @@ +#define SHIM_NAME "PCI devices" + +#include "pci_shim.h" +#include "shim_base.h" +#include "../common.h" +#include "../config/vpci_types.h" //MAX_VPCI_DEVS, pci_shim_device_type +#include "../config/platform_types.h" //hw_config +#include "../internal/virtual_pci.h" +#include + +unsigned int free_dev_idx = 0; +static void *devices[MAX_VPCI_DEVS] = { NULL }; + +static struct pci_dev_descriptor *allocate_vpci_dev_dsc(void) { + if (free_dev_idx >= MAX_VPCI_DEVS) { + /*index has to be at max MAX_VPCI_DEVS-1*/ + pr_loc_bug("No more device indexes are available (max devs: %d)", MAX_VPCI_DEVS); + return ERR_PTR(-ENOMEM); + } + + struct pci_dev_descriptor *dev_dsc; + kmalloc_or_exit_ptr(dev_dsc, sizeof(struct pci_dev_descriptor)); + memcpy(dev_dsc, &pci_dev_conf_default_normal_dev, sizeof(struct pci_dev_descriptor)); + devices[free_dev_idx++] = dev_dsc; + + return dev_dsc; +} +#define allocate_vpci_dev_dsc_var() \ + struct pci_dev_descriptor *dev_dsc = allocate_vpci_dev_dsc(); \ + if (IS_ERR(dev_dsc)) return PTR_ERR(dev_dsc); + +static int +add_vdev(struct pci_dev_descriptor *dev_dsc, unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, + bool is_mf) +{ + const struct virtual_device *vpci_vdev; + + if (is_mf) { + vpci_vdev = vpci_add_multifunction_device(bus_no, dev_no, fn_no, dev_dsc); + } else if(unlikely(fn_no != 0x00)) { + //Making such config will either cause the device to not show up at all or only fn_no=0 one will show u + pr_loc_bug("%s called with non-MF device but non-zero fn_no", __FUNCTION__); + return -EINVAL; + } else { + vpci_vdev = vpci_add_single_device(bus_no, dev_no, dev_dsc); + } + + return IS_ERR(vpci_vdev) ? PTR_ERR(vpci_vdev) : 0; +} + +/** + * Adds a fake Marvell controller + * + * These errors in kernlog are normal (as we don't emulate the behavior of the controller as it's not needed): + * pci 0001:0a:00.0: Can't map mv9235 registers + * ahci: probe of 0001:0a:00.0 failed with error -22 + * + * @return 0 on success or -E + */ +static inline int +vdev_add_generic_marvell_ahci(u16 dev, unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + allocate_vpci_dev_dsc_var(); + dev_dsc->vid = PCI_VENDOR_ID_MARVELL_EXT; + dev_dsc->dev = dev; + dev_dsc->rev_id = 0x11; //All Marvells so far use revision 11 + dev_dsc->class = U24_CLASS_TO_U8_CLASS(PCI_CLASS_STORAGE_SATA_AHCI); + dev_dsc->subclass = U24_CLASS_TO_U8_SUBCLASS(PCI_CLASS_STORAGE_SATA_AHCI); + dev_dsc->prog_if = U24_CLASS_TO_U8_PROGIF(PCI_CLASS_STORAGE_SATA_AHCI); + return add_vdev(dev_dsc, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_MARVELL_88SE9235(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + return vdev_add_generic_marvell_ahci(0x9235, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_MARVELL_88SE9215(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + return vdev_add_generic_marvell_ahci(0x9215, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_INTEL_I211(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + allocate_vpci_dev_dsc_var(); + dev_dsc->vid = PCI_VENDOR_ID_INTEL; + dev_dsc->dev = 0x1539; + dev_dsc->rev_id = 0x03; //Not confirmed + dev_dsc->class = U16_CLASS_TO_U8_CLASS(PCI_CLASS_NETWORK_ETHERNET); + dev_dsc->subclass = U16_CLASS_TO_U8_SUBCLASS(PCI_CLASS_NETWORK_ETHERNET); + return add_vdev(dev_dsc, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_INTEL_X552(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + allocate_vpci_dev_dsc_var(); + dev_dsc->vid = PCI_VENDOR_ID_INTEL; + dev_dsc->dev = 0x15ad; + dev_dsc->rev_id = 0x03; //Not confirmed + dev_dsc->class = U16_CLASS_TO_U8_CLASS(PCI_CLASS_NETWORK_ETHERNET); + dev_dsc->subclass = U16_CLASS_TO_U8_SUBCLASS(PCI_CLASS_NETWORK_ETHERNET); + return add_vdev(dev_dsc, bus_no, dev_no, fn_no, is_mf); +} + + +static int vdev_add_INTEL_CPU_AHCI_CTRL(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + allocate_vpci_dev_dsc_var(); + dev_dsc->vid = PCI_VENDOR_ID_INTEL; + dev_dsc->dev = 0x5ae3; + dev_dsc->class = U24_CLASS_TO_U8_CLASS(PCI_CLASS_STORAGE_SATA_AHCI); + dev_dsc->subclass = U24_CLASS_TO_U8_SUBCLASS(PCI_CLASS_STORAGE_SATA_AHCI); + dev_dsc->prog_if = U24_CLASS_TO_U8_PROGIF(PCI_CLASS_STORAGE_SATA_AHCI); + return add_vdev(dev_dsc, bus_no, dev_no, fn_no, is_mf); +} + +//This technically should be a bridge but we don't have the info to recreate full tree +static inline int +vdev_add_generic_intel_pcie(u16 dev, unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) { + allocate_vpci_dev_dsc_var(); + dev_dsc->vid = PCI_VENDOR_ID_INTEL; + dev_dsc->dev = dev; + dev_dsc->class = U16_CLASS_TO_U8_CLASS(PCI_CLASS_BRIDGE_PCI); + dev_dsc->subclass = U16_CLASS_TO_U8_SUBCLASS(PCI_CLASS_BRIDGE_PCI); + return add_vdev(dev_dsc, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_INTEL_CPU_PCIE_PA(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + return vdev_add_generic_intel_pcie(0x5ad8, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_INTEL_CPU_PCIE_PB(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + return vdev_add_generic_intel_pcie(0x5ad6, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_INTEL_CPU_USB_XHCI(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + allocate_vpci_dev_dsc_var(); + dev_dsc->vid = PCI_VENDOR_ID_INTEL; + dev_dsc->dev = 0x5aa8; + dev_dsc->class = U24_CLASS_TO_U8_CLASS(PCI_CLASS_SERIAL_USB_XHCI); + dev_dsc->subclass = U24_CLASS_TO_U8_SUBCLASS(PCI_CLASS_SERIAL_USB_XHCI); + dev_dsc->prog_if = U24_CLASS_TO_U8_PROGIF(PCI_CLASS_SERIAL_USB_XHCI); + return add_vdev(dev_dsc, bus_no, dev_no, fn_no, is_mf); +} + +static inline int +vdev_add_generic_intel_io(u16 dev, unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + allocate_vpci_dev_dsc_var(); + dev_dsc->vid = PCI_VENDOR_ID_INTEL; + dev_dsc->dev = dev; + dev_dsc->class = U16_CLASS_TO_U8_CLASS(PCI_CLASS_SP_OTHER); + dev_dsc->subclass = U16_CLASS_TO_U8_SUBCLASS(PCI_CLASS_SP_OTHER); + return add_vdev(dev_dsc, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_INTEL_CPU_I2C(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + return vdev_add_generic_intel_io(0x5aac, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_INTEL_CPU_HSUART(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + return vdev_add_generic_intel_io(0x5abc, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_INTEL_CPU_SPI(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + return vdev_add_generic_intel_io(0x5ac6, bus_no, dev_no, fn_no, is_mf); +} + +static int vdev_add_INTEL_CPU_SMBUS(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) +{ + allocate_vpci_dev_dsc_var(); + dev_dsc->vid = PCI_VENDOR_ID_INTEL; + dev_dsc->dev = 0x5ad4; + dev_dsc->class = U16_CLASS_TO_U8_CLASS(PCI_CLASS_SERIAL_SMBUS); + dev_dsc->subclass = U16_CLASS_TO_U8_SUBCLASS(PCI_CLASS_SERIAL_SMBUS); + + return add_vdev(dev_dsc, bus_no, dev_no, fn_no, is_mf); +} + +static int (*dev_type_handler_map[])(unsigned char bus_no, unsigned char dev_no, unsigned char fn_no, bool is_mf) = { + [VPD_MARVELL_88SE9235] = vdev_add_MARVELL_88SE9235, + [VPD_MARVELL_88SE9215] = vdev_add_MARVELL_88SE9215, + [VPD_INTEL_I211] = vdev_add_INTEL_I211, + [VPD_INTEL_X552] = vdev_add_INTEL_X552, + [VPD_INTEL_CPU_AHCI_CTRL] = vdev_add_INTEL_CPU_AHCI_CTRL, + [VPD_INTEL_CPU_PCIE_PA] = vdev_add_INTEL_CPU_PCIE_PA, + [VPD_INTEL_CPU_PCIE_PB] = vdev_add_INTEL_CPU_PCIE_PB, + [VPD_INTEL_CPU_USB_XHCI] = vdev_add_INTEL_CPU_USB_XHCI, + [VPD_INTEL_CPU_I2C] = vdev_add_INTEL_CPU_I2C, + [VPD_INTEL_CPU_HSUART] = vdev_add_INTEL_CPU_HSUART, + [VPD_INTEL_CPU_SPI] = vdev_add_INTEL_CPU_SPI, + [VPD_INTEL_CPU_SMBUS] = vdev_add_INTEL_CPU_SMBUS, +}; + +int register_pci_shim(const struct hw_config *hw) +{ + shim_reg_in(); + + pr_loc_dbg("Creating vPCI devices for %s", hw->name); + int out; + for (int i = 0; i < MAX_VPCI_DEVS; i++) { + if (hw->pci_stubs[i].type == __VPD_TERMINATOR__) + break; + + pr_loc_dbg("Calling %ps with B:D:F=%02x:%02x:%02x mf=%d", dev_type_handler_map[hw->pci_stubs[i].type], + hw->pci_stubs[i].bus, hw->pci_stubs[i].dev, hw->pci_stubs[i].fn, + hw->pci_stubs[i].multifunction ? 1 : 0); + + out = dev_type_handler_map[hw->pci_stubs[i].type](hw->pci_stubs[i].bus, hw->pci_stubs[i].dev, + hw->pci_stubs[i].fn, hw->pci_stubs[i].multifunction); + + if (out != 0) { + pr_loc_err("Failed to create vPCI device B:D:F=%02x:%02x:%02x - error=%d", hw->pci_stubs[i].bus, + hw->pci_stubs[i].dev, hw->pci_stubs[i].fn, out); + return out; + } + + pr_loc_dbg("vPCI device %d created successfully", i+1); + } + + shim_reg_ok(); + return 0; +} + +int unregister_pci_shim(void) +{ + shim_ureg_in(); + vpci_remove_all_devices_and_buses(); + + for (int i = 0; i < free_dev_idx; i++) { + pr_loc_dbg("Free PCI dev %d @ %p", i, devices[i]); + kfree(devices[i]); + } + + shim_ureg_ok(); + return -EIO; //vpci_remove_all_devices_and_buses has a bug - this is a canary to not forget +} diff --git a/shim/pci_shim.h b/shim/pci_shim.h new file mode 100644 index 0000000..8aac673 --- /dev/null +++ b/shim/pci_shim.h @@ -0,0 +1,24 @@ +#ifndef REDPILL_PCI_SHIM_H +#define REDPILL_PCI_SHIM_H + +enum pci_shim_device_type { + __VPD_TERMINATOR__, + VPD_MARVELL_88SE9235, //1b4b:9235 + VPD_MARVELL_88SE9215, //1b4b:9215 + VPD_INTEL_I211, //8086:1539 + VPD_INTEL_X552, //8086:15ad + VPD_INTEL_CPU_AHCI_CTRL, //8086:5ae3 + VPD_INTEL_CPU_PCIE_PA, //8086:5ad8 + VPD_INTEL_CPU_PCIE_PB, //8086:5ad6 + VPD_INTEL_CPU_USB_XHCI, //8086:5aa8 + VPD_INTEL_CPU_I2C, //8086:5aac + VPD_INTEL_CPU_HSUART, //8086:5abc + VPD_INTEL_CPU_SPI, //8086:5ac6 + VPD_INTEL_CPU_SMBUS, //8086:5ad4 +}; + +typedef struct hw_config hw_config_; +int register_pci_shim(const struct hw_config *hw); +int unregister_pci_shim(void); + +#endif //REDPILL_PCI_SHIM_H diff --git a/shim/pmu_shim.c b/shim/pmu_shim.c new file mode 100644 index 0000000..3be1e49 --- /dev/null +++ b/shim/pmu_shim.c @@ -0,0 +1,387 @@ +#define SHIM_NAME "PMU emulator" + +#include "pmu_shim.h" +#include "shim_base.h" +#include "../common.h" +#include "../internal/uart/virtual_uart.h" +#include //kfifo_* + +#define PMU_TTYS_LINE 1 //so far this is hardcoded by syno, so we doubt it will ever change +#define WORK_BUFFER_LEN VUART_FIFO_LEN +#define to_hex_buf_len(len) ((len)*3+1) //2 chars for each hex + space + NULL terminator +#define HEX_BUFFER_LEN to_hex_buf_len(VUART_FIFO_LEN) + +//PMU packets are at minimum 2 bytes long (PMU_CMD_HEAD + 1-3 bytes command + optional data). If this is set to a high +// value (e.g. VUART_FIFO_LEN) in practice commands will only be delivered when the client indicates end-of-transmission) +// which may not be bad... +#define PMU_MIN_PACKET 2 +#define PMU_CMD_HEAD 0x2d //every PMU packet is delimited by containing 0x2d (ASCII "-"/dash) as its first character + +typedef struct command_definition command_definition; + +/** + * A single PMU command and its routing + */ +struct command_definition { + void (*fn) (const command_definition *t, const char *data, u8 data_len); + const u8 length; //commands are realistically 1-3 chars only + const char *name; +} __packed; + +/** + * Result for matching of command signature against known list + */ +typedef enum { + PMU_CMD_AMBIGUOUS = -1, + PMU_CMD_NOT_FOUND = 0, + PMU_CMD_FOUND = 1, +} pmu_match_status; + +/** + * Default/noop shim for a PMU command. It simply prints the command received. + */ +static void cmd_shim_noop(const command_definition *t, const char *data, u8 data_len) +{ + pr_loc_dbg("vPMU received %s using %d bytes - NOOP", t->name, data_len); +} + +//@todo when we get the physical PMU emulator we can move this to a separate library so that shim contacts an internal +// routing routine for commands which aren't shimmed here. Then we will add all PMU=>kernel commands as well. Currently +// we only define kernel=>PMU ones as these are the ones we need to listen for. +#define PMU_CMD__MIN_CODE 0x30 +#define PMU_CMD__MAX_CODE 0x75 +#define single_byte_idx(id) ((id)-PMU_CMD__MIN_CODE) +#define get_single_byte_cmd(id) single_byte_cmds[single_byte_idx(id)] //call it ONLY after has_single_byte_cmd!!! +#define has_single_byte_cmd(id) \ + (likely((id) >= PMU_CMD__MIN_CODE) && likely((id) <= PMU_CMD__MAX_CODE) && get_single_byte_cmd(id).length != 0) +#define DEFINE_SINGLE_BYTE_CMD(cnm, fp) [single_byte_idx(PMU_CMD_ ## cnm)] = { .name = #cnm, .length = 1, .fn = fp } + +#define PMU_CMD_OUT_HW_POWER_OFF 0x31 //"1" +#define PMU_CMD_OUT_BUZ_SHORT 0x32 //"2" +#define PMU_CMD_OUT_BUZ_LONG 0x33 //"3" +#define PMU_CMD_OUT_PWR_LED_ON 0x34 //"4" +#define PMU_CMD_OUT_PWR_LED_BLINK 0x35 //"5" +#define PMU_CMD_OUT_PWR_LED_OFF 0x36 //"6" +#define PMU_CMD_OUT_STATUS_LED_OFF 0x37 //"7" +#define PMU_CMD_OUT_STATUS_LED_ON_GREEN 0x38 //"8" +#define PMU_CMD_OUT_STATUS_LED_PULSE_GREEN 0x39 //"9" +#define PMU_CMD_OUT_STATUS_LED_ON_ORANGE 0x3A //":" +#define PMU_CMD_OUT_STATUS_LED_PULSE_ORANGE 0x3B //";" +//0x3C unknown (possibly not used) +#define PMU_CMD_OUT_STATUS_LED_PULSE 0x3d //"=" +//0x3E-3F unknown (possibly not used) +#define PMU_CMD_OUT_USB_LED_ON 0x40 //"@" +#define PMU_CMD_OUT_USB_LED_PULSE 0x41 //"A" +#define PMU_CMD_OUT_USB_LED_OFF 0x42 //"B" +#define PMU_CMD_OUT_HW_RESET 0x43 //"C" +//0x43-4A unknown +#define PMU_CMD_OUT_10G_LED_ON 0x4a //"J" +#define PMU_CMD_OUT_10G_LED_OFF 0x4b //"K" +//0x4C unknown +#define PMU_CMD_OUT_LED_TOG_PWR_STAT 0x4d //"M", allows for using one led for status and power and toggle between them +//0x4E unknown +#define PMU_CMD_OUT_SWITCH_UP_VER 0x4f //"O" +#define PMU_CMD_OUT_MIR_LED_OFF 0x50 //"P" +//0x51-55 unknown (except 52) +#define PMU_CMD_OUT_GET_UNIQ 0x52 //"P" +#define PMU_CMD_OUT_PWM_CYCLE 0x56 //"V" +#define PMU_CMD_OUT_PWM_HZ 0x57 //"W" +//0x58-59 unknown +//0x60-71 inputs (except 6C) +#define PMU_CMD_OUT_WOL_ON 0x6c //"l" +#define PMU_CMD_OUT_SCHED_UP_OFF 0x72 //"r" +#define PMU_CMD_OUT_SCHED_UP_ON 0x73 //"s" +#define PMU_CMD_OUT_FAN_HEALTH_OFF 0x74 //"t" +#define PMU_CMD_OUT_FAN_HEALTH_ON 0x75 //"u" + +static const command_definition single_byte_cmds[single_byte_idx(PMU_CMD__MAX_CODE)+1] = { + DEFINE_SINGLE_BYTE_CMD(OUT_HW_POWER_OFF, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_BUZ_SHORT, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_BUZ_LONG, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_PWR_LED_ON, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_PWR_LED_BLINK, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_PWR_LED_OFF, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_STATUS_LED_OFF, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_STATUS_LED_ON_GREEN, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_STATUS_LED_PULSE_GREEN, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_STATUS_LED_ON_ORANGE, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_STATUS_LED_PULSE_ORANGE, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_STATUS_LED_PULSE, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_USB_LED_ON, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_USB_LED_PULSE, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_USB_LED_OFF, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_HW_RESET, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_10G_LED_ON, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_10G_LED_OFF, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_LED_TOG_PWR_STAT, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_SWITCH_UP_VER, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_MIR_LED_OFF, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_GET_UNIQ, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_PWM_CYCLE, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_PWM_HZ, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_WOL_ON, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_SCHED_UP_OFF, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_SCHED_UP_ON, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_FAN_HEALTH_OFF, cmd_shim_noop), + DEFINE_SINGLE_BYTE_CMD(OUT_FAN_HEALTH_ON, cmd_shim_noop), +}; + + +static char *uart_buffer = NULL; //keeps data streamed directly by the vUART... todo: vUART should manage this buffer +static char *work_buffer = NULL; //collecting & operatint on the data received from vUART +static char *work_buffer_curr = NULL; //pointer to the current free space in work_buffer +static char *hex_print_buffer = NULL; //helper buffer to print char arrays in hex + +#define work_buffer_fill() ((unsigned int)(work_buffer_curr - work_buffer)) + +/** + * Free all buffers used by this submodule + * + * It is safe to call this method without checking buffers state (it has a deliberate protection against double-free) + */ +static void free_buffers(void) +{ + if (likely(uart_buffer)) + kfree(uart_buffer); + + if (likely(work_buffer)) + kfree(work_buffer); + + if (likely(hex_print_buffer)) + kfree(hex_print_buffer); + + uart_buffer = NULL; + work_buffer = NULL; + work_buffer_curr = NULL; + hex_print_buffer = NULL; +} + +/** + * Allocates space for all buffers used by this submodule + */ +static int alloc_buffers(void) +{ + kmalloc_or_exit_int(uart_buffer, VUART_FIFO_LEN); + kmalloc_or_exit_int(work_buffer, WORK_BUFFER_LEN); + kmalloc_or_exit_int(hex_print_buffer, HEX_BUFFER_LEN); + + work_buffer_curr = work_buffer; + + return 0; +} + +/** + * Converts passed char buffer into user-readable hex print of it + * + * @todo this should probably be extracted + */ +static __used const char *get_hex_print(const char *buffer, int len) +{ + if (unlikely(len == 0)) { + hex_print_buffer[0] = '\0'; + return hex_print_buffer; + } else if (unlikely(to_hex_buf_len(len) > HEX_BUFFER_LEN)) { + pr_loc_bug("Printing %d bytes as hex requires %d bytes in buffer - buffer is %d bytes", len, + to_hex_buf_len(len), HEX_BUFFER_LEN); + hex_print_buffer[0] = '\0'; + return hex_print_buffer; + } + + int hex_len = 0; + for (int i = 0; i < len; ++i) { + sprintf(&hex_print_buffer[i * 3], "%02x ", buffer[i]); + hex_len += 3; + } + + hex_print_buffer[hex_len-1] = '\0'; + return hex_print_buffer; +} + +/** + * Matches command against a list of known ones based on the signature specified + * + * @param cmd pointer to a pointer where address of command structure can be saved if found + */ +static pmu_match_status noinline +match_command(const command_definition **cmd, const char *signature, const unsigned int sig_len) +{ + if (unlikely(sig_len == 0)) { + pr_loc_dbg("Invalid zero-length command (stray head without command signature) - discarding"); + return PMU_CMD_NOT_FOUND; + } + + if (likely(sig_len == 1) //regular 1 byte + || (sig_len == 3 && signature[1] == 0x0d && signature[2] == 0x0a) //1 byte with CRLF (sic!) + ) { + if (!has_single_byte_cmd(signature[0])) + return PMU_CMD_NOT_FOUND; + + *cmd = &get_single_byte_cmd(signature[0]); + return PMU_CMD_FOUND; + } + + return PMU_CMD_NOT_FOUND; //@todo Currently we don't handle multibyte commands; it has to be a full iteration +} + +/** + * Finds command based on its signature and execute its callback if found + */ +static void route_command(const char *buffer, const unsigned int len) +{ + const command_definition *cmd = NULL; + + if (match_command(&cmd, buffer, len) != PMU_CMD_FOUND) { + pr_loc_wrn("Unknown %d byte PMU command with signature hex=\"%s\" ascii=\"%.*s\"", len, + get_hex_print(buffer, len), len, buffer); + return; + } + + pr_loc_dbg("Executing cmd %s handler %pF", cmd->name, cmd->fn); + cmd->fn(cmd, buffer, len); +} + +/** + * Scans work buffer (copied from vUART buffer) to find commands + * + * @param end_of_packet Indicates whether this command was called because the vUART transmitter assumed + * end-of-transmission/IDLE, or flushed due to its buffer being full. If this parameter is true the + * data in the work buffer is assumed to be a complete representation of the state. This becomes + * important if we cannot with 100% confidence say, after reaching the end of the buffer, if it + * will be a multibyte command (but we possibly didn't get all the bytes YET) or this is single or + * multibyte command which we don't know. + * + * This function makes an assumption that when it's called that the buffer represents the whole state of the command + * If this becomes to take too long we can move it to a separate thread, but this will require a lock. + */ +static noinline void process_work_buffer(bool end_of_packet) +{ + if (unlikely(work_buffer == work_buffer_curr)) { + //this can happen if kernel sends no data but we get IDLE... shouldn't logically happen + pr_loc_wrn("%s called on empty buffer?!", __FUNCTION__); + return; + } + + int cmd_len = -1; //number of bytes in the command (excluding header) + for(char *curr = work_buffer; curr < work_buffer_curr; ++curr) { + if (*curr == PMU_CMD_HEAD) { //got the beginning of a new command + //we've found a new command in the buffer - lets check if the previously collected data matches anything + if (cmd_len != -1) { //we only want to call it if this isn't the first byte after last cmd (or 1st in buf) + route_command(curr-cmd_len, cmd_len); + cmd_len = 0; //we've got the head so 0 and not -1 + } else { + ++cmd_len; //We've read the buffer containing head, we then expect to get something which is non-head + } + } else { + if (cmd_len == -1) { //we don't expect data before head + pr_loc_wrn("Found garbage data in PMU buffer before cmd head (\"%c\" / 0x%02x) - ignoring", *curr, + *curr); + continue; + } + + ++cmd_len; //collecting another byte for the currently processed command + } + } + + //We've finished processing the buffer. Now we need to decide what to do with that last piece of data + unsigned int processed = work_buffer_fill(); + if (cmd_len != -1) { //if it's -1 it means we didn't find any heading so we're just discarding all data + if (end_of_packet) { + route_command(work_buffer_curr-cmd_len, cmd_len); + } else { //if the packed didn't end we need to keep that piece of buffer for the next run + processed -= cmd_len + 1; //we also keep head + } + } + + unsigned int left = work_buffer_fill() - processed; + if (likely(left != 0)) { + memmove(work_buffer, work_buffer+processed, left); + } + work_buffer_curr = work_buffer + left; + +// pr_loc_dbg("Left buffer %p curr=%p with %d bytes in it (ascii=\"%.*s\" hex={%s})", work_buffer, work_buffer_curr, left, left, work_buffer, +// get_hex_print(work_buffer, left)); +} + +/** + * Callback passed to vUART. It will be called any time some data is available. + */ +static noinline void pmu_rx_callback(int line, const char *buffer, unsigned int len, vuart_flush_reason reason) +{ + pr_loc_dbg("Got %d bytes from PMU: reason=%d hex={%s} ascii=\"%.*s\"", len, reason, get_hex_print(buffer, len), len, buffer); + + int buffer_space = WORK_BUFFER_LEN - work_buffer_fill(); + if (unlikely(work_buffer_curr + len > work_buffer + WORK_BUFFER_LEN)) { //todo just remove as much as needed from the buffer to fit more data + pr_loc_err("Work buffer is full! Only %d of %d bytes will be copied from receiver", len, buffer_space); + len = buffer_space; + } + + memcpy(work_buffer_curr, buffer, len); + work_buffer_curr += len; +// pr_loc_dbg("Copied data to work buffer, now with %d bytes in it (cur=%p)", +// (unsigned int)(work_buffer_curr - work_buffer), work_buffer_curr); + + //We only want to analyze the buffer when we are sure we have the full command to process. This is because commands + // are variable length and have no end delimiter not length specified with prefixes of short commands conflicting + // with longer commands (sic!) + //For example, you have "SW1" command which when sent will look like "-SW1" (0x2d 0x53 0x57 0x31). We can capture + // this when VUART_FLUSH_IDLE happens. We can also easily capture this when multiple commands are sent at once + // (unlikely but possible) since it will be something like "-SW1-3". However, we CANNOT distinguish "-S" from + // incomplete "-SW1". So we need to rely on IDLE - if we got "-S" with IDLE it means it was "-S" and not the + // beginning of "-SW1". + //We also forcefully flush on full buffer even if no IDLE was specified, as it's technically possible for the + // software to send a long sequence of commands at once totaling more than our buffer (extremely unlikely). + //Additionally, we only process IDLE-signalled buffers when they have at least a single byte of data as some + // versions of the mfgBIOS attach head AND THEN in a separate packet send the actual commands (sic!) + if (reason == VUART_FLUSH_IDLE && work_buffer_fill() > 1) + process_work_buffer(true); + //our buffer is full [we must process] or vUART buffer was full [we should process] + else if (buffer_space <= len || reason == VUART_FLUSH_FULL) + process_work_buffer(false); +} + +int register_pmu_shim(const struct hw_config *hw) +{ + shim_reg_in(); + + int out; + if ((out = vuart_add_device(PMU_TTYS_LINE) != 0)) { + pr_loc_err("Failed to initialize vUART for PMU at ttyS%d", PMU_TTYS_LINE); + return out; + } + + if ((out = alloc_buffers()) != 0) + goto error_out; + + //We don't set the threshold as some commands are variable length but the "packets" are properly split + if ((out = vuart_set_tx_callback(PMU_TTYS_LINE, pmu_rx_callback, uart_buffer, VUART_THRESHOLD_MAX))) { + pr_loc_err("Failed to register RX callback"); + goto error_out; + } + + shim_reg_ok(); + return 0; + + error_out: + free_buffers(); + vuart_remove_device(PMU_TTYS_LINE); //this also removes callback (if set) + return out; +} + +int unregister_pmu_shim(void) +{ + shim_ureg_in(); + + int out = 0; + if (unlikely(!uart_buffer)) { + pr_loc_bug("Attempted to %s while it's not registered", __FUNCTION__); + return 0; //Technically it succeeded + } + + if ((out = vuart_remove_device(PMU_TTYS_LINE)) != 0) + pr_loc_err("Failed to remove vUART for line=%d", PMU_TTYS_LINE); + + free_buffers(); + + shim_ureg_ok(); + return out; +} \ No newline at end of file diff --git a/shim/pmu_shim.h b/shim/pmu_shim.h new file mode 100644 index 0000000..9eaebe0 --- /dev/null +++ b/shim/pmu_shim.h @@ -0,0 +1,8 @@ +#ifndef REDPILL_PMU_SHIM_H +#define REDPILL_PMU_SHIM_H + +typedef struct hw_config hw_config_; +int register_pmu_shim(const struct hw_config *hw); +int unregister_pmu_shim(void); + +#endif //REDPILL_PMU_SHIM_H diff --git a/shim/shim_base.h b/shim/shim_base.h new file mode 100644 index 0000000..cedffb3 --- /dev/null +++ b/shim/shim_base.h @@ -0,0 +1,20 @@ +#ifndef REDPILL_SHIM_BASE_H +#define REDPILL_SHIM_BASE_H + +#define shim_reg_in() pr_loc_dbg("Registering %s shim", SHIM_NAME); +#define shim_reg_ok() pr_loc_inf("Successfully registered %s shim", SHIM_NAME); +#define shim_reg_already() do { \ + pr_loc_bug("Called %s while %s() shim is already", __FUNCTION__, SHIM_NAME); \ + return -EALREADY; \ +} while(0) +#define shim_ureg_in() pr_loc_dbg("Unregistering %s shim", SHIM_NAME); +#define shim_ureg_ok() pr_loc_inf("Successfully unregistered %s shim", SHIM_NAME); +#define shim_ureg_nreg() do { \ + pr_loc_bug("Called %s() while %s shim is not registered (yet?)", __FUNCTION__, SHIM_NAME); \ + return -ENXIO; \ +} while(0) +#define shim_reset_in() pr_loc_inf("Forcefully resetting %s shim", SHIM_NAME); +#define shim_reset_ok() pr_loc_inf("Successfully reset %s", SHIM_NAME); + + +#endif //REDPILL_SHIM_BASE_H diff --git a/shim/storage/sata_port_shim.c b/shim/storage/sata_port_shim.c new file mode 100644 index 0000000..04ab24c --- /dev/null +++ b/shim/storage/sata_port_shim.c @@ -0,0 +1,149 @@ +/** + * Allows for usage of SCSI-based storage devices like they were bare standard SATA ones + * + * WHY THIS SHIM? + * Normally Linux doesn't care if something is an SCSI device or a SATA one, as SATA is a subset of SCSI (technically + * speaking SATA is an interface using SCSI protocol). However, the syno-modified SCSI driver (drivers/scsi/sd.c) adds + * a layer of logical disk types. These types determine what the disk actually is, so that the NAS can know what should + * be done with them. + * For example SYNO_DISK_USB, SYNO_DISK_SYNOBOOT, SYNO_DISK_SATA, and SYNO_DISK_ISCSI are all normally visible in the + * system as /dev/sdX and are all SCSI-based drives. However, you can only use RAID on SATA drives and not on USB ones. + * The "SYNO_DISK_SATA" is kind-of a catch-all type for all disks which are used for storing data, even if they're not + * really SATA disks. One of the exceptions set by the sd.c driver is that if VirtIO driver is used all disks connected + * via that method are treated as SYNO_DISK_SATA. Unfortunately that, very logical and useful, assumption is made ONLY + * when the kernel is compiled with CONFIG_SYNO_KVMX64 (which is a special platform for VDSM). On all other platforms + * disks connected to VirtIO will be slightly broken in old versions and unusable in newer ones (as their tpe is set to + * SYNO_DISK_UNKNOWN). This shim brings the functionality available on CONFIG_SYNO_KVMX64 to all platforms. + * In addition, it changes SAS ports to be SATA as well as syno reserves SYNO_DISK_SAS for usage with just a few FS + * devices and external enclosures. + * + * HOW DOES IT WORK? + * It simply plugs into the SCSI driver (via SCSI notifier) and waits for a new drive. When a new drive is connected it + * checks if it was connected via the VirtIO driver or through a SAS card driver and changes the port type to + * SYNO_PORT_TYPE_SATA, which will later force the driver to assume the drive is indeed a "SATA" drive (SYNO_DISK_SATA). + * While the ports can be enumerated and changed all at once, it's safer to do it per-drive basis as drivers allow for + * ports to be dynamically reconfigured and thus the type may change. This is also why we make no effort of + * restoring port types after this shim is unregistered. + * + * References + * - drivers/scsi/sd.c in Linux sources + */ +#include "sata_port_shim.h" +#include "../shim_base.h" +#include "../../common.h" +#include "../../internal/scsi/scsi_toolbox.h" //scsi_force_replug() +#include "../../internal/scsi/scsi_notifier.h" +#include //struct scsi_device +#include //struct Scsi_Host, SYNO_PORT_TYPE_* +#include "../../config/runtime_config.h" +#include "../../config/platform_types.h" + +#define SHIM_NAME "SATA port emulator" +#define VIRTIO_HOST_ID "Virtio SCSI HBA" + +/** + * Checks if we should fix a given device or ignore it + */ +static bool is_fixable(struct scsi_device *sdp) +{ + return sdp->host->hostt->syno_port_type == SYNO_PORT_TYPE_SAS || + (current_config.hw_config->is_dt == false && // Device-tree models causes a kernel panic if type is changed + sdp->host->hostt->syno_port_type != SYNO_PORT_TYPE_SATA && + strcmp(sdp->host->hostt->name, VIRTIO_HOST_ID) == 0); +} + +/** + * Processes any new devices connected to the system AND existing devices which were forcefully reconnected + * + * When a device which is deemed fixable it will replace its port to SATA to make it work as a standard SATA drive. + * + * @return 0 on success, -E on error + */ +static int on_new_scsi_disk_device(struct scsi_device *sdp) +{ + if (!is_fixable(sdp)) + return 0; + + pr_loc_dbg("Found new disk vendor=\"%s\" model=\"%s\" connected to \"%s\" HBA over non-SATA port (type=%d) - " + "fixing to SATA port (type=%d)", sdp->vendor, sdp->model, sdp->host->hostt->name, + sdp->host->hostt->syno_port_type, SYNO_PORT_TYPE_SATA); + + sdp->host->hostt->syno_port_type = SYNO_PORT_TYPE_SATA; + + return 0; +} + +/** + * Called for every existing SCSI-based disk to determine if there are any fixable devices which are already connected + * + * Every device which is fixable but still connected it will be forcefully re-connected, as this is the only way to fix + * existing device properly. + * + * @return 0 on success, -E on error + */ +static int on_existing_scsi_disk_device(struct scsi_device *sdp) +{ + if (!is_fixable(sdp)) + return 0; + + pr_loc_dbg( + "Found initialized disk vendor=\"%s\" model=\"%s\" connected to \"%s\" HBA over non-SATA port (type=%d)." + " It must be auto-replugged to fix it.", sdp->vendor, sdp->model, sdp->host->hostt->name, + sdp->host->hostt->syno_port_type); + + //After that it will land in on_new_scsi_disk_device() + scsi_force_replug(sdp); + + return 0; +} + +/** + * Tiny shim to direct SCSI notifications to on_existing_scsi_disk_device() before it's probed + */ +static int scsi_disk_probe_handler(struct notifier_block *self, unsigned long state, void *data) +{ + if (state != SCSI_EVT_DEV_PROBING) + return NOTIFY_DONE; + + on_new_scsi_disk_device(data); + return NOTIFY_OK; +} + +static struct notifier_block scsi_disk_nb = { + .notifier_call = scsi_disk_probe_handler, + .priority = INT_MIN, //we want to be FIRST so that we other things can get the correct drive type +}; + +int register_sata_port_shim(void) +{ + shim_reg_in(); + + int out; + + pr_loc_dbg("Registering for new devices notifications"); + out = subscribe_scsi_disk_events(&scsi_disk_nb); + if (unlikely(out != 0)) { + pr_loc_err("Failed to register for SCSI disks notifications - error=%d", out); + return out; + } + + pr_loc_dbg("Iterating over existing devices"); + out = for_each_scsi_disk(on_existing_scsi_disk_device); + if (unlikely(out != 0 && out != -ENXIO)) { + pr_loc_err("Failed to enumerate current SCSI disks - error=%d", out); + return out; + } + + shim_reg_ok(); + return 0; +} + +int unregister_sata_port_shim(void) +{ + shim_ureg_in(); + + unsubscribe_scsi_disk_events(&scsi_disk_nb); + + shim_ureg_ok(); + return 0; //noop +} \ No newline at end of file diff --git a/shim/storage/sata_port_shim.h b/shim/storage/sata_port_shim.h new file mode 100644 index 0000000..d1a8ea1 --- /dev/null +++ b/shim/storage/sata_port_shim.h @@ -0,0 +1,7 @@ +#ifndef REDPILL_SATA_PORT_SHIM_H +#define REDPILL_SATA_PORT_SHIM_H + +int register_sata_port_shim(void); +int unregister_sata_port_shim(void); + +#endif //REDPILL_SATA_PORT_SHIM_H diff --git a/shim/storage/smart_shim.c b/shim/storage/smart_shim.c new file mode 100644 index 0000000..b9e5509 --- /dev/null +++ b/shim/storage/smart_shim.c @@ -0,0 +1,1082 @@ +/** + * Emulates SMART capabilities on drives without SMART support + * + * WHY?! + * When running under a hypervisor some emulated drives will lack SMART support, which is required for some tools to + * operate properly. For example QEmu (e.g. in Proxmox) emulate some basic SMART support (see qemu/hw/ide/core.c), which + * is enough. However, surprisingly popular VMWare products lack any SMART support whatsoever. This is the main reason + * for this shim to exist. + * + * + * HOW IT WORKS? + * The SMART subsystem is rather simple yet contains a large amount of legacy. To understand what's going on here you + * should first read the micron.com docs and then fill-in the details from the official ATA/ATAPI-6 spec (see + * references). Overall the SMART has a few main parts: + * - live/snapshot readings (that's the table with values & thresholds you see in smartctl) + * - capabilities (static values which specify what a given drive can do, e.g. whether SMART is supported) + * - stored logs + * - API to execute tests + * + * All SMART actions are executed by the userspace, and thus delivered via ioctl call to a device. When the ioctl is + * executed a shared buffer is passed which contains a header + space for saving response data. There are two + * different ioctl commands used to execute SMART requests with several subcommands: + * - HDIO_DRIVE_CMD (ioctl, see handle_hdio_drive_cmd_ioctl() & Documentation/ioctl/hdio.txt) + * - ATA_CMD_ID_ATA (read capabilities of the drive, see handle_ata_cmd_identify()) + * # we only hook it to indicate that SMART is supported & enabled even if the ATA disk don't REALLY support it + * # if the drive supports SMART this hook is a noop + * # for non ATA-complaint disks (e.g. VirtIO SCSI) we generate a full fake IDENTIFY data (populate_ata_id()) + * - ATA_CMD_SMART (read data from SMART subsystem, see handle_ata_cmd_smart()) + * # we hook it to emulate SMART data + * # if the drive supports SMART this hook is a noop + * - ATA_SMART_READ_VALUES (read all live values from the drive, see populate_ata_smart_values()) + * - ATA_SMART_READ_THRESHOLDS (read thresholds from live data of the drive, see populate_ata_smart_thresholds()) + * - ATA_SMART_ENABLE (ask drive to enable SMART, it shouldn't really be delivered but we respond with "OK") + * - WIN_FT_SMART_READ_LOG_SECTOR (use WIN_SMART interface to read stored logs, see populate_win_smart_log()) + * - WIN_FT_SMART_IMMEDIATE_OFFLINE (use WIN_SMART iface to run SMART test, see populate_win_smart_exec_test()) + * + * - HDIO_DRIVE_TASK (ioctl, see handle_hdio_drive_task_ioctl() & Documentation/ioctl/hdio.txt) + * - WIN_CMD_SMART (use WIN_SMART to read data from SMART subsystem, see handle_ata_task_smart()) + * # this command is used by smartctl (and probably others) to read a general OK/FAIL status of a drive + * # we respond with "OK" to all commands below + * - WIN_FT_SMART_STATUS + * - WIN_FT_SMART_AUTOSAVE + * - WIN_FT_SMART_AUTO_OFFLINE + * + * Note: Most of the commands are using the standard ATA/ATAPI interface, few are using (legacy?) WIN_SMART interface. + * While WIN_SMART can theoretically be used to read values etc no tool from this century will do that (they will + * use the ATA/ATAPI interface). This shim emulates WIN_SMART only when needed. + * + * + * LIMITATIONS + * - Values are always static and the same for all drives + * - Power-on hours & other counters (e.g. start-stop count) are static + * - Ideally values should be calculated as hours from some date to ensure they increase + * - Start-stop counter (and others) can be derived from power-on hours using linear regression + * + * + * SEQUENCE OF ACTIONS FOR IOCTL REPLACEMENT + * This submodule has a rather unintuitive initialization sequence (it's multistage). It works in the following order: + * 1. Checks if "sd" driver is loaded + * - if not loaded it verifies if it exists in the kernel and overrides sd_ioctl() [see 2.] + * - it SHOULD wait for the driver instead but due to current notifier limitations we can't do that + * 2. Temporarily installs trampoline in sd_ioctl() [drivers/scsi/sd.c] to sd_ioctl_canary() + * 3. Awaits any IOCTL from userspace + * - it simply waits until sd_ioctl_canary() is called + * - sd_ioctl_canary's only role is to install a fast and permanent shim of sd_ioctl() by replacing the ioctl + * routing in SCSI driver (see details in the comment for sd_ioctl_canary()) to point to sd_ioctl_smart_shim() + * - sd_ioctl() trampoline is removed + * - after installation it triggers sd_ioctl_smart_shim() to handle that IOCTL which canary captured + * 4. sd_ioctl_smart_shim() is triggered for every ioctl to a /dev/sdX device coming from the userspace + * - it filters commands which are SMART-related (or at least what smartmontools uses as nobody uses anything else) + * - all non-SMART commands are forwarded as-is + * - SMART commands are forwarded to the drive if the drive supports SMART, if not a sensible values are faked + * + * References + * - https://www.micron.com/-/media/client/global/documents/products/technical-note/solid-state-storage/tnfd10_p400e_smart_firmware_0142.pdf + * - https://hddguru.com/documentation/2006.01.27-ATA-ATAPI-6/ (the official ATA/ATAPI-6 specs) + * - https://www.kernel.org/doc/Documentation/ioctl/hdio.txt (HDIO_* ioctls summary from Linux) + * - https://github.com/qemu/qemu/blob/266469947161aa10b1d36843580d369d5aa38589/hw/ide/core.c#L1826 (qemu SMART) + */ +#include "smart_shim.h" +#include "../shim_base.h" +#include "../../common.h" +#include "../../internal/intercept_driver_register.h" //waiting for "sd" driver to load +#include "../../internal/helper/memory_helper.h" //set_mem_addr_ro(), set_mem_addr_rw() +#include "../../internal/helper/symbol_helper.h" //kernel_has_symbol() +#include "../../internal/scsi/hdparam.h" //a ton of ATA constants +#include "../../internal/scsi/scsi_toolbox.h" //checking for "sd" driver load state +#include "../../internal/override/override_symbol.h" //installing sd_ioctl_canary() +#include //struct block_device +#include //struct gendisk +#include //struct block_device_operations +#include //spinlock_t, spin_* +#include //ATA_* + +#define SHIM_NAME "SMART emulator" + +#ifdef DBG_SMART_PRINT_ALL_IOCTL +#define pr_loc_dbg_ioctl(cmd_hex, subcmd_name, bdev) \ + pr_loc_dbg("Handling ioctl(0x%x)->%s for /dev/%s", cmd_hex, subcmd_name, (bdev)->bd_disk->disk_name); +#define pr_loc_dbg_ioctl_unk(cmd_hex, subcmd_hex, bdev) \ + pr_loc_dbg("Handling ioctl(cmd=0x%x ; sub=0x%x) for /dev/%s - not hooked (noop)", \ + cmd_hex, subcmd_hex, (bdev)->bd_disk->disk_name); +#else +#define pr_loc_dbg_ioctl(cmd_hex, subcmd_name, bdev) //noop +#define pr_loc_dbg_ioctl_unk(cmd_hex, subcmd_hex, bdev) //noop +#endif + +//address of original and unmodified sd_ioctl(); populated by the canary and after the canary trampoline is removed +static int (*sd_ioctl_org) (struct block_device *, fmode_t, unsigned, unsigned long) = NULL; +struct block_device_operations *sd_fops = NULL; //ptr to drivers/scsi/sd.c:sd_fops [to restore sd_ioctl on removal] +static struct override_symbol_inst* sd_ioctl_canary_ovs = NULL; //sd_ioctl() override for canary +static spinlock_t sd_ioctl_canary_lock; + +/********************************************* Fake SMART data definition *********************************************/ +//see "Table 4: SMART Attribute Summary" in micron.com document for a nice summary +//These values below were taken from a random WD drive and slightly modified. While there isn't a definitive list of +// "what must be in SMART" (as ATAPI spec says that everything is vendor-specific) there are some "usually available" +// params the FLAG value is split into low (LSB) and high (MSB). Really only the first 6 bits (so in LSB) are defined: +// Bit 0 - Warranty purposes +// Bit 1 - Offline collected +// Bit 2 - Performance degradation indicator +// Bit 3 - Error rate dependent +// Bit 4 - Event counter (should probably NOT be used with bit 3) +// Bit 5 - Self-preservation +//Values here are a cross-section of HDD & SSD capable ones. While reading these we don't always have easy access to +// the data used to determine whether the drive is an SSD or an HDD so we "play it cool" by only giving values which are +// valid for both. +static const int fake_smart[][ATA_SMART_RECORD_LEN] = { + /* #, lFLAG, hFLAG, VAL, WRST, RAW_DATA, RAW_ATTR_SPC, THRESH, NAME */ + { 1, 0x2d, 0x00, 0xc8, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06 }, /* Raw_Read_Error_Rate */ + { 2, 0x04, 0x00, 0x80, 0x75, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40 }, /* Throughput_Performance */ + { 3, 0x27, 0x00, 0xb1, 0xB0, 0x4f, 0x12, 0x00, 0x00, 0x00, 0x00, 0x06 }, /* Spin_Up_Time */ + { 4, 0x32, 0x00, 0x64, 0x64, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Start_Stop_Count */ + { 5, 0x33, 0x00, 0xc8, 0xC8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c }, /* Reallocated_Sector_Ct */ + //Seek_Error_Rate(7) is invalid for SSDs + //deliberately not providing Seek_Time_Performance (8) to prevent fs from "recalibrating" to this + { 9, 0x32, 0x00, 0x06, 0x00, 0xad, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Power_On_Hours */ + //Spin_Retry_Count(10) and Calibration_Retry_Count(11) are invalid for SSDs + { 12, 0x32, 0x00, 0x64, 0x64, 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Start_Stop_Count */ + { 13, 0x2e, 0x00, 0xc8, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Read_Soft_Error_Rate */ + //14-174: very vendor-specific / esoteric + //175-182: SSD only + { 183, 0x33, 0x00, 0xc8, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c }, /* Runtime_Bad_Block */ + { 184, 0x33, 0x00, 0xC8, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c }, /* End-to-End_Error */ + //185-186: very vendor-specific / esoteric + { 187, 0x3a, 0x00, 0x3e, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05 }, /* Reported_Uncorrect */ + { 188, 0x32, 0x00, 0x64, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Command_Timeout */ + //High_Fly_Writes(189) is invalid for SSDs + { 190, 0x22, 0x00, 0x3e, 0x3e, 0x1B, 0x00, 0x1E, 0x1B, 0x00, 0x00, 0x00 }, /* Airflow_Temperature_Cel */ + //Sense_Error_Rate(191) is invalid for SSDs + { 192, 0x32, 0x00, 0x64, 0x64, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Power-Off_Retract_Count */ + //Unknown_SSD_Attribute(193) is invalid for SSDs + { 194, 0x22, 0x00, 0x76, 0x62, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Temperature_Celsius */ + { 195, 0x32, 0x00, 0x80, 0x80, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Hardware_ECC_Recovered */ + { 196, 0x32, 0x00, 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Reallocated_Event_Count */ + { 197, 0x32, 0x00, 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Current_Pending_Sector */ + { 198, 0x30, 0x00, 0x64, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* Offline_Uncorrectable */ + { 199, 0x32, 0x00, 0xC8, 0xC8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /* UDMA_CRC_Error_Count */ + //rest of the attributes are esoteric or invalid for SSDs +}; + +//SMART components versions (some of them CANNOT be changed) +#define SMART_SNAP_VERSION 0x01 //version for the live data snapshot; vendor-specific +#define WIN_SMART_DIG_LOG_VERSION 0x00 //WIN_SMART log directory version; see 8.55.6.8.1 for details +#define WIN_SMART_SUM_LOG_VERSION 0x01 //WIN_SMART summary log version; ALWAYS 1 as per ATAPI/6 sec. 8.55.6.8.2.1 +#define WIN_SMART_COMP_LOG_VERSION 0x01 //WIN_SMART comprehensive log version; ALWAYS 1 as per ATAPI/6 sec. 8.55.6.8.3.1 +#define WIN_SMART_TEST_LOG_VERSION 0x01 //WIN_SMART self-test log version; ALWAYS 1 as per ATAPI/6 sec. 8.55.6.8.4.1 + + +/********************************************* ATA/IOCTL helper functions *********************************************/ +/** + * Calculates a standard per-sector ATA checksum + * + * ATA/ATAPI-6 standard contains the same checksum references in many places. It's always saved in the last byte of a + * sector (index 511). It is described e.g. in "Table 5: SMART Attribute Entry Format". It's defined as "Two's + * complement checksum of preceding 511B[ytes]". Wikipedia has a great article about that as well. + * + * @param buff A single-sector sized buffer to compute & save checksum to + */ +static void ata_calc_sector_checksum(u8 *buff) +{ + for (int i = 0; i < (ATA_SECT_SIZE-1); i++) { + buff[(ATA_SECT_SIZE-1)] += buff[i]; + } + + buff[(ATA_SECT_SIZE-1)] = 256 - buff[(ATA_SECT_SIZE-1)]; +} + +/** + * Calculates a standard per-worded structure ATA checksum + * + * In principal it's almost the same thing as ata_calc_sector_checksum() but with some constant added to be 16 bits. + * See "8.16.64 Word 255: Integrity word". Checksum is always saved in word 255. + * + * @param word_buff A 255-word (each 16 bits) sized buffer to compute & save checksum to + */ +static void ata_calc_integrity_word(u16 *word_buff) +{ + u8 *byte_buff = (u8 *)word_buff; + + for (int i = 0; i < (ATA_SECT_SIZE-2); i++) { + byte_buff[(ATA_SECT_SIZE-2)] += byte_buff[i]; + } + + byte_buff[(ATA_SECT_SIZE-2)] = 256 - byte_buff[(ATA_SECT_SIZE-2)]; + byte_buff[(ATA_SECT_SIZE-1)] = 0xa5; +} + +/** + * ATA/ATAPI uses "strings" which are LE arranged 8 bit characters into 16 bit words padded with spaces to full length + * + * Example of 10 character ATA field: + * =normal=> "TEST12" + * =ATA====> "ETTS21 " + * + * @param dst buffer to copy the string to + * @param src standard NULL-byte terminated text + * @param length ATA field length; must be even + */ +static void set_ata_string(u8 *dst, const char *src, u8 length) +{ + if (unlikely(length % 2 != 0)) { + pr_loc_bug("Length must be even but got %d", length); + --length; + } + + memset(dst, 0x20, length); //fields in ATA/ATAPI are space-padded and not terminated by \0 + for (u8 i = 0; i < length; i += 2) + { + if (src[i] == '\0') + break; + + dst[i + 1] = src[i]; + dst[i] = src[i + 1]; + } +} + +/** + * Duplicates a user-supplied ioctl() buffer into kernel space to safely read data from it + * + * This pointer returned here is... peculiar. First 4 bytes are a header (as defined by UAPI "struct hd_drive_cmd_hdr" + * in hdreg.h). Remaining 512 bytes are 16 bits words. + * + * @param sectors How many sectors to copy + * @param src User buffer to copy from + * + * @return Pointer to a new kernel-space buffer or ERR_PTR; you need to free/put-it-back using put_ioctl_buffer + */ +static unsigned char* get_ioctl_buffer_kcopy(u8 sectors, const __user void *src) +{ + unsigned char *kbuf; + + kmalloc_or_exit_ptr(kbuf, ata_ioctl_buf_size(sectors)); + if(unlikely(copy_from_user(kbuf, src, ata_ioctl_buf_size(sectors)) != 0)) { + pr_loc_err("Failed to copy ATA user buffer from ptr=%p to kspace=%p", src, kbuf); + kfree(kbuf); + return ERR_PTR(-EFAULT); + } + + return kbuf; +} + +/** + * Releases buffer obtained from get_ioctl_buffer_*() + */ +static __always_inline void put_ioctl_buffer(unsigned char *buffer) +{ + kfree(buffer); +} + +/*************************************** ATAPI/WIN command interface handling *****************************************/ +static int populate_ata_id(const u8 *req_header, void __user *buff_ptr, const char* const disk_name) +{ + pr_loc_dbg("Generating completely fake ATA IDENTITY"); + + unsigned char *kbuf; + char disk_serial[DISK_NAME_LEN]; + kzalloc_or_exit_int(kbuf, HDIO_DRIVE_CMD_HDR_OFFSET + sizeof(struct rp_hd_driveid)); + struct rp_hd_driveid *did = (void *)(kbuf + HDIO_DRIVE_CMD_HDR_OFFSET); //did=drive ID + + //First write response header + kbuf[HDIO_DRIVE_CMD_RET_STATUS] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_ERROR] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_SEC_CNT] = ATA_CMD_ID_ATA_SECTORS; + + did->config = 0x0000; //15th bit = ATA device, rest is reserved/obsolete + strscpy(disk_serial, disk_name, DISK_NAME_LEN > 20 ? 20 : DISK_NAME_LEN); + set_ata_string(did->serial_no, disk_serial, 20); + set_ata_string(did->fw_rev, "1.13.2", 8); + set_ata_string(did->model, "Virtual HDD", 40); + did->reserved50 = (1 << 14); //"shall be set to one" + did->major_rev_num = 0xffff; + did->minor_rev_num = 0xffff; + did->command_set_1 = (1 << 3 | 1 << 0); //PM, SMART supported + did->command_set_2 = (1 << 14); //"shall be set to one" + did->cfsse = (1 << 14 | 1 << 1 | 1 << 0); //14: "shall be set to one" ; smart self-test supported ; smart error-log + did->cfs_enable_1 = (1 << 3 | 1 << 0); //PM, SMART + did->cfs_enable_2 = (1 << 14); //"shall be set to one" + did->csf_default = (1 << 14 | 1 << 1 | 1 << 0); //"shall be one" ; SMART self-test, SMART error-test + did->hw_config = (1 << 14 | 1 << 0); //both "shall be one" + did->lba_capacity = 0xffffffff; //maybe we can get away with not reading capacity? + + ata_calc_integrity_word((void *)did); + + if (unlikely(copy_to_user(buff_ptr, kbuf, HDIO_DRIVE_CMD_HDR_OFFSET + sizeof(struct rp_hd_driveid)) != 0)) { + pr_loc_err("Failed to copy fake ATA IDENTIFY packet to user ptr=%p", (void *)buff_ptr); + kfree(kbuf); + return -EFAULT; + } + + kfree(kbuf); + return 0; +} + +/** + * Handles on-the-fly modification of data related to ATA IDENTIFY DEVICE command + * + * See "8.16 IDENTIFY DEVICE" section in the ATA/ATAPI-6 manual. + * + * @param org_ioctl_exec_result exit code of the original ioctl() call which reached the drive (done by + * handle_hdio_drive_cmd_ioctl()). This command shouldn't normally fail for any drive. + * @param req_header ioctl() header sent along the request, will be HDIO_DRIVE_CMD_HDR_OFFSET bytes long + * @param buff_ptr userspace pointer to a buffer passed to the ioctl() call; it will be read and possibly altered + * + * @return definitive exit code for the ioctl(); in practice 0 when succedded [regardless of the modifications made] or + * the same error code as org_ioctl_exec_result passed + */ +static int handle_ata_cmd_identify(int org_ioctl_exec_result, const u8 *req_header, void __user *buff_ptr, const char* const disk_name) +{ + //ATA IDENTIFY should not fail - it may mean a problem with a disk or the "disk" is a adapter (e.g. IDE>SATA) with + // no disk connected, or if executed against a USB flash drive... or it's an VirtIO SCSI disk read as ATA + if (unlikely(org_ioctl_exec_result != 0)) { + pr_loc_dbg("sd_ioctl(HDIO_DRIVE_CMD ; ATA_CMD_ID_ATA) failed with error=%d, attempting to emulate something", + org_ioctl_exec_result); + return populate_ata_id(req_header, buff_ptr, disk_name); + } + + //sanity check if requested ATA IDENTIFY sector count is really what we're planning to copy + if (unlikely(req_header[HDIO_DRIVE_CMD_HDR_SEC_CNT]) != ATA_CMD_ID_ATA_SECTORS) { + pr_loc_err("Expected %d bytes (%d sectors) DATA for ATA IDENTIFY DEVICE, got %d", + ATA_CMD_ID_ATA_SECTORS, ata_ioctl_buf_size(ATA_CMD_ID_ATA_SECTORS), + req_header[HDIO_DRIVE_CMD_HDR_SEC_CNT]); + return -EIO; + } + + //if the identity succeeded we need to check if SMART is supported & enabled. For that we need to clone + // normally user buffer from ioctl (reading it directly is risky) + unsigned char *kbuf = get_ioctl_buffer_kcopy(ATA_CMD_ID_ATA_SECTORS, buff_ptr); + if (unlikely(IS_ERR(kbuf))) + return PTR_ERR(kbuf); //it will already log error in kcopy + + u16 *ata_identity = (u16 *)(kbuf + HDIO_DRIVE_CMD_HDR_OFFSET); + if (ata_is_smart_supported(ata_identity) && ata_is_smart_enabled(ata_identity)) { + pr_loc_dbg("ATA_CMD_ID_ATA confirmed SMART support - noop"); + put_ioctl_buffer(kbuf); //we no longer need the buffer as we're not touching it, we've only read it + return 0; //SMART supported, pass identity as-is + } + + //if SMART is not supported we modify the response, retaining the original response header but changing + // SMART flags & recalculating checksum + pr_loc_dbg("ATA_CMD_ID_ATA confirmed *no* SMART support - pretending it's there"); + ata_set_smart_supported(ata_identity); + ata_set_smart_enabled(ata_identity); + ata_calc_integrity_word(ata_identity); + + if (unlikely(copy_to_user(buff_ptr, kbuf, ata_ioctl_buf_size(ATA_CMD_ID_ATA_SECTORS)) != 0)) { + pr_loc_err("Failed to copy ATA IDENTIFY packet to user ptr=%p", (void *)buff_ptr); + put_ioctl_buffer(kbuf); + return -EFAULT; + } + + put_ioctl_buffer(kbuf); + return 0; +} + +/** + * Populates user ioctl() buffer with fake SMART snapshot values + * + * This function is responsible for the generation of data which you see in a usual tabular format as a result of + * "smartctl -A" command. The data is formated from the "fake_smart" constant array present on the top of this file. + * + * @param req_header ioctl() header sent along the request, will be HDIO_DRIVE_CMD_HDR_OFFSET bytes long + * @param buff_ptr userspace pointer to a buffer passed to the ioctl() call; it will be overwritten with data + * + * @return 0 on success, -EIO on unexpected call, -ENOMEM when memory reservation fails, or -EFAULT when data fails to + * copy to user buffer + */ +static int populate_ata_smart_values(const u8 *req_header, void __user *buff_ptr) +{ + pr_loc_dbg("Generating fake SMART values"); + + //sanity check if requested SMART READ VALUES sector count is really what we're planning to copy + if (unlikely(req_header[HDIO_DRIVE_CMD_HDR_SEC_CNT]) != ATA_SMART_READ_VALUES_SECTORS) { + pr_loc_err("Expected %d bytes (%d sectors) DATA for ATA SMART READ VALUES, got %d", + ATA_SMART_READ_VALUES_SECTORS, ata_ioctl_buf_size(ATA_SMART_READ_VALUES_SECTORS), + req_header[HDIO_DRIVE_CMD_HDR_SEC_CNT]); + return -EIO; + } + + int i, j; + unsigned char *kbuf; + kzalloc_or_exit_int(kbuf, ata_ioctl_buf_size(ATA_SMART_READ_VALUES_SECTORS)); + u8 *smart_values = (u8 *)(kbuf + HDIO_DRIVE_CMD_HDR_OFFSET); + + //First write response header + kbuf[HDIO_DRIVE_CMD_RET_STATUS] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_ERROR] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_SEC_CNT] = ATA_SMART_READ_VALUES_SECTORS; + + //See "Vendor-Specific Data Bytes 0–361" and "Table 5: SMART Attribute Entry Format" in micron.com + // document for specification of these numbers and calculations + //For full structure see "Table 59 − Device SMART data structure" in ATA/ATAPI-6 PDF + smart_values[0] = SMART_SNAP_VERSION; + + //copy ALL attribute bytes as we were asked for everything (including thresholds) + for (i = 0; i < ARRAY_SIZE(fake_smart); i++) { + for (j = 0; j < 11; j++) { + smart_values[2 + (ATA_SMART_RECORD_LEN * i) + j] = fake_smart[i][j]; + } + } + + //specify that we never ran any SMART tests and we're not running any + smart_values[362] = 0x82; //Sec. 8.55.5.8.1, Table 60 in ATA/ATAPI-6 PDF (self test ran on boot & succeeded) + smart_values[363] = 0x00; //Sec. 8.55.5.8.2, Table 61 in ATA/ATAPI-6 PDF + smart_values[364] = 0x45; //LSB of "Total time to complete Offline data collection" (seconds) + smart_values[365] = 0x00; //MSB of "Total time to complete Offline data collection" (seconds) + smart_values[367] = (1 << 3 | 1 << 4); //bitfield, see sec. 8.55.5.8.4 in ATA/ATAPI-6 PDF + smart_values[368] = (1 << 0 | 1 << 1); //bitfield, see sec. 8.55.5.8.5 in ATA/ATAPI-6 PDF + smart_values[369] = 0x01; //vendor-specific, rel. to sec. 8.55.5.8.5 in ATA/ATAPI-6 PDF + smart_values[370] = 0x01; //bitfield, current only 1st bit used for error logging (Table 59) + smart_values[372] = 0x05; //short self-test polling time (minutes), see Table 59 + smart_values[373] = 0x4B; //long self-test polling time (minutes), see Table 59 + + ata_calc_sector_checksum(smart_values); + + if (copy_to_user(buff_ptr, kbuf, ata_ioctl_buf_size(ATA_SMART_READ_VALUES_SECTORS)) != 0) { + pr_loc_err("Failed to copy SMART VALUES packet to user ptr=%p", buff_ptr); + kfree(kbuf); + return -EFAULT; + } + + kfree(kbuf); + return 0; +} + +/** + * Populates user ioctl() buffer with a subset of fake SMART snapshot values, containing only thresholds + * + * @param req_header ioctl() header sent along the request, will be HDIO_DRIVE_CMD_HDR_OFFSET bytes long + * @param buff_ptr userspace pointer to a buffer passed to the ioctl() call; it will be overwritten with data + * + * @return 0 on success, -EIO on unexpected call, -ENOMEM when memory reservation fails, or -EFAULT when data fails to + * copy to user buffer + */ +static int populate_ata_smart_thresholds(const u8 *req_header, void __user *buff_ptr) +{ + pr_loc_dbg("Generating fake SMART thresholds"); + + //sanity check if requested SMART READ THRESHOLDS sector count is really what we're planning to copy + if (unlikely(req_header[HDIO_DRIVE_CMD_HDR_SEC_CNT]) != ATA_SMART_READ_THRESHOLDS_SECTORS) { + pr_loc_err("Expected %d bytes (%d sectors) DATA for ATA SMART READ THRESHOLDS, got %d", + ATA_SMART_READ_THRESHOLDS_SECTORS, ata_ioctl_buf_size(ATA_SMART_READ_THRESHOLDS_SECTORS), + req_header[HDIO_DRIVE_CMD_HDR_SEC_CNT]); + return -EIO; + } + + int i; + unsigned char *kbuf; + kzalloc_or_exit_int(kbuf, ata_ioctl_buf_size(ATA_SMART_READ_THRESHOLDS_SECTORS)); + u8 *smart_thresholds = (u8 *)(kbuf + HDIO_DRIVE_CMD_HDR_OFFSET); + + //First write response header + kbuf[HDIO_DRIVE_CMD_RET_STATUS] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_ERROR] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_SEC_CNT] = ATA_SMART_READ_THRESHOLDS_SECTORS; + + //See "Vendor-Specific Data Bytes 0–361" and "Table 5: SMART Attribute Entry Format" in micron.com + // document for specification of these numbers and calculations + //For full structure see "Table 59 − Device SMART data structure" in ATA/ATAPI-6 PDF + smart_thresholds[0] = SMART_SNAP_VERSION; + + //copy a subset of attribute bytes as we were asked for thresholds only + for (i = 0; i < ARRAY_SIZE(fake_smart); i++) { + smart_thresholds[2 + (ATA_SMART_RECORD_LEN * i) + 0] = fake_smart[i][0]; //entry id + smart_thresholds[2 + (ATA_SMART_RECORD_LEN * i) + 1] = fake_smart[i][11]; //threshold value + } + + ata_calc_sector_checksum(smart_thresholds); + + if (copy_to_user(buff_ptr, kbuf, ata_ioctl_buf_size(ATA_SMART_READ_THRESHOLDS_SECTORS)) != 0) { + pr_loc_err("Failed to copy SMART THRESHOLDS packet to user ptr=%p", buff_ptr); + kfree(kbuf); + return -EFAULT; + } + + kfree(kbuf); + return 0; +} + +/** + * Read stored SMART log using WIN_SMART interface + * + * This is a special command from the "WIN_SMART" subset to read the SMART offline log. To understand it see the + * "8.55.6 SMART READ LOG" in ATA/ATAPI-6 specs. It describes it as"Command code B0h with the content of the Features + * register equal to D5h" (B0h = 0xb0 = ATA_CMD_SMART; D5h = 0x05 = WIN_FT_SMART_READ_LOG_SECTOR). + * There are multiple types of logs. This function implements all non-vendor ones. + * + * @param req_header ioctl() header sent along the request, will be HDIO_DRIVE_CMD_HDR_OFFSET bytes long + * @param buff_ptr userspace pointer to a buffer passed to the ioctl() call; it will be overwritten with data + * + * @return 0 on success, -EIO on unexpected call, -ENOMEM when memory reservation fails, or -EFAULT when data fails to + * copy to user buffer + */ +static int populate_win_smart_log(const u8 *req_header, void __user *buff_ptr) +{ + pr_loc_dbg("Generating fake WIN_SMART log=%d entries", req_header[HDIO_DRIVE_CMD_HDR_SEC_NUM]); + + //sanity check if requested SMART READ LOG sector count is really what we're planning to copy + if (unlikely(req_header[HDIO_DRIVE_CMD_HDR_SEC_CNT]) != ATA_WIN_SMART_READ_LOG_SECTORS) { + pr_loc_err("Expected %d bytes (%d sectors) DATA for ATA WIN_SMART READ LOG, got %d", + ATA_WIN_SMART_READ_LOG_SECTORS, ata_ioctl_buf_size(ATA_WIN_SMART_READ_LOG_SECTORS), + req_header[HDIO_DRIVE_CMD_HDR_SEC_CNT]); + return -EIO; + } + + unsigned char *kbuf; + kzalloc_or_exit_int(kbuf, ata_ioctl_buf_size(ATA_WIN_SMART_READ_LOG_SECTORS)); + u8 *smart_log = (u8 *)(kbuf + HDIO_DRIVE_CMD_HDR_OFFSET); + + //First write response header + kbuf[HDIO_DRIVE_CMD_RET_STATUS] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_ERROR] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_SEC_CNT] = ATA_WIN_SMART_READ_LOG_SECTORS; + + //See "Table 62 − Log address definition" in ATAPI/6 docs + switch (req_header[HDIO_DRIVE_CMD_HDR_SEC_NUM]) { + case 0x00: //log directory. While the spec says it's optional supporting it means fewer calls to other ones + //we're indicating that we DO support multi-sector logging to avoid further log-read logic complexity. If + // the support is indicated as absent all reads to logs at index 0 must return "command aborted" response + smart_log[0] = WIN_SMART_DIG_LOG_VERSION; + //if every other byte is zero we can ignore the rest of the fields according to Table 63. We also SHOULD NOT + // generate a checksum for log directory (despite all others using checksums...) + + case 0x01: //summary SMART error log (see sect. 8.55.6.8.2 Summary error log sector) + smart_log[0] = WIN_SMART_SUM_LOG_VERSION; + smart_log[1] = 0x00; //no error entries = index is 0 + smart_log[452] = 0x00; //no errors = count byte 1 is zero + smart_log[453] = 0x00; //no errors = count byte 2 is zero + ata_calc_sector_checksum(smart_log); + break; + + case 0x02: //comprehensive SMART error log + smart_log[0] = WIN_SMART_COMP_LOG_VERSION; + smart_log[1] = 0x00; //no error entries = index is 0 + smart_log[452] = 0x00; //no errors = count byte 1 is zero + smart_log[453] = 0x00; //no errors = count byte 2 is zero + ata_calc_sector_checksum(smart_log); + break; + + case 0x06: //SMART self-test log + smart_log[0] = WIN_SMART_TEST_LOG_VERSION; + smart_log[1] = 0x00; //revision (2nd byte, also defined by 8.55.6.8.4.1) + smart_log[508] = 0x00; //no errors + ata_calc_sector_checksum(smart_log); + break; + + default: //other ones are reserved/vendor/etc + pr_loc_err("Unexpected WIN_FT_SMART_READ_LOG_SECTOR with log_addr=%d", + req_header[HDIO_DRIVE_CMD_HDR_SEC_NUM]); + kfree(kbuf); + return -EIO; + } + + if (copy_to_user(buff_ptr, kbuf, ata_ioctl_buf_size(ATA_WIN_SMART_READ_LOG_SECTORS)) != 0) { + pr_loc_err("Failed to copy WIN_SMART LOG packet to user ptr=%p", buff_ptr); + kfree(kbuf); + return -EFAULT; + } + + kfree(kbuf); + return 0; +} + +/** + * Dispatches an drive-internal SMART test using WIN_SMART interface + * + * @param req_header ioctl() header sent along the request, will be HDIO_DRIVE_CMD_HDR_OFFSET bytes long + * @param buff_ptr userspace pointer to a buffer passed to the ioctl() call; it will be overwritten with data + * + * @return 0 on success, -EIO on unexpected call, -ENOMEM when memory reservation fails, or -EFAULT when data fails to + * copy to user buffer + */ +static int populate_win_smart_exec_test(const u8 *req_header, void __user *buff_ptr) +{ + pr_loc_dbg("Generating fake WIN_SMART offline test type=%d", req_header[HDIO_DRIVE_CMD_HDR_SEC_NUM]); + + unsigned char *kbuf; + kzalloc_or_exit_int(kbuf, HDIO_DRIVE_CMD_HDR_OFFSET); + + //First write response header + kbuf[HDIO_DRIVE_CMD_RET_STATUS] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_ERROR] = 0x00; + kbuf[HDIO_DRIVE_CMD_RET_SEC_CNT] = ATA_WIN_SMART_EXEC_TEST; + + //See "Table 58 − SMART EXECUTE OFF-LINE IMMEDIATE LBA Low register values" in ATAPI/6 docs + switch (req_header[HDIO_DRIVE_CMD_HDR_SEC_NUM]) { + case 0x00: //off-line in off-line mode + case 0x01: //short in off-line mode + case 0x02: //long in off-line mode + case 0x7f: //abort previous test + case 0x81: //short in captive mode + case 0x82: //long in captive mode + break; //we only need to populate the response header + + default: //other ones are reserved/vendor/etc + pr_loc_err("Unexpected WIN_FT_SMART_READ_LOG_SECTOR with log_addr=%d", + req_header[HDIO_DRIVE_CMD_HDR_SEC_NUM]); + kfree(kbuf); + return -EIO; + } + + if (copy_to_user(buff_ptr, kbuf, HDIO_DRIVE_CMD_HDR_OFFSET) != 0) { + pr_loc_err("Failed to copy WIN_SMART TEST header to user ptr=%p", buff_ptr); + kfree(kbuf); + return -EFAULT; + } + + kfree(kbuf); + return 0; +} + +/** + * Emulates various SMART data requested via ATA_CMD_SMART method + * + * SMART responses here assume that original ioctl() failed (since otherwise it would be no point to emulate them). If + * you call this function on a drive with functioning SMART it will be ignored and fake smart will be generated for it. + * + * @param req_header ioctl() header sent along the request, will be HDIO_DRIVE_CMD_HDR_OFFSET bytes long + * @param buff_ptr userspace pointer to a buffer passed to the ioctl() call; it will be overwritten with data + * + * @return 0 on success, -EIO on unexpected call, -ENOMEM when memory reservation fails, or -EFAULT when data fails to + * copy to user buffer + */ +static int __always_inline handle_ata_cmd_smart(const u8 *req_header, void __user *buff_ptr) +{ + pr_loc_dbg("Got SMART *command* - looking for feature=0x%x", req_header[HDIO_DRIVE_CMD_HDR_FEATURE]); + + switch (req_header[HDIO_DRIVE_CMD_HDR_FEATURE]) { + case ATA_SMART_READ_VALUES: //read all SMART values snapshot + return populate_ata_smart_values(req_header, buff_ptr); + + case ATA_SMART_READ_THRESHOLDS: //read all SMART thresholds snapshot + return populate_ata_smart_thresholds(req_header, buff_ptr); + + case ATA_SMART_ENABLE: //enable previously disabled SMART support + pr_loc_wrn("Attempted ATA_SMART_ENABLE modification!");\ + return 0; + + case WIN_FT_SMART_READ_LOG_SECTOR: //reads offline-stored drive logs + return populate_win_smart_log(req_header, buff_ptr); + + case WIN_FT_SMART_IMMEDIATE_OFFLINE: //execute a SMART test + return populate_win_smart_exec_test(req_header, buff_ptr); + + default: + pr_loc_dbg("Unknown SMART *command* read w/feature=0x%02x", req_header[HDIO_DRIVE_CMD_HDR_FEATURE]); + return -EIO; + } +} + +/** + * Shims various commands launched via HDIO_DRIVE_CMD interface, routing them to individual shims + * + * This function will shim calls which need to be modified or replaced. All ioctl()s routed here are first executed and + * then routed to individual shims. If a given ioctl() is not a one which we're interested in altering the result of a + * real ioctl() call to the driver will be returned unaltered. + * + * To fully understand this function make sure to read HDIO_DRIVE_CMD description provided by kernel developers at + * https://www.kernel.org/doc/Documentation/ioctl/hdio.txt + */ +static int handle_hdio_drive_cmd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, void __user *buff_ptr) +{ + //Before we execute ioctl we need to save the original header as ioctl will override it (they share buffer) + u8 req_header[HDIO_DRIVE_CMD_HDR_OFFSET]; + if(unlikely(copy_from_user(req_header, buff_ptr, HDIO_DRIVE_CMD_HDR_OFFSET) != 0)) { + pr_loc_err("Expected to copy HDIO_DRIVE_CMD header of %d bytes from %p - it failed", HDIO_DRIVE_CMD_HDR_OFFSET, + buff_ptr); + return -EIO; + } + + int ioctl_out = sd_ioctl_org(bdev, mode, cmd, (unsigned long)buff_ptr); + switch (req_header[HDIO_DRIVE_CMD_HDR_CMD]) { + //this command probes the disk for its overall capabilities; it may have nothing to do with SMART reading but + // we need to modify it to indicate SMART support + case ATA_CMD_ID_ATA: + pr_loc_dbg_ioctl(cmd, "ATA_CMD_ID_ATA", bdev); + return handle_ata_cmd_identify(ioctl_out, req_header, buff_ptr, bdev->bd_disk->disk_name); + + //this command asks directly for the SMART data of the drive and will fail on drives with no real SMART support + case ATA_CMD_SMART: //if the drive supports SMART it will just return the data as-is, no need to proxy + pr_loc_dbg_ioctl(cmd, "ATA_CMD_SMART", bdev); + return (ioctl_out == 0) ? 0 : handle_ata_cmd_smart(req_header, buff_ptr); + + //We're only interested in a subset of commands - rest are simply redirected back + default: + pr_loc_dbg_ioctl_unk(cmd, req_header[HDIO_DRIVE_CMD_HDR_CMD], bdev); + return ioctl_out; + } +} + + +/***************************************** ATAPI/WIN task interface handling ******************************************/ +/** + * Emulates various SMART data requested via WIN_CMD_SMART method + * + * SMART responses here assume that original ioctl() failed (since otherwise it would be no point to emulate them). If + * you call this function on a drive with functioning SMART it will be ignored and fake smart will be generated for it. + * + * @param req_header ioctl() header sent along the request, will be HDIO_DRIVE_TASK_HDR_OFFSET bytes long + * @param buff_ptr userspace pointer to a buffer passed to the ioctl() call; it will be overwritten with data. So far + * all known commands don't use any data buffer but only the response header (which, like req_header, is + * HDIO_DRIVE_TASK_HDR_OFFSET bytes long) + * + * @return 0 on success, -EIO on unexpected call, -ENOMEM when memory reservation fails, or -EFAULT when data fails to + * copy to user buffer + */ +static int __always_inline handle_ata_task_smart(const u8 *req_header, void __user *buff_ptr) +{ + pr_loc_dbg("Got SMART *task* - looking for feature=0x%x", req_header[HDIO_DRIVE_TASK_HDR_FEATURE]); + + switch (req_header[HDIO_DRIVE_TASK_HDR_FEATURE]) { + case WIN_FT_SMART_STATUS: + case WIN_FT_SMART_AUTOSAVE: + case WIN_FT_SMART_AUTO_OFFLINE: { //this must be in brackets, see https://stackoverflow.com/a/18496437 + //Status-checking commands in WIN_SMART officially work in such a way that if the buffer was unchanged it + // means that SMART is OK. However, two static values of 0x4f/0xc2 are used for OK. Here we're taking the + // easy (and probably safer) approach by returning the same values. + //Smartmontools actually verifies if it's the same as req header as well: + // https://github.com/mirror/smartmontools/blob/b63206bc12efb2ae543040b9008f42c037eb1f04/os_linux.cpp#L405 + u8 rsp_header[HDIO_DRIVE_TASK_HDR_OFFSET] = { + [HDIO_DRIVE_TASK_RET_STATUS] = 0x00, + [HDIO_DRIVE_TASK_RET_ERROR] = 0x00, + [HDIO_DRIVE_TASK_RET_SEC_CNT] = req_header[HDIO_DRIVE_TASK_HDR_SEC_CNT], + [HDIO_DRIVE_TASK_RET_SEC_NUM] = req_header[HDIO_DRIVE_TASK_HDR_SEC_NUM], + [HDIO_DRIVE_TASK_RET_LCYL] = req_header[HDIO_DRIVE_TASK_HDR_LCYL], + [HDIO_DRIVE_TASK_RET_HCYL] = req_header[HDIO_DRIVE_TASK_HDR_HCYL], + [HDIO_DRIVE_TASK_RET_SEL] = req_header[HDIO_DRIVE_TASK_HDR_SEL], + }; + + if (copy_to_user(buff_ptr, rsp_header, HDIO_DRIVE_TASK_HDR_OFFSET) != 0) { + pr_loc_err("Failed to copy WIN_SMART feature=0x%02x task result packet to user ptr=%p", + req_header[HDIO_DRIVE_TASK_HDR_FEATURE], buff_ptr); + return -EFAULT; + } + return 0; + } + default: + pr_loc_dbg("Unknown SMART *task* read w/feature=0x%02x", req_header[HDIO_DRIVE_TASK_HDR_FEATURE]); + return -EIO; //We INTENTIONALLY error-out here as there shouldn't be any other ATA SMART *tasks* requested + } +} + +/** + * Shims various commands launched via HDIO_DRIVE_TASK interface, routing them to individual shims + * + * This function will shim calls which need to be modified or replaced. All ioctl()s routed here are first executed and + * then routed to individual shims. If a given ioctl() is not a one which we're interested in altering the result of a + * real ioctl() call to the driver will be returned unaltered. + * This function is used to a very limited subset of commands in the normal circumstances, as see in the smartmontools + * code: https://github.com/mirror/smartmontools/blob/b63206bc12efb2ae543040b9008f42c037eb1f04/os_linux.cpp#L379 + * + * To fully understand this function make sure to read HDIO_DRIVE_TASK description provided by kernel developers at + * https://www.kernel.org/doc/Documentation/ioctl/hdio.txt + */ +static int +handle_hdio_drive_task_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, void __user *buff_ptr) +{ + //Before we execute ioctl we need to save the original header as ioctl will override it (they share buffer) + u8 req_header[HDIO_DRIVE_TASK_HDR_OFFSET]; + if (unlikely(copy_from_user(req_header, buff_ptr, HDIO_DRIVE_TASK_HDR_OFFSET) != 0)) { + pr_loc_err("Expected to copy HDIO_DRIVE_TASK_HDR_OFFSET header of %d bytes from %p - it failed", + HDIO_DRIVE_TASK_HDR_OFFSET, + buff_ptr); + return -EIO; + } + + int ioctl_out = sd_ioctl_org(bdev, mode, cmd, (unsigned long)buff_ptr); + switch (req_header[HDIO_DRIVE_TASK_HDR_CMD]) { + //this command asks directly for the SMART data. From our understanding it's only used for a small subset of + // commands. The normal SMART reads/logs/etc are going through HDIO_DRIVE_CMD instead. The only thing [so far] + // is the SMART self-reported status which goes via HDIO_DRIVE_TASK route + case WIN_CMD_SMART: //if the drive supports SMART it will just return the data as-is, no need to proxy + pr_loc_dbg_ioctl(cmd, "WIN_CMD_SMART", bdev); + return (ioctl_out == 0) ? 0 : handle_ata_task_smart(req_header, buff_ptr); + + //We're only interested in a subset of commands run via task IOCTL - rest are simply redirected back + default: + pr_loc_dbg("sd_ioctl(HDIO_DRIVE_TASK ; cmd=0x%02x) => %d - not a hooked cmd, noop", + req_header[HDIO_DRIVE_TASK_HDR_CMD], ioctl_out); + return ioctl_out; + } +} + +/********************************** ioctl() handling re-routing from driver to shim ***********************************/ +//These are called from each other so we need to predeclare them +int sd_ioctl_canary_install(void); +int sd_ioctl_canary_uninstall(void); + +/** + * Filters/proxies/emulates device IOCTLs as needed for emulating SMART + * + * This shim is installed just before the first IOCTL from the userspace. + */ +static int sd_ioctl_smart_shim(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +{ +#ifdef DBG_SMART_PRINT_ALL_IOCTL + pr_loc_dbg("Handling ioctl(0x%02x) for /dev/%s", cmd, bdev->bd_disk->disk_name); +#endif + + if (unlikely(!sd_ioctl_org)) { + pr_loc_bug("Called %s but no original sd_ioctl() address is known", __FUNCTION__); + return -EIO; + } + + switch (cmd) { + case HDIO_DRIVE_CMD: //"a special drive command" as per hdreg.h + return handle_hdio_drive_cmd_ioctl(bdev, mode, cmd, (void *)arg); + + case HDIO_DRIVE_TASK: //"execute task and special drive command" as per Documentation/ioctl/hdio.txt + return handle_hdio_drive_task_ioctl(bdev, mode, cmd, (void *)arg); + + default: //any other ioctls are proxied as-is +# ifdef DBG_SMART_PRINT_ALL_IOCTL + pr_loc_dbg("sd_ioctl(0x%02x) - not a hooked ioctl, noop", cmd); +# endif + return sd_ioctl_org(bdev, mode, cmd, arg); + } +} + +/** + * Installs a permanent shim into the sd driver ops + * + * Keep in mind this function works only if the sd_fops address has been populated. It's normally done by the canary + * (sd_ioctl_canary()) which is installed by sd_ioctl_canary_install(). + * + * @return 0 on success; -E on error + */ +int sd_ioctl_smart_shim_install(void) +{ + if (unlikely(!sd_fops)) { + pr_loc_bug("Gendisk has no ops (yet?)"); //this could happen if IOCTL somehow arrived before sd_probe_async() + return -EIO; + } + + //This shouldn't happen - it can only be the case if LKM is unloaded without cleanup (or cleanup is broken) + if (unlikely(sd_fops->ioctl == sd_ioctl_smart_shim)) { + pr_loc_bug("sd_ioctl() SMART shim was already installed"); + return 0; + } + + pr_loc_dbg("Rerouting sd_fops->ioctl<%p>=%pF<%p> to %pF<%p>", &sd_fops->ioctl, sd_fops->ioctl, sd_fops->ioctl, + sd_ioctl_smart_shim, sd_ioctl_smart_shim); + sd_ioctl_org = sd_fops->ioctl; + + WITH_MEM_UNLOCKED( + &sd_fops->ioctl, sizeof(void *), + sd_fops->ioctl = sd_ioctl_smart_shim; + ); + + return 0; +} + +/** + * Removes the permanent shim present in sd driver ops (if installed) + * + * If called with shim not installed it will noop by design. + * + * @return 0 on success or noop; -E on error + */ +int sd_ioctl_smart_shim_uninstall(void) +{ + //sd_fops is not saved - nothing to restore + if (unlikely(!sd_fops)) + return 0; + + if (unlikely(!sd_ioctl_org)) { + pr_loc_bug("sd_fops->ioctl is not saved - cannot restore"); + return -EIO; + } + + pr_loc_dbg("Restoring sd_fops->ioctl<%p>=%pF<%p> to %pF<%p>", &sd_fops->ioctl, sd_fops->ioctl, sd_fops->ioctl, + sd_ioctl_org, sd_ioctl_org); + + WITH_MEM_UNLOCKED( + &sd_fops->ioctl, sizeof(void *), + sd_fops->ioctl = sd_ioctl_org; + ); + + sd_ioctl_org = NULL; + sd_fops = NULL; + + return 0; +} + +/** + * Awaits any SCSI ioctl() to override the ioctl handler in sd_fops.ioctl (drivers/scsi/sd.c) + * + * What is essentially does it awaits the first call from the userspace to trigger chain landing in sd_ioctl(), which + * will be any program attempting to do an ioctl() on /dev/sdX. Then it gets gendisk from block_device passed, which in + * turn contains a pointer to the same-for-all sd_fops. Having this pointer lets us replace ->ioctl pointer to finally + * shim the sd_ioctl() pathway. This is done to AVOID using override_symbol:call_overridden_symbol() which isn't really + * that multi-thread safe and actually pretty slow. The last thing we want is slowing down IOCTLs on a storage system. + * Using this strange routine above installs a fast IOCTL filter to e.g. respond to SMARTs. + * + * While this method isn't really clear other ones are even worse. To find sd_fops we need to get at least one + * FULLY INITIALIZED gendisk, which is complicated. During sd_probe() the gendisk is only partially built (i.e. it lacks + * sd_fops...). These other parts are populated in delayed/scheduled sd_probe_async(). So the number of permutations is + * quite large, as we need to consider the following states of the system: + * - no driver loaded + * - driver loaded, no drives yet + * - driver loaded, 1 drive which is not initialized yet + * - driver loaded, >1 drive, one initialized and one not + * - driver loaded, 1 drive which is initialized + * - ...... you see the point? + * + * This whole thing would be much easier if we could get scsi_disk structure, but it's a private one (and heavily + * modified with MY_ABC_HERE). + */ +static int sd_ioctl_canary(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +{ + int out; + spin_lock(&sd_ioctl_canary_lock); + + pr_loc_dbg("%s triggered for first ioctl()", __FUNCTION__); + //if the override of the symbol is no longer present it means that before we've got the lock another canary was + // processing another IOCTL and did what the canary needed to do - we can safely redirect + if (unlikely(!sd_ioctl_canary_ovs)) { + if (unlikely(!sd_fops)) { + pr_loc_bug("Canary is already processed after obtaining lock BUT fops aren't here - the canary is broken"); + return -EIO; //we don't really know the state + } + + pr_loc_dbg("Canary is already processed after obtaining lock - proxying to sd_fops->ioctl directly"); + goto proxy_ioctl; + } + + if (unlikely(!bdev)) { + pr_loc_bug("NULL block_device passed to %s", __FUNCTION__); + return -EIO; + } + + struct gendisk *disk = bdev->bd_disk; + if (unlikely(!disk)) { + pr_loc_bug("block_device w/o gendisk found"); + return -EIO; + } + + sd_fops = (void *)disk->fops; //forcefully remove "const" protection here + out = sd_ioctl_smart_shim_install(); + if (out != 0) { + pr_loc_err("Failed to install proper SMART shim"); + return -EIO; + } + + out = sd_ioctl_canary_uninstall(); //it will log what's wrong + spin_unlock(&sd_ioctl_canary_lock); + if (out != 0) + return -EIO; //we cannot continue as the sd_ioctl() wasn't restored and calling the shim can cause infinite loop + + proxy_ioctl: + pr_loc_dbg("Canary finished - routing to sd_fops->ioctl = %pF<%p>", sd_fops->ioctl, sd_fops->ioctl); + return sd_fops->ioctl(bdev, mode, cmd, arg); +} + +/** + * Installs a temporary sd_ioctl() replacement to capture sd_fops used to install permanent shim + * + * This function is used to replace sd_ioctl() with sd_ioctl_canary(). The purpose of this is to capture sd_fops on a + * first ioctl() from userspace. Then this temporary canary is replaced with a real one (the sd_ioctl_smart_shim()). + * That two-step process is here because we don't have a sane way of obtaining sd_fops (see the function comment for + * sd_ioctl_canary() to learn about the details) and using override_symbol method makes calls to the original sd_ioctl() + * slow and potentially prone to multithreading problems. + * + * @return 0 on success; -E on error + */ +int sd_ioctl_canary_install(void) +{ + if (unlikely(sd_ioctl_canary_ovs)) { + pr_loc_bug("Canary already installed"); + return 0; //technically not a full-blown failure + } + + spin_lock_init(&sd_ioctl_canary_lock); + sd_ioctl_canary_ovs = override_symbol("sd_ioctl", sd_ioctl_canary); + if (IS_ERR(sd_ioctl_canary_ovs)) { + pr_loc_err("Failed to install sd_ioctl() canary"); + int out = PTR_ERR(sd_ioctl_canary_ovs); + sd_ioctl_canary_ovs = NULL; + return out; + } + + return 0; +} + +/** + * Removes the temporary shim installed via sd_ioctl_canary_install() (if installed) + * + * If called with shim not installed it will noop by design. It will noop if the shim was never installed or if it was + * already replaced with the permanent shim. + * + * @return 0 on success or noop; -E on error + */ +int sd_ioctl_canary_uninstall(void) +{ + //this isn't an error failure - it will most likely happen when unregistering this shim after first ioctl + if (unlikely(!sd_ioctl_canary_ovs)) + return 0; + + int out = restore_symbol(sd_ioctl_canary_ovs); + if (out != 0) { + pr_loc_err("Failed to uninstall sd_ioctl() canary"); + return out; + } + + sd_ioctl_canary_ovs = NULL; + return 0; +} + +/****************************************** Standard public API of the shim *******************************************/ +int register_disk_smart_shim(void) +{ + shim_reg_in(); + + int out; + + out = is_scsi_driver_loaded(); + if (IS_SCSI_DRIVER_ERROR(out)) { + pr_loc_err("Failed to determine SCSI driver status - error=%d", out); + return out; + } else if(out == SCSI_DRV_LOADED || kernel_has_symbol("sd_ioctl")) { + //driver is loaded, OR it's not loaded, but it's compiled-in + pr_loc_dbg("SCSI driver exists - installing canary"); + if ((out = sd_ioctl_canary_install()) != 0) + return out; + } else { //driver not loaded and doesn't exist (=not compiled in) + //normally this should call watch_scsi_driver_register() but the current implementation of driver watcher allows + // for just a single watcher per driver (as it doesn't use standard kernel notifiers, sic!). This is however + // unlikely case to ever occur + pr_loc_bug("Cannot register SMART shim - the SCSI driver \"%s\" is not loaded and it doesn't exist", + SCSI_DRV_NAME); + return -ENXIO; + } + + shim_reg_ok(); + return 0; +} + +int unregister_disk_smart_shim(void) +{ + shim_ureg_in(); + + int out; + bool is_error = false; + + out = sd_ioctl_canary_uninstall(); + if (out != 0) { + pr_loc_err("sd_ioctl_canary_uninstall failed - error=%d", out); + is_error = true; + } + + out = sd_ioctl_smart_shim_uninstall(); + if (out != 0) { + pr_loc_err("sd_ioctl_smart_shim_uninstall failed - error=%d", out); + is_error = true; + } + + if (is_error) + return -EIO; + + shim_ureg_ok(); + return 0; +} diff --git a/shim/storage/smart_shim.h b/shim/storage/smart_shim.h new file mode 100644 index 0000000..badbe5f --- /dev/null +++ b/shim/storage/smart_shim.h @@ -0,0 +1,7 @@ +#ifndef REDPILL_SMART_SHIM_H +#define REDPILL_SMART_SHIM_H + +int register_disk_smart_shim(void); +int unregister_disk_smart_shim(void); + +#endif //REDPILL_SMART_SHIM_H diff --git a/shim/uart_fixer.c b/shim/uart_fixer.c new file mode 100644 index 0000000..7f7269a --- /dev/null +++ b/shim/uart_fixer.c @@ -0,0 +1,105 @@ +#define SHIM_NAME "UART fixer" + +#include "uart_fixer.h" +#include "shim_base.h" +#include "../common.h" +#include "../config/runtime_config.h" //STD_COM* +#include "../config/platform_types.h" //hw_config +#include "../internal/call_protected.h" //early_serial_setup() +#include "../internal/override/override_symbol.h" //overriding uart_match_port() +#include //serial8250_unregister_port + +#ifdef DBG_DISABLE_UART_SWAP_FIX +static int noinline uart_swap_hw_output(unsigned int from, unsigned char to) +{ + pr_loc_wrn("UART swapping needed for the platform but forcefully disabled via DBG_DISABLE_UART_SWAP"); + return 0; +} +#elif defined(UART_BUG_SWAPPED) +#include "../internal/uart/uart_swapper.h" +#else +static int noinline uart_swap_hw_output(unsigned int from, unsigned char to) +{ + pr_loc_bug("Called %s from uart_fixer context when UART_BUG_SWAPPED is not set", __FUNCTION__); + return -EINVAL; +} +#endif + +static bool ttyS0_force_initted = false; //Was ttyS0 forcefully initialized by us? +static bool serial_swapped = false; //Whether ttyS0 and ttyS1 were swapped + +/** + * On some platforms (e.g. 918+) the first serial port appears to not be functional as it's not initialized properly. + * + * It is speculated that it has to do with "CONFIG_SYNO_X86_TTY_CONSOLE_OUTPUT=y" but it's not confirmed. If this is not + * fixed by this function setting kernel console output to ttyS0 will result in earlycon working as expected (as it + * doesn't use the normal 8250 driver) with nothing being transmitted as soon as earlycon is switched to the proper + * "console=" port. + */ +static int fix_muted_ttyS0(void) +{ + int out = 0; + struct uart_port port = { + .iobase = STD_COM1_IOBASE, + .uartclk = STD_COMX_BAUD * 16, + .irq = STD_COM1_IRQ, + .flags = STD_COMX_FLAGS + }; + + if ((out = _early_serial_setup(&port)) != 0) { + pr_loc_err("Failed to register ttyS0 to hw port @ %lx", port.iobase); + return out; + } + + pr_loc_dbg("Fixed muted ttyS0 to hw port @ %lx", port.iobase); + ttyS0_force_initted = true; + return out; +} + +/** + * Reverses what fix_muted_ttyS0() did + */ +static int mute_ttyS0(void) +{ + pr_loc_dbg("Re-muting ttyS0"); + serial8250_unregister_port(0); + + return 0; +} + +int register_uart_fixer(const hw_config_uart_fixer *hw) +{ + shim_reg_in(); + + int out = 0; + if ( + (hw->swap_serial && (out = uart_swap_hw_output(1, 0)) != 0) || + (hw->reinit_ttyS0 && (out = fix_muted_ttyS0()) != 0) + ) { + pr_loc_err("Failed to register UART fixer"); + + return out; + } + + serial_swapped = hw->swap_serial; + + shim_reg_ok(); + return out; +} + +int unregister_uart_fixer(void) +{ + shim_ureg_in(); + + int out = 0; + if ( + (serial_swapped && (out = uart_swap_hw_output(0, 1)) != 0) || + (ttyS0_force_initted && (out = mute_ttyS0()) != 0) + ) { + pr_loc_err("Failed to unregister UART fixer"); + return out; + } + + shim_ureg_ok(); + return out; +} \ No newline at end of file diff --git a/shim/uart_fixer.h b/shim/uart_fixer.h new file mode 100644 index 0000000..f8170a3 --- /dev/null +++ b/shim/uart_fixer.h @@ -0,0 +1,8 @@ +#ifndef REDPILL_UART_FIXER_H +#define REDPILL_UART_FIXER_H + +typedef struct hw_config hw_config_uart_fixer; +int register_uart_fixer(const hw_config_uart_fixer *hw); +int unregister_uart_fixer(void); + +#endif //REDPILL_UART_FIXER_H diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..c3d228c --- /dev/null +++ b/tools/README.md @@ -0,0 +1,5 @@ +# Dev Tools + +This directory contains some tools we use during development. They're not +normally used with the module in any way. They're messy, buggy, quick and +dirty but often helpful ;) \ No newline at end of file diff --git a/tools/always_serial.sh b/tools/always_serial.sh new file mode 100644 index 0000000..921ed2e --- /dev/null +++ b/tools/always_serial.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Usage: always_serial.sh +# +# Runs Proxmox serial console. If it fails it tries again... and again... and again until it succeeds. This is very +# useful when stopping and starting VMs with multiple serial ports. Without it every time you stop and start a VM you +# have to go and open all serial consoles again manually. +# To exit this script press Control+C twice (or if console is active Control+O and then Control+C twice). + +trap_cancel() { + echo "Press Control+C once more terminate the process (or wait 2s for it to restart)" + sleep 2 || exit 1 +} +trap trap_cancel SIGINT SIGTERM + +if [[ "$#" -ne 2 ]]; then + echo "Usage: $0 " + exit 2 +fi + +while true; do + clear + echo "Started serial$2 monitor for VM=$1 at" $(date) + qm terminal $1 -iface serial$2 +done \ No newline at end of file diff --git a/tools/always_telnet.sh b/tools/always_telnet.sh new file mode 100644 index 0000000..b1a7487 --- /dev/null +++ b/tools/always_telnet.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Usage: always_telnet.sh +# +# Runs telnet remote console. If it fails it tries again... and again... and again until it succeeds. This is very +# useful when stopping and starting VMs with multiple serial ports on ESXi. Without it every time you stop and start a +# VM you have to go and open all serial consoles again manually. +# To exit this script press Control+C twice (or if console is active Control+O and then Control+C twice). + +trap_cancel() { + echo "Press Control+C once more terminate the process (or wait 2s for it to restart)" + sleep 2 || exit 1 +} +trap trap_cancel SIGINT SIGTERM + +if [[ "$#" -ne 2 ]]; then + echo "Usage: $0 " + exit 2 +fi + +while true; do + clear + echo "Started telnet for $1:$2 at" $(date) + telnet $1 $2 + sleep 0.2 +done \ No newline at end of file diff --git a/tools/inject_rp_ko.sh b/tools/inject_rp_ko.sh new file mode 100644 index 0000000..4d79227 --- /dev/null +++ b/tools/inject_rp_ko.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Injects RedPill LKM file into a ramdisk inside of an existing image (so you can test new LKM without constant full image +# rebuild & transfer) +# +# Internally we use it something like this with Proxmox pointing to /dev/loop0 as the USB source: +# rm redpill.ko ; wget https://buildsrv/redpill.ko ; \ +# IRP_LEAVE_ATTACHED=1 ./inject_rp_ko.sh rp-3615-v6.img redpill.ko ; losetup ; \ +# qm stop 101 ; sleep 1 ; qm start 101 ; qm terminal 101 -iface serial1 + +self=${0##*/} +img="$(realpath $1 2> /dev/null)" +lkm="$(realpath $2 2> /dev/null)" +if [ $# -ne 2 -o ! -f "$img" -o ! -f "$lkm" ] +then + echo "Usage: $self " + exit 2 +fi + +echo "Detaching $img from all loopdevs" +losetup -j "$img" | grep -E -o '^/dev/loop[0-9]+' | \ +while read -r loopdev; do + umount "${loopdev}p"? 2>/dev/null + losetup -d "$loopdev" + echo "Detached $loopdev" +done + +losetup -j "$img" | grep -E -q '^/dev/loop[0-9]+' +if [ $? -eq 0 ]; then + echo "$img is still attached to some loop devs!" + exit 1 +fi + +set -euo pipefail +LODEV="$(losetup --show -f -P "$img")" + +UNIQ_BASE="$PWD/__inject_rp_$(date '+%s')" +echo "Making directories in $UNIQ_BASE" +TMP_MNT_DIR="$UNIQ_BASE/img-mnt" +TMP_RDU_DIR="$UNIQ_BASE/rd-unpacked" +mkdir -p "$TMP_MNT_DIR" +mkdir -p "$TMP_RDU_DIR" + +echo "Mounting in $TMP_MNT_DIR" +mount "${LODEV}p1" "$TMP_MNT_DIR" + +echo "Unpacking $TMP_MNT_DIR/rd.gz" +cd "$TMP_RDU_DIR" +if file "$TMP_MNT_DIR/rd.gz" | grep -q 'cpio archive'; then # special case: uncompressed rd + IRP_FLAT_RD=1 + cat "$TMP_MNT_DIR/rd.gz" | cpio -idmv +else + IRP_FLAT_RD=0 + xz -dc < "$TMP_MNT_DIR/rd.gz" | cpio -idmv +fi + +echo "Copying $lkm" +cp "$lkm" "$TMP_RDU_DIR/usr/lib/modules/rp.ko" + +echo "Repacking $TMP_MNT_DIR/rd.gz" +if [[ IRP_FLAT_RD -eq 1 ]]; then # special case: uncompressed rd + find . 2>/dev/null | cpio -o -H newc -R root:root > "$TMP_MNT_DIR/rd.gz" +else + find . 2>/dev/null | cpio -o -H newc -R root:root | xz -9 --format=lzma > "$TMP_MNT_DIR/rd.gz" +fi + +echo "Unmounting & detaching (if requested)" +sync +umount "$TMP_MNT_DIR" +if [[ -z "${IRP_LEAVE_ATTACHED}" ]]; then + losetup -d "$LODEV" +fi + +echo "Cleaning up $UNIQ_BASE" +rm -rf "$UNIQ_BASE" \ No newline at end of file diff --git a/tools/make_all.sh b/tools/make_all.sh new file mode 100644 index 0000000..cd98df9 --- /dev/null +++ b/tools/make_all.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Makes all permutations of the LKM and copies them to RedPill Load directory so that we can easily rebuild all images +# Yes, it has all the paths hardcoded - change it to fit your environment. +# When you are executing this script do it from the root of the LKM dir like ./tools/make_all.sh + +LINUX_SRC_ROOT="$PWD/.." +RP_LOAD_ROOT="$HOME/build/redpill-load" + +set -euo pipefail +rm redpill-*.bin redpill-*.ko || true + +# Build for v6 for 3615xs +make LINUX_SRC="$LINUX_SRC_ROOT/linux-3.10.x-bromolow-25426" clean +make LINUX_SRC="$LINUX_SRC_ROOT/linux-3.10.x-bromolow-25426" -j dev-v6 +cp redpill.ko "$RP_LOAD_ROOT/ext/rp-lkm/redpill-linux-v3.10.105.ko" +cp redpill.ko redpill-v6-3615.bin + +# Build for v7 for 3615xs +make LINUX_SRC="$LINUX_SRC_ROOT/bromolow-DSM-7.0-toolkit/build" clean +make LINUX_SRC="$LINUX_SRC_ROOT/bromolow-DSM-7.0-toolkit/build" -j dev-v7 +cp redpill.ko "$RP_LOAD_ROOT/ext/rp-lkm/redpill-linux-v3.10.108.ko" +cp redpill.ko redpill-v7-3615.bin + +# Build for v6 for 918+ +make LINUX_SRC="$LINUX_SRC_ROOT/linux-4.4.x-apollolake-25426" clean +make LINUX_SRC="$LINUX_SRC_ROOT/linux-4.4.x-apollolake-25426" -j dev-v6 +cp redpill.ko "$RP_LOAD_ROOT/ext/rp-lkm/redpill-linux-v4.4.59+.ko" +cp redpill.ko redpill-v6-918.bin + +# Build for v7 for 918+ +make LINUX_SRC="$LINUX_SRC_ROOT/apollolake-DS-7.0-toolkit/build" clean +make LINUX_SRC="$LINUX_SRC_ROOT/apollolake-DS-7.0-toolkit/build" -j dev-v7 +cp redpill.ko "$RP_LOAD_ROOT/ext/rp-lkm/redpill-linux-v4.4.180+.ko" +cp redpill.ko redpill-v7-918.bin + +ln -s redpill-v6-3615.bin redpill-v6-3615.ko +ln -s redpill-v7-3615.bin redpill-v7-3615.ko +ln -s redpill-v6-918.bin redpill-v6-918.ko +ln -s redpill-v7-918.bin redpill-v7-918.ko + +echo "OK" \ No newline at end of file