/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2020 IBM Corporation. All rights reserved. * * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "memory_patcher.h" #include "opal/util/output.h" #include "opal/util/show_help.h" #include "opal/mca/memory/base/empty.h" #include "opal/mca/memory/base/base.h" #include "opal/memoryhooks/memory.h" #include "opal/mca/patcher/base/base.h" #include #include #include #include #include #include #include #include #include #if defined(HAVE_SYS_SYSCALL_H) #include #endif #if defined(HAVE_LINUX_MMAN_H) #include #endif #if defined(HAVE_SYS_IPC_H) #include #endif #include "memory_patcher.h" #undef opal_memory_changed #define HAS_SHMDT (defined(SYS_shmdt) || \ (defined(IPCOP_shmdt) && defined(SYS_ipc))) #define HAS_SHMAT (defined(SYS_shmat) || \ (defined(IPCOP_shmat) && defined(SYS_ipc))) static int patcher_open(void); static int patcher_close(void); static int patcher_register(void); static int patcher_query (int *); static int mca_memory_patcher_priority; opal_memory_patcher_component_t mca_memory_patcher_component = { .super = { .memoryc_version = { OPAL_MEMORY_BASE_VERSION_2_0_0, /* Component name and version */ .mca_component_name = "patcher", MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, OPAL_RELEASE_VERSION), /* Component open and close functions */ .mca_open_component = patcher_open, .mca_close_component = patcher_close, .mca_register_component_params = patcher_register, }, .memoryc_data = { /* The component is checkpoint ready */ MCA_BASE_METADATA_PARAM_CHECKPOINT }, /* Memory framework functions. */ .memoryc_query = patcher_query, .memoryc_register = opal_memory_base_component_register_empty, .memoryc_deregister = opal_memory_base_component_deregister_empty, .memoryc_set_alignment = opal_memory_base_component_set_alignment_empty, }, /* Component-specific data, filled in later (compiler will 0/NULL it out) */ }; #if HAVE_DECL___SYSCALL && defined(HAVE___SYSCALL) /* calling __syscall is preferred on some systems when some arguments may be 64-bit. it also * has the benefit of having an off_t return type */ #define memory_patcher_syscall __syscall #else #define memory_patcher_syscall syscall #endif /* All the hooks in this file have two levels. The first level has the OPAL_PATCHER_* macros * around the call to the second level. This was done because with xlc the compiler was * generating an access to r2 before the OPAL_PATCHER_* assembly. This was loading invalid * data. If this can be resolved the two levels can be joined. */ /* * Nathan's original fix described above can have the same problem reappear if the * interception functions inline themselves. */ static void *_intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) __opal_attribute_noinline__; static int _intercept_munmap(void *start, size_t length) __opal_attribute_noinline__; #if defined (SYS_mremap) #if defined(__linux__) static void *_intercept_mremap (void *start, size_t oldlen, size_t newlen, int flags, void *new_address) __opal_attribute_noinline__; #else static void *_intercept_mremap (void *start, size_t oldlen, void *new_address, size_t newlen, int flags) __opal_attribute_noinline__; #endif // defined(__linux__) #endif // defined(SYS_mremap) static int _intercept_madvise (void *start, size_t length, int advice) __opal_attribute_noinline__; #if defined SYS_brk static int _intercept_brk (void *addr) __opal_attribute_noinline__; #endif #if defined(__linux__) #if HAS_SHMAT static void *_intercept_shmat(int shmid, const void *shmaddr, int shmflg) __opal_attribute_noinline__; #endif // HAS_SHMAT #if HAS_SHMDT static int _intercept_shmdt (const void *shmaddr) __opal_attribute_noinline__; #endif // HAS_SHMDT #endif // defined(__linux__) #if defined (SYS_mmap) #if defined(HAVE___MMAP) && !HAVE_DECL___MMAP /* prototype for Apple's internal mmap function */ void *__mmap (void *start, size_t length, int prot, int flags, int fd, off_t offset); #endif static void *(*original_mmap)(void *, size_t, int, int, int, off_t); static void *_intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { void *result = 0; if ((flags & MAP_FIXED) && (start != NULL)) { opal_mem_hooks_release_hook (start, length, true); } if (!original_mmap) { result = (void*)(intptr_t) memory_patcher_syscall(SYS_mmap, start, length, prot, flags, fd, offset); } else { result = original_mmap (start, length, prot, flags, fd, offset); } return result; } static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { OPAL_PATCHER_BEGIN; void *result = _intercept_mmap (start, length, prot, flags, fd, offset); OPAL_PATCHER_END; return result; } #endif #if defined (SYS_munmap) static int (*original_munmap) (void *, size_t); static int _intercept_munmap(void *start, size_t length) { int result = 0; /* could be in a malloc implementation */ opal_mem_hooks_release_hook (start, length, true); if (!original_munmap) { result = memory_patcher_syscall(SYS_munmap, start, length); } else { result = original_munmap (start, length); } return result; } static int intercept_munmap(void *start, size_t length) { OPAL_PATCHER_BEGIN; int result = _intercept_munmap (start, length); OPAL_PATCHER_END; return result; } #endif #if defined (SYS_mremap) #if defined(__linux__) /* on linux this function has an optional extra argument but ... can not be used here because it * causes issues when intercepting a 4-argument mremap call */ static void *(*original_mremap) (void *, size_t, size_t, int, void *); #else /* mremap has a different signature on BSD systems */ static void *(*original_mremap) (void *, size_t, void *, size_t, int); #endif #if defined(__linux__) static void *_intercept_mremap (void *start, size_t oldlen, size_t newlen, int flags, void *new_address) #else static void *_intercept_mremap (void *start, size_t oldlen, void *new_address, size_t newlen, int flags) #endif { void *result = MAP_FAILED; if (MAP_FAILED != start && oldlen > 0) { opal_mem_hooks_release_hook (start, oldlen, true); } #if defined(MREMAP_FIXED) if (!(flags & MREMAP_FIXED)) { new_address = NULL; } #endif #if defined(__linux__) if (!original_mremap) { result = (void *)(intptr_t) memory_patcher_syscall (SYS_mremap, start, oldlen, newlen, flags, new_address); } else { result = original_mremap (start, oldlen, newlen, flags, new_address); } #else if (!original_mremap) { result = (void *)(intptr_t) memory_patcher_syscall (SYS_mremap, start, oldlen, new_address, newlen, flags); } else { result = original_mremap (start, oldlen, new_address, newlen, flags); } #endif return result; } #if defined(__linux__) static void *intercept_mremap (void *start, size_t oldlen, size_t newlen, int flags, void *new_address) { OPAL_PATCHER_BEGIN; void *result = _intercept_mremap (start, oldlen, newlen, flags, new_address); OPAL_PATCHER_END; return result; } #else static void *intercept_mremap (void *start, size_t oldlen, void *new_address, size_t newlen, int flags) { OPAL_PATCHER_BEGIN; void *result = _intercept_mremap (start, oldlen, new_address, newlen, flags); OPAL_PATCHER_END; return result; } #endif #endif #if defined (SYS_madvise) static int (*original_madvise) (void *, size_t, int); static int _intercept_madvise (void *start, size_t length, int advice) { int result = 0; if (advice == MADV_DONTNEED || #ifdef MADV_FREE advice == MADV_FREE || #endif #ifdef MADV_REMOVE advice == MADV_REMOVE || #endif advice == POSIX_MADV_DONTNEED) { opal_mem_hooks_release_hook (start, length, false); } if (!original_madvise) { result = memory_patcher_syscall(SYS_madvise, start, length, advice); } else { result = original_madvise (start, length, advice); } return result; } static int intercept_madvise (void *start, size_t length, int advice) { OPAL_PATCHER_BEGIN; int result = _intercept_madvise (start, length, advice); OPAL_PATCHER_END; return result; } #endif #if defined SYS_brk #ifdef HAVE___CURBRK extern void *__curbrk; /* in libc */ #endif static int (*original_brk) (void *); static int _intercept_brk (void *addr) { int result = 0; void *old_addr, *new_addr; #ifdef HAVE___CURBRK old_addr = __curbrk; #else old_addr = sbrk (0); #endif if (!original_brk) { /* get the current_addr */ new_addr = (void *) (intptr_t) memory_patcher_syscall(SYS_brk, addr); #ifdef HAVE___CURBRK /* * Note: if we were using glibc brk/sbrk, their __curbrk would get * updated, but since we're going straight to the syscall, we have * to update __curbrk or else glibc won't see it. */ __curbrk = new_addr; #endif } else { result = original_brk (addr); #ifdef HAVE___CURBRK new_addr = __curbrk; #else new_addr = sbrk (0); #endif } if (new_addr < addr) { errno = ENOMEM; result = -1; } else if (new_addr < old_addr) { opal_mem_hooks_release_hook (new_addr, (intptr_t) old_addr - (intptr_t) new_addr, true); } return result; } static int intercept_brk (void *addr) { OPAL_PATCHER_BEGIN; int result = _intercept_brk (addr); OPAL_PATCHER_END; return result; } #endif // These op codes used to be in bits/ipc.h but were removed in glibc in 2015 // with a comment saying they should be defined in internal headers: // https://sourceware.org/bugzilla/show_bug.cgi?id=18560 // and when glibc uses that syscall it seems to do so from its own definitions: // https://github.com/bminor/glibc/search?q=IPCOP_shmat&unscoped_q=IPCOP_shmat #ifndef IPCOP_shmat #define IPCOP_shmat 21 #endif #ifndef IPCOP_shmdt #define IPCOP_shmdt 22 #endif #if defined(__linux__) #if (HAS_SHMDT || HAS_SHMAT) #include #include #include static size_t memory_patcher_get_shm_seg_size (const void *shmaddr) { unsigned long start_addr, end_addr; char *ptr, *newline; char buffer[1024]; size_t seg_size = 0; int fd; seg_size = 0; fd = open ("/proc/self/maps", O_RDONLY); if (fd < 0) { return 0; } for (size_t read_offset = 0 ; ; ) { ssize_t nread = read(fd, buffer + read_offset, sizeof(buffer) - 1 - read_offset); if (nread <= 0) { if (errno == EINTR) { continue; } break; } else { buffer[nread + read_offset] = '\0'; } ptr = buffer; while ( (newline = strchr(ptr, '\n')) != NULL ) { /* 00400000-0040b000 r-xp ... \n */ int ret = sscanf(ptr, "%lx-%lx ", &start_addr, &end_addr); if (ret != 2) { continue; } if (start_addr == (uintptr_t)shmaddr) { seg_size = end_addr - start_addr; goto out_close; } newline = strchr(ptr, '\n'); if (newline == NULL) { break; } ptr = newline + 1; } read_offset = strlen(ptr); memmove(buffer, ptr, read_offset); } out_close: close(fd); return seg_size; } static size_t get_shm_size(int shmid) { struct shmid_ds ds; int ret; ret = shmctl(shmid, IPC_STAT, &ds); if (ret < 0) { return 0; } return ds.shm_segsz; } #endif #endif // defined(__linux__) #if defined(__linux__) #if HAS_SHMAT static void *(*original_shmat)(int shmid, const void *shmaddr, int shmflg); static void *_intercept_shmat(int shmid, const void *shmaddr, int shmflg) { void *result = 0; size_t size = get_shm_size(shmid); if ((shmflg & SHM_REMAP) && (shmaddr != NULL)) { // I don't really know what REMAP combined with SHM_RND does, so I'll just // guess it remaps all the way down to the lower attach_addr, and all the // way up to the original shmaddr+size uintptr_t attach_addr = (uintptr_t)shmaddr; if (shmflg & SHM_RND) { attach_addr -= ((uintptr_t)shmaddr) % SHMLBA; size += ((uintptr_t)shmaddr) % SHMLBA; } opal_mem_hooks_release_hook ((void*)attach_addr, size, false); } if (!original_shmat) { #if defined(SYS_shmat) result = (void*) memory_patcher_syscall(SYS_shmat, shmid, shmaddr, shmflg); #else // IPCOP_shmat unsigned long ret; ret = memory_patcher_syscall(SYS_ipc, IPCOP_shmat, shmid, shmflg, &shmaddr, shmaddr); result = (ret > -(unsigned long)SHMLBA) ? (void *)ret : (void *)shmaddr; #endif } else { result = original_shmat (shmid, shmaddr, shmflg); } return result; } static void* intercept_shmat (int shmid, const void * shmaddr, int shmflg) { OPAL_PATCHER_BEGIN; void *result = _intercept_shmat (shmid, shmaddr, shmflg); OPAL_PATCHER_END; return result; } #endif #endif // defined(__linux__) #if defined(__linux__) #if HAS_SHMDT static int (*original_shmdt) (const void *); static int _intercept_shmdt (const void *shmaddr) { int result; /* opal_mem_hooks_release_hook should probably be updated to take a const void *. * for now just cast away the const */ opal_mem_hooks_release_hook ((void *) shmaddr, memory_patcher_get_shm_seg_size (shmaddr), false); if (original_shmdt) { result = original_shmdt (shmaddr); } else { #if defined(SYS_shmdt) result = memory_patcher_syscall (SYS_shmdt, shmaddr); #else // IPCOP_shmdt result = memory_patcher_syscall(SYS_ipc, IPCOP_shmdt, 0, 0, 0, shmaddr); #endif } return result; } static int intercept_shmdt (const void *shmaddr) { OPAL_PATCHER_BEGIN; int result = _intercept_shmdt (shmaddr); OPAL_PATCHER_END; return result; } #endif #endif // defined(__linux__) static int patcher_register (void) { mca_memory_patcher_priority = 80; mca_base_component_var_register (&mca_memory_patcher_component.super.memoryc_version, "priority", "Priority of the patcher memory hook component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_CONSTANT, &mca_memory_patcher_priority); return OPAL_SUCCESS; } static int patcher_query (int *priority) { int rc; rc = mca_base_framework_open (&opal_patcher_base_framework, 0); if (OPAL_SUCCESS != rc) { *priority = -1; return OPAL_SUCCESS; } *priority = mca_memory_patcher_priority; return OPAL_SUCCESS; } static int patcher_open (void) { static int was_executed_already = 0; int rc; if (was_executed_already) { return OPAL_SUCCESS; } was_executed_already = 1; rc = opal_patcher_base_select (); if (OPAL_SUCCESS != rc) { mca_base_framework_close (&opal_patcher_base_framework); return OPAL_ERR_NOT_AVAILABLE; } /* set memory hooks support level */ opal_mem_hooks_set_support (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT); #if defined (SYS_mmap) rc = opal_patcher->patch_symbol ("mmap", (uintptr_t) intercept_mmap, (uintptr_t *) &original_mmap); if (OPAL_SUCCESS != rc) { return rc; } #endif #if defined (SYS_munmap) rc = opal_patcher->patch_symbol ("munmap", (uintptr_t)intercept_munmap, (uintptr_t *) &original_munmap); if (OPAL_SUCCESS != rc) { return rc; } #endif #if defined (SYS_mremap) rc = opal_patcher->patch_symbol ("mremap",(uintptr_t)intercept_mremap, (uintptr_t *) &original_mremap); if (OPAL_SUCCESS != rc) { return rc; } #endif #if defined (SYS_madvise) rc = opal_patcher->patch_symbol ("madvise", (uintptr_t)intercept_madvise, (uintptr_t *) &original_madvise); if (OPAL_SUCCESS != rc) { return rc; } #endif #if defined(__linux__) #if HAS_SHMAT rc = opal_patcher->patch_symbol ("shmat", (uintptr_t) intercept_shmat, (uintptr_t *) &original_shmat); if (OPAL_SUCCESS != rc) { return rc; } #endif // HAS_SHMAT #if HAS_SHMDT rc = opal_patcher->patch_symbol ("shmdt", (uintptr_t) intercept_shmdt, (uintptr_t *) &original_shmdt); if (OPAL_SUCCESS != rc) { return rc; } #endif // HAS_SHMDT #endif // defined(__linux__) #if defined (SYS_brk) rc = opal_patcher->patch_symbol ("brk", (uintptr_t)intercept_brk, (uintptr_t *) &original_brk); #endif return rc; } static int patcher_close(void) { mca_base_framework_close (&opal_patcher_base_framework); /* Note that we don't need to unpatch any symbols here; the patcher framework will take care of all of that for us. */ return OPAL_SUCCESS; }