diff options
| author | Marc André Tanner <mat@brain-dump.org> | 2020-09-28 15:42:14 +0200 |
|---|---|---|
| committer | Marc André Tanner <mat@brain-dump.org> | 2020-10-10 10:48:10 +0200 |
| commit | 574567b125d1a81bb191cfecdecc76c70f216231 (patch) | |
| tree | b6ffbff29ade3306506ef917c2875b2d377f20fd /text-io.c | |
| parent | a889c06ffe1bae1659a976d812c76433e6b69e51 (diff) | |
| download | vis-574567b125d1a81bb191cfecdecc76c70f216231.tar.gz vis-574567b125d1a81bb191cfecdecc76c70f216231.tar.xz | |
text: move I/O related code to separate file
This groups all I/O related code together to make it reusable in different
core text data structure implementations.
Diffstat (limited to 'text-io.c')
| -rw-r--r-- | text-io.c | 548 |
1 files changed, 548 insertions, 0 deletions
diff --git a/text-io.c b/text-io.c new file mode 100644 index 0000000..bfaad41 --- /dev/null +++ b/text-io.c @@ -0,0 +1,548 @@ +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <libgen.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <limits.h> +#include <sys/mman.h> +#if CONFIG_ACL +#include <sys/acl.h> +#endif +#if CONFIG_SELINUX +#include <selinux/selinux.h> +#endif + +#include "text.h" +#include "text-internal.h" +#include "text-util.h" +#include "util.h" + +struct TextSave { /* used to hold context between text_save_{begin,commit} calls */ + Text *txt; /* text to operate on */ + char *filename; /* filename to save to as given to text_save_begin */ + char *tmpname; /* temporary name used for atomic rename(2) */ + int fd; /* file descriptor to write data to using text_save_write */ + int dirfd; /* directory file descriptor, relative to which we save */ + enum TextSaveMethod type; /* method used to save file */ +}; + +/* Allocate blocks holding the actual file content in chunks of size: */ +#ifndef BLOCK_SIZE +#define BLOCK_SIZE (1 << 20) +#endif +/* Files smaller than this value are copied on load, larger ones are mmap(2)-ed + * directely. Hence the former can be truncated, while doing so on the latter + * results in havoc. */ +#define BLOCK_MMAP_SIZE (1 << 26) + +/* allocate a new block of MAX(size, BLOCK_SIZE) bytes */ +Block *block_alloc(size_t size) { + Block *blk = calloc(1, sizeof *blk); + if (!blk) + return NULL; + if (BLOCK_SIZE > size) + size = BLOCK_SIZE; + if (!(blk->data = malloc(size))) { + free(blk); + return NULL; + } + blk->type = BLOCK_TYPE_MALLOC; + blk->size = size; + return blk; +} + +Block *block_read(size_t size, int fd) { + Block *blk = block_alloc(size); + if (!blk) + return NULL; + char *data = blk->data; + size_t rem = size; + while (rem > 0) { + ssize_t len = read(fd, data, rem); + if (len == -1) { + block_free(blk); + return NULL; + } else if (len == 0) { + break; + } else { + data += len; + rem -= len; + } + } + blk->len = size - rem; + return blk; +} + +Block *block_mmap(size_t size, int fd, off_t offset) { + Block *blk = calloc(1, sizeof *blk); + if (!blk) + return NULL; + if (size) { + blk->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset); + if (blk->data == MAP_FAILED) { + free(blk); + return NULL; + } + } + blk->type = BLOCK_TYPE_MMAP_ORIG; + blk->size = size; + blk->len = size; + return blk; +} + +Block *block_load(int dirfd, const char *filename, enum TextLoadMethod method, struct stat *info) { + Block *block = NULL; + int fd = openat(dirfd, filename, O_RDONLY); + if (fd == -1) + goto out; + if (fstat(fd, info) == -1) + goto out; + if (!S_ISREG(info->st_mode)) { + errno = S_ISDIR(info->st_mode) ? EISDIR : ENOTSUP; + goto out; + } + + // XXX: use lseek(fd, 0, SEEK_END); instead? + size_t size = info->st_size; + if (size == 0) + goto out; + if (method == TEXT_LOAD_READ || (method == TEXT_LOAD_AUTO && size < BLOCK_MMAP_SIZE)) + block = block_read(size, fd); + else + block = block_mmap(size, fd, 0); +out: + if (fd != -1) + close(fd); + return block; +} + +void block_free(Block *blk) { + if (!blk) + return; + if (blk->type == BLOCK_TYPE_MALLOC) + free(blk->data); + else if ((blk->type == BLOCK_TYPE_MMAP_ORIG || blk->type == BLOCK_TYPE_MMAP) && blk->data) + munmap(blk->data, blk->size); + free(blk); +} + +/* check whether block has enough free space to store len bytes */ +bool block_capacity(Block *blk, size_t len) { + return blk->size - blk->len >= len; +} + +/* append data to block, assumes there is enough space available */ +const char *block_append(Block *blk, const char *data, size_t len) { + char *dest = memcpy(blk->data + blk->len, data, len); + blk->len += len; + return dest; +} + +/* insert data into block at an arbitrary position, this should only be used with + * data of the most recently created piece. */ +bool block_insert(Block *blk, size_t pos, const char *data, size_t len) { + if (pos > blk->len || !block_capacity(blk, len)) + return false; + if (blk->len == pos) + return block_append(blk, data, len); + char *insert = blk->data + pos; + memmove(insert + len, insert, blk->len - pos); + memcpy(insert, data, len); + blk->len += len; + return true; +} + +/* delete data from a block at an arbitrary position, this should only be used with + * data of the most recently created piece. */ +bool block_delete(Block *blk, size_t pos, size_t len) { + size_t end; + if (!addu(pos, len, &end) || end > blk->len) + return false; + if (blk->len == pos) { + blk->len -= len; + return true; + } + char *delete = blk->data + pos; + memmove(delete, delete + len, blk->len - pos - len); + blk->len -= len; + return true; +} + +Text *text_load(const char *filename) { + return text_load_method(filename, TEXT_LOAD_AUTO); +} + +Text *text_loadat(int dirfd, const char *filename) { + return text_loadat_method(dirfd, filename, TEXT_LOAD_AUTO); +} + +Text *text_load_method(const char *filename, enum TextLoadMethod method) { + return text_loadat_method(AT_FDCWD, filename, method); +} + +static ssize_t write_all(int fd, const char *buf, size_t count) { + size_t rem = count; + while (rem > 0) { + ssize_t written = write(fd, buf, rem > INT_MAX ? INT_MAX : rem); + if (written < 0) { + if (errno == EAGAIN || errno == EINTR) + continue; + return -1; + } else if (written == 0) { + break; + } + rem -= written; + buf += written; + } + return count - rem; +} + +static bool preserve_acl(int src, int dest) { +#if CONFIG_ACL + acl_t acl = acl_get_fd(src); + if (!acl) + return errno == ENOTSUP ? true : false; + if (acl_set_fd(dest, acl) == -1) { + acl_free(acl); + return false; + } + acl_free(acl); +#endif /* CONFIG_ACL */ + return true; +} + +static bool preserve_selinux_context(int src, int dest) { +#if CONFIG_SELINUX + char *context = NULL; + if (!is_selinux_enabled()) + return true; + if (fgetfilecon(src, &context) == -1) + return errno == ENOTSUP ? true : false; + if (fsetfilecon(dest, context) == -1) { + freecon(context); + return false; + } + freecon(context); +#endif /* CONFIG_SELINUX */ + return true; +} + +static int mkstempat(int dirfd, char *template) { + if (dirfd == AT_FDCWD) + return mkstemp(template); + // FIXME: not thread safe + int fd = -1; + int cwd = open(".", O_RDONLY|O_DIRECTORY); + if (cwd == -1) + goto err; + if (fchdir(dirfd) == -1) + goto err; + fd = mkstemp(template); +err: + if (cwd != -1) { + fchdir(cwd); + close(cwd); + } + return fd; +} + +/* Create a new file named `.filename.vis.XXXXXX` (where `XXXXXX` is a + * randomly generated, unique suffix) and try to preserve all important + * meta data. After the file content has been written to this temporary + * file, text_save_commit_atomic will atomically move it to its final + * (possibly already existing) destination using rename(2). + * + * This approach does not work if: + * + * - the file is a symbolic link + * - the file is a hard link + * - file ownership can not be preserved + * - file group can not be preserved + * - directory permissions do not allow creation of a new file + * - POSXI ACL can not be preserved (if enabled) + * - SELinux security context can not be preserved (if enabled) + */ +static bool text_save_begin_atomic(TextSave *ctx) { + int oldfd, saved_errno; + if ((oldfd = openat(ctx->dirfd, ctx->filename, O_RDONLY)) == -1 && errno != ENOENT) + goto err; + struct stat oldmeta = { 0 }; + if (oldfd != -1 && fstatat(ctx->dirfd, ctx->filename, &oldmeta, AT_SYMLINK_NOFOLLOW) == -1) + goto err; + if (oldfd != -1) { + if (S_ISLNK(oldmeta.st_mode)) /* symbolic link */ + goto err; + if (oldmeta.st_nlink > 1) /* hard link */ + goto err; + } + + char suffix[] = ".vis.XXXXXX"; + size_t len = strlen(ctx->filename) + sizeof("./.") + sizeof(suffix); + char *dir = strdup(ctx->filename); + char *base = strdup(ctx->filename); + + if (!(ctx->tmpname = malloc(len)) || !dir || !base) { + free(dir); + free(base); + goto err; + } + + snprintf(ctx->tmpname, len, "%s/.%s%s", dirname(dir), basename(base), suffix); + free(dir); + free(base); + + if ((ctx->fd = mkstempat(ctx->dirfd, ctx->tmpname)) == -1) + goto err; + + if (oldfd == -1) { + mode_t mask = umask(0); + umask(mask); + if (fchmod(ctx->fd, 0666 & ~mask) == -1) + goto err; + } else { + if (fchmod(ctx->fd, oldmeta.st_mode) == -1) + goto err; + if (!preserve_acl(oldfd, ctx->fd) || !preserve_selinux_context(oldfd, ctx->fd)) + goto err; + /* change owner if necessary */ + if (oldmeta.st_uid != getuid() && fchown(ctx->fd, oldmeta.st_uid, (uid_t)-1) == -1) + goto err; + /* change group if necessary, in case of failure some editors reset + * the group permissions to the same as for others */ + if (oldmeta.st_gid != getgid() && fchown(ctx->fd, (uid_t)-1, oldmeta.st_gid) == -1) + goto err; + close(oldfd); + } + + ctx->type = TEXT_SAVE_ATOMIC; + return true; +err: + saved_errno = errno; + if (oldfd != -1) + close(oldfd); + if (ctx->fd != -1) + close(ctx->fd); + ctx->fd = -1; + free(ctx->tmpname); + ctx->tmpname = NULL; + errno = saved_errno; + return false; +} + +static bool text_save_commit_atomic(TextSave *ctx) { + if (fsync(ctx->fd) == -1) + return false; + + struct stat meta = { 0 }; + if (fstat(ctx->fd, &meta) == -1) + return false; + + bool close_failed = (close(ctx->fd) == -1); + ctx->fd = -1; + if (close_failed) + return false; + + if (renameat(ctx->dirfd, ctx->tmpname, ctx->dirfd, ctx->filename) == -1) + return false; + + free(ctx->tmpname); + ctx->tmpname = NULL; + + int dir = openat(ctx->dirfd, dirname(ctx->filename), O_DIRECTORY|O_RDONLY); + if (dir == -1) + return false; + + if (fsync(dir) == -1 && errno != EINVAL) { + close(dir); + return false; + } + + if (close(dir) == -1) + return false; + + text_saved(ctx->txt, &meta); + return true; +} + +static bool text_save_begin_inplace(TextSave *ctx) { + Text *txt = ctx->txt; + struct stat now = { 0 }; + int newfd = -1, saved_errno; + if ((ctx->fd = openat(ctx->dirfd, ctx->filename, O_CREAT|O_WRONLY, 0666)) == -1) + goto err; + if (fstat(ctx->fd, &now) == -1) + goto err; + struct stat loaded = text_stat(txt); + Block *block = text_block_mmaped(txt); + if (block && now.st_dev == loaded.st_dev && now.st_ino == loaded.st_ino) { + /* The file we are going to overwrite is currently mmap-ed from + * text_load, therefore we copy the mmap-ed block to a temporary + * file and remap it at the same position such that all pointers + * from the various pieces are still valid. + */ + size_t size = block->size; + char tmpname[32] = "/tmp/vis-XXXXXX"; + newfd = mkstemp(tmpname); + if (newfd == -1) + goto err; + if (unlink(tmpname) == -1) + goto err; + ssize_t written = write_all(newfd, block->data, size); + if (written == -1 || (size_t)written != size) + goto err; + void *data = mmap(block->data, size, PROT_READ, MAP_SHARED|MAP_FIXED, newfd, 0); + if (data == MAP_FAILED) + goto err; + bool close_failed = (close(newfd) == -1); + newfd = -1; + if (close_failed) + goto err; + block->type = BLOCK_TYPE_MMAP; + } + /* overwrite the existing file content, if something goes wrong + * here we are screwed, TODO: make a backup before? */ + if (ftruncate(ctx->fd, 0) == -1) + goto err; + ctx->type = TEXT_SAVE_INPLACE; + return true; +err: + saved_errno = errno; + if (newfd != -1) + close(newfd); + if (ctx->fd != -1) + close(ctx->fd); + ctx->fd = -1; + errno = saved_errno; + return false; +} + +static bool text_save_commit_inplace(TextSave *ctx) { + if (fsync(ctx->fd) == -1) + return false; + struct stat meta = { 0 }; + if (fstat(ctx->fd, &meta) == -1) + return false; + if (close(ctx->fd) == -1) + return false; + text_saved(ctx->txt, &meta); + return true; +} + +TextSave *text_save_begin(Text *txt, int dirfd, const char *filename, enum TextSaveMethod type) { + if (!filename) + return NULL; + TextSave *ctx = calloc(1, sizeof *ctx); + if (!ctx) + return NULL; + ctx->txt = txt; + ctx->fd = -1; + ctx->dirfd = dirfd; + if (!(ctx->filename = strdup(filename))) + goto err; + errno = 0; + if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_ATOMIC) && text_save_begin_atomic(ctx)) + return ctx; + if (errno == ENOSPC) + goto err; + if ((type == TEXT_SAVE_AUTO || type == TEXT_SAVE_INPLACE) && text_save_begin_inplace(ctx)) + return ctx; +err: + text_save_cancel(ctx); + return NULL; +} + +bool text_save_commit(TextSave *ctx) { + if (!ctx) + return true; + bool ret; + switch (ctx->type) { + case TEXT_SAVE_ATOMIC: + ret = text_save_commit_atomic(ctx); + break; + case TEXT_SAVE_INPLACE: + ret = text_save_commit_inplace(ctx); + break; + default: + ret = false; + break; + } + + text_save_cancel(ctx); + return ret; +} + +void text_save_cancel(TextSave *ctx) { + if (!ctx) + return; + int saved_errno = errno; + if (ctx->fd != -1) + close(ctx->fd); + if (ctx->tmpname && ctx->tmpname[0]) + unlinkat(ctx->dirfd, ctx->tmpname, 0); + free(ctx->tmpname); + free(ctx->filename); + free(ctx); + errno = saved_errno; +} + +/* First try to save the file atomically using rename(2) if this does not + * work overwrite the file in place. However if something goes wrong during + * this overwrite the original file is permanently damaged. + */ +bool text_save(Text *txt, const char *filename) { + return text_saveat(txt, AT_FDCWD, filename); +} + +bool text_saveat(Text *txt, int dirfd, const char *filename) { + return text_saveat_method(txt, dirfd, filename, TEXT_SAVE_AUTO); +} + +bool text_save_method(Text *txt, const char *filename, enum TextSaveMethod method) { + return text_saveat_method(txt, AT_FDCWD, filename, method); +} + +bool text_saveat_method(Text *txt, int dirfd, const char *filename, enum TextSaveMethod method) { + if (!filename) { + text_saved(txt, NULL); + return true; + } + TextSave *ctx = text_save_begin(txt, dirfd, filename, method); + if (!ctx) + return false; + Filerange range = (Filerange){ .start = 0, .end = text_size(txt) }; + ssize_t written = text_save_write_range(ctx, &range); + if (written == -1 || (size_t)written != text_range_size(&range)) { + text_save_cancel(ctx); + return false; + } + return text_save_commit(ctx); +} + +ssize_t text_save_write_range(TextSave *ctx, const Filerange *range) { + return text_write_range(ctx->txt, range, ctx->fd); +} + +ssize_t text_write(const Text *txt, int fd) { + Filerange r = (Filerange){ .start = 0, .end = text_size(txt) }; + return text_write_range(txt, &r, fd); +} + +ssize_t text_write_range(const Text *txt, const Filerange *range, int fd) { + size_t size = text_range_size(range), rem = size; + for (Iterator it = text_iterator_get(txt, range->start); + rem > 0 && text_iterator_valid(&it); + text_iterator_next(&it)) { + size_t prem = it.end - it.text; + if (prem > rem) + prem = rem; + ssize_t written = write_all(fd, it.text, prem); + if (written == -1) + return -1; + rem -= written; + if ((size_t)written != prem) + break; + } + return size - rem; +} |
