aboutsummaryrefslogtreecommitdiff
path: root/text.c
diff options
context:
space:
mode:
authorMarc André Tanner <mat@brain-dump.org>2015-07-06 11:13:57 +0200
committerMarc André Tanner <mat@brain-dump.org>2015-07-06 11:28:40 +0200
commite7dabd3d87d89efe2fadbe1e21d03fbeb3cc68a6 (patch)
tree9ed71627b9ad5002dc1688a04aef1be0e06cb8cf /text.c
parent3708502761771e1af941dfb7f6dd44d97274accf (diff)
downloadvis-e7dabd3d87d89efe2fadbe1e21d03fbeb3cc68a6.tar.gz
vis-e7dabd3d87d89efe2fadbe1e21d03fbeb3cc68a6.tar.xz
text: overhaul file load implementation
Files smaller than 8M are now copied into an internal buffer upon load. Thus they can be safely truncated. Larger files are memory mapped and use the file/virtual memory system as caching layer. Hence truncating them will corrupt the file content. Eventually the resulting SIGBUS should be handled gracefully.
Diffstat (limited to 'text.c')
-rw-r--r--text.c37
1 files changed, 34 insertions, 3 deletions
diff --git a/text.c b/text.c
index 6457cff..e013979 100644
--- a/text.c
+++ b/text.c
@@ -35,6 +35,10 @@
#include "util.h"
#define BUFFER_SIZE (1 << 20)
+/* Files smaller than this value are copied on load, larger ones are mmap(2)-ed
+ * directely. Hence the former can be truncated, while doing so on the latter
+ * results in havoc. */
+#define BUFFER_MMAP_SIZE (1 << 23)
struct Regex {
const char *string;
@@ -136,6 +140,7 @@ struct Text {
/* buffer management */
static Buffer *buffer_alloc(Text *txt, size_t size);
+static Buffer *buffer_read(Text *txt, size_t size, int fd);
static Buffer *buffer_mmap(Text *txt, size_t size, int fd, off_t offset);
static void buffer_free(Buffer *buf);
static bool buffer_capacity(Buffer *buf, size_t len);
@@ -203,6 +208,27 @@ static Buffer *buffer_alloc(Text *txt, size_t size) {
return buf;
}
+static Buffer *buffer_read(Text *txt, size_t size, int fd) {
+ Buffer *buf = buffer_alloc(txt, size);
+ if (!buf)
+ return NULL;
+ while (size > 0) {
+ char data[4096];
+ ssize_t len = read(fd, data, MIN(sizeof(data), size));
+ if (len == -1) {
+ txt->buffers = buf->next;
+ buffer_free(buf);
+ return NULL;
+ } else if (len == 0) {
+ break;
+ } else {
+ buffer_append(buf, data, len);
+ size -= len;
+ }
+ }
+ return buf;
+}
+
static Buffer *buffer_mmap(Text *txt, size_t size, int fd, off_t offset) {
Buffer *buf = calloc(1, sizeof(Buffer));
if (!buf)
@@ -993,15 +1019,20 @@ Text *text_load(const char *filename) {
goto out;
}
// XXX: use lseek(fd, 0, SEEK_END); instead?
- if (!(txt->buf = buffer_mmap(txt, txt->info.st_size, txt->fd, 0)))
+ size_t size = txt->info.st_size;
+ if (size < BUFFER_MMAP_SIZE)
+ txt->buf = buffer_read(txt, size, txt->fd);
+ else
+ txt->buf = buffer_mmap(txt, size, txt->fd, 0);
+ if (!txt->buf)
goto out;
Piece *p = piece_alloc(txt);
if (!p)
goto out;
piece_init(&txt->begin, NULL, p, NULL, 0);
- piece_init(p, &txt->begin, &txt->end, txt->buf->data, txt->buf->size);
+ piece_init(p, &txt->begin, &txt->end, txt->buf->data, txt->buf->len);
piece_init(&txt->end, p, NULL, NULL, 0);
- txt->size = txt->buf->size;
+ txt->size = txt->buf->len;
}
/* write an empty action */
change_alloc(txt, EPOS);