[leafnode-list] [PATCH] Experimental patch to mmap active file for speed
Robert Clark
clark at exiter.com
Mon Nov 1 04:43:20 CET 2004
In article <8089590.7tekqT2dun at warez.exiter.com>, Robert Clark <clark at exiter.com> wrote:
> If the attachment does not come through the list, I'll
> repost the diff inline.
And repost it inline I shall ..
- Rob
-------------- cut here and watch the word wrap. -------------
diff -u leafnode-2.0.0.alpha20041007a-orig/activutil.c leafnode-2.0.0.alpha20041007a/activutil.c
--- leafnode-2.0.0.alpha20041007a-orig/activutil.c 2004-10-07 03:30:16.000000000 -0400
+++ leafnode-2.0.0.alpha20041007a/activutil.c 2004-10-31 22:27:49.867994263 -0500
@@ -16,12 +16,14 @@
#include <limits.h>
#include <stdio.h>
#include <sys/stat.h>
+#include <sys/mman.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include <time.h>
#include <fcntl.h>
+#include <string.h>
#ifdef WITH_DMALLOC
#include <dmalloc.h>
@@ -438,22 +440,26 @@
}
/*
- * read active file into memory
+ * read active file into memory. because this can be a fairly I/O intensive
+ * operation, the active file is loaded into memory before it is processed.
*/
static void
readactive(void)
{
- char *p, *r = 0;
+ char *p, *mmap_ptr, *r = 0;
int n;
- FILE *f;
+ int fd;
struct newsgroup *g;
+ struct stat stat_buf;
+ size_t filesize;
+ size_t file_index;
mastr *s = mastr_new(LN_PATH_MAX);
-
+
freeactive(active);
active = 0;
mastr_vcat(s, spooldir, GROUPINFO, NULL);
- if ((f = fopen(mastr_str(s), "r")) == NULL) {
+ if ((fd = open(mastr_str(s), O_RDONLY)) == -1) {
ln_log_sys(LNLOG_SERR, LNLOG_CTOP, "unable to open %s: %m",
mastr_str(s));
mastr_delete(s);
@@ -462,17 +468,42 @@
return;
}
+ /*
+ * Scanning this group list is fairly expensinve and is done twice
+ * so we mmap the file for speed.
+ */
+ if ( stat(mastr_str(s), &stat_buf) == -1 ) {
+ ln_log_sys(LNLOG_SERR, LNLOG_CTOP, "could not pre-determine size of %s: %m", mastr_str(s));
+ mastr_delete(s);
+ active = NULL;
+ activesize = 0;
+ return;
+ }
+ filesize = stat_buf.st_size;
+
+ mmap_ptr = mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0 );
+ close(fd); /* close the file, not needed after it has be mapped to memory. */
+ if (mmap_ptr == MAP_FAILED) {
+ ln_log_sys(LNLOG_SERR, LNLOG_CTOP, "Could not memory map file %s: %m", mastr_str(s));
+ mastr_delete(s);
+ active = NULL;
+ activesize = 0;
+ return;
+ }
+
/* count lines = newsgroups */
activesize = 0;
- while ((p = getaline(f)))
+ file_index = 0;
+ while ( (p = getabufferedline(mmap_ptr, &file_index, filesize)) != NULL ) {
activesize++;
- rewind(f);
+ }
active = (struct newsgroup *)critmalloc((1 + activesize) *
sizeof(struct newsgroup),
"allocating active");
g = active;
- while ((p = getaline(f))) {
- unsigned long temp;
+ file_index = 0;
+ while ( (p = getabufferedline(mmap_ptr, &file_index, filesize)) != NULL ) {
+ unsigned long temp;
r = strchr(p, '\t');
if (!r && check_old_format(p)) {
@@ -534,8 +565,9 @@
sort(active, activesize, sizeof(struct newsgroup), &compactive);
validateactive();
- /* don't check for errors, we opened the file for reading */
- (void)fclose(f);
+ /* don't check for errors, we opened the file for reading only */
+ (void)munmap(mmap_ptr, filesize);
+
mastr_delete(s);
}
diff -u leafnode-2.0.0.alpha20041007a-orig/getaline.c leafnode-2.0.0.alpha20041007a/getaline.c
--- leafnode-2.0.0.alpha20041007a-orig/getaline.c 2004-09-14 06:37:22.000000000 -0400
+++ leafnode-2.0.0.alpha20041007a/getaline.c 2004-10-31 22:25:46.875600316 -0500
@@ -64,3 +64,67 @@
ln_log(LNLOG_SDEBUG, LNLOG_CTOP, "<%s", buf); /* FIXME: CTOP? */
return buf;
}
+
+/**
+ * Reads the next line from the buffer \p s and strips CRLF and LF.
+ * \return pointer to static buffer holding line or 0 for I/O or OOM error.
+ */
+char *
+getabufferedline(const char *s /** buffer to read from */,
+ size_t *start, /** where in the buffer to start from */
+ size_t length /** total length of the buffer */ )
+{
+ size_t len; /* # of chars stored into buf before '\0' */
+ size_t index;
+
+#define IS_EOL(c) (c == '\n' || c == '\r' || c == '\0')
+
+ if (s == NULL || start == NULL || *start >= length ) {
+ return 0;
+ }
+
+ /* first have to find out how big the line will be. Counts the number of
+ characters until we get to a newline, carriage return, or the
+ end if the buffer */
+ index = *start;
+ while (index < length && ! IS_EOL(s[index]) ) {
+ index++;
+ }
+
+ len = index - *start;
+
+ if (len <= 0) {
+ return 0;
+ }
+ else if (size < len) {
+ /* Previous buffer was too small, realloc a bugger one. */
+ if (buf) {
+ free(buf);
+ }
+ size = len + 1;
+
+ buf = malloc(size);
+ if (buf == NULL) {
+ return 0;
+ }
+ }
+
+ memcpy(buf, (s + * start), len);
+ buf[len] = '\0'; /* unconditionally terminate string,
+ possibly overwriting newline */
+
+ /* now see if there are extra chars to skip over. */
+ while (index < length && IS_EOL(s[index]) ) {
+ index++;
+ }
+ *start = index;
+
+ if (debugmode & DEBUG_IO)
+ ln_log(LNLOG_SDEBUG, LNLOG_CTOP, "<%s", buf); /* FIXME: CTOP? */
+
+ return buf;
+
+#undef IS_EOL
+
+}
+
diff -u leafnode-2.0.0.alpha20041007a-orig/leafnode.h leafnode-2.0.0.alpha20041007a/leafnode.h
--- leafnode-2.0.0.alpha20041007a-orig/leafnode.h 2004-09-16 07:05:29.000000000 -0400
+++ leafnode-2.0.0.alpha20041007a/leafnode.h 2004-10-31 20:44:30.965870845 -0500
@@ -172,6 +172,8 @@
/* handling of misc. lines */
/*@null@*/ /*@dependent@*/ char *getaline(FILE * f);
/* reads one line, regardless of length, returns pointer to static buffer */
+ /*@null@*/ /*@dependent@*/ char *getabufferedline(const char *s, size_t *start, size_t length);
+ /* reads one line, regardless of length, returns pointer to static buffer */
/*@null@*/ /*@only@*/ char *
mygetfoldedline(const char *, unsigned long, FILE * f);
More information about the leafnode-list
mailing list