[leafnode-list] [PATCH] Experimental patch to mmap active file for speed

Robert Clark clark at exiter.com
Mon Nov 1 04:43:20 CET 2004


In article <8089590.7tekqT2dun at warez.exiter.com>, Robert Clark <clark at exiter.com> wrote:

> If the attachment does not come through the list, I'll
> repost the diff inline.

And repost it inline I shall ..

- Rob

-------------- cut here and watch the word wrap. -------------

diff -u leafnode-2.0.0.alpha20041007a-orig/activutil.c leafnode-2.0.0.alpha20041007a/activutil.c
--- leafnode-2.0.0.alpha20041007a-orig/activutil.c      2004-10-07 03:30:16.000000000 -0400
+++ leafnode-2.0.0.alpha20041007a/activutil.c   2004-10-31 22:27:49.867994263 -0500
@@ -16,12 +16,14 @@
 #include <limits.h>
 #include <stdio.h>
 #include <sys/stat.h>
+#include <sys/mman.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <assert.h>
 #include <time.h>
 #include <fcntl.h>
+#include <string.h>
 
 #ifdef WITH_DMALLOC
 #include <dmalloc.h>
@@ -438,22 +440,26 @@
 }
 
 /*
- * read active file into memory
+ * read active file into memory. because this can be a fairly I/O intensive
+ * operation, the active file is loaded into memory before it is processed.
  */
 static void
 readactive(void)
 {
-    char *p, *r = 0;
+    char *p, *mmap_ptr, *r = 0;
     int n;
-    FILE *f;
+    int fd;
     struct newsgroup *g;
+    struct stat stat_buf;
+    size_t filesize;
+    size_t file_index;
     mastr *s = mastr_new(LN_PATH_MAX);
-
+    
     freeactive(active);
     active = 0;
     mastr_vcat(s, spooldir, GROUPINFO, NULL);
 
-    if ((f = fopen(mastr_str(s), "r")) == NULL) {
+    if ((fd = open(mastr_str(s), O_RDONLY)) == -1) {
        ln_log_sys(LNLOG_SERR, LNLOG_CTOP, "unable to open %s: %m",
                mastr_str(s));
        mastr_delete(s);
@@ -462,17 +468,42 @@
        return;
     }
 
+    /*
+     * Scanning this group list is fairly expensinve and is done twice
+     * so we mmap the file for speed. 
+     */
+    if ( stat(mastr_str(s), &stat_buf) == -1 ) {
+        ln_log_sys(LNLOG_SERR, LNLOG_CTOP, "could not pre-determine size of %s: %m", mastr_str(s));
+        mastr_delete(s);
+       active = NULL;
+       activesize = 0;
+       return;
+    }
+    filesize = stat_buf.st_size;
+
+    mmap_ptr = mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0 );
+    close(fd); /* close the file, not needed after it has be mapped to memory. */
+    if (mmap_ptr == MAP_FAILED) {
+        ln_log_sys(LNLOG_SERR, LNLOG_CTOP, "Could not memory map file %s: %m", mastr_str(s));
+        mastr_delete(s);
+       active = NULL;
+       activesize = 0;
+       return;
+    }
+
     /* count lines = newsgroups */
     activesize = 0;
-    while ((p = getaline(f)))
+    file_index = 0;
+    while ( (p = getabufferedline(mmap_ptr, &file_index, filesize)) != NULL ) {
        activesize++;
-    rewind(f);
+    }
     active = (struct newsgroup *)critmalloc((1 + activesize) *
                                            sizeof(struct newsgroup),
                                            "allocating active");
     g = active;
-    while ((p = getaline(f))) {
-       unsigned long temp;
+    file_index = 0;
+    while ( (p = getabufferedline(mmap_ptr, &file_index, filesize)) != NULL ) {
+        unsigned long temp;
 
        r = strchr(p, '\t');
        if (!r && check_old_format(p)) {
@@ -534,8 +565,9 @@
     sort(active, activesize, sizeof(struct newsgroup), &compactive);
     validateactive();
 
-    /* don't check for errors, we opened the file for reading */
-    (void)fclose(f);
+    /* don't check for errors, we opened the file for reading only */
+    (void)munmap(mmap_ptr, filesize);
+
     mastr_delete(s);
 }
 
diff -u leafnode-2.0.0.alpha20041007a-orig/getaline.c leafnode-2.0.0.alpha20041007a/getaline.c
--- leafnode-2.0.0.alpha20041007a-orig/getaline.c       2004-09-14 06:37:22.000000000 -0400
+++ leafnode-2.0.0.alpha20041007a/getaline.c    2004-10-31 22:25:46.875600316 -0500
@@ -64,3 +64,67 @@
        ln_log(LNLOG_SDEBUG, LNLOG_CTOP, "<%s", buf);   /* FIXME: CTOP? */
     return buf;
 }
+
+/**
+ * Reads the next line from the buffer \p s and strips CRLF and LF.
+ * \return pointer to static buffer holding line or 0 for I/O or OOM error.
+ */
+char *
+getabufferedline(const char *s /** buffer to read from */, 
+                 size_t *start, /** where in the buffer to start from */
+                 size_t length /** total length of the buffer */ )
+{
+    size_t len;                /* # of chars stored into buf before '\0' */
+    size_t index;
+
+#define IS_EOL(c) (c == '\n' || c == '\r' || c == '\0')
+
+    if (s == NULL || start == NULL || *start >= length ) {
+        return 0;
+    }
+
+    /* first have to find out how big the line will be. Counts the number of
+       characters until we get to a newline, carriage return, or the 
+       end if the buffer */
+    index = *start;
+    while (index < length && ! IS_EOL(s[index]) ) {
+        index++;
+    }
+
+    len = index - *start;
+
+    if (len <= 0) {
+        return 0;
+    }
+    else if (size < len) {
+        /* Previous buffer was too small, realloc a bugger one. */
+        if (buf) {
+            free(buf);
+        }
+        size = len + 1;
+
+        buf = malloc(size);
+        if (buf == NULL) {
+            return 0;
+        }
+    }
+
+    memcpy(buf,  (s + * start),  len);
+    buf[len] = '\0';            /* unconditionally terminate string,
+                                  possibly overwriting newline */
+
+    /* now see if there are extra chars to skip over. */
+    while (index < length && IS_EOL(s[index]) ) {
+        index++;
+    }
+    *start = index;
+
+    if (debugmode & DEBUG_IO)
+       ln_log(LNLOG_SDEBUG, LNLOG_CTOP, "<%s", buf);   /* FIXME: CTOP? */
+
+    return buf;
+
+#undef IS_EOL
+
+}
+
diff -u leafnode-2.0.0.alpha20041007a-orig/leafnode.h leafnode-2.0.0.alpha20041007a/leafnode.h
--- leafnode-2.0.0.alpha20041007a-orig/leafnode.h       2004-09-16 07:05:29.000000000 -0400
+++ leafnode-2.0.0.alpha20041007a/leafnode.h    2004-10-31 20:44:30.965870845 -0500
@@ -172,6 +172,8 @@
     /* handling of misc. lines */
     /*@null@*/ /*@dependent@*/ char *getaline(FILE * f);
     /* reads one line, regardless of length, returns pointer to static buffer */
+    /*@null@*/ /*@dependent@*/ char *getabufferedline(const char *s, size_t *start, size_t length);
+    /* reads one line, regardless of length, returns pointer to static buffer */
 
     /*@null@*/ /*@only@*/ char *
     mygetfoldedline(const char *, unsigned long, FILE * f);



More information about the leafnode-list mailing list