[leafnode-list] [PATCH] WIP: port leafnode-2 to pcre2

Matěj Cepl mcepl at cepl.eu
Wed Mar 26 22:01:53 CET 2025


I tried to port patches from
https://sourceforge.net/p/leafnode/git/ci/c4738bf3b28bdcdbe7247ceede115594d03e764c/
and
https://sourceforge.net/p/leafnode/git/ci/a30371c2dc1c28bbceb4989dd7cf7647461b9ce4/
but currently I am not even able to run ./configure.
---
 Makefile.am        |  2 +-
 NEWS               |  7 ++++++
 applyfilter.c      |  4 ++--
 configure.ac       | 54 ++++++++++++++++++++++++----------------------
 configutil.c       |  4 ++--
 fetchnews.c        | 10 ++++-----
 filterutil.c       | 16 +++++++-------
 groupselect.c      | 26 ++++++++++++++++------
 groupselect.h      |  7 +++---
 leafnode-version.c |  8 +++++--
 leafnode.8.in      |  2 +-
 leafnode.h         |  9 ++++----
 masock_sa2name.c   |  3 ++-
 pcrewrap.c         | 16 +++++++-------
 pcrewrap.h         |  5 +++--
 15 files changed, 101 insertions(+), 72 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index defa8b2..499d836 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -271,7 +271,7 @@ b_sortnl_SOURCES	= b_sortnl.c critmem_malloc.c critmem_realloc.c
 b_sortnl_LDADD		= @LIBOBJS@
 
 # programs
-LDADD			= @LIBOBJS@ liblnutil.a
+LDADD			= @LIBOBJS@ liblnutil.a @PCRE2_8LIB@
 applyfilter_SOURCES	= applyfilter.c
 fetchnews_SOURCES	= fetchnews.c fetchnews_check_date.c fetchnews.h
 leafnode_SOURCES	= nntpd.c
diff --git a/NEWS b/NEWS
index 08ae938..90d598a 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,13 @@
 KNOWN BUGS:
 * The code sometimes uses timeout_client where it should use a server timeout.
 |
+### CHANGES
+- leafnode now requires the PCRE2 library instead of PCRE.
+  PCRE2 has been around for a few years and is maintained,
+  while PCRE is end of life, no longer supported,
+  and is being phased out by distributions.
+  See its home page, https://github.com/PhilipHazel/pcre2
+
 2.0.0.alpha202301: Changes since 202101:
 - Bugfix: store messages with folded header lines such as Message-ID,
   reported by Matěj Cepl via leafnode-list at .
diff --git a/applyfilter.c b/applyfilter.c
index bfacaca..27c074e 100644
--- a/applyfilter.c
+++ b/applyfilter.c
@@ -108,14 +108,14 @@ static int applyfilter(const char *name, struct newsgroup *g,
 
 	switch (ret) {
 	    case 0:
-		score = killfilter(myfilter, l);
+		score = killfilter(myfilter, (unsigned char *)l);
 		break;
 	    case -1:
 		score = TRUE;
 		break;
 	    case -2: /* article has no body */
 		if (delaybody_group(g->name))
-		    score = killfilter(myfilter, l);
+		    score = killfilter(myfilter, (unsigned char *)l);
 		else
 		    score = TRUE;
 		break;
diff --git a/configure.ac b/configure.ac
index c999e49..0220e11 100644
--- a/configure.ac
+++ b/configure.ac
@@ -178,59 +178,61 @@ AC_CHECK_LIB(crypt, crypt)
 
 AC_CACHE_SAVE
 
-dnl Check for PCRE library.
-AC_PATH_PROG(PCRECONFIG,pcre-config,AC_MSG_ERROR(pcre-config not found, make sure you have the pcre and pcre-devel packages installed))
+dnl Check for PCRE2 library.
+AC_ARG_VAR(PCRE2CONFIG,where the pcre2-config script is installed)
+AC_PATH_PROG(PCRE2CONFIG,pcre2-config,no)
 
-CF=`$PCRECONFIG --cflags`
+CF=`$PCRE2CONFIG --cflags`
 case $CF in ?*)
 AC_MSG_NOTICE([adding $CF to CFLAGS]); CFLAGS="$CFLAGS $CF"; export CFLAGS
 ;; esac
 
 
-LF=`$PCRECONFIG --libs`
+LF=`$PCRE2CONFIG --libs`
 case $LF in ?*)
 AC_MSG_NOTICE([adding $LF to LIBS]); LIBS="$LIBS $LF"; export LIBS
 ;; esac
 
-AC_CACHE_CHECK(for pcre.h,ac_cv_header_pcre_h,[
-  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <pcre.h>]], [[return 0;]])],[ac_cv_header_pcre_h=yes],[ac_cv_header_pcre_h=no])
+AC_CACHE_CHECK(for pcre2.h,ac_cv_header_pcre2_h,[
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 0]], [[#include <pcre2.h>]], [[return 0;]])],[ac_cv_header_pcre2_h=yes],[ac_cv_header_pcre2_h=no])
 ])
 
-if test "x$ac_cv_header_pcre_h" = xyes
+if test "x$ac_cv_header_pcre2_h" = xyes
 then
-  AC_MSG_CHECKING([for pcre_compile() in PCRE library])
-  AC_CACHE_VAL([ln_cv_have_libpcre], [
+  AC_MSG_CHECKING([for pcre2_compile_8() in PCRE2 library])
+  AC_CACHE_VAL([ln_cv_have_libpcre2], [
   AC_LINK_IFELSE(
-  [AC_LANG_PROGRAM([[#include <pcre.h>]],
-    [[pcre_compile(0, 0, 0, 0, 0);]])],
-  [ln_cv_have_libpcre=yes], [ln_cv_have_libpcre=no])
+  [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 0]],
+    [[#include <pcre2.h>]],
+    [[pcre2_compile_8(0, 0, 0, 0, 0);]])],
+  [ln_cv_have_libpcre2=yes], [ln_cv_have_libpcre2=no])
   ])
-  AC_MSG_RESULT([$ln_cv_have_libpcre])
+  AC_MSG_RESULT([$ln_cv_have_libpcre2])
 fi
 
-if test "x$ln_cv_have_libpcre" = xyes
+if test "x$ln_cv_have_libpcre2" = xyes
 then
-  AC_DEFINE(HAVE_LIBPCRE, 1, [Set if PCRE library is present.])
+  AC_DEFINE(HAVE_LIBPCRE2, 1, [Set if PCRE2 library is present.])
 else
-    echo "*** I cannot find PCRE. leafnode depends on it."
+    echo "*** I cannot find PCRE2. leafnode depends on it."
     echo "***"
-    echo "*** If you have PCRE installed, pcre-config was not found."
+    echo "*** If you have PCRE2 installed, pcre2-config was not found."
     echo "*** You can work around this by adding the header location to"
-    echo "*** the environment variables CPPFLAGS, i. e. CPPFLAGS=-I/opt/pcre/include"
+    echo "*** the environment variables CPPFLAGS, i. e. CPPFLAGS=-I/opt/pcre2/include"
     echo "*** and the library location to the environment variables LDFLAGS,"
-    echo "*** e. g. LDFLAGS=-L/opt/pcre/lib, assuming your PCRE resides in /opt/pcre."
+    echo "*** e. g. LDFLAGS=-L/opt/pcre2/lib, assuming your PCRE resides in /opt/pcre2."
     echo "***"
-    echo "*** If not you don't have PCRE installed, please download and install PCRE 3.7"
-    echo "*** or a newer version from http://www.pcre.org/, it's easy:"
-    echo "*** Just download, unpack, then cd to pcre-3.7, then"
+    echo "*** If not you don't have PCRE2 installed, please download and install PCRE2"
+    echo "*** or a newer version from http://www.pcre2.org/, it's easy:"
+    echo "*** Just download, unpack, then cd to pcre2*, then"
     echo "*** ./configure && make, then as root: make install"
     echo "*** Then, reconfigure leafnode."
-    AC_MSG_ERROR(PCRE library not found)
+    AC_MSG_ERROR(PCRE2 library not found)
     exit 1
 fi
 
-AC_SUBST(PCRELIB)
-AC_SUBST(LINKPCRELIB)
+AC_SUBST(PCRE2_8LIB)
+AC_SUBST(PCRE2_8DEP)
 
 dnl Checks for library functions.
 AC_CHECK_FUNCS([setgroups])
@@ -246,7 +248,7 @@ AC_CHECK_LIB(nsl, gethostent)
 AC_CHECK_LIB(socket, setsockopt)
 AC_REPLACE_FUNCS(arc4random mergesort strcasestr strlcpy)
 
-dnl The 'pcre' library uses non-standard defines
+dnl The 'pcre2' library uses non-standard defines
 test ".$ac_cv_func_strerror" != ".yes" && \
 test ".$cf_cv_have_sys_errlist" = ".yes" && \
 	PCRE_DEFINES="$PCRE_DEFINES -DSTRERROR_FROM_ERRLIST"
diff --git a/configutil.c b/configutil.c
index b3cefde..36f2e3a 100644
--- a/configutil.c
+++ b/configutil.c
@@ -523,7 +523,7 @@ readconfig(/*@null@*/ const char *configfile)
 		    break;
 		case CP_ONLYGROUPSPCRE:
 		    {
-			pcre *r = gs_compile(value, configfile, line);
+			pcre2_code_8 *r = gs_compile((unsigned char *)value, configfile, line);
 
 			if (!r)
 			    exit(2);
@@ -623,7 +623,7 @@ freeservers(struct serverlist *s)
 	if (p->password)
 	    free(p->password);
 	if (p->group_pcre)
-	    free(p->group_pcre);
+	    pcre2_code_free_8(p->group_pcre);
 	free(p);
 	p = t;
     }
diff --git a/fetchnews.c b/fetchnews.c
index 23ce537..c2f7c90 100644
--- a/fetchnews.c
+++ b/fetchnews.c
@@ -388,7 +388,7 @@ isgrouponserver(const struct serverlist *cursrv, char *newsgroups)
 	    q = strchr(p, ',');
 	    if (q)
 		*q++ = '\0';
-	    if (gs_match(cursrv -> group_pcre, p)) {
+	    if (gs_match(cursrv -> group_pcre, (unsigned char *)p)) {
 		putaline(nntpout, "GROUP %s", p);
 		if (nntpreply(cursrv) == 211)
 		    retval = TRUE;
@@ -769,7 +769,7 @@ getfirstlast(struct serverlist *cursrv, struct newsgroup *g, unsigned
     long n;
     char *l, *t;
 
-    if (!gs_match(cursrv -> group_pcre, g->name))
+    if (!gs_match(cursrv -> group_pcre, (unsigned char *)g->name))
 	return 0;
 
     putaline(nntpout, "GROUP %s", g->name);
@@ -1222,7 +1222,7 @@ getgroup(struct serverlist *cursrv, struct newsgroup *g, unsigned long first)
 	return 0;
     if (!chdirgroup(g->name, TRUE))	/* also creates the directory */
 	return 0;
-    if (!gs_match(cursrv -> group_pcre, g->name))
+    if (!gs_match(cursrv -> group_pcre, (unsigned char *)g->name))
 	return 0;
 
     /* we don't care about x-posts for delaybody */
@@ -1442,7 +1442,7 @@ nntpactive(struct serverlist *cursrv, int fa)
 	    if (!splitLISTline(l, &p, &r))
 		continue;
 	    *p = '\0';
-	    if (gs_match(cursrv->group_pcre, l)) {
+	    if (gs_match(cursrv->group_pcre, (unsigned char *)l)) {
 		insertgroup(l, *r, 1, 0, time(NULL), NULL);
 		appendtolist(groups, l);
 		count++;
@@ -1513,7 +1513,7 @@ nntpactive(struct serverlist *cursrv, int fa)
 	    /* FIXME: save high water mark in .last.posting? */
 	    first = 1;
 	    last = 0;
-	    if (gs_match(cursrv->group_pcre, l)) {
+	    if (gs_match(cursrv->group_pcre, (unsigned char *)l)) {
 		if (is_interesting(l)
 			&& (forceact || !(active && findgroup(l, active, -1)))
 			&& chdirgroup(l, FALSE)) {
diff --git a/filterutil.c b/filterutil.c
index 8ef7966..a51dace 100644
--- a/filterutil.c
+++ b/filterutil.c
@@ -196,7 +196,7 @@ newfilter(void)
 static struct filterlist *oldf = NULL;
 
 static void
-insertfilter(/*@owned@*/ struct filterlist *f, /*@only@*/ pcre *ng, /*@only@*/ char *ngpcretext, int invertngs)
+insertfilter(/*@owned@*/ struct filterlist *f, /*@only@*/ pcre2_code_8 *ng, /*@only@*/ char *ngpcretext, int invertngs)
 {
     (f->entry)->invertngs = invertngs;
     (f->entry)->newsgroups = ng;
@@ -291,7 +291,7 @@ readfilter(/*@null@*/ const char *filterfilename)
 		state = RF_WANTPAT;
 	    } else if ((state == RF_WANTPAT || state == RF_WANTNGORPAT) &&
 		    !strcasecmp("pattern", param)) {
-		pcre *re, *ngp;
+		pcre2_code_8 *re, *ngp;
 		if (!ngt || !(ngp = ln_pcre_compile(ngt + invertngs, 0, NULL, filterfilename, line))) {
 		    ln_log(LNLOG_SNOTICE, LNLOG_CTOP,
 			    "No newsgroup for pattern = %s (line %lu) found",
@@ -322,7 +322,7 @@ readfilter(/*@null@*/ const char *filterfilename)
 			(!strcasecmp("maxlines", param)) ||
 			(!strcasecmp("maxbytes", param)) ||
 			(!strcasecmp("maxcrosspost", param)))) {
-		pcre *ngp;
+		pcre2_code_8 *ngp;
 		f = newfilter();
 		if (!(ngp = ln_pcre_compile(ngt + invertngs, 0, NULL, filterfilename, line))) {
 		    rv = FALSE;
@@ -391,7 +391,7 @@ selectfilter(const char *groupname)
     fold = NULL;
     master = filter;
     while (master) {
-	if ((master->entry)->invertngs ^ (pcre_exec((master->entry)->newsgroups, NULL, groupname,
+	if ((master->entry)->invertngs ^ (pcre2_match_8((master->entry)->newsgroups, NULL, groupname,
 		    strlen(groupname), 0, /* options */ 0, NULL, 0) >= 0)) {
 	    f = (struct filterlist *)critmalloc(sizeof(struct filterlist),
 						"Allocating groupfilter space");
@@ -420,7 +420,7 @@ regexp_addinfo(const struct filterentry *g, const char *hdr) {
     const char *x = hdr;
     while (*x) {
 	int len = strcspn(x, "\n");
-	int match = (pcre_exec(g->expr, NULL, x, (int)strcspn(x, "\n"),
+	int match = (pcre2_match_8(g->expr, NULL, x, (int)strcspn(x, "\n"),
 		0, 0, NULL, 0) >= 0);
 	if (match) {
 	    ln_log(LNLOG_SDEBUG, LNLOG_CALL, "regexp filter: detail: \"%-.*s\""
@@ -462,7 +462,7 @@ killfilter(const struct filterlist *f, const char *hdr)
 	           g->ngpcretext);
 	}
 	if ((g->limit == -1) && (g->expr)) {
-	    match = (pcre_exec(g->expr, NULL, hdr, (int)strlen(hdr),
+	    match = (pcre2_match_8(g->expr, NULL, hdr, (int)strlen(hdr),
 			      0, 0, NULL, 0));
 	    if (debugmode & DEBUG_FILTER) {
 	        ln_log(LNLOG_SDEBUG, LNLOG_CALL,
@@ -591,13 +591,13 @@ free_entry(/*@null@*/ /*@only@*/ struct filterentry *e)
 {
     if (e) {
 	if (e->expr)
-	    pcre_free(e->expr);
+	    pcre2_code_free_8(e->expr);
 	if (e->action)
 	    free(e->action);
 	if (e->cleartext)
 	    free(e->cleartext);
 	if (e->newsgroups)
-	    pcre_free(e->newsgroups);
+	    pcre2_code_free_8(e->newsgroups);
 	if (e->ngpcretext)
 	    free(e->ngpcretext);
 	free(e);
diff --git a/groupselect.c b/groupselect.c
index 881272d..427f919 100644
--- a/groupselect.c
+++ b/groupselect.c
@@ -1,24 +1,36 @@
 #include <string.h>
-#include <pcre.h>
 
 #include "config.h"
 #include "groupselect.h"
 #include "ln_log.h"
 #include "pcrewrap.h"
 
-pcre *gs_compile(const char *regex, const char *file, unsigned long line) {
+pcre2_code_8 *gs_compile(const char *regex, const char *file, unsigned long line) {
+    int regex_errcode;
+    size_t regex_errpos;
+    pcre2_code_8 *re;
     return ln_pcre_compile(regex, PCRE_MULTILINE, NULL, file, line);
 }
 
 /* match s against PCRE p
  * WARNING: If p is NULL, every string s is considered a match
  * returns 1 for match, 0 for mismatch, negative for error */
-int gs_match(const pcre *p, const char *s) {
+int gs_match(const pcre2_code_8 *p, const unsigned char *s) {
     int match;
-    if (p == NULL) return 1;
-    match = pcre_exec(p, NULL, s, strlen(s), 0, PCRE_ANCHORED, NULL, 0);
+    pcre2_match_data_8 *match_data;
 
-    if (match == PCRE_ERROR_NOMATCH) return 0;
+    if (re == NULL) return 1;
+    match_data = pcre2_match_data_create_8(1, NULL);
+    if (NULL == match_data) {
+        ln_log(LNLOG_SERR, LNLOG_CTOP, "gs_match: out of memory allocating match_data");
+        return -1;
+    }
+
+    match = pcre2_match_8(re, s, PCRE2_ZERO_TERMINATED, 0,
+                          PCRE2_ANCHORED, match_data, NULL);
+
+    pcre2_match_data_free_8(match_data);
+    if (match == PCRE2_ERROR_NOMATCH) return 0;
     if (match >= 0) return 1;
-    return match;
+    return match; /* match < 0, but not PCRE2_ERROR_NOMATCH */
 }
diff --git a/groupselect.h b/groupselect.h
index d1c03d6..0751df6 100644
--- a/groupselect.h
+++ b/groupselect.h
@@ -2,9 +2,10 @@
 #define GROUPSELECT_H
 
 /* for pcre type */
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 0
+#include <pcre2.h>
 
-pcre *gs_compile(const char *regex, const char *file, unsigned long line);
-int gs_match(const pcre *p, const char *s);
+pcre2_code_8 *gs_compile(const unsigned char *regex, const char *file, unsigned long line);
+int gs_match(const pcre2_code_8 *p, const unsigned char *s);
 
 #endif
diff --git a/leafnode-version.c b/leafnode-version.c
index ac0a8da..0761b15 100644
--- a/leafnode-version.c
+++ b/leafnode-version.c
@@ -24,6 +24,7 @@ main(void) /* if you support arguments some day, please make -v
 	      compatible with leafnode-1. */
 {
     static char env_path[] = "PATH=/bin:/usr/bin";
+    uint8_t pcre2info[64]; /* PCRE 10 requires at least 24 code units */
     /* ------------------------------------------------ */
     /* *** IMPORTANT ***
      * 
@@ -47,7 +48,10 @@ main(void) /* if you support arguments some day, please make -v
     puts(def_spooldir);
     fputs("default MTA: ", stdout);
     puts(DEFAULTMTA);
-    fputs("pcre version: ", stdout);
-    puts(pcre_version());
+	pcre2_config_8(PCRE2_CONFIG_VERSION, pcre2info);
+	printf("PCRE2 version: %s\n", pcre2info);
+
+	pcre2_config_8(PCRE2_CONFIG_UNICODE_VERSION, pcre2info);
+	printf("PCRE2 Unicode version: %s\n", pcre2info);
     exit(0);
 }
diff --git a/leafnode.8.in b/leafnode.8.in
index 70052de..897c60a 100644
--- a/leafnode.8.in
+++ b/leafnode.8.in
@@ -601,7 +601,7 @@ has been paid for by Uninett AS (http://www.uninett.no/).
 .BR texpire (8),
 .BR checkgroups (8),
 .BR glob (7),
-.BR pcre (3),
+.BR pcre2 (3),
 .BR @sysconfdir@/filters.example
 - note that filters.example should not be used as is -,
 .B "RFC 977"
diff --git a/leafnode.h b/leafnode.h
index b5c2cf9..ef8f4fe 100644
--- a/leafnode.h
+++ b/leafnode.h
@@ -58,7 +58,8 @@ using std::max;
 #ifdef __LCLINT__
 #include "lclint_fixes.h"
 #else /* not __LCLINT__ */
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 0	/* let's fail unless explicitly using the _8() functions */
+#include <pcre2.h>
 #endif /* not __LCLINT__ */
 
 #include <stdbool.h>
@@ -298,11 +299,11 @@ matchlist(struct stringlistnode *patterns, const char *str);
  */
 struct filterentry {
     char *ngpcretext;
-    pcre *newsgroups;
+    pcre2_code_8 *newsgroups;
     int invertngs;
     long limit;
     char *cleartext;
-    pcre *expr;
+    pcre2_code_8 *expr;
     char *action;
 };
 struct filterlist {
@@ -437,7 +438,7 @@ struct serverlist {
     char *name;		/* Servername */
     char *username;
     char *password;
-    pcre *group_pcre;
+    pcre2_code_8 *group_pcre;
     unsigned short port;	/* port, if 0, look up nntp/tcp */
     int usexhdr;		/* use XHDR instead of XOVER if sensible */
     int descriptions;	/* download descriptions as well */
diff --git a/masock_sa2name.c b/masock_sa2name.c
index 4c9810b..527bd12 100644
--- a/masock_sa2name.c
+++ b/masock_sa2name.c
@@ -6,9 +6,10 @@
 
 #include "config.h"
 
-#undef _GNU_SOURCE
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE	// to expose nonstandard members in in.h structures to
 			// fix compilation in strict conformance mode on Linux
+#endif
 
 #include "masock.h"
 #include "critmem.h"
diff --git a/pcrewrap.c b/pcrewrap.c
index 823bd53..cddbbe2 100644
--- a/pcrewrap.c
+++ b/pcrewrap.c
@@ -1,19 +1,19 @@
 #include "pcrewrap.h"
-#include <pcre.h>
 #include "ln_log.h"
 
-pcre *ln_pcre_compile(const char *pattern, int options,
+pcre2_code_8 *ln_pcre_compile(const char *pattern, int options,
 	const unsigned char *tableptr, const char *filename,
 	unsigned long line)
 {
-    const char *regex_errmsg;
-    int regex_errpos;
-    pcre *re;
+    int regex_errcode;
+    size_t regex_errpos;
+    pcre2_code_8 *re;
 
-    re = pcre_compile(pattern, options,
-	    &regex_errmsg, &regex_errpos, tableptr);
-    if (!re) {
+    if (NULL == (re = pcre2_compile_8(regex, PCRE2_ZERO_TERMINATED, PCRE2_MULTILINE, &regex_errcode,
+				      &regex_errpos, NULL))) {
 	/* could not compile */
+	unsigned char regex_errmsg[1024];
+	int len = pcre2_get_error_message_8(regex_errcode, regex_errmsg, sizeof(regex_errmsg));
 	ln_log(LNLOG_SWARNING, LNLOG_CTOP,
 		"%s: invalid pattern at line %lu: %s",
 		filename, line, regex_errmsg);
diff --git a/pcrewrap.h b/pcrewrap.h
index a637053..47ee039 100644
--- a/pcrewrap.h
+++ b/pcrewrap.h
@@ -1,9 +1,10 @@
 #ifndef PCREWRAP_H
 #define PCREWRAP_H
 
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 0	/* let's fail unless explicitly using the _8() functions */
+#include <pcre2.h>
 
-pcre *ln_pcre_compile(const char *value, int options,
+pcre2_code_8 *ln_pcre_compile(const char *value, int options,
 	const unsigned char *tableptr, const char *filename,
 	unsigned long line);
 #endif
-- 
2.49.0



More information about the leafnode-list mailing list