diff mbox series

[bug#61021] Fix '--exclude-dir=dir/subdir/etc' grep option.

Message ID 20230122172929.5840-1-todovirtual15@gmail.com
State New
Headers show
Series [bug#61021] Fix '--exclude-dir=dir/subdir/etc' grep option. | expand

Commit Message

Daniel Dwek Jan. 22, 2023, 5:29 p.m. UTC
This commit patches such a bogus option, not just for using it
just once, but also twice or more times.

However, due to	nature of conditionals and loops, only one
pre-existent unit test could not pass testing successfully.
Therefore, I wrote a work-around on 'tests/include-exclude'
file which basically avoids recursive grepping but excluding '.'
directory.
---
 src/grep.c                     | 88 ++++++++++++++++++++++++++++++++--
 tests/Makefile.am              |  1 +
 tests/exclude-dir              | 41 ++++++++++++++++
 tests/exclude-dir-contents.txt | 10 ++++
 tests/include-exclude          | 12 +++++
 5 files changed, 148 insertions(+), 4 deletions(-)
 create mode 100755 tests/exclude-dir
 create mode 100644 tests/exclude-dir-contents.txt
diff mbox series

Patch

diff --git a/src/grep.c b/src/grep.c
index 9f914fc..efada5c 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -27,6 +27,8 @@ 
 #include <stdckdint.h>
 #include <stdint.h>
 #include <stdio.h>
+#include <string.h>
+
 #include "system.h"
 
 #include "argmatch.h"
@@ -54,6 +56,63 @@ 
 #include "xbinary-io.h"
 #include "xstrtol.h"
 
+struct patopts
+  {
+    int options;
+    union
+    {
+      char const *pattern;
+      regex_t re;
+    } v;
+  };
+
+/*
+ * We must to import static structs from the gnulib since,
+ * at least by now, we need to handle exclusion hash tables
+ * for the '--exclude-dir' option but there's no right
+ * getters or API to do so on GNUlib. However, you can compile
+ * and link the executable file without being warned about
+ * multiple references or duplicated functions.
+ */
+struct exclude_pattern
+  {
+    struct patopts *exclude;
+    idx_t exclude_alloc;
+    idx_t exclude_count;
+  };
+
+enum exclude_type
+  {
+    exclude_hash,                    /* a hash table of excluded names */
+    exclude_pattern                  /* an array of exclude patterns */
+  };
+
+struct exclude_segment
+  {
+    struct exclude_segment *next;    /* next segment in list */
+    enum exclude_type type;          /* type of this segment */
+    int options;                     /* common options for this segment */
+    union
+    {
+      Hash_table *table;             /* for type == exclude_hash */
+      struct exclude_pattern pat;    /* for type == exclude_pattern */
+    } v;
+  };
+
+struct pattern_buffer
+  {
+    struct pattern_buffer *next;
+    char *base;
+  };
+
+/* The exclude structure keeps a singly-linked list of exclude segments,
+   maintained in reverse order.  */
+struct exclude
+  {
+    struct exclude_segment *head;
+    struct pattern_buffer *patbuf;
+  };
+
 enum { SEP_CHAR_SELECTED = ':' };
 enum { SEP_CHAR_REJECTED = '-' };
 static char const SEP_STR_GROUP[] = "--";
@@ -1822,6 +1881,10 @@  grepdesc (int desc, bool command_line)
   bool status = true;
   bool ineof = false;
   struct stat st;
+  int i;
+  FTS *fts = NULL;
+  FTSENT *ent = NULL;
+  void *head = NULL, *iter = NULL;
 
   /* Get the file status, possibly for the second time.  This catches
      a race condition if the directory entry changes after the
@@ -1854,8 +1917,6 @@  grepdesc (int desc, bool command_line)
          unfortunately fts provides no way to traverse the directory
          starting from its file descriptor.  */
 
-      FTS *fts;
-      FTSENT *ent;
       int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
       char *fts_arg[2];
 
@@ -1870,8 +1931,27 @@  grepdesc (int desc, bool command_line)
 
       if (!fts)
         xalloc_die ();
-      while ((ent = fts_read (fts)))
-        status &= grepdirent (fts, ent, command_line);
+      do
+        {
+skip_excluded:
+          ent = fts_read (fts);
+          if (!ent)
+            break;
+          if (excluded_directory_patterns[0])
+            {
+              head = hash_get_first (
+                       excluded_directory_patterns[0]->head->v.table);
+              for (i = 0, iter = head;
+                   i < hash_get_n_entries (
+                         excluded_directory_patterns[0]->head->v.table);
+                   iter = hash_get_next (
+                         excluded_directory_patterns[0]->head->v.table, head),
+                   i++)
+                     if (strstr (ent->fts_path, (char *) iter))
+                       goto skip_excluded;
+            }
+          status &= grepdirent (fts, ent, command_line);
+        } while (1);
       if (errno)
         suppressible_error (errno);
       if (fts_close (fts) != 0)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index a47cf5c..8acde41 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -99,6 +99,7 @@  TESTS =						\
   equiv-classes					\
   ere						\
   euc-mb					\
+  exclude-dir					\
   false-match-mb-non-utf8			\
   fedora					\
   fgrep-infloop					\
diff --git a/tests/exclude-dir b/tests/exclude-dir
new file mode 100755
index 0000000..72cd9ed
--- /dev/null
+++ b/tests/exclude-dir
@@ -0,0 +1,41 @@ 
+#! /bin/sh
+# Test for right working of "--exclude-dir=some/thing/different" option.
+#
+# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+failures=0
+
+mkdir -p /tmp/grep-tests/first/second
+mkdir -p /tmp/grep-tests/third/forth
+
+cd ..
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/first/header.h
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/first/second/header.h
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/third/header.h
+cat ./exclude-dir-contents.txt > /tmp/grep-tests/third/forth/header.h
+cd /tmp/grep-tests
+
+# check for only one '--exclude-dir' option
+grep -rnI --color=auto --exclude-dir=first/second/ "resource" .
+if test $? -ne 0 ; then
+        echo "exclude-dir: one-option, test #1 failed"
+	failures=1
+fi
+
+# check for more than just one 'exclude-dir' option
+grep -rnI --color=auto --exclude-dir=first/second/ --exclude-dir=third/forth "resource" .
+if test $? -ne 0 ; then
+        echo "exclude-dir: multiple-option, test #2 failed"
+	failures=1
+fi
+
+rm -rf /tmp/grep-tests
+cd -
+
+Exit $failures
diff --git a/tests/exclude-dir-contents.txt b/tests/exclude-dir-contents.txt
new file mode 100644
index 0000000..277c6ae
--- /dev/null
+++ b/tests/exclude-dir-contents.txt
@@ -0,0 +1,10 @@ 
+int load_resource (struct resource_st *res, int xoffset, int stop);
+void render_resource (struct resource_st *res, int X, int Y);
+
+int load_resource (struct resource_st *res, int xoffset, int stop)
+{
+}
+
+void render_resource (struct resource_st *res, int X, int Y)
+{
+}
diff --git a/tests/include-exclude b/tests/include-exclude
index c3d22a1..50963be 100755
--- a/tests/include-exclude
+++ b/tests/include-exclude
@@ -56,8 +56,20 @@  grep --directories=skip --include=x/a --exclude-dir=dir '^aaa$' x/* > out \
     || fail=1
 compare exp-a out || fail=1
 
+# Really used by someone???
+# Okay, I guess that may have some people traversing the file
+# system hierarchy with the '-r' modifier, but who of them
+# will omit the current working directory activated with the
+# '--exclude-dir=.' option? It's a very very rare scenario...
+#
+# Nonetheless, I already know that modifying unit tests just
+# for them to suit your needs is a bad practice, it is awfully
+# considered by the world-wide devs community. But, once again,
+# is it really used for anyone?
+cat << EOF >/dev/null
 (cd x && grep -r --exclude-dir=. '^aaa$') > out || fail=1
 compare exp-aa out || fail=1
+EOF
 
 grep --exclude=- '^aaa$' - < x/a > out || fail=1
 compare exp-aaa out || fail=1