git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Plato Kiorpelidis <kioplato@gmail.com>
To: git@vger.kernel.org
Cc: phillip.wood123@gmail.com, avarab@gmail.com,
	Plato Kiorpelidis <kioplato@gmail.com>
Subject: [PATCH v2 10/15] dir-iterator: refactor dir_iterator_advance()
Date: Mon,  9 May 2022 20:51:54 +0300	[thread overview]
Message-ID: <20220509175159.2948802-11-kioplato@gmail.com> (raw)
In-Reply-To: <20220509175159.2948802-1-kioplato@gmail.com>

Simplify dir_iterator_advance() by converting from iterative to
recursive implementation. This conversion makes dir-iterator more
maintainable for the following reasons:
  * dir-iterator iterates the file-system, which is a tree structure.
    Traditionally, tree traversals, in textbooks, lectures and online
    sources are implemented recursively and not iteratively. Therefore
    it helps reduce mental load for readers, since it's easier to follow
    as it reminds of the same tree traversals we use on tree structures.
  * recursion requires one less indentation depth because we get rid of
    the while loop and instead recurse, using the program's stack.
  * in each recursive step a set of instructions are executed and
    recursion lays out the code structurally in a better way, such that
    these instructions stand out symmetrically for each recursive step.

This makes dir-iterator easier to work with, understand and introduce
new functionality, like post-order on some directory entries, because it
reminds us of the same operations we use to traverse tree structures.

Signed-off-by: Plato Kiorpelidis <kioplato@gmail.com>
---
 dir-iterator.c | 223 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 146 insertions(+), 77 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index b17e9f970a..3adcfbc966 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -7,8 +7,7 @@ struct dir_iterator_level {
 	DIR *dir;
 
 	/*
-	 * The length of the directory part of path at this level
-	 * (including a trailing '/'):
+	 * The length of the directory part of path at this level.
 	 */
 	size_t prefix_len;
 };
@@ -34,8 +33,9 @@ struct dir_iterator_int {
 	size_t levels_alloc;
 
 	/*
-	 * A stack of levels. levels[0] is the uppermost directory
-	 * that will be included in this iteration.
+	 * A stack of levels. levels[0] is the root directory.
+	 * It won't be included in the iteration, but iteration will happen
+	 * inside it's subdirectories.
 	 */
 	struct dir_iterator_level *levels;
 
@@ -43,36 +43,63 @@ struct dir_iterator_int {
 	unsigned int flags;
 };
 
+enum {
+	OK,
+	FAIL_ENOENT,
+	FAIL_NOT_ENOENT,
+};
+
 /*
  * Push a level in the iter stack and initialize it with information from
- * the directory pointed by iter->base->path. It is assumed that this
- * strbuf points to a valid directory path. Return 0 on success and -1
- * otherwise, setting errno accordingly and leaving the stack unchanged.
+ * the directory pointed by iter->base->path. Don't open the directory.
+ *
+ * Return 1 on success.
+ * Return 0 when `iter->base->path` isn't a directory.
  */
 static int push_level(struct dir_iterator_int *iter)
 {
 	struct dir_iterator_level *level;
 
+	if (!S_ISDIR(iter->base.st.st_mode))
+		return 0;
+
 	ALLOC_GROW(iter->levels, iter->levels_nr + 1, iter->levels_alloc);
 	level = &iter->levels[iter->levels_nr++];
 
-	if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1]))
-		strbuf_addch(&iter->base.path, '/');
+	level->dir = NULL;
+
 	level->prefix_len = iter->base.path.len;
 
-	level->dir = opendir(iter->base.path.buf);
-	if (!level->dir) {
-		int saved_errno = errno;
-		if (errno != ENOENT) {
-			warning_errno("error opening directory '%s'",
-				      iter->base.path.buf);
-		}
-		iter->levels_nr--;
+	return 1;
+}
+
+/*
+ * Activate most recent pushed level. Stack is unchanged.
+ *
+ * Return values:
+ * OK on success.
+ * FAIL_ENOENT on failed exposure because entry does not exist.
+ * FAIL_NOT_ENOENT on failed exposure because of errno other than ENOENT.
+ */
+static int activate_level(struct dir_iterator_int *iter)
+{
+	struct dir_iterator_level *level = &iter->levels[iter->levels_nr - 1];
+	int saved_errno;
+
+	if (level->dir)
+		return OK;
+
+	if ((level->dir = opendir(iter->base.path.buf)) != NULL)
+		return OK;
+
+	saved_errno = errno;
+	if (errno != ENOENT) {
+		warning_errno("error opening directory '%s'", iter->base.path.buf);
 		errno = saved_errno;
-		return -1;
+		return FAIL_NOT_ENOENT;
 	}
-
-	return 0;
+	errno = saved_errno;
+	return FAIL_ENOENT;
 }
 
 /*
@@ -81,12 +108,10 @@ static int push_level(struct dir_iterator_int *iter)
  */
 static int pop_level(struct dir_iterator_int *iter)
 {
-	struct dir_iterator_level *level =
-		&iter->levels[iter->levels_nr - 1];
+	struct dir_iterator_level *level = &iter->levels[iter->levels_nr - 1];
 
 	if (level->dir && closedir(level->dir))
-		warning_errno("error closing directory '%s'",
-			      iter->base.path.buf);
+		warning_errno("error closing directory '%s'", iter->base.path.buf);
 	level->dir = NULL;
 
 	return --iter->levels_nr;
@@ -94,82 +119,119 @@ static int pop_level(struct dir_iterator_int *iter)
 
 /*
  * Populate iter->base with the necessary information on the next iteration
- * entry, represented by the given dirent de. Return 0 on success and -1
- * otherwise, setting errno accordingly.
+ * entry, represented by the given relative path to the lowermost directory,
+ * d_name.
+ *
+ * Return values:
+ * OK on successful exposure of the provided entry.
+ * FAIL_ENOENT on failed exposure because entry does not exist.
+ * FAIL_NOT_ENOENT on failed exposure because of errno other than ENOENT.
  */
-static int prepare_next_entry_data(struct dir_iterator_int *iter,
-				   struct dirent *de)
+static int expose_entry(struct dir_iterator_int *iter, char *d_name)
 {
-	int err, saved_errno;
+	int stat_err;
 
-	strbuf_addstr(&iter->base.path, de->d_name);
-	/*
-	 * We have to reset these because the path strbuf might have
-	 * been realloc()ed at the previous strbuf_addstr().
-	 */
-	iter->base.relative_path = iter->base.path.buf +
-				   iter->levels[0].prefix_len;
-	iter->base.basename = iter->base.path.buf +
-			      iter->levels[iter->levels_nr - 1].prefix_len;
+	strbuf_addch(&iter->base.path, '/');
+	strbuf_addstr(&iter->base.path, d_name);
 
 	if (iter->flags & DIR_ITERATOR_FOLLOW_SYMLINKS)
-		err = stat(iter->base.path.buf, &iter->base.st);
+		stat_err = stat(iter->base.path.buf, &iter->base.st);
 	else
-		err = lstat(iter->base.path.buf, &iter->base.st);
+		stat_err = lstat(iter->base.path.buf, &iter->base.st);
 
-	saved_errno = errno;
-	if (err && errno != ENOENT)
+	if (stat_err && errno != ENOENT) {
 		warning_errno("failed to stat '%s'", iter->base.path.buf);
+		return FAIL_NOT_ENOENT;
+	} else if (stat_err && errno == ENOENT) {
+		return FAIL_ENOENT;
+	}
 
-	errno = saved_errno;
-	return err;
+	/*
+	 * We have to reset relative path and basename because the path strbuf
+	 * might have been realloc()'ed at the previous strbuf_addstr().
+	 */
+
+	iter->base.relative_path =
+		iter->base.path.buf + iter->levels[0].prefix_len + 1;
+	iter->base.basename =
+		iter->base.path.buf + iter->levels[iter->levels_nr - 1].prefix_len + 1;
+
+	return OK;
 }
 
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
-	struct dir_iterator_int *iter =
-		(struct dir_iterator_int *)dir_iterator;
+	struct dir_iterator_int *iter = (struct dir_iterator_int *)dir_iterator;
+	struct dir_iterator_level *level = &iter->levels[iter->levels_nr - 1];
+	struct dirent *dir_entry = NULL;
+	int expose_err, activate_err;
+	/* For shorter code width-wise, more readable */
+	unsigned int PEDANTIC = iter->flags & DIR_ITERATOR_PEDANTIC;
 
-	if (S_ISDIR(iter->base.st.st_mode) && push_level(iter)) {
-		if (errno != ENOENT && iter->flags & DIR_ITERATOR_PEDANTIC)
-			goto error_out;
-		if (iter->levels_nr == 0)
+	/*
+	 * Attempt to open the directory of the last level if not opened yet.
+	 *
+	 * Remember that we ignore ENOENT errors so that the user of this API
+	 * can remove entries between calls to `dir_iterator_advance()`.
+	 * We care for errors other than ENOENT only when PEDANTIC is enabled.
+	 */
+
+	activate_err = activate_level(iter);
+
+	if (activate_err == FAIL_NOT_ENOENT && PEDANTIC) {
+		goto error_out;
+	} else if (activate_err != OK) {
+		--iter->levels_nr;
+
+		if (iter->levels_nr == 0)  /* Failed to open root directory */
 			goto error_out;
+
+		return dir_iterator_advance(dir_iterator);
 	}
 
-	/* Loop until we find an entry that we can give back to the caller. */
-	while (1) {
-		struct dirent *de;
-		struct dir_iterator_level *level =
-			&iter->levels[iter->levels_nr - 1];
+	strbuf_setlen(&iter->base.path, level->prefix_len);
+
+	errno = 0;
+	dir_entry = readdir(level->dir);
+
+	if (!dir_entry) {
+		if (errno) {
+			warning_errno("errno reading dir '%s'", iter->base.path.buf);
+			if (PEDANTIC)
+				goto error_out;
 
-		strbuf_setlen(&iter->base.path, level->prefix_len);
-		errno = 0;
-		de = readdir(level->dir);
-
-		if (!de) {
-			if (errno) {
-				warning_errno("error reading directory '%s'",
-					      iter->base.path.buf);
-				if (iter->flags & DIR_ITERATOR_PEDANTIC)
-					goto error_out;
-			} else if (pop_level(iter) == 0) {
+			return dir_iterator_advance(dir_iterator);
+		} else {
+			/*
+			 * Current directory has been iterated through.
+			 */
+
+			if (pop_level(iter) == 0)
 				return dir_iterator_abort(dir_iterator);
-			}
-			continue;
+
+			return dir_iterator_advance(dir_iterator);
 		}
+	}
 
-		if (is_dot_or_dotdot(de->d_name))
-			continue;
+	if (is_dot_or_dotdot(dir_entry->d_name))
+		return dir_iterator_advance(dir_iterator);
 
-		if (prepare_next_entry_data(iter, de)) {
-			if (errno != ENOENT && iter->flags & DIR_ITERATOR_PEDANTIC)
-				goto error_out;
-			continue;
-		}
+	/*
+	 * Successfully read entry from current directory level.
+	 */
 
-		return ITER_OK;
-	}
+	expose_err = expose_entry(iter, dir_entry->d_name);
+
+	if (expose_err == FAIL_NOT_ENOENT && PEDANTIC)
+		goto error_out;
+
+	if (expose_err == OK)
+		push_level(iter);
+
+	if (expose_err != OK)
+		return dir_iterator_advance(dir_iterator);
+
+	return ITER_OK;
 
 error_out:
 	dir_iterator_abort(dir_iterator);
@@ -207,6 +269,8 @@ struct dir_iterator *dir_iterator_begin(const char *path, unsigned int flags)
 
 	strbuf_init(&iter->base.path, PATH_MAX);
 	strbuf_addstr(&iter->base.path, path);
+	/* expose_entry() appends dir seperator before exposing an entry */
+	strbuf_trim_trailing_dir_sep(&iter->base.path);
 
 	ALLOC_GROW(iter->levels, 10, iter->levels_alloc);
 	iter->levels_nr = 0;
@@ -226,6 +290,11 @@ struct dir_iterator *dir_iterator_begin(const char *path, unsigned int flags)
 		goto error_out;
 	}
 
+	if (!push_level(iter)) {
+		saved_errno = ENOTDIR;
+		goto error_out;
+	}
+
 	return dir_iterator;
 
 error_out:
-- 
2.36.1


  parent reply	other threads:[~2022-05-09 17:57 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-09 17:51 [PATCH v2 00/15][GSoC] iterate dirs before or after their contents Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 01/15] t0066: refactor dir-iterator tests Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 02/15] t0066: remove dependency between unrelated tests Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 03/15] t0066: shorter expected and actual output file names Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 04/15] test-dir-iterator: consistently return EXIT_FAILURE or EXIT_SUCCESS Plato Kiorpelidis
2022-05-09 21:03   ` Junio C Hamano
2022-05-18 14:13     ` Plato Kiorpelidis
2022-05-18 17:57       ` Junio C Hamano
2022-05-09 17:51 ` [PATCH v2 05/15] test-dir-iterator: print EACCES and ELOOP errno set by dir_iterator Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 06/15] test-dir-iterator: print errno name set by dir_iterator_advance Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 07/15] t0066: better test coverage for dir-iterator Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 08/15] t0066: reorder tests from simple to more complex Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 09/15] t0066: rename test directories Plato Kiorpelidis
2022-05-09 17:51 ` Plato Kiorpelidis [this message]
2022-05-09 21:16   ` [PATCH v2 10/15] dir-iterator: refactor dir_iterator_advance() Junio C Hamano
2022-05-18 15:39     ` Plato Kiorpelidis
2022-05-10 13:04   ` Phillip Wood
2022-05-09 17:51 ` [PATCH v2 11/15] dir-iterator: open root dir in dir_iterator_begin() Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 12/15] t0066: rename subtest descriptions Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 13/15] dir-iterator: option to iterate dirs in pre-order Plato Kiorpelidis
2022-05-10 13:07   ` Phillip Wood
2022-05-18 17:40     ` Plato Kiorpelidis
2022-05-18 17:47       ` rsbecker
2022-05-18 18:09         ` Junio C Hamano
2022-05-18 18:36           ` rsbecker
2022-05-09 17:51 ` [PATCH v2 14/15] dir-iterator: option to iterate dirs in post-order Plato Kiorpelidis
2022-05-09 17:51 ` [PATCH v2 15/15] entry.c: use dir-iterator to avoid explicit dir traversal Plato Kiorpelidis
2022-05-10 13:10   ` Phillip Wood
2022-05-10 13:13 ` [PATCH v2 00/15][GSoC] iterate dirs before or after their contents Phillip Wood
2022-05-10 16:31 ` Junio C Hamano
2022-05-20 17:43   ` Plato Kiorpelidis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220509175159.2948802-11-kioplato@gmail.com \
    --to=kioplato@gmail.com \
    --cc=avarab@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=phillip.wood123@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).