git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
@ 2020-08-24  9:03 Shourya Shukla
  2020-08-24 18:35 ` Junio C Hamano
  0 siblings, 1 reply; 12+ messages in thread
From: Shourya Shukla @ 2020-08-24  9:03 UTC (permalink / raw)
  To: git
  Cc: christian.couder, kaartic.sivaraam, gitster, johannes.schindelin,
	liu.denton, Prathamesh Chavan, Christian Couder, Stefan Beller,
	Shourya Shukla

From: Prathamesh Chavan <pc44800@gmail.com>

Convert submodule subcommand 'add' to a builtin and call it via
'git-submodule.sh'.

Also, since the command die()s out in case of absence of commits in the
submodule and exits with exit status 1 when we try adding a submodule
which is mentioned in .gitignore, the keyword 'fatal' is prefixed in the
error messages. Therefore, prepend the keyword in the expected outputs
of tests t7400.6 and t7400.16.

Mentored-by: Christian Couder <chriscool@tuxfamily.org>
Mentored-by: Stefan Beller <stefanbeller@gmail.com>
Signed-off-by: Prathamesh Chavan <pc44800@gmail.com>
Mentored-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
Signed-off-by: Shourya Shukla <shouryashukla.oo@gmail.com>
---
This is my port of 'git submodule add' from shell to C. I had some
confusion regarding this segment in the shell script:

	if test -z "$force"
	then
		git ls-files --error-unmatch "$sm_path" > /dev/null 2>&1 &&
		die "$(eval_gettext "'\$sm_path' already exists in the index")"
	else
		git ls-files -s "$sm_path" | sane_grep -v "^160000" > /dev/null 2>&1 &&
		die "$(eval_gettext "'\$sm_path' already exists in the index and is not a submodule")"
	fi

This is what I have done in C:

	if (!force) {
		if (is_directory(path) && submodule_from_path(the_repository, &null_oid, path))
			die(_("'%s' already exists in the index"), path);
	} else {
		int err;
		if (index_name_pos(&the_index, path, strlen(path)) >= 0 &&
		    !is_submodule_populated_gently(path, &err))
			die(_("'%s' already exists in the index and is not a "
			      "submodule"), path);
	}

Is this part correct? I am not very sure about this. This particular
part is not covered in any test or test script, so, I do not have a
solid method of knowing the correctness of this segment.
Feedback and reviews are appreciated.
---

 builtin/submodule--helper.c | 371 ++++++++++++++++++++++++++++++++++++
 git-submodule.sh            | 161 +---------------
 t/t7400-submodule-basic.sh  |   5 +-
 3 files changed, 375 insertions(+), 162 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index df135abbf1..b8189822a3 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -2320,6 +2320,376 @@ static int module_set_branch(int argc, const char **argv, const char *prefix)
 	return !!ret;
 }
 
+struct add_data {
+	const char *prefix;
+	const char *branch;
+	const char *reference_path;
+	const char *sm_path;
+	const char *sm_name;
+	const char *repo;
+	const char *realrepo;
+	int depth;
+	unsigned int force: 1;
+	unsigned int quiet: 1;
+	unsigned int progress: 1;
+	unsigned int dissociate: 1;
+};
+#define ADD_DATA_INIT { NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, 0 }
+
+/*
+ * Guess dir name from repository: strip leading '.*[/:]',
+ * strip trailing '[:/]*.git'.
+ */
+static char *guess_dir_name(const char *repo)
+{
+	const char *p, *start, *end, *limit;
+	int after_slash_or_colon;
+
+	after_slash_or_colon = 0;
+	limit = repo + strlen(repo);
+	start = repo;
+	end = limit;
+	for (p = repo; p < limit; p++) {
+		if (starts_with(p, ".git")) {
+			/* strip trailing '[:/]*.git' */
+			if (!after_slash_or_colon)
+				end = p;
+			p += 3;
+		} else if (*p == '/' || *p == ':') {
+			/* strip leading '.*[/:]' */
+			if (end == limit)
+				end = p;
+			after_slash_or_colon = 1;
+		} else if (after_slash_or_colon) {
+			start = p;
+			end = limit;
+			after_slash_or_colon = 0;
+		}
+	}
+	return xstrndup(start, end - start);
+}
+
+static void fprintf_submodule_remote(const char *str)
+{
+	const char *p = str;
+	const char *start;
+	const char *end;
+	char *name, *url;
+
+	start = p;
+	while (*p != ' ')
+		p++;
+	end = p;
+	name = xstrndup(start, end - start);
+
+	while(*p == ' ')
+		p++;
+	start = p;
+	while (*p != ' ')
+		p++;
+	end = p;
+	url = xstrndup(start, end - start);
+
+	fprintf(stderr, "  %s\t%s\n", name, url);
+	free(name);
+	free(url);
+}
+
+static void modify_remote_v(struct strbuf *sb)
+{
+	int i;
+	for (i = 0; i < sb->len; i++) {
+		const char *start = sb->buf + i;
+		const char *end = start;
+		while (sb->buf[i++] != '\n')
+			end++;
+		if (!strcmp("fetch", xstrndup(end - 6, 5)))
+			fprintf_submodule_remote(xstrndup(start, end - start - 7));
+	}
+}
+
+static int add_submodule(struct add_data *info)
+{
+	/* perhaps the path exists and is already a git repo, else clone it */
+	if (is_directory(info->sm_path)) {
+		char *sub_git_path = xstrfmt("%s/.git", info->sm_path);
+		if (is_directory(sub_git_path) || file_exists(sub_git_path))
+			printf(_("Adding existing repo at '%s' to the index\n"),
+				 info->sm_path);
+		else
+			die(_("'%s' already exists and is not a valid git repo"),
+			      info->sm_path);
+		free(sub_git_path);
+	} else {
+		struct strvec clone_args = STRVEC_INIT;
+		struct child_process cp = CHILD_PROCESS_INIT;
+		char *submodule_git_dir = xstrfmt(".git/modules/%s", info->sm_name);
+
+		if (is_directory(submodule_git_dir)) {
+			if (!info->force) {
+				struct child_process cp_rem = CHILD_PROCESS_INIT;
+				struct strbuf sb_rem = STRBUF_INIT;
+				cp_rem.git_cmd = 1;
+				fprintf(stderr, _("A git directory for '%s' is "
+					"found locally with remote(s):\n"),
+					info->sm_name);
+				strvec_pushf(&cp_rem.env_array,
+					     "GIT_DIR=%s", submodule_git_dir);
+				strvec_push(&cp_rem.env_array, "GIT_WORK_TREE=.");
+				strvec_pushl(&cp_rem.args, "remote", "-v", NULL);
+				if (!capture_command(&cp_rem, &sb_rem, 0)) {
+					modify_remote_v(&sb_rem);
+				}
+				error(_("If you want to reuse this local git "
+				      "directory instead of cloning again from\n "
+				      "  %s\n"
+				      "use the '--force' option. If the local "
+				      "git directory is not the correct repo\n"
+				      "or you are unsure what this means choose "
+				      "another name with the '--name' option."),
+				      info->realrepo);
+				return 1;
+			} else {
+				printf(_("Reactivating local git directory for "
+					 "submodule '%s'."), info->sm_path);
+			}
+		}
+		free(submodule_git_dir);
+
+		strvec_push(&clone_args, "clone");
+
+		if (info->quiet)
+			strvec_push(&clone_args, "--quiet");
+
+		if (info->progress)
+			strvec_push(&clone_args, "--progress");
+
+		if (info->prefix)
+			strvec_pushl(&clone_args, "--prefix", info->prefix, NULL);
+		strvec_pushl(&clone_args, "--path", info->sm_path, "--name",
+			     info->sm_name, "--url", info->realrepo, NULL);
+		if (info->reference_path)
+			strvec_pushl(&clone_args, "--reference",
+				     info->reference_path, NULL);
+		if (info->dissociate)
+			strvec_push(&clone_args, "--dissociate");
+
+		if (info->depth >= 0)
+			strvec_pushf(&clone_args, "--depth=%d", info->depth);
+
+		if (module_clone(clone_args.nr, clone_args.v, info->prefix)) {
+			strvec_clear(&clone_args);
+			return -1;
+		}
+
+		prepare_submodule_repo_env(&cp.env_array);
+		cp.git_cmd = 1;
+		cp.dir = info->sm_path;
+		strvec_pushl(&cp.args, "checkout", "-f", "-q", NULL);
+
+		if (info->branch) {
+			strvec_pushl(&cp.args, "-B", info->branch, NULL);
+			strvec_pushf(&cp.args, "origin/%s", info->branch);
+		}
+
+		if (run_command(&cp))
+			die(_("unable to checkout submodule '%s'"), info->sm_path);
+	}
+	return 0;
+}
+
+static void config_added_submodule(struct add_data *info)
+{
+	char *key, *var = NULL;
+	struct child_process cp = CHILD_PROCESS_INIT;
+
+	key = xstrfmt("submodule.%s.url", info->sm_name);
+	git_config_set_gently(key, info->realrepo);
+	free(key);
+
+	cp.git_cmd = 1;
+	strvec_pushl(&cp.args, "add", "--no-warn-embedded-repo", NULL);
+	if (info->force)
+		strvec_push(&cp.args, "--force");
+	strvec_pushl(&cp.args, "--", info->sm_path, ".gitmodules", NULL);
+
+	key = xstrfmt("submodule.%s.path", info->sm_name);
+	git_config_set_in_file_gently(".gitmodules", key, info->sm_path);
+	free(key);
+	key = xstrfmt("submodule.%s.url", info->sm_name);
+	git_config_set_in_file_gently(".gitmodules", key, info->repo);
+	free(key);
+	key = xstrfmt("submodule.%s.branch", info->sm_name);
+	if (info->branch)
+		git_config_set_in_file_gently(".gitmodules", key, info->branch);
+	free(key);
+
+	if (run_command(&cp))
+		die(_("failed to add submodule '%s'"), info->sm_path);
+
+	/*
+	 * NEEDSWORK: In a multi-working-tree world, this needs to be
+	 * set in the per-worktree config.
+	 */
+	if (!git_config_get_string("submodule.active", &var) && var) {
+
+		/*
+		 * If the submodule being adding isn't already covered by the
+		 * current configured pathspec, set the submodule's active flag
+		 */
+		if (!is_submodule_active(the_repository, info->sm_path)) {
+			key = xstrfmt("submodule.%s.active", info->sm_name);
+			git_config_set_gently(key, "true");
+			free(key);
+		}
+	} else {
+		key = xstrfmt("submodule.%s.active", info->sm_name);
+		git_config_set_gently(key, "true");
+		free(key);
+	}
+}
+
+static int module_add(int argc, const char **argv, const char *prefix)
+{
+	const char *branch = NULL, *custom_name = NULL, *realrepo = NULL;
+	const char *reference_path = NULL, *repo = NULL, *name = NULL;
+	char *path;
+	int force = 0, quiet = 0, depth = -1, progress = 0, dissociate = 0;
+	struct add_data info = ADD_DATA_INIT;
+	struct strbuf sb = STRBUF_INIT;
+
+	struct option options[] = {
+		OPT_STRING('b', "branch", &branch, N_("branch"),
+			   N_("branch of repository to add as submodule")),
+		OPT_BOOL('f', "force", &force, N_("allow adding an otherwise "
+						  "ignored submodule path")),
+		OPT__QUIET(&quiet, N_("print only error messages")),
+		OPT_BOOL(0, "progress", &progress, N_("force cloning progress")),
+		OPT_STRING(0, "reference", &reference_path, N_("repository"),
+			   N_("reference repository")),
+		OPT_BOOL(0, "dissociate", &dissociate, N_("borrow the objects from reference repositories")),
+		OPT_STRING(0, "name", &custom_name, N_("name"),
+			   N_("sets the submodule’s name to the given string "
+			      "instead of defaulting to its path")),
+		OPT_INTEGER(0, "depth", &depth, N_("depth for shallow clones")),
+		OPT_END()
+	};
+
+	const char *const usage[] = {
+		N_("git submodule--helper add [<options>] [--] [<path>]"),
+		NULL
+	};
+
+	argc = parse_options(argc, argv, prefix, options, usage, 0);
+
+	if (!is_writing_gitmodules_ok())
+		die(_("please make sure that the .gitmodules file is in the working tree"));
+
+	if (reference_path && !is_absolute_path(reference_path) && prefix)
+		reference_path = xstrfmt("%s%s", prefix, reference_path);
+
+	if (argc == 0 || argc > 2) {
+		usage_with_options(usage, options);
+	} else if (argc == 1) {
+		repo = argv[0];
+		path = guess_dir_name(repo);
+	} else {
+		repo = argv[0];
+		path = xstrdup(argv[1]);
+	}
+
+	if (!is_absolute_path(path) && prefix)
+		path = xstrfmt("%s%s", prefix, path);
+
+	/* assure repo is absolute or relative to parent */
+	if (starts_with_dot_dot_slash(repo) || starts_with_dot_slash(repo)) {
+		char *remote = get_default_remote();
+		char *remoteurl;
+		struct strbuf sb = STRBUF_INIT;
+
+		if (prefix)
+			die(_("relative path can only be used from the toplevel "
+			      "of the working tree"));
+		/* dereference source url relative to parent's url */
+		strbuf_addf(&sb, "remote.%s.url", remote);
+		if (git_config_get_string(sb.buf, &remoteurl))
+			remoteurl = xgetcwd();
+		realrepo = relative_url(remoteurl, repo, NULL);
+
+		free(remoteurl);
+		free(remote);
+	} else if (is_dir_sep(repo[0]) || strchr(repo, ':')) {
+		realrepo = repo;
+	} else {
+		die(_("repo URL: '%s' must be absolute or begin with ./|../"),
+		      repo);
+	}
+
+	/*
+	 * normalize path:
+	 * multiple //; leading ./; /./; /../;
+	 */
+	normalize_path_copy(path, path);
+	/* strip trailing '/' */
+	if (is_dir_sep(path[strlen(path) -1]))
+		path[strlen(path) - 1] = '\0';
+
+	if (!force) {
+		if (is_directory(path) && submodule_from_path(the_repository, &null_oid, path))
+			die(_("'%s' already exists in the index"), path);
+	} else {
+		int err;
+		if (index_name_pos(&the_index, path, strlen(path)) >= 0 &&
+		    !is_submodule_populated_gently(path, &err))
+			die(_("'%s' already exists in the index and is not a "
+			      "submodule"), path);
+	}
+
+	strbuf_addstr(&sb, path);
+	if (is_directory(path)) {
+		struct object_id oid;
+		if (resolve_gitlink_ref(path, "HEAD", &oid) < 0)
+			die(_("'%s' does not have a commit checked out"), path);
+	}
+
+	if (!force) {
+		struct strbuf sb = STRBUF_INIT;
+		struct child_process cp = CHILD_PROCESS_INIT;
+		cp.git_cmd = 1;
+		cp.no_stdout = 1;
+		strvec_pushl(&cp.args, "add", "--dry-run", "--ignore-missing",
+			     "--no-warn-embedded-repo", path, NULL);
+		if (pipe_command(&cp, NULL, 0, NULL, 0, &sb, 0))
+			die(_("%s"), sb.buf);
+		strbuf_release(&sb);
+	}
+
+	name = custom_name ? custom_name : path;
+	if (check_submodule_name(name))
+		die(_("'%s' is not a valid submodule name"), name);
+
+	info.prefix = prefix;
+	info.sm_name = name;
+	info.sm_path = path;
+	info.repo = repo;
+	info.realrepo = realrepo;
+	info.reference_path = reference_path;
+	info.branch = branch;
+	info.depth = depth;
+	info.progress = !!progress;
+	info.dissociate = !!dissociate;
+	info.force = !!force;
+	info.quiet = !!quiet;
+
+	if (add_submodule(&info))
+		return 1;
+	config_added_submodule(&info);
+
+	free(path);
+
+	return 0;
+}
+
 #define SUPPORT_SUPER_PREFIX (1<<0)
 
 struct cmd_struct {
@@ -2352,6 +2722,7 @@ static struct cmd_struct commands[] = {
 	{"config", module_config, 0},
 	{"set-url", module_set_url, 0},
 	{"set-branch", module_set_branch, 0},
+	{"add", module_add, SUPPORT_SUPER_PREFIX},
 };
 
 int cmd_submodule__helper(int argc, const char **argv, const char *prefix)
diff --git a/git-submodule.sh b/git-submodule.sh
index 43eb6051d2..434db338a4 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -171,166 +171,7 @@ cmd_add()
 		shift
 	done
 
-	if ! git submodule--helper config --check-writeable >/dev/null 2>&1
-	then
-		 die "$(eval_gettext "please make sure that the .gitmodules file is in the working tree")"
-	fi
-
-	if test -n "$reference_path"
-	then
-		is_absolute_path "$reference_path" ||
-		reference_path="$wt_prefix$reference_path"
-
-		reference="--reference=$reference_path"
-	fi
-
-	repo=$1
-	sm_path=$2
-
-	if test -z "$sm_path"; then
-		sm_path=$(printf '%s\n' "$repo" |
-			sed -e 's|/$||' -e 's|:*/*\.git$||' -e 's|.*[/:]||g')
-	fi
-
-	if test -z "$repo" || test -z "$sm_path"; then
-		usage
-	fi
-
-	is_absolute_path "$sm_path" || sm_path="$wt_prefix$sm_path"
-
-	# assure repo is absolute or relative to parent
-	case "$repo" in
-	./*|../*)
-		test -z "$wt_prefix" ||
-		die "$(gettext "Relative path can only be used from the toplevel of the working tree")"
-
-		# dereference source url relative to parent's url
-		realrepo=$(git submodule--helper resolve-relative-url "$repo") || exit
-		;;
-	*:*|/*)
-		# absolute url
-		realrepo=$repo
-		;;
-	*)
-		die "$(eval_gettext "repo URL: '\$repo' must be absolute or begin with ./|../")"
-	;;
-	esac
-
-	# normalize path:
-	# multiple //; leading ./; /./; /../; trailing /
-	sm_path=$(printf '%s/\n' "$sm_path" |
-		sed -e '
-			s|//*|/|g
-			s|^\(\./\)*||
-			s|/\(\./\)*|/|g
-			:start
-			s|\([^/]*\)/\.\./||
-			tstart
-			s|/*$||
-		')
-	if test -z "$force"
-	then
-		git ls-files --error-unmatch "$sm_path" > /dev/null 2>&1 &&
-		die "$(eval_gettext "'\$sm_path' already exists in the index")"
-	else
-		git ls-files -s "$sm_path" | sane_grep -v "^160000" > /dev/null 2>&1 &&
-		die "$(eval_gettext "'\$sm_path' already exists in the index and is not a submodule")"
-	fi
-
-	if test -d "$sm_path" &&
-		test -z $(git -C "$sm_path" rev-parse --show-cdup 2>/dev/null)
-	then
-	    git -C "$sm_path" rev-parse --verify -q HEAD >/dev/null ||
-	    die "$(eval_gettext "'\$sm_path' does not have a commit checked out")"
-	fi
-
-	if test -z "$force"
-	then
-	    dryerr=$(git add --dry-run --ignore-missing --no-warn-embedded-repo "$sm_path" 2>&1 >/dev/null)
-	    res=$?
-	    if test $res -ne 0
-	    then
-		 echo >&2 "$dryerr"
-		 exit $res
-	    fi
-	fi
-
-	if test -n "$custom_name"
-	then
-		sm_name="$custom_name"
-	else
-		sm_name="$sm_path"
-	fi
-
-	if ! git submodule--helper check-name "$sm_name"
-	then
-		die "$(eval_gettext "'$sm_name' is not a valid submodule name")"
-	fi
-
-	# perhaps the path exists and is already a git repo, else clone it
-	if test -e "$sm_path"
-	then
-		if test -d "$sm_path"/.git || test -f "$sm_path"/.git
-		then
-			eval_gettextln "Adding existing repo at '\$sm_path' to the index"
-		else
-			die "$(eval_gettext "'\$sm_path' already exists and is not a valid git repo")"
-		fi
-
-	else
-		if test -d ".git/modules/$sm_name"
-		then
-			if test -z "$force"
-			then
-				eval_gettextln >&2 "A git directory for '\$sm_name' is found locally with remote(s):"
-				GIT_DIR=".git/modules/$sm_name" GIT_WORK_TREE=. git remote -v | grep '(fetch)' | sed -e s,^,"  ", -e s,' (fetch)',, >&2
-				die "$(eval_gettextln "\
-If you want to reuse this local git directory instead of cloning again from
-  \$realrepo
-use the '--force' option. If the local git directory is not the correct repo
-or you are unsure what this means choose another name with the '--name' option.")"
-			else
-				eval_gettextln "Reactivating local git directory for submodule '\$sm_name'."
-			fi
-		fi
-		git submodule--helper clone ${GIT_QUIET:+--quiet} ${progress:+"--progress"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
-		(
-			sanitize_submodule_env
-			cd "$sm_path" &&
-			# ash fails to wordsplit ${branch:+-b "$branch"...}
-			case "$branch" in
-			'') git checkout -f -q ;;
-			?*) git checkout -f -q -B "$branch" "origin/$branch" ;;
-			esac
-		) || die "$(eval_gettext "Unable to checkout submodule '\$sm_path'")"
-	fi
-	git config submodule."$sm_name".url "$realrepo"
-
-	git add --no-warn-embedded-repo $force "$sm_path" ||
-	die "$(eval_gettext "Failed to add submodule '\$sm_path'")"
-
-	git submodule--helper config submodule."$sm_name".path "$sm_path" &&
-	git submodule--helper config submodule."$sm_name".url "$repo" &&
-	if test -n "$branch"
-	then
-		git submodule--helper config submodule."$sm_name".branch "$branch"
-	fi &&
-	git add --force .gitmodules ||
-	die "$(eval_gettext "Failed to register submodule '\$sm_path'")"
-
-	# NEEDSWORK: In a multi-working-tree world, this needs to be
-	# set in the per-worktree config.
-	if git config --get submodule.active >/dev/null
-	then
-		# If the submodule being adding isn't already covered by the
-		# current configured pathspec, set the submodule's active flag
-		if ! git submodule--helper is-active "$sm_path"
-		then
-			git config submodule."$sm_name".active "true"
-		fi
-	else
-		git config submodule."$sm_name".active "true"
-	fi
+	git ${wt_prefix:+-C "$wt_prefix"} ${prefix:+--super-prefix "$prefix"} submodule--helper add ${force:+--force} ${GIT_QUIET:+--quiet} ${progress:+--progress} ${branch:+--branch "$branch"} ${reference_path:+--reference "$reference_path"} ${dissociate:+--dissociate} ${custom_name:+--name "$custom_name"} ${depth:+"$depth"} -- "$@"
 }
 
 #
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index fec7e0299d..f26edddbb8 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -48,7 +48,7 @@ test_expect_success 'submodule update aborts on missing gitmodules url' '
 
 test_expect_success 'add aborts on repository with no commits' '
 	cat >expect <<-\EOF &&
-	'"'repo-no-commits'"' does not have a commit checked out
+	fatal: '"'repo-no-commits'"' does not have a commit checked out
 	EOF
 	git init repo-no-commits &&
 	test_must_fail git submodule add ../a ./repo-no-commits 2>actual &&
@@ -171,11 +171,12 @@ test_expect_success 'submodule add to .gitignored path fails' '
 	(
 		cd addtest-ignore &&
 		cat <<-\EOF >expect &&
-		The following paths are ignored by one of your .gitignore files:
+		fatal: The following paths are ignored by one of your .gitignore files:
 		submod
 		hint: Use -f if you really want to add them.
 		hint: Turn this message off by running
 		hint: "git config advice.addIgnoredFile false"
+
 		EOF
 		# Does not use test_commit due to the ignore
 		echo "*" > .gitignore &&
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-08-24  9:03 [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C Shourya Shukla
@ 2020-08-24 18:35 ` Junio C Hamano
  2020-08-24 20:30   ` Kaartic Sivaraam
  2020-08-26  9:15   ` Shourya Shukla
  0 siblings, 2 replies; 12+ messages in thread
From: Junio C Hamano @ 2020-08-24 18:35 UTC (permalink / raw)
  To: Shourya Shukla
  Cc: git, christian.couder, kaartic.sivaraam, johannes.schindelin,
	liu.denton, Prathamesh Chavan, Christian Couder, Stefan Beller

Shourya Shukla <shouryashukla.oo@gmail.com> writes:

> 	if test -z "$force"
> 	then
> 		git ls-files --error-unmatch "$sm_path" > /dev/null 2>&1 &&
> 		die "$(eval_gettext "'\$sm_path' already exists in the index")"
> 	else
> 		git ls-files -s "$sm_path" | sane_grep -v "^160000" > /dev/null 2>&1 &&
> 		die "$(eval_gettext "'\$sm_path' already exists in the index and is not a submodule")"
> 	fi

Hmph.  So,

 - if we are not being 'force'd, we see if there is anything in the
   index for the path and error out, whether it is a gitlink or not.

 - if there is 'force' option, we see what the given path is in the
   index, and if it is already a gitlink, then die.  That sort of
   makes sense, as long as the remainder of the code deals with the
   path that is not a submodule in a sensible way.

> This is what I have done in C:
>
> 	if (!force) {
> 		if (is_directory(path) && submodule_from_path(the_repository, &null_oid, path))
> 			die(_("'%s' already exists in the index"), path);

The shell version would error out with anything in the index, so I'd
expect that a faithful conversion would not call is_directory() nor
submodule_from_path() at all---it would just look path up in the_index
and complains if anything is found.  For example, the quoted part in
the original above is what gives the error message when I do

	$ git submodule add ./Makefile
	'Makefile' already exists in the index.

I think.  And the above code won't trigger the "already exists" at
all because 'path' is not a directory.

> 	} else {
> 		int err;
> 		if (index_name_pos(&the_index, path, strlen(path)) >= 0 &&
> 		    !is_submodule_populated_gently(path, &err))
> 			die(_("'%s' already exists in the index and is not a "
> 			      "submodule"), path);

Likewise.  The above does much more than the original.

The original was checking if the found cache entry has 160000 mode
bit, so the second test would not be is_submodule_populated_gently()
but more like !S_ISGITLINK(ce->ce_mode)

Now it is a different question if the original is correct to begin
with ;-).  

> 	}
>
> Is this part correct? I am not very sure about this. This particular
> part is not covered in any test or test script, so, I do not have a
> solid method of knowing the correctness of this segment.
> Feedback and reviews are appreciated.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-08-24 18:35 ` Junio C Hamano
@ 2020-08-24 20:30   ` Kaartic Sivaraam
  2020-08-24 20:46     ` Junio C Hamano
  2020-08-26  9:27     ` Shourya Shukla
  2020-08-26  9:15   ` Shourya Shukla
  1 sibling, 2 replies; 12+ messages in thread
From: Kaartic Sivaraam @ 2020-08-24 20:30 UTC (permalink / raw)
  To: Junio C Hamano, Shourya Shukla
  Cc: git, christian.couder, johannes.schindelin, liu.denton,
	Prathamesh Chavan, Christian Couder, Stefan Beller

On Mon, 2020-08-24 at 11:35 -0700, Junio C Hamano wrote:
> Shourya Shukla <shouryashukla.oo@gmail.com> writes:
> 
> > 	if test -z "$force"
> > 	then
> > 		git ls-files --error-unmatch "$sm_path" > /dev/null 2>&1 &&
> > 		die "$(eval_gettext "'\$sm_path' already exists in the index")"
> > 	else
> > 		git ls-files -s "$sm_path" | sane_grep -v "^160000" > /dev/null 2>&1 &&
> > 		die "$(eval_gettext "'\$sm_path' already exists in the index and is not a submodule")"
> > 	fi
> 
> Hmph.  So,
> 
>  - if we are not being 'force'd, we see if there is anything in the
>    index for the path and error out, whether it is a gitlink or not.
> 

Right.

>  - if there is 'force' option, we see what the given path is in the
>    index, and if it is already a gitlink, then die.  That sort of
>    makes sense, as long as the remainder of the code deals with the
>    path that is not a submodule in a sensible way.
> 

With `force, I think it's the opposite of what you describe. That is:

    - if there is 'force' option, we see what the given path is in the
      index, and if it is **not** already a gitlink, then die. 

Note the `-v` passed to sane_grep.

> > This is what I have done in C:
> > 
> > 	if (!force) {
> > 		if (is_directory(path) && submodule_from_path(the_repository, &null_oid, path))
> > 			die(_("'%s' already exists in the index"), path);
> 
> The shell version would error out with anything in the index, so I'd
> expect that a faithful conversion would not call is_directory() nor
> submodule_from_path() at all---it would just look path up in the_index
> and complains if anything is found.  For example, the quoted part in
> the original above is what gives the error message when I do
> 
> 	$ git submodule add ./Makefile
> 	'Makefile' already exists in the index.
> 
> I think.  And the above code won't trigger the "already exists" at
> all because 'path' is not a directory.
> 
> > 	} else {
> > 		int err;
> > 		if (index_name_pos(&the_index, path, strlen(path)) >= 0 &&
> > 		    !is_submodule_populated_gently(path, &err))
> > 			die(_("'%s' already exists in the index and is not a "
> > 			      "submodule"), path);
> 
> Likewise.  The above does much more than the original.
> 
> The original was checking if the found cache entry has 160000 mode
> bit, so the second test would not be is_submodule_populated_gently()
> but more like !S_ISGITLINK(ce->ce_mode)
> 

Yeah, the C version does need a more proper check in both cases.


> Now it is a different question if the original is correct to begin
> with ;-).  
> 

By looking at commit message of 619acfc78c (submodule add: extend force
flag to add existing repos, 2016-10-06), I'm assuming it's correct.
There are chances I might be missing something, though.

Speaking of correctness, I'm surprised how the port passed the
following test t7400.63 despite the incorrect check.

-- 8< --
$ ./t7400-submodule-basic.sh
... snip ...
ok 62 - add submodules without specifying an explicit path
ok 63 - add should fail when path is used by a file
ok 64 - add should fail when path is used by an existing directory
... snip ...
-- >8 --

Most likely it passed because it slipped through the incorrect check
and failed later in the code[1]. That's not good, of course.

> > 	}
> > 
> > Is this part correct? I am not very sure about this. This particular
> > part is not covered in any test or test script, so, I do not have a
> > solid method of knowing the correctness of this segment.
> > Feedback and reviews are appreciated.


> +static int add_submodule(struct add_data *info)
> +{
> +	/* perhaps the path exists and is already a git repo, else clone it */
> +	if (is_directory(info->sm_path)) {
> +		char *sub_git_path = xstrfmt("%s/.git", info->sm_path);
> +		if (is_directory(sub_git_path) || file_exists(sub_git_path))
> +			printf(_("Adding existing repo at '%s' to the index\n"),
> +				 info->sm_path);
> +		else
> +			die(_("'%s' already exists and is not a valid git repo"),
> +			      info->sm_path);
> +		free(sub_git_path);
> +	} else {
> +		struct strvec clone_args = STRVEC_INIT;
> +		struct child_process cp = CHILD_PROCESS_INIT;
> +		char *submodule_git_dir = xstrfmt(".git/modules/%s", info->sm_name);
> +
> +		if (is_directory(submodule_git_dir)) {
> +			if (!info->force) {
> +				struct child_process cp_rem = CHILD_PROCESS_INIT;
> +				struct strbuf sb_rem = STRBUF_INIT;
> +				cp_rem.git_cmd = 1;
> +				fprintf(stderr, _("A git directory for '%s' is "
> +					"found locally with remote(s):\n"),
> +					info->sm_name);
> +				strvec_pushf(&cp_rem.env_array,
> +					     "GIT_DIR=%s", submodule_git_dir);
> +				strvec_push(&cp_rem.env_array, "GIT_WORK_TREE=.");
> +				strvec_pushl(&cp_rem.args, "remote", "-v", NULL);
> +				if (!capture_command(&cp_rem, &sb_rem, 0)) {
> +					modify_remote_v(&sb_rem);
> +				}
> +				error(_("If you want to reuse this local git "
> +				      "directory instead of cloning again from\n "
> +				      "  %s\n"
> +				      "use the '--force' option. If the local "
> +				      "git directory is not the correct repo\n"
> +				      "or you are unsure what this means choose "
> +				      "another name with the '--name' option."),
> +				      info->realrepo);
> +				return 1;
> +			} else {
> +				printf(_("Reactivating local git directory for "
> +					 "submodule '%s'."), info->sm_path);
> +			}
> +		}
> +		free(submodule_git_dir);

This part results in a difference in error message in shell and C 
versions.

-- 8< --
$ # Shell version
$ git submodule add ../subm1 sub
A git directory for 'sub' is found locally with remote(s):
  origin        /me/subm1
If you want to reuse this local git directory instead of cloning again from
  /me/subm1
use the '--force' option. If the local git directory is not the correct repo
or you are unsure what this means choose another name with the '--name' option.
$
$ # C version
$ git submodule add ../subm1 sub
A git directory for 'sub' is found locally with remote(s):
  origin        /me/subm1
error: If you want to reuse this local git directory instead of cloning again from
   /me/subm1
use the '--force' option. If the local git directory is not the correct repo
or you are unsure what this means choose another name with the '--name' option.
-- >8 --

Note how the third line is oddly prefixed by a `error` unlike the rest
of the lines. It would be nice if we could weed out that inconsistency.
We could probably use `advise()` for printing the last four lines and
`error()` for the lines above them.


Footnote
---
[1]: Looks like not checking for the error message when a command fails
     has it's own downsides x-(

--
Sivaraam



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-08-24 20:30   ` Kaartic Sivaraam
@ 2020-08-24 20:46     ` Junio C Hamano
  2020-08-26  9:27     ` Shourya Shukla
  1 sibling, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2020-08-24 20:46 UTC (permalink / raw)
  To: Kaartic Sivaraam
  Cc: Shourya Shukla, git, christian.couder, johannes.schindelin,
	liu.denton, Prathamesh Chavan, Christian Couder, Stefan Beller

Kaartic Sivaraam <kaartic.sivaraam@gmail.com> writes:

>> > 	else
>> > 		git ls-files -s "$sm_path" | sane_grep -v "^160000" > /dev/null 2>&1 &&
>> > 		die "$(eval_gettext "'\$sm_path' already exists in the index and is not a submodule")"
>> > 	fi
>> 
>> Hmph.  So,
>> 
>>  - if we are not being 'force'd, we see if there is anything in the
>>    index for the path and error out, whether it is a gitlink or not.
>> 
>
> Right.
>
>>  - if there is 'force' option, we see what the given path is in the
>>    index, and if it is already a gitlink, then die.  That sort of
>>    makes sense, as long as the remainder of the code deals with the
>>    path that is not a submodule in a sensible way.
>> 
>
> With `force, I think it's the opposite of what you describe. That is:
>
>     - if there is 'force' option, we see what the given path is in the
>       index, and if it is **not** already a gitlink, then die. 
>
> Note the `-v` passed to sane_grep.

Thanks.

Yeah, "-v ^160000" passes (i.e. detects an error) if the path exists
and it is anything but gitlink, so missing path is OK (no input to
grep, and grep won't see a gitlink), a blob is not OK (grep sees
something that is not a gitlink), and a gitlink is not OK.

If $sm_path is a directory with tracked contents, ls-files would
give multiple entries, and some of which may or may not be a
gitlink, but most of them would not be, so it is likely that grep
would find one entry that is not gitlink and error out.  Which is a
good thing to do.

>> > 	} else {
>> > 		int err;
>> > 		if (index_name_pos(&the_index, path, strlen(path)) >= 0 &&
>> > 		    !is_submodule_populated_gently(path, &err))
>> > 			die(_("'%s' already exists in the index and is not a "
>> > 			      "submodule"), path);
>> 
>> Likewise.  The above does much more than the original.
>> 
>> The original was checking if the found cache entry has 160000 mode
>> bit, so the second test would not be is_submodule_populated_gently()
>> but more like !S_ISGITLINK(ce->ce_mode)
>
> Yeah, the C version does need a more proper check in both cases.

Especially, the case where $sm_path is a directory with tracked
contents in it would need a careful examination.

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-08-24 18:35 ` Junio C Hamano
  2020-08-24 20:30   ` Kaartic Sivaraam
@ 2020-08-26  9:15   ` Shourya Shukla
  2020-08-30 19:58     ` Kaartic Sivaraam
  1 sibling, 1 reply; 12+ messages in thread
From: Shourya Shukla @ 2020-08-26  9:15 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: git, christian.couder, johannes.schindelin, liu.denton,
	kaartic.sivaraam

On 24/08 11:35, Junio C Hamano wrote:
> Shourya Shukla <shouryashukla.oo@gmail.com> writes:
> 
> > 	if test -z "$force"
> > 	then
> > 		git ls-files --error-unmatch "$sm_path" > /dev/null 2>&1 &&
> > 		die "$(eval_gettext "'\$sm_path' already exists in the index")"
> > 	else
> > 		git ls-files -s "$sm_path" | sane_grep -v "^160000" > /dev/null 2>&1 &&
> > 		die "$(eval_gettext "'\$sm_path' already exists in the index and is not a submodule")"
> > 	fi
> 
> Hmph.  So,
> 
>  - if we are not being 'force'd, we see if there is anything in the
>    index for the path and error out, whether it is a gitlink or not.
> 
>  - if there is 'force' option, we see what the given path is in the
>    index, and if it is already a gitlink, then die.  That sort of
>    makes sense, as long as the remainder of the code deals with the
>    path that is not a submodule in a sensible way.
> 
> > This is what I have done in C:
> >
> > 	if (!force) {
> > 		if (is_directory(path) && submodule_from_path(the_repository, &null_oid, path))
> > 			die(_("'%s' already exists in the index"), path);
> 
> The shell version would error out with anything in the index, so I'd
> expect that a faithful conversion would not call is_directory() nor
> submodule_from_path() at all---it would just look path up in the_index
> and complains if anything is found.  For example, the quoted part in
> the original above is what gives the error message when I do
> 
> 	$ git submodule add ./Makefile
> 	'Makefile' already exists in the index.
> 
> I think.  And the above code won't trigger the "already exists" at
> all because 'path' is not a directory.

Alright. That is correct. I tried to use a multitude of functions but
did not find luck with any of them. The functions I tried:

    - index_path() to check if the path is in the index. For some
      reason, it switched to the 'default' case and return the
      'unsupported file type' error.

    - A combination of doing an OR with index_file_exists() and
      index_dir_exists(). Still no luck. t7406.43 fails.

    - Using index_name_pos() along with the above two functions. Again a
      failure in the same test.

I feel that index_name_pos() should suffice this task but it fails in
t7406.43. The SM is in index since 'git ls-files --error-unmatch s1'
does return 's1' (s1 is the submodule). What am I missing here?

> > 	} else {
> > 		int err;
> > 		if (index_name_pos(&the_index, path, strlen(path)) >= 0 &&
> > 		    !is_submodule_populated_gently(path, &err))
> > 			die(_("'%s' already exists in the index and is not a "
> > 			      "submodule"), path);
>
> Likewise.  The above does much more than the original.
>
> The original was checking if the found cache entry has 160000 mode
> bit, so the second test would not be is_submodule_populated_gently()
> but more like !S_ISGITLINK(ce->ce_mode)

Using this results in failure of t7506.[33-40]. I implemented this in
two ways:

    1. Use stat() to initialise the stat st corresponding to the 'path'.
       Then do a '!S_ISGITLINK(st.st_mode)'.

    2. Run a for loop:
		for (i = 0; i < active_nr; i++) {
		const struct cache_entry *ce = active_cache[i];

		if (index_name_pos(&the_index, path, strlen(path)) >= 0 &&
		    !S_ISGITLINK(ce->ce_mode))
			die(_("'%s' already exists in the index and is not a "
			      "submodule"), path);
        }

        Still the tests failed. What is meant by 'active_nr' BTW? I am
        not aware of this term.

Where am I going wrong for both the if-cases?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-08-24 20:30   ` Kaartic Sivaraam
  2020-08-24 20:46     ` Junio C Hamano
@ 2020-08-26  9:27     ` Shourya Shukla
  2020-08-26 10:54       ` Kaartic Sivaraam
  1 sibling, 1 reply; 12+ messages in thread
From: Shourya Shukla @ 2020-08-26  9:27 UTC (permalink / raw)
  To: Kaartic Sivaraam
  Cc: Junio C Hamano, git, christian.couder, johannes.schindelin,
	liu.denton, Prathamesh Chavan, Christian Couder, Stefan Beller

On 8/25/20, Kaartic Sivaraam <kaartic.sivaraam@gmail.com> wrote:
> On Mon, 2020-08-24 at 11:35 -0700, Junio C Hamano wrote:
>> Now it is a different question if the original is correct to begin
>> with ;-).
>>
>
> By looking at commit message of 619acfc78c (submodule add: extend force
> flag to add existing repos, 2016-10-06), I'm assuming it's correct.
> There are chances I might be missing something, though.
>
> Speaking of correctness, I'm surprised how the port passed the
> following test t7400.63 despite the incorrect check.
>
> -- 8< --
> $ ./t7400-submodule-basic.sh
> ... snip ...
> ok 62 - add submodules without specifying an explicit path
> ok 63 - add should fail when path is used by a file
> ok 64 - add should fail when path is used by an existing directory
> ... snip ...
> -- >8 --
>
> Most likely it passed because it slipped through the incorrect check
> and failed later in the code[1]. That's not good, of course.
>
>> > 	}
>> >
>> > Is this part correct? I am not very sure about this. This particular
>> > part is not covered in any test or test script, so, I do not have a
>> > solid method of knowing the correctness of this segment.
>> > Feedback and reviews are appreciated.
>
>
>> +static int add_submodule(struct add_data *info)
>> +{
>> +	/* perhaps the path exists and is already a git repo, else clone it */
>> +	if (is_directory(info->sm_path)) {
>> +		char *sub_git_path = xstrfmt("%s/.git", info->sm_path);
>> +		if (is_directory(sub_git_path) || file_exists(sub_git_path))
>> +			printf(_("Adding existing repo at '%s' to the index\n"),
>> +				 info->sm_path);
>> +		else
>> +			die(_("'%s' already exists and is not a valid git repo"),
>> +			      info->sm_path);
>> +		free(sub_git_path);
>> +	} else {
>> +		struct strvec clone_args = STRVEC_INIT;
>> +		struct child_process cp = CHILD_PROCESS_INIT;
>> +		char *submodule_git_dir = xstrfmt(".git/modules/%s", info->sm_name);
>> +
>> +		if (is_directory(submodule_git_dir)) {
>> +			if (!info->force) {
>> +				struct child_process cp_rem = CHILD_PROCESS_INIT;
>> +				struct strbuf sb_rem = STRBUF_INIT;
>> +				cp_rem.git_cmd = 1;
>> +				fprintf(stderr, _("A git directory for '%s' is "
>> +					"found locally with remote(s):\n"),
>> +					info->sm_name);
>> +				strvec_pushf(&cp_rem.env_array,
>> +					     "GIT_DIR=%s", submodule_git_dir);
>> +				strvec_push(&cp_rem.env_array, "GIT_WORK_TREE=.");
>> +				strvec_pushl(&cp_rem.args, "remote", "-v", NULL);
>> +				if (!capture_command(&cp_rem, &sb_rem, 0)) {
>> +					modify_remote_v(&sb_rem);
>> +				}
>> +				error(_("If you want to reuse this local git "
>> +				      "directory instead of cloning again from\n "
>> +				      "  %s\n"
>> +				      "use the '--force' option. If the local "
>> +				      "git directory is not the correct repo\n"
>> +				      "or you are unsure what this means choose "
>> +				      "another name with the '--name' option."),
>> +				      info->realrepo);
>> +				return 1;
>> +			} else {
>> +				printf(_("Reactivating local git directory for "
>> +					 "submodule '%s'."), info->sm_path);
>> +			}
>> +		}
>> +		free(submodule_git_dir);
>
> This part results in a difference in error message in shell and C
> versions.
>
> -- 8< --
> $ # Shell version
> $ git submodule add ../subm1 sub
> A git directory for 'sub' is found locally with remote(s):
>   origin        /me/subm1
> If you want to reuse this local git directory instead of cloning again from
>   /me/subm1
> use the '--force' option. If the local git directory is not the correct
> repo
> or you are unsure what this means choose another name with the '--name'
> option.
> $
> $ # C version
> $ git submodule add ../subm1 sub
> A git directory for 'sub' is found locally with remote(s):
>   origin        /me/subm1
> error: If you want to reuse this local git directory instead of cloning
> again from
>    /me/subm1
> use the '--force' option. If the local git directory is not the correct
> repo
> or you are unsure what this means choose another name with the '--name'
> option.
> -- >8 --
>
> Note how the third line is oddly prefixed by a `error` unlike the rest
> of the lines. It would be nice if we could weed out that inconsistency.
> We could probably use `advise()` for printing the last four lines and
> `error()` for the lines above them.

Understood. I will correct this part. BTW, you surely are talking
about error() on
the first 2 lines? I think fprintf(stderr, _()) is OK for them otherwise they
will be prefixed by 'error:' which will not be in line with the shell version.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-08-26  9:27     ` Shourya Shukla
@ 2020-08-26 10:54       ` Kaartic Sivaraam
  0 siblings, 0 replies; 12+ messages in thread
From: Kaartic Sivaraam @ 2020-08-26 10:54 UTC (permalink / raw)
  To: Shourya Shukla
  Cc: Junio C Hamano, git, christian.couder, johannes.schindelin,
	liu.denton, Prathamesh Chavan, Christian Couder, Stefan Beller

On 26-08-2020 14:57, Shourya Shukla wrote:
> On 8/25/20, Kaartic Sivaraam <kaartic.sivaraam@gmail.com> wrote:
>>
>> This part results in a difference in error message in shell and C
>> versions.
>>
>> -- 8< --
>> $ # Shell version
>> $ git submodule add ../subm1 sub
>> A git directory for 'sub' is found locally with remote(s):
>>   origin        /me/subm1
>> If you want to reuse this local git directory instead of cloning again from
>>   /me/subm1
>> use the '--force' option. If the local git directory is not the correct
>> repo
>> or you are unsure what this means choose another name with the '--name'
>> option.
>> $
>> $ # C version
>> $ git submodule add ../subm1 sub
>> A git directory for 'sub' is found locally with remote(s):
>>   origin        /me/subm1
>> error: If you want to reuse this local git directory instead of cloning
>> again from
>>    /me/subm1
>> use the '--force' option. If the local git directory is not the correct
>> repo
>> or you are unsure what this means choose another name with the '--name'
>> option.
>> -- >8 --
>>
>> Note how the third line is oddly prefixed by a `error` unlike the rest
>> of the lines. It would be nice if we could weed out that inconsistency.
>> We could probably use `advise()` for printing the last four lines and
>> `error()` for the lines above them.
> 
> Understood. I will correct this part. BTW, you surely are talking
> about error() on
> the first 2 lines? I think fprintf(stderr, _()) is OK for them otherwise they
> will be prefixed by 'error:' which will not be in line with the shell version.
> 

Yes. It's better to prefix them with `error` because well... it is an
error. I realize the shell version didn't explicitly do this but that
doesn't necessarily mean the error message was helpful without the
prefix. AFAIK, many Git commands prefix their error messages with
`fatal` or `error` which makes it easy to distinguish error messages
from actual program output in the terminal. So, it's good to do the same
here.

-- 
Sivaraam

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-08-26  9:15   ` Shourya Shukla
@ 2020-08-30 19:58     ` Kaartic Sivaraam
  2020-08-31 13:04       ` Shourya Shukla
  0 siblings, 1 reply; 12+ messages in thread
From: Kaartic Sivaraam @ 2020-08-30 19:58 UTC (permalink / raw)
  To: Shourya Shukla
  Cc: Junio C Hamano, git, christian.couder, johannes.schindelin,
	liu.denton

On Wed, 2020-08-26 at 14:45 +0530, Shourya Shukla wrote:
> On 24/08 11:35, Junio C Hamano wrote:
> > Shourya Shukla <shouryashukla.oo@gmail.com> writes:
> > 
> > The shell version would error out with anything in the index, so I'd
> > expect that a faithful conversion would not call is_directory() nor
> > submodule_from_path() at all---it would just look path up in the_index
> > and complains if anything is found.  For example, the quoted part in
> > the original above is what gives the error message when I do
> > 
> > 	$ git submodule add ./Makefile
> > 	'Makefile' already exists in the index.
> > 
> > I think.  And the above code won't trigger the "already exists" at
> > all because 'path' is not a directory.
> 
> Alright. That is correct. I tried to use a multitude of functions but
> did not find luck with any of them. The functions I tried:
> 

It would've been nice to see the actual code you tried so that it's
easier for others to more easily identify if you're using the wrong
function or using the correct function in the wrong way.

>     - index_path() to check if the path is in the index. For some
>       reason, it switched to the 'default' case and return the
>       'unsupported file type' error.
> 
>     - A combination of doing an OR with index_file_exists() and
>       index_dir_exists(). Still no luck. t7406.43 fails.
> 
>     - Using index_name_pos() along with the above two functions. Again a
>       failure in the same test.
> 
> I feel that index_name_pos() should suffice this task but it fails in
> t7406.43. The SM is in index since 'git ls-files --error-unmatch s1'
> does return 's1' (s1 is the submodule). What am I missing here?
> 

You're likely missing the fact that you should call `read_cache` before
using `index_name_pos` or the likes of it.

For instance, the following works without issues for most cases (more
on that below):

        if (read_cache() < 0)
                die(_("index file corrupt"));

        cache_pos = cache_name_pos(path, strlen(path));
        if (cache_pos >= 0) {
                if (!force) {
                        die(_("'%s' already exists in the index"),
path);
                }
                else {
                        struct cache_entry *ce = the_index.cache[cache_pos];

                        if (!S_ISGITLINK(ce->ce_mode))
                                die(_("'%s' already exists in the index and is not a "
                                      "submodule"), path);
                }
        }

This is more close to what the shell version did but misses one case
which might or might not be covered by the test suite[1]. The case when
path is a directory that has tracked contents. In the shell version we
would get:

   $ git submodule add ../git-crypt/ builtin
   'builtin' already exists in the index
   $ git submodule add --force ../git-crypt/ builtin
   'builtin' already exists in the index and is not a submodule

   In the C version with the above snippet we get:

   $ git submodule add --force ../git-crypt/ builtin
   fatal: 'builtin' does not have a commit checked out
   $ git submodule add ../git-crypt/ builtin
   fatal: 'builtin' does not have a commit checked out

   That's not appropriate and should be fixed. I believe we could do
   something with `cache_dir_exists` to fix this.


   Footnote
   ===

   [1]: If it's not covered already, it might be a good idea to add a test
   for the above case.

   --
   Sivaraam



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-08-30 19:58     ` Kaartic Sivaraam
@ 2020-08-31 13:04       ` Shourya Shukla
  2020-09-01 20:35         ` Kaartic Sivaraam
  0 siblings, 1 reply; 12+ messages in thread
From: Shourya Shukla @ 2020-08-31 13:04 UTC (permalink / raw)
  To: Kaartic Sivaraam
  Cc: gitster, git, christian.couder, johannes.schindelin, liu.denton

On 31/08 01:28, Kaartic Sivaraam wrote:
> On Wed, 2020-08-26 at 14:45 +0530, Shourya Shukla wrote:
> > On 24/08 11:35, Junio C Hamano wrote:
> > > Shourya Shukla <shouryashukla.oo@gmail.com> writes:
> > > 
> > > The shell version would error out with anything in the index, so I'd
> > > expect that a faithful conversion would not call is_directory() nor
> > > submodule_from_path() at all---it would just look path up in the_index
> > > and complains if anything is found.  For example, the quoted part in
> > > the original above is what gives the error message when I do
> > > 
> > > 	$ git submodule add ./Makefile
> > > 	'Makefile' already exists in the index.
> > > 
> > > I think.  And the above code won't trigger the "already exists" at
> > > all because 'path' is not a directory.
> > 
> > Alright. That is correct. I tried to use a multitude of functions but
> > did not find luck with any of them. The functions I tried:
> > 
> 
> It would've been nice to see the actual code you tried so that it's
> easier for others to more easily identify if you're using the wrong
> function or using the correct function in the wrong way.

Yeah, that is my fault. I will tag along below.

> >     - index_path() to check if the path is in the index. For some
> >       reason, it switched to the 'default' case and return the
> >       'unsupported file type' error.
> > 
> >     - A combination of doing an OR with index_file_exists() and
> >       index_dir_exists(). Still no luck. t7406.43 fails.
> > 
> >     - Using index_name_pos() along with the above two functions. Again a
> >       failure in the same test.
> > 
> > I feel that index_name_pos() should suffice this task but it fails in
> > t7406.43. The SM is in index since 'git ls-files --error-unmatch s1'
> > does return 's1' (s1 is the submodule). What am I missing here?
> > 
> 
> You're likely missing the fact that you should call `read_cache` before
> using `index_name_pos` or the likes of it.

Alright, called it.

> For instance, the following works without issues for most cases (more
> on that below):
> 
>         if (read_cache() < 0)
>                 die(_("index file corrupt"));
> 
>         cache_pos = cache_name_pos(path, strlen(path));
>         if (cache_pos >= 0) {
>                 if (!force) {
>                         die(_("'%s' already exists in the index"),
> path);
>                 }
>                 else {
>                         struct cache_entry *ce = the_index.cache[cache_pos];
> 
>                         if (!S_ISGITLINK(ce->ce_mode))
>                                 die(_("'%s' already exists in the index and is not a "
>                                       "submodule"), path);
>                 }
>         }

I actually did this only using 'index_*()' functions. But made a very
very very silly mistake:
I did a sizeof() instead of strlen() and I did not notice this until
I saw what you did. IDK how I made this mistake.

This is what I have done finally:
---
	if (read_cache() < 0)
		die(_("index file corrupt"));

	if (!force) {
		if (cache_file_exists(path, strlen(path), ignore_case) ||
		    cache_dir_exists(path, strlen(path)))
			die(_("'%s' already exists in the index"), path);
	} else {
		int cache_pos = cache_name_pos(path, strlen(path));
		struct cache_entry *ce = the_index.cache[cache_pos];
		if (cache_pos >= 0 && !S_ISGITLINK(ce->ce_mode))
			die(_("'%s' already exists in the index and is not a "
			      "submodule"), path);
	}
---

I did not put the 'cache_pos >= 0' at the start since I thought that it
will unnecessarily increase an indentation level. Since we are using
'cache_{file,dir}_exists' in the first check and 'cache_name_pos()' in
the second, the placement of check at another indentation level would be
unnecessary. What do you think about this?

> This is more close to what the shell version did but misses one case
> which might or might not be covered by the test suite[1]. The case when
> path is a directory that has tracked contents. In the shell version we
> would get:
> 
>    $ git submodule add ../git-crypt/ builtin
>    'builtin' already exists in the index
>    $ git submodule add --force ../git-crypt/ builtin
>    'builtin' already exists in the index and is not a submodule
> 
>    In the C version with the above snippet we get:
> 
>    $ git submodule add --force ../git-crypt/ builtin
>    fatal: 'builtin' does not have a commit checked out
>    $ git submodule add ../git-crypt/ builtin
>    fatal: 'builtin' does not have a commit checked out
> 
>    That's not appropriate and should be fixed. I believe we could do
>    something with `cache_dir_exists` to fix this.
> 
> 
>    Footnote
>    ===
> 
>    [1]: If it's not covered already, it might be a good idea to add a test
>    for the above case.

Like Junio said, we do not care if it is a file or a directory of any
sorts, we will give the error if it already exists. Therefore, even if
it is an untracked or a tracked one, it should not matter to us. Hence
testing for it may not be necessary is what I feel. Why should we test
it?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-08-31 13:04       ` Shourya Shukla
@ 2020-09-01 20:35         ` Kaartic Sivaraam
  2020-09-02 12:04           ` Shourya Shukla
  0 siblings, 1 reply; 12+ messages in thread
From: Kaartic Sivaraam @ 2020-09-01 20:35 UTC (permalink / raw)
  To: Shourya Shukla
  Cc: gitster, git, christian.couder, johannes.schindelin, liu.denton

On Mon, 2020-08-31 at 18:34 +0530, Shourya Shukla wrote:
> On 31/08 01:28, Kaartic Sivaraam wrote:
> 
> This is what I have done finally:
> ---
> 	if (read_cache() < 0)
> 		die(_("index file corrupt"));
> 
> 	if (!force) {
> 		if (cache_file_exists(path, strlen(path), ignore_case) ||
> 		    cache_dir_exists(path, strlen(path)))
> 			die(_("'%s' already exists in the index"), path);
> 	} else {
> 		int cache_pos = cache_name_pos(path, strlen(path));
> 		struct cache_entry *ce = the_index.cache[cache_pos];
> 		if (cache_pos >= 0 && !S_ISGITLINK(ce->ce_mode))
> 			die(_("'%s' already exists in the index and is not a "
> 			      "submodule"), path);
> 	}
> ---
> 
> I did not put the 'cache_pos >= 0' at the start since I thought that it
> will unnecessarily increase an indentation level. Since we are using
> 'cache_{file,dir}_exists' in the first check and 'cache_name_pos()' in
> the second, the placement of check at another indentation level would be
> unnecessary. What do you think about this?
> 

Interestingly. 'cache_dir_exists' seems to work as expected only when
the global ignore_case whose value seems to depend on core.ignorecase.
So, we can't just rely on 'cache_dir_exists to identify a directory
that has tracked contents. Apparently, the 'directory_exists_in_index'
in 'dir.c' seems to have the code that we want here (which is also the
only user of 'index_dir_exists'; the function for which
'cache_dir_exists' is a convenience wrapper.

The best idea I could think of is to expose that method and re-use it
here. Given that my kowledge about index and caching is primitive, I'm
not sure if there's a better approach. If others have a better idea for
handling this directory case, do enlighten us.

> > This is more close to what the shell version did but misses one case
> > which might or might not be covered by the test suite[1]. The case when
> > path is a directory that has tracked contents. In the shell version we
> > would get:
> > 
> >    $ git submodule add ../git-crypt/ builtin
> >    'builtin' already exists in the index
> >    $ git submodule add --force ../git-crypt/ builtin
> >    'builtin' already exists in the index and is not a submodule
> > 
> >    In the C version with the above snippet we get:
> > 
> >    $ git submodule add --force ../git-crypt/ builtin
> >    fatal: 'builtin' does not have a commit checked out
> >    $ git submodule add ../git-crypt/ builtin
> >    fatal: 'builtin' does not have a commit checked out
> > 
> >    That's not appropriate and should be fixed. I believe we could do
> >    something with `cache_dir_exists` to fix this.
> > 
> > 
> >    Footnote
> >    ===
> > 
> >    [1]: If it's not covered already, it might be a good idea to add a test
> >    for the above case.
> 
> Like Junio said, we do not care if it is a file or a directory of any
> sorts, we will give the error if it already exists. Therefore, even if
> it is an untracked or a tracked one, it should not matter to us. Hence
> testing for it may not be necessary is what I feel. Why should we test
> it?

I'm guessing you misunderstood. A few things:

- We only care about tracked contents for the case in hand.

- Identifying whether a given path corresponds to a directory
  which has tracked contents is tricky. Neither 'cache_name_pos'
  nor 'cache_file_exists' handle this. 'cache_dir_exists' is also
  not very useful as mentioned above.

So, we do have to take care when handling that case as Junio pointed
out.

-- 
Sivaraam



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-09-01 20:35         ` Kaartic Sivaraam
@ 2020-09-02 12:04           ` Shourya Shukla
  2020-09-03  8:46             ` Kaartic Sivaraam
  0 siblings, 1 reply; 12+ messages in thread
From: Shourya Shukla @ 2020-09-02 12:04 UTC (permalink / raw)
  To: Kaartic Sivaraam
  Cc: gitster, git, christian.couder, johannes.schindelin, liu.denton

On 02/09 02:05, Kaartic Sivaraam wrote:
> On Mon, 2020-08-31 at 18:34 +0530, Shourya Shukla wrote:
> > On 31/08 01:28, Kaartic Sivaraam wrote:
> > 
> > This is what I have done finally:
> > ---
> > 	if (read_cache() < 0)
> > 		die(_("index file corrupt"));
> > 
> > 	if (!force) {
> > 		if (cache_file_exists(path, strlen(path), ignore_case) ||
> > 		    cache_dir_exists(path, strlen(path)))
> > 			die(_("'%s' already exists in the index"), path);
> > 	} else {
> > 		int cache_pos = cache_name_pos(path, strlen(path));
> > 		struct cache_entry *ce = the_index.cache[cache_pos];
> > 		if (cache_pos >= 0 && !S_ISGITLINK(ce->ce_mode))
> > 			die(_("'%s' already exists in the index and is not a "
> > 			      "submodule"), path);
> > 	}
> > ---
> > 
> > I did not put the 'cache_pos >= 0' at the start since I thought that it
> > will unnecessarily increase an indentation level. Since we are using
> > 'cache_{file,dir}_exists' in the first check and 'cache_name_pos()' in
> > the second, the placement of check at another indentation level would be
> > unnecessary. What do you think about this?
> > 
> 
> Interestingly. 'cache_dir_exists' seems to work as expected only when
> the global ignore_case whose value seems to depend on core.ignorecase.
> So, we can't just rely on 'cache_dir_exists to identify a directory
> that has tracked contents. Apparently, the 'directory_exists_in_index'
> in 'dir.c' seems to have the code that we want here (which is also the
> only user of 'index_dir_exists'; the function for which
> 'cache_dir_exists' is a convenience wrapper.

I think both 'cache_{dir,file}_exists()' depend on 'core.ignorecase'
though I am not able to confirm this for 'cache_dir_exists()'. Where
exactly does this happen for the function? The function you mention
seems perfect to me, though, we will also have to make the enum
'exist_status' visible. Will that be fine? The final output will be:
---
	if (!force) {
		if (directory_exists_in_index(&the_index, path, strlen(path)))
			die(_("'%s' already exists in the index"), path);
	} else {
		int cache_pos = cache_name_pos(path, strlen(path));
		struct cache_entry *ce = the_index.cache[cache_pos];
		if (cache_pos >= 0 && !S_ISGITLINK(ce->ce_mode))
			die(_("'%s' already exists in the index and is not a "
			      "submodule"), path);
	}
---


And obviously an extra commit changing the visibility of the function
and the enum.
 
> > > This is more close to what the shell version did but misses one case
> > > which might or might not be covered by the test suite[1]. The case when
> > > path is a directory that has tracked contents. In the shell version we
> > > would get:
> > > 
> > >    $ git submodule add ../git-crypt/ builtin
> > >    'builtin' already exists in the index
> > >    $ git submodule add --force ../git-crypt/ builtin
> > >    'builtin' already exists in the index and is not a submodule
> > > 
> > >    In the C version with the above snippet we get:
> > > 
> > >    $ git submodule add --force ../git-crypt/ builtin
> > >    fatal: 'builtin' does not have a commit checked out
> > >    $ git submodule add ../git-crypt/ builtin
> > >    fatal: 'builtin' does not have a commit checked out
> > > 
> > >    That's not appropriate and should be fixed. I believe we could do
> > >    something with `cache_dir_exists` to fix this.
> > > 
> > > 
> > >    Footnote
> > >    ===
> > > 
> > >    [1]: If it's not covered already, it might be a good idea to add a test
> > >    for the above case.
> > 
> > Like Junio said, we do not care if it is a file or a directory of any
> > sorts, we will give the error if it already exists. Therefore, even if
> > it is an untracked or a tracked one, it should not matter to us. Hence
> > testing for it may not be necessary is what I feel. Why should we test
> > it?
> 
> I'm guessing you misunderstood. A few things:
> 
> - We only care about tracked contents for the case in hand.
> 
> - Identifying whether a given path corresponds to a directory
>   which has tracked contents is tricky. Neither 'cache_name_pos'
>   nor 'cache_file_exists' handle this. 'cache_dir_exists' is also
>   not very useful as mentioned above.
> 
> So, we do have to take care when handling that case as Junio pointed
> out.

I still do not understand this case. Let's say this was our
superproject:

.gitmodules .git/ a.txt dir1/

And we did:
    $ git submodule add <url> dir1/

Now, at this point, how does it matter if 'dir1/' has tracked content or
not right? A directory exists with that name and now we do not add the
SM to that path.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C
  2020-09-02 12:04           ` Shourya Shukla
@ 2020-09-03  8:46             ` Kaartic Sivaraam
  0 siblings, 0 replies; 12+ messages in thread
From: Kaartic Sivaraam @ 2020-09-03  8:46 UTC (permalink / raw)
  To: Shourya Shukla
  Cc: gitster, git, christian.couder, johannes.schindelin, liu.denton,
	Elijah Newren, Martin Ågren

+Cc: Elijah Newren, Martin Ågren

On Wed, 2020-09-02 at 17:34 +0530, Shourya Shukla wrote:
> On 02/09 02:05, Kaartic Sivaraam wrote:
> > On Mon, 2020-08-31 at 18:34 +0530, Shourya Shukla wrote:
> > > On 31/08 01:28, Kaartic Sivaraam wrote:
> > > 
> > > This is what I have done finally:
> > > ---
> > > 	if (read_cache() < 0)
> > > 		die(_("index file corrupt"));
> > > 
> > > 	if (!force) {
> > > 		if (cache_file_exists(path, strlen(path), ignore_case) ||
> > > 		    cache_dir_exists(path, strlen(path)))
> > > 			die(_("'%s' already exists in the index"), path);
> > > 	} else {
> > > 		int cache_pos = cache_name_pos(path, strlen(path));
> > > 		struct cache_entry *ce = the_index.cache[cache_pos];
> > > 		if (cache_pos >= 0 && !S_ISGITLINK(ce->ce_mode))
> > > 			die(_("'%s' already exists in the index and is not a "
> > > 			      "submodule"), path);
> > > 	}
> > > ---
> > > 
> > > I did not put the 'cache_pos >= 0' at the start since I thought that it
> > > will unnecessarily increase an indentation level. Since we are using
> > > 'cache_{file,dir}_exists' in the first check and 'cache_name_pos()' in
> > > the second, the placement of check at another indentation level would be
> > > unnecessary. What do you think about this?
> > > 
> > 
> > Interestingly. 'cache_dir_exists' seems to work as expected only when
> > the global ignore_case whose value seems to depend on core.ignorecase.
> > So, we can't just rely on 'cache_dir_exists to identify a directory
> > that has tracked contents. Apparently, the 'directory_exists_in_index'
> > in 'dir.c' seems to have the code that we want here (which is also the
> > only user of 'index_dir_exists'; the function for which
> > 'cache_dir_exists' is a convenience wrapper.
> 
> I think both 'cache_{dir,file}_exists()' depend on 'core.ignorecase'
> though I am not able to confirm this for 'cache_dir_exists()'. Where
> exactly does this happen for the function?

As you can see in 'name-hash.c', 'index_file_exists' and there by
'cache_dir_exists' work using the 'name_hash' stored in the index. If
you look at the flow of 'lazy_init_name_hash', you'll see how
'name_hash' gets initialized and populated despite the value of
'ignore_case'. OTOH, dir_hash is populted only when 'ignore_case' is
true. So, it seems to be that only 'cache_dir_exists' depends on the
value of 'ignore_case'.

>  The function you mention
> seems perfect to me, though, we will also have to make the enum
> 'exist_status' visible. Will that be fine?

To me that appears to be the only way forward other than spawning a
call to ls-files as was done in one of the earlier versions tat was not
sent to the list. Anyways, I'm not the best person to answer this
question. So, I've CC-ed a couple of people who might be able to shed
some light for us.

>  The final output will be:
> ---
> 	if (!force) {
> 		if (directory_exists_in_index(&the_index, path, strlen(path)))
> 			die(_("'%s' already exists in the index"), path);
> 	} else {
> 		int cache_pos = cache_name_pos(path, strlen(path));
> 		struct cache_entry *ce = the_index.cache[cache_pos];
> 		if (cache_pos >= 0 && !S_ISGITLINK(ce->ce_mode))
> 			die(_("'%s' already exists in the index and is not a "
> 			      "submodule"), path);
> 	}
> ---
> 
> 

The above doesn't handle all cases. In particular, we want to handle
the case of tracked files when `force` is not given
(directory_exists_in_index certainly doesn't handle that). We also need
to handle directories with tracked contents when force is given (we
already know cache_name_pos is not sufficient to handle them). So, I
think we would want something along the lines of the following:

        if (read_cache() < 0)
                die(_("index file corrupt"));

        cache_pos = cache_name_pos(path, strlen(path));
        if (cache_pos < 0 &&
            directory_exists_in_index(&the_index, path, strlen(path)) == index_directory) {
                directory_in_cache = 1;
        }

        if (!force) {
               if (cache_pos >= 0 || directory_in_cache)
                        die(_("'%s' already exists in the index"), path);
        }
        else {
                struct cache_entry *ce = NULL;
                if (cache_pos >= 0)
                {
                        ce = the_index.cache[cache_pos];
                }

                if (directory_in_cache || (ce && !S_ISGITLINK(ce->ce_mode))) {
                        die(_("'%s' already exists in the index and is not a "
                              "submodule"), path);
                }
        }

After seeing this, I'm starting to think it's better have this in a
separate helper function instead of making the `module_add` function
even more longer than it already is.

> And obviously an extra commit changing the visibility of the function
> and the enum.
>  
> > > > This is more close to what the shell version did but misses one case
> > > > which might or might not be covered by the test suite[1]. The case when
> > > > path is a directory that has tracked contents. In the shell version we
> > > > would get:
> > > > 
> > > >    $ git submodule add ../git-crypt/ builtin
> > > >    'builtin' already exists in the index
> > > >    $ git submodule add --force ../git-crypt/ builtin
> > > >    'builtin' already exists in the index and is not a submodule
> > > > 
> > > >    In the C version with the above snippet we get:
> > > > 
> > > >    $ git submodule add --force ../git-crypt/ builtin
> > > >    fatal: 'builtin' does not have a commit checked out
> > > >    $ git submodule add ../git-crypt/ builtin
> > > >    fatal: 'builtin' does not have a commit checked out
> > > > 
> > > >    That's not appropriate and should be fixed. I believe we could do
> > > >    something with `cache_dir_exists` to fix this.
> > > > 
> > > > 
> > > >    Footnote
> > > >    ===
> > > > 
> > > >    [1]: If it's not covered already, it might be a good idea to add a test
> > > >    for the above case.
> > > 
> > > Like Junio said, we do not care if it is a file or a directory of any
> > > sorts, we will give the error if it already exists. Therefore, even if
> > > it is an untracked or a tracked one, it should not matter to us. Hence
> > > testing for it may not be necessary is what I feel. Why should we test
> > > it?
> > 
> > I'm guessing you misunderstood. A few things:
> > 
> > - We only care about tracked contents for the case in hand.
> > 
> > - Identifying whether a given path corresponds to a directory
> >   which has tracked contents is tricky. Neither 'cache_name_pos'
> >   nor 'cache_file_exists' handle this. 'cache_dir_exists' is also
> >   not very useful as mentioned above.
> > 
> > So, we do have to take care when handling that case as Junio pointed
> > out.
> 
> I still do not understand this case. Let's say this was our
> superproject:
> 
> .gitmodules .git/ a.txt dir1/
> 
> And we did:
>     $ git submodule add <url> dir1/
> 
> Now, at this point, how does it matter if 'dir1/' has tracked content or
> not right? A directory exists with that name and now we do not add the
> SM to that path.
> 

I'm guessing you're looking at it in a more general sense of the
command workflow. I was speaking only about the following snippet of
the shell script which we're trying to emulate now:

        if test -z "$force"
        then
                git ls-files --error-unmatch "$sm_path" > /dev/null 2>&1 &&
                die "$(eval_gettext "'\$sm_path' already exists in the index")"
        else
                git ls-files -s "$sm_path" | sane_grep -v "^160000" > /dev/null 2>&1 &&
                die "$(eval_gettext "'\$sm_path' already exists in the index and is not a submodule")"
        fi

When sm_path is an empty directory or a directory that has no tracked
contents the 'ls-files' command would fail and we apparently will *not*
get an error stating the path already exists in the index. The command
might fail in a later part of the code but that's not what I'm talking
about.

A few other things I noticed:

> +       strbuf_addstr(&sb, path);
> +       if (is_directory(path)) {

I remember mentioning to you that the 'is_directory' check is
sufficient here and the 'is_nonbare_repository_dir' is not necessary
here as 'resolve_gitlink_ref' already takes care of it. Unfortunately,
looks like without the 'is_nonbare_repository_dir' check we get the
following unhelpful error message when the path is a directory that
_exists_ and is ignored in .gitignore:

   $ git submodule add ../git-crypt/ Debug
   fatal: 'Debug' does not have a commit checked out

   The shell version did not have this problem and gave the following
   appropriate error message:

   $ git submodule add ../git-crypt/ Debug
   The following paths are ignored by one of your .gitignore files:
   Debug
   hint: Use -f if you really want to add them.
   hint: Turn this message off by running
   hint: "git config advice.addIgnoredFile false"

      So, we should check whether the given directory is a non-bare
      repository before calling 'resolve_gitlink_ref' to be consistent with
      what the shell version does.

      For the note, this isn't caught by the 'submodule add to .gitignored
      path fails' in t7400 as the corresponding directory doesn't exist
      there. So, our 'is_directory' check fails and we don't call
      'resolve_gitlink_ref'.

      > +               struct object_id oid;
> +               if (resolve_gitlink_ref(path, "HEAD", &oid) < 0)
> +                       die(_("'%s' does not have a commit checked out"), path);
> +       }
> +
> +       if (!force) {
> +               struct strbuf sb = STRBUF_INIT;
> +               struct child_process cp = CHILD_PROCESS_INIT;
> +               cp.git_cmd = 1;
> +               cp.no_stdout = 1;
> +               strvec_pushl(&cp.args, "add", "--dry-run", "--ignore-missing",
> +                            "--no-warn-embedded-repo", path, NULL);
> +               if (pipe_command(&cp, NULL, 0, NULL, 0, &sb, 0))
> +                       die(_("%s"), sb.buf);

Using 'die' to print an already formatted error message of a command
results in an additional newline which looks ugly. For reference, here
are the output from the shell and C versions of the command:

-- 8< --
$ # Shell version
$ git submodule add ../parent/ submod
The following paths are ignored by one of your .gitignore files:
submod
hint: Use -f if you really want to add them.
hint: Turn this message off by running
hint: "git config advice.addIgnoredFile false"
$ # C version
$ git submodule add ../parent/ submod
fatal: The following paths are ignored by one of your .gitignore files:
submod
hint: Use -f if you really want to add them.
hint: Turn this message off by running
hint: "git config advice.addIgnoredFile false"

$
-- >8 --

So, it would be nice if we use 'fprintf(stderr, ...)' or something like
that so that we don't get the additional newline.

> +               strbuf_release(&sb);
> +       }
> 

-- 
Sivaraam



^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-09-03  8:46 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-24  9:03 [GSoC][PATCH] submodule: port submodule subcommand 'add' from shell to C Shourya Shukla
2020-08-24 18:35 ` Junio C Hamano
2020-08-24 20:30   ` Kaartic Sivaraam
2020-08-24 20:46     ` Junio C Hamano
2020-08-26  9:27     ` Shourya Shukla
2020-08-26 10:54       ` Kaartic Sivaraam
2020-08-26  9:15   ` Shourya Shukla
2020-08-30 19:58     ` Kaartic Sivaraam
2020-08-31 13:04       ` Shourya Shukla
2020-09-01 20:35         ` Kaartic Sivaraam
2020-09-02 12:04           ` Shourya Shukla
2020-09-03  8:46             ` Kaartic Sivaraam

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).