git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCHv2] submodule--helper: initial clone learns retry logic
@ 2016-06-10  0:35 Stefan Beller
  2016-06-10 23:31 ` Stefan Beller
  0 siblings, 1 reply; 2+ messages in thread
From: Stefan Beller @ 2016-06-10  0:35 UTC (permalink / raw
  To: gitster; +Cc: git, Jens.Lehmann, Stefan Beller

Each submodule that is attempted to be cloned, will be retried once in
case of failure after all other submodules were cloned. This helps to
mitigate ephemeral server failures and increases chances of a reliable
clone of a repo with hundreds of submodules immensely.

Signed-off-by: Stefan Beller <sbeller@google.com>
---

 This patch doesn't do pointer forbidden magic, also we don't abuse the
 prio queue.
 
 The API for running parallel commands isn't quite designed for this
 though as we need to pass around an int instead of a pointer, so we wrap
 the int into a int*, which will create a xmalloc/free for each submodule.
 
 This replaces the patch [1/2] in the series 
 "[PATCH 0/2] Dealing with a lot of submodules" 
 
 Thanks,
 Stefan

 builtin/submodule--helper.c | 66 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 59 insertions(+), 7 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index c7deb55..6424b40 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -590,10 +590,14 @@ struct submodule_update_clone {
 
 	/* If we want to stop as fast as possible and return an error */
 	unsigned quickstop : 1;
+
+	/* failed clones to be retried again */
+	const struct cache_entry **failed_clones;
+	int failed_clones_nr, failed_clones_alloc;
 };
 #define SUBMODULE_UPDATE_CLONE_INIT {0, MODULE_LIST_INIT, 0, \
 	SUBMODULE_UPDATE_STRATEGY_INIT, 0, -1, NULL, NULL, NULL, NULL, \
-	STRING_LIST_INIT_DUP, 0}
+	STRING_LIST_INIT_DUP, 0, NULL, 0, 0}
 
 
 static void next_submodule_warn_missing(struct submodule_update_clone *suc,
@@ -718,23 +722,47 @@ cleanup:
 static int update_clone_get_next_task(struct child_process *child,
 				      struct strbuf *err,
 				      void *suc_cb,
-				      void **void_task_cb)
+				      void **idx_task_cb)
 {
 	struct submodule_update_clone *suc = suc_cb;
+	const struct cache_entry *ce;
+	int index;
 
 	for (; suc->current < suc->list.nr; suc->current++) {
-		const struct cache_entry *ce = suc->list.entries[suc->current];
+		ce = suc->list.entries[suc->current];
 		if (prepare_to_clone_next_submodule(ce, child, suc, err)) {
+			int *p = xmalloc(sizeof(*p));
+			*p = suc->current;
+			*idx_task_cb = p;
 			suc->current++;
 			return 1;
 		}
 	}
+
+	/*
+	 * The loop above tried cloning each submodule once, now try the
+	 * stragglers again, which we can imagine as an extension of the
+	 * entry list.
+	 */
+	index = suc->current - suc->list.nr;
+	if (index < suc->failed_clones_nr) {
+		int *p;
+		ce = suc->failed_clones[index];
+		if (!prepare_to_clone_next_submodule(ce, child, suc, err))
+			die("BUG: ce was a submodule before?");
+		p = xmalloc(sizeof(*p));
+		*p = suc->current;
+		*idx_task_cb = p;
+		suc->current ++;
+		return 1;
+	}
+
 	return 0;
 }
 
 static int update_clone_start_failure(struct strbuf *err,
 				      void *suc_cb,
-				      void *void_task_cb)
+				      void *idx_task_cb)
 {
 	struct submodule_update_clone *suc = suc_cb;
 	suc->quickstop = 1;
@@ -744,15 +772,39 @@ static int update_clone_start_failure(struct strbuf *err,
 static int update_clone_task_finished(int result,
 				      struct strbuf *err,
 				      void *suc_cb,
-				      void *void_task_cb)
+				      void *idx_task_cb)
 {
+	const struct cache_entry *ce;
 	struct submodule_update_clone *suc = suc_cb;
 
+	int *idxP = *(int**)idx_task_cb;
+	int idx = *idxP;
+	free(idxP);
+
 	if (!result)
 		return 0;
 
-	suc->quickstop = 1;
-	return 1;
+	if (idx < suc->list.nr) {
+		ce  = suc->list.entries[idx];
+		strbuf_addf(err, _("Failed to clone '%s'. Retry scheduled"),
+			    ce->name);
+		strbuf_addch(err, '\n');
+		ALLOC_GROW(suc->failed_clones,
+			   suc->failed_clones_nr + 1,
+			   suc->failed_clones_alloc);
+		suc->failed_clones[suc->failed_clones_nr++] = ce;
+		return 0;
+	} else {
+		idx = suc->current - suc->list.nr;
+		ce  = suc->failed_clones[idx];
+		strbuf_addf(err, _("Failed to clone '%s' a second time, aborting"),
+			    ce->name);
+		strbuf_addch(err, '\n');
+		suc->quickstop = 1;
+		return 1;
+	}
+
+	return 0;
 }
 
 static int update_clone(int argc, const char **argv, const char *prefix)
-- 
2.9.0.rc2.365.gb48bdfb

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCHv2] submodule--helper: initial clone learns retry logic
  2016-06-10  0:35 [PATCHv2] submodule--helper: initial clone learns retry logic Stefan Beller
@ 2016-06-10 23:31 ` Stefan Beller
  0 siblings, 0 replies; 2+ messages in thread
From: Stefan Beller @ 2016-06-10 23:31 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git@vger.kernel.org, Jens Lehmann, Stefan Beller

On Thu, Jun 9, 2016 at 5:35 PM, Stefan Beller <sbeller@google.com> wrote:
> Each submodule that is attempted to be cloned, will be retried once in
> case of failure after all other submodules were cloned. This helps to
> mitigate ephemeral server failures and increases chances of a reliable
> clone of a repo with hundreds of submodules immensely.
>
> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>
>  This patch doesn't do pointer forbidden magic, also we don't abuse the
>  prio queue.
>
>  The API for running parallel commands isn't quite designed for this
>  though as we need to pass around an int instead of a pointer, so we wrap
>  the int into a int*, which will create a xmalloc/free for each submodule.
>
>  This replaces the patch [1/2] in the series
>  "[PATCH 0/2] Dealing with a lot of submodules"
>
>  Thanks,
>  Stefan
>
>  builtin/submodule--helper.c | 66 ++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 59 insertions(+), 7 deletions(-)
>
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index c7deb55..6424b40 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -590,10 +590,14 @@ struct submodule_update_clone {
>
>         /* If we want to stop as fast as possible and return an error */
>         unsigned quickstop : 1;
> +
> +       /* failed clones to be retried again */
> +       const struct cache_entry **failed_clones;
> +       int failed_clones_nr, failed_clones_alloc;
>  };
>  #define SUBMODULE_UPDATE_CLONE_INIT {0, MODULE_LIST_INIT, 0, \
>         SUBMODULE_UPDATE_STRATEGY_INIT, 0, -1, NULL, NULL, NULL, NULL, \
> -       STRING_LIST_INIT_DUP, 0}
> +       STRING_LIST_INIT_DUP, 0, NULL, 0, 0}
>
>
>  static void next_submodule_warn_missing(struct submodule_update_clone *suc,
> @@ -718,23 +722,47 @@ cleanup:
>  static int update_clone_get_next_task(struct child_process *child,
>                                       struct strbuf *err,
>                                       void *suc_cb,
> -                                     void **void_task_cb)
> +                                     void **idx_task_cb)
>  {
>         struct submodule_update_clone *suc = suc_cb;
> +       const struct cache_entry *ce;
> +       int index;
>
>         for (; suc->current < suc->list.nr; suc->current++) {
> -               const struct cache_entry *ce = suc->list.entries[suc->current];
> +               ce = suc->list.entries[suc->current];
>                 if (prepare_to_clone_next_submodule(ce, child, suc, err)) {
> +                       int *p = xmalloc(sizeof(*p));
> +                       *p = suc->current;
> +                       *idx_task_cb = p;
>                         suc->current++;
>                         return 1;
>                 }
>         }
> +
> +       /*
> +        * The loop above tried cloning each submodule once, now try the
> +        * stragglers again, which we can imagine as an extension of the
> +        * entry list.
> +        */
> +       index = suc->current - suc->list.nr;
> +       if (index < suc->failed_clones_nr) {
> +               int *p;
> +               ce = suc->failed_clones[index];
> +               if (!prepare_to_clone_next_submodule(ce, child, suc, err))
> +                       die("BUG: ce was a submodule before?");
> +               p = xmalloc(sizeof(*p));
> +               *p = suc->current;
> +               *idx_task_cb = p;
> +               suc->current ++;
> +               return 1;
> +       }
> +
>         return 0;
>  }
>
>  static int update_clone_start_failure(struct strbuf *err,
>                                       void *suc_cb,
> -                                     void *void_task_cb)
> +                                     void *idx_task_cb)
>  {

Here we would leak the pointer, but as we quickstop soon, this may not
be a problem.

>         struct submodule_update_clone *suc = suc_cb;
>         suc->quickstop = 1;
> @@ -744,15 +772,39 @@ static int update_clone_start_failure(struct strbuf *err,
>  static int update_clone_task_finished(int result,
>                                       struct strbuf *err,
>                                       void *suc_cb,
> -                                     void *void_task_cb)
> +                                     void *idx_task_cb)
>  {
> +       const struct cache_entry *ce;
>         struct submodule_update_clone *suc = suc_cb;
>
> +       int *idxP = *(int**)idx_task_cb;
> +       int idx = *idxP;
> +       free(idxP);
> +
>         if (!result)
>                 return 0;
>
> -       suc->quickstop = 1;
> -       return 1;
> +       if (idx < suc->list.nr) {
> +               ce  = suc->list.entries[idx];
> +               strbuf_addf(err, _("Failed to clone '%s'. Retry scheduled"),
> +                           ce->name);
> +               strbuf_addch(err, '\n');
> +               ALLOC_GROW(suc->failed_clones,
> +                          suc->failed_clones_nr + 1,
> +                          suc->failed_clones_alloc);
> +               suc->failed_clones[suc->failed_clones_nr++] = ce;
> +               return 0;
> +       } else {
> +               idx = suc->current - suc->list.nr;
> +               ce  = suc->failed_clones[idx];
> +               strbuf_addf(err, _("Failed to clone '%s' a second time, aborting"),
> +                           ce->name);
> +               strbuf_addch(err, '\n');
> +               suc->quickstop = 1;
> +               return 1;
> +       }
> +
> +       return 0;
>  }

I wonder how we can test this properly as the git binary we have here
is too reliable,
but I observed the correctness of this patch when cloning a repo with
lots of submodules
and one of them failing.

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-06-10 23:31 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-06-10  0:35 [PATCHv2] submodule--helper: initial clone learns retry logic Stefan Beller
2016-06-10 23:31 ` Stefan Beller

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).