git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Edward Thomson <ethomson@edwardthomson.com>
To: git@vger.kernel.org
Subject: [RFC PATCH 1/1] recover: restoration of deleted worktree files
Date: Sat, 4 Aug 2018 14:24:16 +0000	[thread overview]
Message-ID: <20180804142416.GA6@5f28dc333bbd> (raw)
In-Reply-To: <20180804142247.GA7@e3c0ce5ceb57>

Introduce git-recover, a simple script to aide in restoration of deleted
worktree files.  This will look for unreachable blobs in the object
database and prompt users to restore them to disk, either interactively
or on the command-line.
---
 git-recover.sh | 311 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 311 insertions(+)
 create mode 100755 git-recover.sh

diff --git a/git-recover.sh b/git-recover.sh
new file mode 100755
index 000000000..651d4116f
--- /dev/null
+++ b/git-recover.sh
@@ -0,0 +1,311 @@
+#!/usr/bin/env bash
+#
+# This program helps recover files in your repository that were deleted
+# from the working tree.
+#
+# Copyright (c) 2017-2018 Edward Thomson.
+
+set -e
+
+IFS=$'\n'
+
+PROGNAME=$(echo "$0" | sed -e 's/.*\///')
+GIT_DIR=$(git rev-parse --git-dir)
+
+DO_RECOVER=0
+DO_FULL=0
+DO_INTERACTIVE=0
+BLOBS=()
+FILENAMES=()
+
+function die_usage {
+	echo "usage: $PROGNAME [-a] [-i] [--full] [<id> [-f <filename>] ...]" >&2
+	exit 1
+}
+
+while [[ $# -gt 0 ]]; do
+	case "$1" in
+	-a|--all)
+		DO_RECOVER=1
+		;;
+	-i|--interactive)
+		DO_INTERACTIVE=1
+		;;
+	--full)
+		DO_FULL=1
+		;;
+	*)
+		if [ "${1:0:1}" == "-" ]; then
+			echo "$PROGNAME: unknown argument: $1" >&2
+			die_usage
+		fi
+		BLOBS+=("$1")
+
+		shift
+		if [ "$1" == "-f" ] || [ "$1" == "--filename" ]; then
+			shift
+			if [ $# == 0 ]; then
+				die_usage
+			fi
+			FILENAMES+=("$1")
+			shift
+		else
+			FILENAMES+=("")
+		fi
+		continue
+	;;
+	esac
+	shift
+done
+
+if [ ${#BLOBS[@]} != 0 ] && [ $DO_RECOVER == 1 ]; then
+	die_usage
+elif [ ${#BLOBS[@]} != 0 ]; then
+	DO_RECOVER=1
+fi
+
+case "$OSTYPE" in
+	darwin*|freebsd*) IS_BSD=1 ;;
+	*) IS_BSD=0 ;;
+esac
+
+function expand_given_blobs() {
+	for i in "${!BLOBS[@]}"; do
+		ID=$(git rev-parse --verify "${BLOBS[$i]}" 2>/dev/null || true)
+
+		if [ -z "$ID" ]; then
+			echo "$PROGNAME: ${BLOBS[$i]} is not a valid object." 1>&2
+			exit 1
+		fi
+
+		TYPE=$(git cat-file -t "${ID}" 2>/dev/null || true)
+
+		if [ "$TYPE" != "blob" ]; then
+			echo "$PROGNAME: ${BLOBS[$i]} is not a blob." 1>&2
+			exit
+		fi
+
+		BLOBS[$i]=$ID
+	done
+}
+
+# find all the unreachable blobs
+function find_unreachable() {
+	FULLNESS="--no-full"
+
+	if [ $DO_FULL == 1 ]; then FULLNESS="--full"; fi
+
+	BLOBS=($(git fsck --unreachable --no-reflogs \
+		"${FULLNESS}" --no-progress | sed -ne 's/^unreachable blob //p'))
+}
+
+function read_one_file {
+	BLOB=$1
+	FILTER_NAME=$2
+	ARGS=()
+
+	if [ -z "$FILTER_NAME" ]; then
+		ARGS+=("blob")
+	else
+		ARGS+=("--filters" "--path=$FILTER_NAME")
+	fi
+
+	git cat-file "${ARGS[@]}" "$BLOB"
+}
+
+function write_one_file {
+	BLOB=$1
+	FILTER_NAME=$2
+	OUTPUT_NAME=$3
+
+	ABBREV=$(git rev-parse --short "${BLOB}")
+
+	echo -n "Writing $ABBREV: "
+	read_one_file "$BLOB" "$FILTER_NAME" > "$OUTPUT_NAME"
+	echo "$OUTPUT_NAME."
+}
+
+function unique_filename {
+	if [ ! -f "${BLOB}" ]; then
+		echo "$BLOB"
+	else
+		cnt=1
+		while true
+		do
+			fn="${BLOB}~${cnt}"
+			if [ ! -f "${fn}" ]; then
+				echo "${fn}"
+				break
+			fi
+			cnt=$((cnt+1))
+		done
+	fi
+}
+
+function write_recoverable {
+	for i in "${!BLOBS[@]}"; do
+		BLOB=${BLOBS[$i]}
+		FILTER_NAME=${FILENAMES[$i]}
+		OUTPUT_NAME=${FILENAMES[$i]:-$(unique_filename)}
+
+		write_one_file "$BLOB" "$FILTER_NAME" "$OUTPUT_NAME"
+	done
+}
+
+function file_time {
+	if [ $IS_BSD == 1 ]; then
+		stat -f %c "$1"
+	else
+		stat -c %Y "$1"
+	fi
+}
+
+function timestamp_to_s {
+	if [ $IS_BSD == 1 ]; then
+		date -r "$1"
+	else
+		date -d @"$1"
+	fi
+}
+
+function sort_by_timestamp {
+	# sort blobs in loose objects by their timestamp (packed blobs last)
+	BLOB_AND_TIMESTAMPS=($(for BLOB in "${BLOBS[@]}"; do
+		LOOSE="${BLOB::2}/${BLOB:2}"
+		TIME=$(file_time "$GIT_DIR/objects/$LOOSE" 2>/dev/null || true)
+		echo "$BLOB $TIME"
+	done | sort -k2 -r))
+}
+
+function print_recoverable {
+	echo "Recoverable orphaned git blobs:"
+	echo ""
+
+	sort_by_timestamp
+	for BLOB_AND_TIMESTAMP in "${BLOB_AND_TIMESTAMPS[@]}"; do
+		BLOB=${BLOB_AND_TIMESTAMP::40}
+		TIME=${BLOB_AND_TIMESTAMP:41}
+		DATE=$([ ! -z "$TIME" ] && timestamp_to_s "$TIME" || echo "(Unknown)") 
+
+		echo "$BLOB  $DATE"
+	done
+}
+
+function prompt_for_filename {
+	while true
+	do
+		echo -n "Filename (return to skip): "
+		read -r FILENAME
+
+		if [ -f "$FILENAME" ]; then
+			echo -n "File exists, overwrite? [y,N]: "
+			read -r overwrite
+
+			case "$overwrite" in
+			[yY]*)
+				return 0
+				;;
+			esac
+
+			echo
+		else
+			return 0
+		fi
+	done
+}
+
+function view_file {
+	read_one_file "${BLOB}" | ${PAGER:-less}
+}
+
+function show_summary {
+	FILETYPE=$(read_one_file "${BLOB}" | file -b -)
+	IS_TEXT=$(echo "${FILETYPE}" | grep -c ' text$' 2>/dev/null || true)
+
+	if [ "$IS_TEXT" == "1" ]; then
+		read_one_file "${BLOB}"
+	else
+		read_one_file "${BLOB}" | hexdump -C
+	fi
+}
+
+function interactive {
+	echo "Recoverable orphaned git blobs:"
+
+	sort_by_timestamp
+	for BLOB_AND_TIMESTAMP in "${BLOB_AND_TIMESTAMPS[@]}"; do
+		echo
+
+		BLOB=${BLOB_AND_TIMESTAMP::40}
+		TIME=${BLOB_AND_TIMESTAMP:41}
+		DATE=$([ ! -z "$TIME" ] && timestamp_to_s "$TIME" || echo "(Unknown)") 
+
+		echo "$BLOB  ($DATE)"
+		show_summary "${BLOB}" | head -4 | sed -e 's/^/> /'
+		echo
+
+		while true
+		do
+			echo -n "Recover this file? [y,n,v,f,q,?]: "
+			read -r ans || return 1
+
+			case "$ans" in
+			[yY]*)
+				write_one_file "${BLOB}" "" "$(unique_filename)"
+				break
+				;;
+			[nN]*)
+				break
+				;;
+			[vV]*)
+				view_file "${BLOB}"
+				echo
+				;;
+			[fF]*)
+				prompt_for_filename
+
+				if [ "$FILENAME" == "" ]; then
+					break
+				fi
+
+				write_one_file "${BLOB}" "${FILENAME}" "${FILENAME}"
+				break
+				;;
+			\?*)
+				echo
+				echo "Do you want to recover this file?"
+				echo " y: yes, write the file to ${BLOB}"
+				echo " n: no, skip this file and see the next orphaned file"
+				echo " v: view the file"
+				echo " f: prompt for a filename to use for recovery"
+				echo " q: quit"
+				echo
+				;;
+			[qQ]*)
+				return 0
+				;;
+			esac
+		done
+	done
+}
+
+
+if [ ${#BLOBS[@]} != 0 ]; then
+	expand_given_blobs
+else
+	find_unreachable
+fi
+
+if [ ${#BLOBS[@]} == 0 ]; then
+	echo "$PROGNAME: no recoverable orphaned blobs."
+	exit
+fi
+
+if [ $DO_INTERACTIVE == 1 ]; then
+	interactive
+elif [ $DO_RECOVER == 1 ]; then
+	write_recoverable
+else
+	print_recoverable
+fi
+
-- 
2.0.0 (libgit2)


  reply	other threads:[~2018-08-04 14:24 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-04 14:22 [RFC PATCH 0/1] Introduce git-recover Edward Thomson
2018-08-04 14:24 ` Edward Thomson [this message]
2018-08-04 15:54   ` [RFC PATCH 1/1] recover: restoration of deleted worktree files Junio C Hamano
2018-08-04 16:17     ` Robert P. J. Day
2018-08-04 17:33       ` Todd Zullinger
2018-08-04 16:19     ` Edward Thomson
2018-08-04 16:48       ` Junio C Hamano
2018-08-05  1:34 ` [RFC PATCH 0/1] Introduce git-recover Jonathan Nieder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180804142416.GA6@5f28dc333bbd \
    --to=ethomson@edwardthomson.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).