git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 05/11] copy.c: convert copy_file() to copy_dir_recursively()
Date: Sat, 12 Nov 2016 09:23:31 +0700	[thread overview]
Message-ID: <20161112022337.13317-6-pclouds@gmail.com> (raw)
In-Reply-To: <20161112022337.13317-1-pclouds@gmail.com>

This finally enables busybox's copy_file() code under a new name
(because "copy_file" is already taken in Git code base). Because this
comes from busybox, POSIXy (or even Linuxy) behavior is expected. More
changes may be needed for Windows support.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h |   1 +
 copy.c  | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 179 insertions(+), 38 deletions(-)

diff --git a/cache.h b/cache.h
index a50a61a..a9a72f8 100644
--- a/cache.h
+++ b/cache.h
@@ -1857,6 +1857,7 @@ extern void fprintf_or_die(FILE *, const char *fmt, ...);
 extern int copy_fd(int ifd, int ofd);
 extern int copy_file(const char *dst, const char *src, int mode);
 extern int copy_file_with_time(const char *dst, const char *src, int mode);
+extern int copy_dir_recursively(const char *source, const char *dest);
 
 extern void write_or_die(int fd, const void *buf, size_t count);
 extern void fsync_or_die(int fd, const char *);
diff --git a/copy.c b/copy.c
index 60c7d8a..8471f7f 100644
--- a/copy.c
+++ b/copy.c
@@ -1,4 +1,6 @@
 #include "cache.h"
+#include "dir.h"
+#include "hashmap.h"
 
 int copy_fd(int ifd, int ofd)
 {
@@ -66,21 +68,126 @@ int copy_file_with_time(const char *dst, const char *src, int mode)
 	return status;
 }
 
-#if 0
-/* Return:
- * -1 error, copy not made
- *  0 copy is made or user answered "no" in interactive mode
- *    (failures to preserve mode/owner/times are not reported in exit code)
+struct inode_key {
+	struct hashmap_entry entry;
+	ino_t ino;
+	dev_t dev;
+	/*
+	 * Reportedly, on cramfs a file and a dir can have same ino.
+	 * Need to also remember "file/dir" bit:
+	 */
+	char isdir; /* bool */
+};
+
+struct inode_value {
+	struct inode_key key;
+	char name[FLEX_ARRAY];
+};
+
+#define HASH_SIZE      311u   /* Should be prime */
+static inline unsigned hash_inode(ino_t i)
+{
+	return i % HASH_SIZE;
+}
+
+static int inode_cmp(const void *entry, const void *entry_or_key,
+		     const void *keydata)
+{
+	const struct inode_value *inode = entry;
+	const struct inode_key   *key   = entry_or_key;
+
+	return !(inode->key.ino   == key->ino &&
+		 inode->key.dev   == key->dev &&
+		 inode->key.isdir == key->isdir);
+}
+
+static const char *is_in_ino_dev_hashtable(const struct hashmap *map,
+					   const struct stat *st)
+{
+	struct inode_key key;
+	struct inode_value *value;
+
+	key.entry.hash = hash_inode(st->st_ino);
+	key.ino	       = st->st_ino;
+	key.dev	       = st->st_dev;
+	key.isdir      = !!S_ISDIR(st->st_mode);
+	value	       = hashmap_get(map, &key, NULL);
+	return value ? value->name : NULL;
+}
+
+static void add_to_ino_dev_hashtable(struct hashmap *map,
+				     const struct stat *st,
+				     const char *path)
+{
+	struct inode_value *v;
+	int len = strlen(path);
+
+	v = xmalloc(offsetof(struct inode_value, name) + len + 1);
+	v->key.entry.hash = hash_inode(st->st_ino);
+	v->key.ino	  = st->st_ino;
+	v->key.dev	  = st->st_dev;
+	v->key.isdir      = !!S_ISDIR(st->st_mode);
+	memcpy(v->name, path, len + 1);
+	hashmap_add(map, v);
+}
+
+/*
+ * Find out if the last character of a string matches the one given.
+ * Don't underrun the buffer if the string length is 0.
  */
-int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
+static inline char *last_char_is(const char *s, int c)
+{
+	if (s && *s) {
+		size_t sz = strlen(s) - 1;
+		s += sz;
+		if ( (unsigned char)*s == c)
+			return (char*)s;
+	}
+	return NULL;
+}
+
+static inline char *concat_path_file(const char *path, const char *filename)
+{
+	struct strbuf sb = STRBUF_INIT;
+	char *lc;
+
+	if (!path)
+		path = "";
+	lc = last_char_is(path, '/');
+	while (*filename == '/')
+		filename++;
+	strbuf_addf(&sb, "%s%s%s", path, (lc==NULL ? "/" : ""), filename);
+	return strbuf_detach(&sb, NULL);
+}
+
+static char *concat_subpath_file(const char *path, const char *f)
+{
+	if (f && is_dot_or_dotdot(f))
+		return NULL;
+	return concat_path_file(path, f);
+}
+
+static int do_unlink(const char *dest)
+{
+	int e = errno;
+
+	if (unlink(dest) < 0) {
+		errno = e; /* do not use errno from unlink */
+		return error_errno(_("can't create '%s'"), dest);
+	}
+	return 0;
+}
+
+static int copy_dir_1(struct hashmap *inode_map,
+		      const char *source,
+		      const char *dest)
 {
 	/* This is a recursive function, try to minimize stack usage */
-	/* NB: each struct stat is ~100 bytes */
 	struct stat source_stat;
 	struct stat dest_stat;
-	smallint retval = 0;
-	smallint dest_exists = 0;
-	smallint ovr;
+	int retval = 0;
+	int dest_exists = 0;
+	int ovr;
 
 	if (lstat(source, &source_stat) < 0)
 		return error_errno(_("can't stat '%s'"), source);
@@ -102,7 +209,7 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 		mode_t saved_umask = 0;
 
 		/* Did we ever create source ourself before? */
-		tp = is_in_ino_dev_hashtable(&source_stat);
+		tp = is_in_ino_dev_hashtable(inode_map, &source_stat);
 		if (tp)
 			/* We did! it's a recursion! man the lifeboats... */
 			return error(_("recursion detected, omitting directory '%s'"),
@@ -132,11 +239,12 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 			if (lstat(dest, &dest_stat) < 0)
 				return error_errno(_("can't stat '%s'"), dest);
 		}
+
 		/*
 		 * remember (dev,inode) of each created dir. name is
 		 * not remembered
 		 */
-		add_to_ino_dev_hashtable(&dest_stat, NULL);
+		add_to_ino_dev_hashtable(inode_map, &dest_stat, "");
 
 		/* Recursively copy files in SOURCE */
 		dp = opendir(source);
@@ -152,7 +260,7 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 			if (!new_source)
 				continue;
 			new_dest = concat_path_file(dest, d->d_name);
-			if (copy_file(new_source, new_dest, flags & ~FILEUTILS_DEREFERENCE_L0) < 0)
+			if (copy_dir_1(inode_map, new_source, new_dest) < 0)
 				retval = -1;
 			free(new_source);
 			free(new_dest);
@@ -177,53 +285,57 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 			goto dont_cat;
 		}
 
-		if (ENABLE_FEATURE_PRESERVE_HARDLINKS) {
+		if (1 /*ENABLE_FEATURE_PRESERVE_HARDLINKS*/) {
 			const char *link_target;
-			link_target = is_in_ino_dev_hashtable(&source_stat);
+			link_target = is_in_ino_dev_hashtable(inode_map, &source_stat);
 			if (link_target) {
 				if (link(link_target, dest) < 0) {
-					ovr = ask_and_unlink(dest, flags);
-					if (ovr <= 0)
+					ovr = do_unlink(dest);
+					if (ovr < 0)
 						return ovr;
 					if (link(link_target, dest) < 0)
 						return error_errno(_("can't create link '%s'"), dest);
 				}
 				return 0;
 			}
-			add_to_ino_dev_hashtable(&source_stat, dest);
+			add_to_ino_dev_hashtable(inode_map, &source_stat, dest);
 		}
 
-		src_fd = open_or_warn(source, O_RDONLY);
+		src_fd = open(source, O_RDONLY);
 		if (src_fd < 0)
-			return -1;
+			return error_errno(_("can't open '%s'"), source);
 
 		/* Do not try to open with weird mode fields */
 		new_mode = source_stat.st_mode;
 		if (!S_ISREG(source_stat.st_mode))
 			new_mode = 0666;
 
-		/* POSIX way is a security problem versus (sym)link attacks */
-		if (!ENABLE_FEATURE_NON_POSIX_CP) {
-			dst_fd = open(dest, O_WRONLY|O_CREAT|O_TRUNC, new_mode);
-		} else { /* safe way: */
-			dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
-		}
+		dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
 		if (dst_fd == -1) {
-			ovr = ask_and_unlink(dest, flags);
-			if (ovr <= 0) {
+			ovr = do_unlink(dest);
+			if (ovr < 0) {
 				close(src_fd);
 				return ovr;
 			}
 			/* It shouldn't exist. If it exists, do not open (symlink attack?) */
-			dst_fd = open3_or_warn(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
+			dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
 			if (dst_fd < 0) {
 				close(src_fd);
-				return -1;
+				return error_errno(_("can't open '%s'"), dest);
 			}
 		}
 
-		if (bb_copyfd_eof(src_fd, dst_fd) == -1)
+		switch (copy_fd(src_fd, dst_fd)) {
+		case COPY_READ_ERROR:
+			error(_("copy-fd: read returned %s"), strerror(errno));
 			retval = -1;
+			break;
+		case COPY_WRITE_ERROR:
+			error(_("copy-fd: write returned %s"), strerror(errno));
+			retval = -1;
+			break;
+		}
+
 		/* Careful with writing... */
 		if (close(dst_fd) < 0)
 			retval = error_errno(_("error writing to '%s'"), dest);
@@ -243,19 +355,28 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 	/* We are lazy here, a bit lax with races... */
 	if (dest_exists) {
 		errno = EEXIST;
-		ovr = ask_and_unlink(dest, flags);
-		if (ovr <= 0)
+		ovr = do_unlink(dest);
+		if (ovr < 0)
 			return ovr;
 	}
 	if (S_ISLNK(source_stat.st_mode)) {
-		char *lpath = xmalloc_readlink_or_warn(source);
-		if (lpath) {
-			int r = symlink(lpath, dest);
-			free(lpath);
+		struct strbuf lpath = STRBUF_INIT;
+		if (!strbuf_readlink(&lpath, source, 0)) {
+			int r = symlink(lpath.buf, dest);
+			strbuf_release(&lpath);
 			if (r < 0)
 				return error_errno(_("can't create symlink '%s'"), dest);
 			if (lchown(dest, source_stat.st_uid, source_stat.st_gid) < 0)
 				error_errno(_("can't preserve %s of '%s'"), "ownership", dest);
+		} else {
+			/* EINVAL => "file: Invalid argument" => puzzled user */
+			const char *errmsg = _("not a symlink");
+			int err = errno;
+
+			if (err != EINVAL)
+				errmsg = strerror(err);
+			error(_("%s: cannot read link: %s"), source, errmsg);
+			strbuf_release(&lpath);
 		}
 		/*
 		 * _Not_ jumping to preserve_mode_ugid_time: symlinks
@@ -293,4 +414,23 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 
 	return retval;
 }
-#endif
+
+/*
+ * Return:
+ * -1 error, copy not made
+ *  0 copy is made
+ *
+ * Failures to preserve mode/owner/times are not reported in exit
+ * code. No support for preserving SELinux security context. Symlinks
+ * and hardlinks are preserved.
+ */
+int copy_dir_recursively(const char *source, const char *dest)
+{
+	int ret;
+	struct hashmap inode_map;
+
+	hashmap_init(&inode_map, inode_cmp, 1024);
+	ret = copy_dir_1(&inode_map, source, dest);
+	hashmap_free(&inode_map, 1);
+	return ret;
+}
-- 
2.8.2.524.g6ff3d78


  parent reply	other threads:[~2016-11-12  2:24 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-12  2:23 [PATCH 00/11] git worktree (re)move Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` [PATCH 01/11] copy.c: import copy_file() from busybox Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` [PATCH 02/11] copy.c: delete unused code in copy_file() Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` [PATCH 03/11] copy.c: convert bb_(p)error_msg to error(_errno) Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` [PATCH 04/11] copy.c: style fix Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` Nguyễn Thái Ngọc Duy [this message]
2016-11-12  2:23 ` [PATCH 06/11] worktree.c: add validate_worktree() Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` [PATCH 07/11] worktree.c: add update_worktree_location() Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` [PATCH 08/11] worktree move: new command Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` [PATCH 09/11] worktree move: accept destination as directory Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` [PATCH 10/11] worktree move: refuse to move worktrees with submodules Nguyễn Thái Ngọc Duy
2016-11-12  2:23 ` [PATCH 11/11] worktree remove: new command Nguyễn Thái Ngọc Duy
     [not found] ` <xmqqeg2gyv1v.fsf@gitster.mtv.corp.google.com>
     [not found]   ` <xmqqa8d4yts7.fsf@gitster.mtv.corp.google.com>
2016-11-16 13:05     ` [PATCH 00/11] git worktree (re)move Duy Nguyen
2016-11-16 13:11       ` Duy Nguyen
2016-11-16 18:11       ` Junio C Hamano
2016-11-16 18:39         ` Junio C Hamano
2016-11-28  9:43 ` [PATCH v2 " Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 01/11] copy.c: import copy_file() from busybox Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 02/11] copy.c: delete unused code in copy_file() Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 03/11] copy.c: convert bb_(p)error_msg to error(_errno) Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 04/11] copy.c: style fix Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 05/11] copy.c: convert copy_file() to copy_dir_recursively() Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 06/11] worktree.c: add validate_worktree() Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 07/11] worktree.c: add update_worktree_location() Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 08/11] worktree move: new command Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 09/11] worktree move: accept destination as directory Nguyễn Thái Ngọc Duy
2016-11-28  9:43   ` [PATCH v2 10/11] worktree move: refuse to move worktrees with submodules Nguyễn Thái Ngọc Duy
2016-12-19 20:57     ` Stefan Beller
2016-11-28  9:43   ` [PATCH v2 11/11] worktree remove: new command Nguyễn Thái Ngọc Duy
2016-11-28 19:48   ` [PATCH v2 00/11] git worktree (re)move Junio C Hamano
2016-11-28 20:20     ` Junio C Hamano
2016-11-28 21:25       ` Johannes Sixt
2016-11-29 12:17         ` Duy Nguyen
2016-11-29 12:08       ` Duy Nguyen
2016-11-29 13:56         ` Duy Nguyen
2016-11-29 19:28           ` Junio C Hamano
2016-11-30  0:18             ` Stefan Beller
2016-11-29 21:14           ` Johannes Sixt
2016-11-30  0:09             ` Duy Nguyen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161112022337.13317-6-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).