git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [JGIT PATCH] Encode/decode index and tree entries using UTF-8
@ 2008-10-19 13:29 Robin Rosenberg
  2008-10-19 17:14 ` Shawn O. Pearce
  0 siblings, 1 reply; 3+ messages in thread
From: Robin Rosenberg @ 2008-10-19 13:29 UTC (permalink / raw
  To: spearce; +Cc: git

Decoding uses the same strategy as for commit messages and other string
entities. Encoding is always done in UTF-8. This is incompatible with
Git for non-unicode unices, but it leads to the expected behavior on
Windows and cross-locale sharing of repositories.

Signed-off-by: Robin Rosenberg <robin.rosnberg@dewire.com>
---

Inpired by the recent thread on the gitml, I decideed to clean up jgit a little. I
know the GitIndex is soon to be obsoleted, but it it still the class that does
the dirty work when committing in Egit and the changes are fairly simple
anyway.

- Unicode paths will work on all platforms that support unicode, i.e. Windows
and any unix using a UTF-8 locale, with one small exception. Accented characters
on OS-X probably do not work well.
- Combined use of unicode on one platform is compatible with non-unicode locales
on other platforms as long as the characters in use are available in the local character
set.

A side note, invalid byte sequences in unix, e.g. ISO-latin-1 encoded file names cannot
work in Java. Such files are inaccessible. Jgit will allow you to rename them in the index,
but that is all.

-- robin

 .../src/org/spearce/jgit/lib/GitIndex.java         |   27 +++++++++++---------
 .../src/org/spearce/jgit/lib/Tree.java             |   11 +++----
 .../src/org/spearce/jgit/lib/TreeEntry.java        |   13 +++------
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
index 22935ab..3d37033 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
@@ -63,6 +63,7 @@
 import org.spearce.jgit.errors.CorruptObjectException;
 import org.spearce.jgit.errors.NotSupportedException;
 import org.spearce.jgit.util.FS;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * A representation of the Git index.
@@ -178,8 +179,9 @@ public Entry add(File wd, File f) throws IOException {
 	 * @param f
 	 *            the file whose path shall be removed.
 	 * @return true if such a path was found (and thus removed)
+	 * @throws IOException 
 	 */
-	public boolean remove(File wd, File f) {
+	public boolean remove(File wd, File f) throws IOException {
 		byte[] key = makeKey(wd, f);
 		return entries.remove(key) != null;
 	}
@@ -300,11 +302,11 @@ static boolean File_hasExecute() {
 		return FS.INSTANCE.supportsExecute();
 	}
 
-	static byte[] makeKey(File wd, File f) {
+	static byte[] makeKey(File wd, File f) throws IOException {
 		if (!f.getPath().startsWith(wd.getPath()))
 			throw new Error("Path is not in working dir");
 		String relName = Repository.stripWorkDir(wd, f);
-		return relName.getBytes();
+		return relName.getBytes(Constants.CHARACTER_ENCODING);
 	}
 
 	Boolean filemode;
@@ -376,7 +378,7 @@ Entry(TreeEntry f, int stage)
 				size = -1;
 			}
 			sha1 = f.getId();
-			name = f.getFullName().getBytes("UTF-8");
+			name = f.getFullName().getBytes(Constants.CHARACTER_ENCODING);
 			flags = (short) ((stage << 12) | name.length); // TODO: fix flags
 		}
 
@@ -580,7 +582,7 @@ private File getFile(File wd) {
 		}
 
 		public String toString() {
-			return new String(name) + "/SHA-1(" + sha1.name() + ")/M:"
+			return getName() + "/SHA-1(" + sha1.name() + ")/M:"
 					+ new Date(ctime / 1000000L) + "/C:"
 					+ new Date(mtime / 1000000L) + "/d" + dev + "/i" + ino
 					+ "/m" + Integer.toString(mode, 8) + "/u" + uid + "/g"
@@ -591,7 +593,7 @@ public String toString() {
 		 * @return path name for this entry
 		 */
 		public String getName() {
-			return new String(name);
+			return RawParseUtils.decode(Constants.CHARSET, name, 0, name.length);
 		}
 
 		/**
@@ -731,7 +733,7 @@ void readTree(String prefix, Tree t) throws IOException {
 				readTree(name, (Tree) te);
 			} else {
 				Entry e = new Entry(te, 0);
-				entries.put(name.getBytes("UTF-8"), e);
+				entries.put(name.getBytes(Constants.CHARACTER_ENCODING), e);
 			}
 		}
 	}
@@ -743,7 +745,7 @@ void readTree(String prefix, Tree t) throws IOException {
 	 * @throws IOException
 	 */
 	public Entry addEntry(TreeEntry te) throws IOException {
-		byte[] key = te.getFullName().getBytes("UTF-8");
+		byte[] key = te.getFullName().getBytes(Constants.CHARACTER_ENCODING);
 		Entry e = new Entry(te, 0);
 		entries.put(key, e);
 		return e;
@@ -825,7 +827,7 @@ public ObjectId writeTree() throws IOException {
 			while (trees.size() < newName.length) {
 				if (!current.existsTree(newName[trees.size() - 1])) {
 					current = new Tree(current, newName[trees.size() - 1]
-							.getBytes());
+							.getBytes(Constants.CHARACTER_ENCODING));
 					current.getParent().addEntry(current);
 					trees.push(current);
 				} else {
@@ -835,7 +837,7 @@ public ObjectId writeTree() throws IOException {
 				}
 			}
 			FileTreeEntry ne = new FileTreeEntry(current, e.sha1,
-					newName[newName.length - 1].getBytes(),
+					newName[newName.length - 1].getBytes(Constants.CHARACTER_ENCODING),
 					(e.mode & FileMode.EXECUTABLE_FILE.getBits()) == FileMode.EXECUTABLE_FILE.getBits());
 			current.addEntry(ne);
 		}
@@ -880,7 +882,7 @@ int longestCommonPath(String[] a, String[] b) {
 	 * Small beware: Unaccounted for are unmerged entries. You may want
 	 * to abort if members with stage != 0 are found if you are doing
 	 * any updating operations. All stages will be found after one another
-	 * here later. Currently only one stage per name is returned.
+	 * here later. Currently only one stage per name is returned.	
 	 *
 	 * @return The index entries sorted
 	 */
@@ -896,7 +898,8 @@ int longestCommonPath(String[] a, String[] b) {
 	 * @throws UnsupportedEncodingException
 	 */
 	public Entry getEntry(String path) throws UnsupportedEncodingException {
-		return (Entry) entries.get(Repository.gitInternalSlash(path.getBytes("ISO-8859-1")));
+		return (Entry) entries.get(Repository.gitInternalSlash(path
+				.getBytes(Constants.CHARACTER_ENCODING)));
 	}
 
 	/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
index 25a9a71..3fd3d30 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
@@ -44,6 +44,7 @@
 import org.spearce.jgit.errors.CorruptObjectException;
 import org.spearce.jgit.errors.EntryExistsException;
 import org.spearce.jgit.errors.MissingObjectException;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * A representation of a Git tree entry. A Tree is a directory in Git.
@@ -281,8 +282,7 @@ public FileTreeEntry addFile(final byte[] s, final int offset)
 
 		final byte[] newName = substring(s, offset, slash);
 		if (p >= 0)
-			throw new EntryExistsException(new String(newName,
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(RawParseUtils.decode(Constants.CHARSET, newName, 0, newName.length));
 		else if (slash < s.length) {
 			final Tree t = new Tree(this, newName);
 			insertEntry(p, t);
@@ -332,8 +332,8 @@ public Tree addTree(final byte[] s, final int offset) throws IOException {
 
 		final byte[] newName = substring(s, offset, slash);
 		if (p >= 0)
-			throw new EntryExistsException(new String(newName,
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(RawParseUtils.decode(
+					Constants.CHARSET, newName, 0, newName.length));
 
 		final Tree t = new Tree(this, newName);
 		insertEntry(p, t);
@@ -355,8 +355,7 @@ public void addEntry(final TreeEntry e) throws IOException {
 			e.attachParent(this);
 			insertEntry(p, e);
 		} else {
-			throw new EntryExistsException(new String(e.getNameUTF8(),
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(e.getName());
 		}
 	}
 
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
index 85dda1d..7f58056 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
@@ -39,9 +39,9 @@
 package org.spearce.jgit.lib;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 
 import org.spearce.jgit.lib.GitIndex.Entry;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * This class represents an entry in a tree, like a blob or another tree.
@@ -126,13 +126,10 @@ public Repository getRepository() {
 	 * @return the name of this entry.
 	 */
 	public String getName() {
-		try {
-			return nameUTF8 != null ? new String(nameUTF8,
-					Constants.CHARACTER_ENCODING) : null;
-		} catch (UnsupportedEncodingException uee) {
-			throw new RuntimeException("JVM doesn't support "
-					+ Constants.CHARACTER_ENCODING, uee);
-		}
+		if (nameUTF8 != null)
+			return RawParseUtils.decode(Constants.CHARSET, nameUTF8, 0,
+					nameUTF8.length);
+		return null;
 	}
 
 	/**
-- 
1.6.0.2.308.gef4a

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [JGIT PATCH] Encode/decode index and tree entries using UTF-8
  2008-10-19 13:29 [JGIT PATCH] Encode/decode index and tree entries using UTF-8 Robin Rosenberg
@ 2008-10-19 17:14 ` Shawn O. Pearce
  2008-10-19 18:24   ` [JGIT PATCH v2] " Robin Rosenberg
  0 siblings, 1 reply; 3+ messages in thread
From: Shawn O. Pearce @ 2008-10-19 17:14 UTC (permalink / raw
  To: Robin Rosenberg; +Cc: git

Robin Rosenberg <robin.rosenberg.lists@dewire.com> wrote:
> Decoding uses the same strategy as for commit messages and other string
> entities. Encoding is always done in UTF-8. This is incompatible with
> Git for non-unicode unices, but it leads to the expected behavior on
> Windows and cross-locale sharing of repositories.

FWIW I think this is a good idea.
 
> Inpired by the recent thread on the gitml, I decideed to clean up jgit a little. I
> know the GitIndex is soon to be obsoleted, but it it still the class that does
> the dirty work when committing in Egit and the changes are fairly simple
> anyway.

Yup, I agree.

I mostly agree with the patch, but we have a utility function you
are missing using:
 
> @@ -300,11 +302,11 @@ static boolean File_hasExecute() {
>  		return FS.INSTANCE.supportsExecute();
>  	}
>  
> -	static byte[] makeKey(File wd, File f) {
> +	static byte[] makeKey(File wd, File f) throws IOException {
>  		if (!f.getPath().startsWith(wd.getPath()))
>  			throw new Error("Path is not in working dir");
>  		String relName = Repository.stripWorkDir(wd, f);
> -		return relName.getBytes();
> +		return relName.getBytes(Constants.CHARACTER_ENCODING);
>  	}

Instead of "relName.getBytes(Constants.CHARACTER_ENCODING)" use
"Constants.encode(relName)".  Its shorter and faster.
  
> @@ -591,7 +593,7 @@ public String toString() {
>  		 * @return path name for this entry
>  		 */
>  		public String getName() {
> -			return new String(name);
> +			return RawParseUtils.decode(Constants.CHARSET, name, 0, name.length);

Heh.  That's actually a common idiom.  We probably should add:

	String decode(final byte[] arr) {
		return decode(Constants.CHARSET, arr, 0, arr.length);
	}

to RawParseUtils to make these decode whole array calls easier
to make.

I think you should squash this into your patch, and fix up the
getBytes and decode calls as I noted above before we apply this.

diff --git a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java
index cc683d7..913f3ae 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java
@@ -42,7 +42,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.nio.ByteBuffer;
 import java.util.Arrays;
 
 import org.spearce.jgit.lib.AnyObjectId;
@@ -50,6 +49,7 @@
 import org.spearce.jgit.lib.FileMode;
 import org.spearce.jgit.lib.ObjectId;
 import org.spearce.jgit.util.NB;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * A single file (or stage of a file) in a {@link DirCache}.
@@ -405,7 +405,7 @@ public void setObjectIdFromRaw(final byte[] bs, final int p) {
 	 *         returned string.
 	 */
 	public String getPathString() {
-		return Constants.CHARSET.decode(ByteBuffer.wrap(path)).toString();
+		return RawParseUtils.decode(path);
 	}
 
 	/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheTree.java b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheTree.java
index 26b6348..589894a 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheTree.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheTree.java
@@ -39,7 +39,6 @@
 
 import java.io.IOException;
 import java.io.OutputStream;
-import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Comparator;
 
@@ -251,8 +250,7 @@ ObjectId getObjectId() {
 	 * @return name of the tree. This does not contain any '/' characters.
 	 */
 	public String getNameString() {
-		final ByteBuffer bb = ByteBuffer.wrap(encodedName);
-		return Constants.CHARSET.decode(bb).toString();
+		return RawParseUtils.decode(encodedName);
 	}
 
 	/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
index 6c0e339..2519f19 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
@@ -379,6 +379,21 @@ public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
 	}
 
 	/**
+	 * Decode a region of the buffer from the default character set (UTF-8).
+	 * 
+	 * If the byte stream cannot be decoded that way, the platform default is
+	 * tried and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+	 * 
+	 * @param buffer
+	 *            buffer to pull raw bytes from.
+	 * @return a string representation of the entire buffer, after decoding the
+	 *         region through the specified character set.
+	 */
+	public static String decode(final byte[] buffer) {
+		return decode(Constants.CHARSET, buffer, 0, buffer.length);
+	}
+
+	/**
 	 * Decode a region of the buffer under the specified character set if possible.
 	 *
 	 * If the byte stream cannot be decoded that way, the platform default is tried

-- 
Shawn.

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [JGIT PATCH v2] Encode/decode index and tree entries using UTF-8
  2008-10-19 17:14 ` Shawn O. Pearce
@ 2008-10-19 18:24   ` Robin Rosenberg
  0 siblings, 0 replies; 3+ messages in thread
From: Robin Rosenberg @ 2008-10-19 18:24 UTC (permalink / raw
  To: Shawn O. Pearce; +Cc: git

Decoding uses the same strategy as for commit messages and other string
entities. Encoding is always done in UTF-8. This is incompatible with
Git for non-unicode unices, but it leads to the expected behavior on
Windows and cross-locale sharing of repositories.

Signed-off-by: Robin Rosenberg <robin.rosnberg@dewire.com>
---

söndagen den 19 oktober 2008 19.14.56 skrev Shawn O. Pearce:
> Robin Rosenberg <robin.rosenberg.lists@dewire.com> wrote:
> > Decoding uses the same strategy as for commit messages and other string
> > entities. Encoding is always done in UTF-8. This is incompatible with
> > Git for non-unicode unices, but it leads to the expected behavior on
> > Windows and cross-locale sharing of repositories.
> 
> FWIW I think this is a good idea.

Ok, so here's the update.  We might want to move the encode out of Constants
too as it is no longer a utility for constants.

-- robin

 .../src/org/spearce/jgit/lib/GitIndex.java         |   27 ++++++++--------
 .../src/org/spearce/jgit/lib/Tree.java             |   16 ++++-----
 .../src/org/spearce/jgit/lib/TreeEntry.java        |   14 +++-----
 .../src/org/spearce/jgit/util/RawParseUtils.java   |   32 ++++++++++++++++++++
 4 files changed, 58 insertions(+), 31 deletions(-)

diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
index 22935ab..bafddef 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
@@ -63,6 +63,7 @@
 import org.spearce.jgit.errors.CorruptObjectException;
 import org.spearce.jgit.errors.NotSupportedException;
 import org.spearce.jgit.util.FS;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * A representation of the Git index.
@@ -178,8 +179,9 @@ public Entry add(File wd, File f) throws IOException {
 	 * @param f
 	 *            the file whose path shall be removed.
 	 * @return true if such a path was found (and thus removed)
+	 * @throws IOException 
 	 */
-	public boolean remove(File wd, File f) {
+	public boolean remove(File wd, File f) throws IOException {
 		byte[] key = makeKey(wd, f);
 		return entries.remove(key) != null;
 	}
@@ -300,11 +302,11 @@ static boolean File_hasExecute() {
 		return FS.INSTANCE.supportsExecute();
 	}
 
-	static byte[] makeKey(File wd, File f) {
+	static byte[] makeKey(File wd, File f) throws IOException {
 		if (!f.getPath().startsWith(wd.getPath()))
 			throw new Error("Path is not in working dir");
 		String relName = Repository.stripWorkDir(wd, f);
-		return relName.getBytes();
+		return Constants.encode(relName);
 	}
 
 	Boolean filemode;
@@ -376,7 +378,7 @@ Entry(TreeEntry f, int stage)
 				size = -1;
 			}
 			sha1 = f.getId();
-			name = f.getFullName().getBytes("UTF-8");
+			name = Constants.encode(f.getFullName());
 			flags = (short) ((stage << 12) | name.length); // TODO: fix flags
 		}
 
@@ -580,7 +582,7 @@ private File getFile(File wd) {
 		}
 
 		public String toString() {
-			return new String(name) + "/SHA-1(" + sha1.name() + ")/M:"
+			return getName() + "/SHA-1(" + sha1.name() + ")/M:"
 					+ new Date(ctime / 1000000L) + "/C:"
 					+ new Date(mtime / 1000000L) + "/d" + dev + "/i" + ino
 					+ "/m" + Integer.toString(mode, 8) + "/u" + uid + "/g"
@@ -591,7 +593,7 @@ public String toString() {
 		 * @return path name for this entry
 		 */
 		public String getName() {
-			return new String(name);
+			return RawParseUtils.decode(name);
 		}
 
 		/**
@@ -731,7 +733,7 @@ void readTree(String prefix, Tree t) throws IOException {
 				readTree(name, (Tree) te);
 			} else {
 				Entry e = new Entry(te, 0);
-				entries.put(name.getBytes("UTF-8"), e);
+				entries.put(Constants.encode(name), e);
 			}
 		}
 	}
@@ -743,7 +745,7 @@ void readTree(String prefix, Tree t) throws IOException {
 	 * @throws IOException
 	 */
 	public Entry addEntry(TreeEntry te) throws IOException {
-		byte[] key = te.getFullName().getBytes("UTF-8");
+		byte[] key = Constants.encode(te.getFullName());
 		Entry e = new Entry(te, 0);
 		entries.put(key, e);
 		return e;
@@ -824,8 +826,7 @@ public ObjectId writeTree() throws IOException {
 			}
 			while (trees.size() < newName.length) {
 				if (!current.existsTree(newName[trees.size() - 1])) {
-					current = new Tree(current, newName[trees.size() - 1]
-							.getBytes());
+					current = new Tree(current, Constants.encode(newName[trees.size() - 1]));
 					current.getParent().addEntry(current);
 					trees.push(current);
 				} else {
@@ -835,7 +836,7 @@ public ObjectId writeTree() throws IOException {
 				}
 			}
 			FileTreeEntry ne = new FileTreeEntry(current, e.sha1,
-					newName[newName.length - 1].getBytes(),
+					Constants.encode(newName[newName.length - 1]),
 					(e.mode & FileMode.EXECUTABLE_FILE.getBits()) == FileMode.EXECUTABLE_FILE.getBits());
 			current.addEntry(ne);
 		}
@@ -880,7 +881,7 @@ int longestCommonPath(String[] a, String[] b) {
 	 * Small beware: Unaccounted for are unmerged entries. You may want
 	 * to abort if members with stage != 0 are found if you are doing
 	 * any updating operations. All stages will be found after one another
-	 * here later. Currently only one stage per name is returned.
+	 * here later. Currently only one stage per name is returned.	
 	 *
 	 * @return The index entries sorted
 	 */
@@ -896,7 +897,7 @@ int longestCommonPath(String[] a, String[] b) {
 	 * @throws UnsupportedEncodingException
 	 */
 	public Entry getEntry(String path) throws UnsupportedEncodingException {
-		return (Entry) entries.get(Repository.gitInternalSlash(path.getBytes("ISO-8859-1")));
+		return (Entry) entries.get(Repository.gitInternalSlash(Constants.encode(path)));
 	}
 
 	/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
index 25a9a71..0ecd04d 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
@@ -44,6 +44,7 @@
 import org.spearce.jgit.errors.CorruptObjectException;
 import org.spearce.jgit.errors.EntryExistsException;
 import org.spearce.jgit.errors.MissingObjectException;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * A representation of a Git tree entry. A Tree is a directory in Git.
@@ -251,7 +252,7 @@ public void unload() {
 	 * @throws IOException
 	 */
 	public FileTreeEntry addFile(final String name) throws IOException {
-		return addFile(Repository.gitInternalSlash(name.getBytes(Constants.CHARACTER_ENCODING)), 0);
+		return addFile(Repository.gitInternalSlash(Constants.encode(name)), 0);
 	}
 
 	/**
@@ -281,8 +282,7 @@ public FileTreeEntry addFile(final byte[] s, final int offset)
 
 		final byte[] newName = substring(s, offset, slash);
 		if (p >= 0)
-			throw new EntryExistsException(new String(newName,
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(RawParseUtils.decode(newName));
 		else if (slash < s.length) {
 			final Tree t = new Tree(this, newName);
 			insertEntry(p, t);
@@ -304,7 +304,7 @@ else if (slash < s.length) {
 	 * @throws IOException
 	 */
 	public Tree addTree(final String name) throws IOException {
-		return addTree(Repository.gitInternalSlash(name.getBytes(Constants.CHARACTER_ENCODING)), 0);
+		return addTree(Repository.gitInternalSlash(Constants.encode(name)), 0);
 	}
 
 	/**
@@ -332,8 +332,7 @@ public Tree addTree(final byte[] s, final int offset) throws IOException {
 
 		final byte[] newName = substring(s, offset, slash);
 		if (p >= 0)
-			throw new EntryExistsException(new String(newName,
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(RawParseUtils.decode(newName));
 
 		final Tree t = new Tree(this, newName);
 		insertEntry(p, t);
@@ -355,8 +354,7 @@ public void addEntry(final TreeEntry e) throws IOException {
 			e.attachParent(this);
 			insertEntry(p, e);
 		} else {
-			throw new EntryExistsException(new String(e.getNameUTF8(),
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(e.getName());
 		}
 	}
 
@@ -450,7 +448,7 @@ public boolean existsBlob(String path) throws IOException {
 	}
 
 	private TreeEntry findMember(final String s, byte slast) throws IOException {
-		return findMember(Repository.gitInternalSlash(s.getBytes(Constants.CHARACTER_ENCODING)), slast, 0);
+		return findMember(Repository.gitInternalSlash(Constants.encode(s)), slast, 0);
 	}
 
 	private TreeEntry findMember(final byte[] s, final byte slast, final int offset)
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
index 85dda1d..c95863c 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
@@ -39,9 +39,9 @@
 package org.spearce.jgit.lib;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 
 import org.spearce.jgit.lib.GitIndex.Entry;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * This class represents an entry in a tree, like a blob or another tree.
@@ -126,13 +126,9 @@ public Repository getRepository() {
 	 * @return the name of this entry.
 	 */
 	public String getName() {
-		try {
-			return nameUTF8 != null ? new String(nameUTF8,
-					Constants.CHARACTER_ENCODING) : null;
-		} catch (UnsupportedEncodingException uee) {
-			throw new RuntimeException("JVM doesn't support "
-					+ Constants.CHARACTER_ENCODING, uee);
-		}
+		if (nameUTF8 != null)
+			return RawParseUtils.decode(nameUTF8);
+		return null;
 	}
 
 	/**
@@ -142,7 +138,7 @@ public String getName() {
 	 * @throws IOException
 	 */
 	public void rename(final String n) throws IOException {
-		rename(n.getBytes(Constants.CHARACTER_ENCODING));
+		rename(Constants.encode(n));
 	}
 
 	/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
index 6c0e339..4b96439 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
@@ -379,6 +379,38 @@ public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
 	}
 
 	/**
+	 * Decode a buffer under UTF-8, if possible.
+	 *
+	 * If the byte stream cannot be decoded that way, the platform default is tried
+	 * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+	 * 
+	 * @param buffer
+	 *            buffer to pull raw bytes from.
+	 * @return a string representation of the range <code>[start,end)</code>,
+	 *         after decoding the region through the specified character set.
+	 */
+	public static String decode(final byte[] buffer) {
+		return decode(Constants.CHARSET, buffer, 0, buffer.length);
+	}
+
+	/**
+	 * Decode a buffer under the specified character set if possible.
+	 *
+	 * If the byte stream cannot be decoded that way, the platform default is tried
+	 * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+	 * 
+	 * @param cs
+	 *            character set to use when decoding the buffer.
+	 * @param buffer
+	 *            buffer to pull raw bytes from.
+	 * @return a string representation of the range <code>[start,end)</code>,
+	 *         after decoding the region through the specified character set.
+	 */
+	public static String decode(final Charset cs, final byte[] buffer) {
+		return decode(cs, buffer, 0, buffer.length);
+	}
+
+	/**
 	 * Decode a region of the buffer under the specified character set if possible.
 	 *
 	 * If the byte stream cannot be decoded that way, the platform default is tried
-- 
1.6.0.2.308.gef4a

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-10-19 18:26 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-19 13:29 [JGIT PATCH] Encode/decode index and tree entries using UTF-8 Robin Rosenberg
2008-10-19 17:14 ` Shawn O. Pearce
2008-10-19 18:24   ` [JGIT PATCH v2] " Robin Rosenberg

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).