* [JGIT PATCH] Encode/decode index and tree entries using UTF-8
@ 2008-10-19 13:29 Robin Rosenberg
2008-10-19 17:14 ` Shawn O. Pearce
0 siblings, 1 reply; 3+ messages in thread
From: Robin Rosenberg @ 2008-10-19 13:29 UTC (permalink / raw
To: spearce; +Cc: git
Decoding uses the same strategy as for commit messages and other string
entities. Encoding is always done in UTF-8. This is incompatible with
Git for non-unicode unices, but it leads to the expected behavior on
Windows and cross-locale sharing of repositories.
Signed-off-by: Robin Rosenberg <robin.rosnberg@dewire.com>
---
Inpired by the recent thread on the gitml, I decideed to clean up jgit a little. I
know the GitIndex is soon to be obsoleted, but it it still the class that does
the dirty work when committing in Egit and the changes are fairly simple
anyway.
- Unicode paths will work on all platforms that support unicode, i.e. Windows
and any unix using a UTF-8 locale, with one small exception. Accented characters
on OS-X probably do not work well.
- Combined use of unicode on one platform is compatible with non-unicode locales
on other platforms as long as the characters in use are available in the local character
set.
A side note, invalid byte sequences in unix, e.g. ISO-latin-1 encoded file names cannot
work in Java. Such files are inaccessible. Jgit will allow you to rename them in the index,
but that is all.
-- robin
.../src/org/spearce/jgit/lib/GitIndex.java | 27 +++++++++++---------
.../src/org/spearce/jgit/lib/Tree.java | 11 +++----
.../src/org/spearce/jgit/lib/TreeEntry.java | 13 +++------
3 files changed, 25 insertions(+), 26 deletions(-)
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
index 22935ab..3d37033 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
@@ -63,6 +63,7 @@
import org.spearce.jgit.errors.CorruptObjectException;
import org.spearce.jgit.errors.NotSupportedException;
import org.spearce.jgit.util.FS;
+import org.spearce.jgit.util.RawParseUtils;
/**
* A representation of the Git index.
@@ -178,8 +179,9 @@ public Entry add(File wd, File f) throws IOException {
* @param f
* the file whose path shall be removed.
* @return true if such a path was found (and thus removed)
+ * @throws IOException
*/
- public boolean remove(File wd, File f) {
+ public boolean remove(File wd, File f) throws IOException {
byte[] key = makeKey(wd, f);
return entries.remove(key) != null;
}
@@ -300,11 +302,11 @@ static boolean File_hasExecute() {
return FS.INSTANCE.supportsExecute();
}
- static byte[] makeKey(File wd, File f) {
+ static byte[] makeKey(File wd, File f) throws IOException {
if (!f.getPath().startsWith(wd.getPath()))
throw new Error("Path is not in working dir");
String relName = Repository.stripWorkDir(wd, f);
- return relName.getBytes();
+ return relName.getBytes(Constants.CHARACTER_ENCODING);
}
Boolean filemode;
@@ -376,7 +378,7 @@ Entry(TreeEntry f, int stage)
size = -1;
}
sha1 = f.getId();
- name = f.getFullName().getBytes("UTF-8");
+ name = f.getFullName().getBytes(Constants.CHARACTER_ENCODING);
flags = (short) ((stage << 12) | name.length); // TODO: fix flags
}
@@ -580,7 +582,7 @@ private File getFile(File wd) {
}
public String toString() {
- return new String(name) + "/SHA-1(" + sha1.name() + ")/M:"
+ return getName() + "/SHA-1(" + sha1.name() + ")/M:"
+ new Date(ctime / 1000000L) + "/C:"
+ new Date(mtime / 1000000L) + "/d" + dev + "/i" + ino
+ "/m" + Integer.toString(mode, 8) + "/u" + uid + "/g"
@@ -591,7 +593,7 @@ public String toString() {
* @return path name for this entry
*/
public String getName() {
- return new String(name);
+ return RawParseUtils.decode(Constants.CHARSET, name, 0, name.length);
}
/**
@@ -731,7 +733,7 @@ void readTree(String prefix, Tree t) throws IOException {
readTree(name, (Tree) te);
} else {
Entry e = new Entry(te, 0);
- entries.put(name.getBytes("UTF-8"), e);
+ entries.put(name.getBytes(Constants.CHARACTER_ENCODING), e);
}
}
}
@@ -743,7 +745,7 @@ void readTree(String prefix, Tree t) throws IOException {
* @throws IOException
*/
public Entry addEntry(TreeEntry te) throws IOException {
- byte[] key = te.getFullName().getBytes("UTF-8");
+ byte[] key = te.getFullName().getBytes(Constants.CHARACTER_ENCODING);
Entry e = new Entry(te, 0);
entries.put(key, e);
return e;
@@ -825,7 +827,7 @@ public ObjectId writeTree() throws IOException {
while (trees.size() < newName.length) {
if (!current.existsTree(newName[trees.size() - 1])) {
current = new Tree(current, newName[trees.size() - 1]
- .getBytes());
+ .getBytes(Constants.CHARACTER_ENCODING));
current.getParent().addEntry(current);
trees.push(current);
} else {
@@ -835,7 +837,7 @@ public ObjectId writeTree() throws IOException {
}
}
FileTreeEntry ne = new FileTreeEntry(current, e.sha1,
- newName[newName.length - 1].getBytes(),
+ newName[newName.length - 1].getBytes(Constants.CHARACTER_ENCODING),
(e.mode & FileMode.EXECUTABLE_FILE.getBits()) == FileMode.EXECUTABLE_FILE.getBits());
current.addEntry(ne);
}
@@ -880,7 +882,7 @@ int longestCommonPath(String[] a, String[] b) {
* Small beware: Unaccounted for are unmerged entries. You may want
* to abort if members with stage != 0 are found if you are doing
* any updating operations. All stages will be found after one another
- * here later. Currently only one stage per name is returned.
+ * here later. Currently only one stage per name is returned.
*
* @return The index entries sorted
*/
@@ -896,7 +898,8 @@ int longestCommonPath(String[] a, String[] b) {
* @throws UnsupportedEncodingException
*/
public Entry getEntry(String path) throws UnsupportedEncodingException {
- return (Entry) entries.get(Repository.gitInternalSlash(path.getBytes("ISO-8859-1")));
+ return (Entry) entries.get(Repository.gitInternalSlash(path
+ .getBytes(Constants.CHARACTER_ENCODING)));
}
/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
index 25a9a71..3fd3d30 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
@@ -44,6 +44,7 @@
import org.spearce.jgit.errors.CorruptObjectException;
import org.spearce.jgit.errors.EntryExistsException;
import org.spearce.jgit.errors.MissingObjectException;
+import org.spearce.jgit.util.RawParseUtils;
/**
* A representation of a Git tree entry. A Tree is a directory in Git.
@@ -281,8 +282,7 @@ public FileTreeEntry addFile(final byte[] s, final int offset)
final byte[] newName = substring(s, offset, slash);
if (p >= 0)
- throw new EntryExistsException(new String(newName,
- Constants.CHARACTER_ENCODING));
+ throw new EntryExistsException(RawParseUtils.decode(Constants.CHARSET, newName, 0, newName.length));
else if (slash < s.length) {
final Tree t = new Tree(this, newName);
insertEntry(p, t);
@@ -332,8 +332,8 @@ public Tree addTree(final byte[] s, final int offset) throws IOException {
final byte[] newName = substring(s, offset, slash);
if (p >= 0)
- throw new EntryExistsException(new String(newName,
- Constants.CHARACTER_ENCODING));
+ throw new EntryExistsException(RawParseUtils.decode(
+ Constants.CHARSET, newName, 0, newName.length));
final Tree t = new Tree(this, newName);
insertEntry(p, t);
@@ -355,8 +355,7 @@ public void addEntry(final TreeEntry e) throws IOException {
e.attachParent(this);
insertEntry(p, e);
} else {
- throw new EntryExistsException(new String(e.getNameUTF8(),
- Constants.CHARACTER_ENCODING));
+ throw new EntryExistsException(e.getName());
}
}
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
index 85dda1d..7f58056 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
@@ -39,9 +39,9 @@
package org.spearce.jgit.lib;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import org.spearce.jgit.lib.GitIndex.Entry;
+import org.spearce.jgit.util.RawParseUtils;
/**
* This class represents an entry in a tree, like a blob or another tree.
@@ -126,13 +126,10 @@ public Repository getRepository() {
* @return the name of this entry.
*/
public String getName() {
- try {
- return nameUTF8 != null ? new String(nameUTF8,
- Constants.CHARACTER_ENCODING) : null;
- } catch (UnsupportedEncodingException uee) {
- throw new RuntimeException("JVM doesn't support "
- + Constants.CHARACTER_ENCODING, uee);
- }
+ if (nameUTF8 != null)
+ return RawParseUtils.decode(Constants.CHARSET, nameUTF8, 0,
+ nameUTF8.length);
+ return null;
}
/**
--
1.6.0.2.308.gef4a
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [JGIT PATCH] Encode/decode index and tree entries using UTF-8
2008-10-19 13:29 [JGIT PATCH] Encode/decode index and tree entries using UTF-8 Robin Rosenberg
@ 2008-10-19 17:14 ` Shawn O. Pearce
2008-10-19 18:24 ` [JGIT PATCH v2] " Robin Rosenberg
0 siblings, 1 reply; 3+ messages in thread
From: Shawn O. Pearce @ 2008-10-19 17:14 UTC (permalink / raw
To: Robin Rosenberg; +Cc: git
Robin Rosenberg <robin.rosenberg.lists@dewire.com> wrote:
> Decoding uses the same strategy as for commit messages and other string
> entities. Encoding is always done in UTF-8. This is incompatible with
> Git for non-unicode unices, but it leads to the expected behavior on
> Windows and cross-locale sharing of repositories.
FWIW I think this is a good idea.
> Inpired by the recent thread on the gitml, I decideed to clean up jgit a little. I
> know the GitIndex is soon to be obsoleted, but it it still the class that does
> the dirty work when committing in Egit and the changes are fairly simple
> anyway.
Yup, I agree.
I mostly agree with the patch, but we have a utility function you
are missing using:
> @@ -300,11 +302,11 @@ static boolean File_hasExecute() {
> return FS.INSTANCE.supportsExecute();
> }
>
> - static byte[] makeKey(File wd, File f) {
> + static byte[] makeKey(File wd, File f) throws IOException {
> if (!f.getPath().startsWith(wd.getPath()))
> throw new Error("Path is not in working dir");
> String relName = Repository.stripWorkDir(wd, f);
> - return relName.getBytes();
> + return relName.getBytes(Constants.CHARACTER_ENCODING);
> }
Instead of "relName.getBytes(Constants.CHARACTER_ENCODING)" use
"Constants.encode(relName)". Its shorter and faster.
> @@ -591,7 +593,7 @@ public String toString() {
> * @return path name for this entry
> */
> public String getName() {
> - return new String(name);
> + return RawParseUtils.decode(Constants.CHARSET, name, 0, name.length);
Heh. That's actually a common idiom. We probably should add:
String decode(final byte[] arr) {
return decode(Constants.CHARSET, arr, 0, arr.length);
}
to RawParseUtils to make these decode whole array calls easier
to make.
I think you should squash this into your patch, and fix up the
getBytes and decode calls as I noted above before we apply this.
diff --git a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java
index cc683d7..913f3ae 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheEntry.java
@@ -42,7 +42,6 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.nio.ByteBuffer;
import java.util.Arrays;
import org.spearce.jgit.lib.AnyObjectId;
@@ -50,6 +49,7 @@
import org.spearce.jgit.lib.FileMode;
import org.spearce.jgit.lib.ObjectId;
import org.spearce.jgit.util.NB;
+import org.spearce.jgit.util.RawParseUtils;
/**
* A single file (or stage of a file) in a {@link DirCache}.
@@ -405,7 +405,7 @@ public void setObjectIdFromRaw(final byte[] bs, final int p) {
* returned string.
*/
public String getPathString() {
- return Constants.CHARSET.decode(ByteBuffer.wrap(path)).toString();
+ return RawParseUtils.decode(path);
}
/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheTree.java b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheTree.java
index 26b6348..589894a 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheTree.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/dircache/DirCacheTree.java
@@ -39,7 +39,6 @@
import java.io.IOException;
import java.io.OutputStream;
-import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Comparator;
@@ -251,8 +250,7 @@ ObjectId getObjectId() {
* @return name of the tree. This does not contain any '/' characters.
*/
public String getNameString() {
- final ByteBuffer bb = ByteBuffer.wrap(encodedName);
- return Constants.CHARSET.decode(bb).toString();
+ return RawParseUtils.decode(encodedName);
}
/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
index 6c0e339..2519f19 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
@@ -379,6 +379,21 @@ public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
}
/**
+ * Decode a region of the buffer from the default character set (UTF-8).
+ *
+ * If the byte stream cannot be decoded that way, the platform default is
+ * tried and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+ *
+ * @param buffer
+ * buffer to pull raw bytes from.
+ * @return a string representation of the entire buffer, after decoding the
+ * region through the specified character set.
+ */
+ public static String decode(final byte[] buffer) {
+ return decode(Constants.CHARSET, buffer, 0, buffer.length);
+ }
+
+ /**
* Decode a region of the buffer under the specified character set if possible.
*
* If the byte stream cannot be decoded that way, the platform default is tried
--
Shawn.
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [JGIT PATCH v2] Encode/decode index and tree entries using UTF-8
2008-10-19 17:14 ` Shawn O. Pearce
@ 2008-10-19 18:24 ` Robin Rosenberg
0 siblings, 0 replies; 3+ messages in thread
From: Robin Rosenberg @ 2008-10-19 18:24 UTC (permalink / raw
To: Shawn O. Pearce; +Cc: git
Decoding uses the same strategy as for commit messages and other string
entities. Encoding is always done in UTF-8. This is incompatible with
Git for non-unicode unices, but it leads to the expected behavior on
Windows and cross-locale sharing of repositories.
Signed-off-by: Robin Rosenberg <robin.rosnberg@dewire.com>
---
söndagen den 19 oktober 2008 19.14.56 skrev Shawn O. Pearce:
> Robin Rosenberg <robin.rosenberg.lists@dewire.com> wrote:
> > Decoding uses the same strategy as for commit messages and other string
> > entities. Encoding is always done in UTF-8. This is incompatible with
> > Git for non-unicode unices, but it leads to the expected behavior on
> > Windows and cross-locale sharing of repositories.
>
> FWIW I think this is a good idea.
Ok, so here's the update. We might want to move the encode out of Constants
too as it is no longer a utility for constants.
-- robin
.../src/org/spearce/jgit/lib/GitIndex.java | 27 ++++++++--------
.../src/org/spearce/jgit/lib/Tree.java | 16 ++++-----
.../src/org/spearce/jgit/lib/TreeEntry.java | 14 +++-----
.../src/org/spearce/jgit/util/RawParseUtils.java | 32 ++++++++++++++++++++
4 files changed, 58 insertions(+), 31 deletions(-)
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
index 22935ab..bafddef 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
@@ -63,6 +63,7 @@
import org.spearce.jgit.errors.CorruptObjectException;
import org.spearce.jgit.errors.NotSupportedException;
import org.spearce.jgit.util.FS;
+import org.spearce.jgit.util.RawParseUtils;
/**
* A representation of the Git index.
@@ -178,8 +179,9 @@ public Entry add(File wd, File f) throws IOException {
* @param f
* the file whose path shall be removed.
* @return true if such a path was found (and thus removed)
+ * @throws IOException
*/
- public boolean remove(File wd, File f) {
+ public boolean remove(File wd, File f) throws IOException {
byte[] key = makeKey(wd, f);
return entries.remove(key) != null;
}
@@ -300,11 +302,11 @@ static boolean File_hasExecute() {
return FS.INSTANCE.supportsExecute();
}
- static byte[] makeKey(File wd, File f) {
+ static byte[] makeKey(File wd, File f) throws IOException {
if (!f.getPath().startsWith(wd.getPath()))
throw new Error("Path is not in working dir");
String relName = Repository.stripWorkDir(wd, f);
- return relName.getBytes();
+ return Constants.encode(relName);
}
Boolean filemode;
@@ -376,7 +378,7 @@ Entry(TreeEntry f, int stage)
size = -1;
}
sha1 = f.getId();
- name = f.getFullName().getBytes("UTF-8");
+ name = Constants.encode(f.getFullName());
flags = (short) ((stage << 12) | name.length); // TODO: fix flags
}
@@ -580,7 +582,7 @@ private File getFile(File wd) {
}
public String toString() {
- return new String(name) + "/SHA-1(" + sha1.name() + ")/M:"
+ return getName() + "/SHA-1(" + sha1.name() + ")/M:"
+ new Date(ctime / 1000000L) + "/C:"
+ new Date(mtime / 1000000L) + "/d" + dev + "/i" + ino
+ "/m" + Integer.toString(mode, 8) + "/u" + uid + "/g"
@@ -591,7 +593,7 @@ public String toString() {
* @return path name for this entry
*/
public String getName() {
- return new String(name);
+ return RawParseUtils.decode(name);
}
/**
@@ -731,7 +733,7 @@ void readTree(String prefix, Tree t) throws IOException {
readTree(name, (Tree) te);
} else {
Entry e = new Entry(te, 0);
- entries.put(name.getBytes("UTF-8"), e);
+ entries.put(Constants.encode(name), e);
}
}
}
@@ -743,7 +745,7 @@ void readTree(String prefix, Tree t) throws IOException {
* @throws IOException
*/
public Entry addEntry(TreeEntry te) throws IOException {
- byte[] key = te.getFullName().getBytes("UTF-8");
+ byte[] key = Constants.encode(te.getFullName());
Entry e = new Entry(te, 0);
entries.put(key, e);
return e;
@@ -824,8 +826,7 @@ public ObjectId writeTree() throws IOException {
}
while (trees.size() < newName.length) {
if (!current.existsTree(newName[trees.size() - 1])) {
- current = new Tree(current, newName[trees.size() - 1]
- .getBytes());
+ current = new Tree(current, Constants.encode(newName[trees.size() - 1]));
current.getParent().addEntry(current);
trees.push(current);
} else {
@@ -835,7 +836,7 @@ public ObjectId writeTree() throws IOException {
}
}
FileTreeEntry ne = new FileTreeEntry(current, e.sha1,
- newName[newName.length - 1].getBytes(),
+ Constants.encode(newName[newName.length - 1]),
(e.mode & FileMode.EXECUTABLE_FILE.getBits()) == FileMode.EXECUTABLE_FILE.getBits());
current.addEntry(ne);
}
@@ -880,7 +881,7 @@ int longestCommonPath(String[] a, String[] b) {
* Small beware: Unaccounted for are unmerged entries. You may want
* to abort if members with stage != 0 are found if you are doing
* any updating operations. All stages will be found after one another
- * here later. Currently only one stage per name is returned.
+ * here later. Currently only one stage per name is returned.
*
* @return The index entries sorted
*/
@@ -896,7 +897,7 @@ int longestCommonPath(String[] a, String[] b) {
* @throws UnsupportedEncodingException
*/
public Entry getEntry(String path) throws UnsupportedEncodingException {
- return (Entry) entries.get(Repository.gitInternalSlash(path.getBytes("ISO-8859-1")));
+ return (Entry) entries.get(Repository.gitInternalSlash(Constants.encode(path)));
}
/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
index 25a9a71..0ecd04d 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
@@ -44,6 +44,7 @@
import org.spearce.jgit.errors.CorruptObjectException;
import org.spearce.jgit.errors.EntryExistsException;
import org.spearce.jgit.errors.MissingObjectException;
+import org.spearce.jgit.util.RawParseUtils;
/**
* A representation of a Git tree entry. A Tree is a directory in Git.
@@ -251,7 +252,7 @@ public void unload() {
* @throws IOException
*/
public FileTreeEntry addFile(final String name) throws IOException {
- return addFile(Repository.gitInternalSlash(name.getBytes(Constants.CHARACTER_ENCODING)), 0);
+ return addFile(Repository.gitInternalSlash(Constants.encode(name)), 0);
}
/**
@@ -281,8 +282,7 @@ public FileTreeEntry addFile(final byte[] s, final int offset)
final byte[] newName = substring(s, offset, slash);
if (p >= 0)
- throw new EntryExistsException(new String(newName,
- Constants.CHARACTER_ENCODING));
+ throw new EntryExistsException(RawParseUtils.decode(newName));
else if (slash < s.length) {
final Tree t = new Tree(this, newName);
insertEntry(p, t);
@@ -304,7 +304,7 @@ else if (slash < s.length) {
* @throws IOException
*/
public Tree addTree(final String name) throws IOException {
- return addTree(Repository.gitInternalSlash(name.getBytes(Constants.CHARACTER_ENCODING)), 0);
+ return addTree(Repository.gitInternalSlash(Constants.encode(name)), 0);
}
/**
@@ -332,8 +332,7 @@ public Tree addTree(final byte[] s, final int offset) throws IOException {
final byte[] newName = substring(s, offset, slash);
if (p >= 0)
- throw new EntryExistsException(new String(newName,
- Constants.CHARACTER_ENCODING));
+ throw new EntryExistsException(RawParseUtils.decode(newName));
final Tree t = new Tree(this, newName);
insertEntry(p, t);
@@ -355,8 +354,7 @@ public void addEntry(final TreeEntry e) throws IOException {
e.attachParent(this);
insertEntry(p, e);
} else {
- throw new EntryExistsException(new String(e.getNameUTF8(),
- Constants.CHARACTER_ENCODING));
+ throw new EntryExistsException(e.getName());
}
}
@@ -450,7 +448,7 @@ public boolean existsBlob(String path) throws IOException {
}
private TreeEntry findMember(final String s, byte slast) throws IOException {
- return findMember(Repository.gitInternalSlash(s.getBytes(Constants.CHARACTER_ENCODING)), slast, 0);
+ return findMember(Repository.gitInternalSlash(Constants.encode(s)), slast, 0);
}
private TreeEntry findMember(final byte[] s, final byte slast, final int offset)
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
index 85dda1d..c95863c 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
@@ -39,9 +39,9 @@
package org.spearce.jgit.lib;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import org.spearce.jgit.lib.GitIndex.Entry;
+import org.spearce.jgit.util.RawParseUtils;
/**
* This class represents an entry in a tree, like a blob or another tree.
@@ -126,13 +126,9 @@ public Repository getRepository() {
* @return the name of this entry.
*/
public String getName() {
- try {
- return nameUTF8 != null ? new String(nameUTF8,
- Constants.CHARACTER_ENCODING) : null;
- } catch (UnsupportedEncodingException uee) {
- throw new RuntimeException("JVM doesn't support "
- + Constants.CHARACTER_ENCODING, uee);
- }
+ if (nameUTF8 != null)
+ return RawParseUtils.decode(nameUTF8);
+ return null;
}
/**
@@ -142,7 +138,7 @@ public String getName() {
* @throws IOException
*/
public void rename(final String n) throws IOException {
- rename(n.getBytes(Constants.CHARACTER_ENCODING));
+ rename(Constants.encode(n));
}
/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
index 6c0e339..4b96439 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
@@ -379,6 +379,38 @@ public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
}
/**
+ * Decode a buffer under UTF-8, if possible.
+ *
+ * If the byte stream cannot be decoded that way, the platform default is tried
+ * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+ *
+ * @param buffer
+ * buffer to pull raw bytes from.
+ * @return a string representation of the range <code>[start,end)</code>,
+ * after decoding the region through the specified character set.
+ */
+ public static String decode(final byte[] buffer) {
+ return decode(Constants.CHARSET, buffer, 0, buffer.length);
+ }
+
+ /**
+ * Decode a buffer under the specified character set if possible.
+ *
+ * If the byte stream cannot be decoded that way, the platform default is tried
+ * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+ *
+ * @param cs
+ * character set to use when decoding the buffer.
+ * @param buffer
+ * buffer to pull raw bytes from.
+ * @return a string representation of the range <code>[start,end)</code>,
+ * after decoding the region through the specified character set.
+ */
+ public static String decode(final Charset cs, final byte[] buffer) {
+ return decode(cs, buffer, 0, buffer.length);
+ }
+
+ /**
* Decode a region of the buffer under the specified character set if possible.
*
* If the byte stream cannot be decoded that way, the platform default is tried
--
1.6.0.2.308.gef4a
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2008-10-19 18:26 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-19 13:29 [JGIT PATCH] Encode/decode index and tree entries using UTF-8 Robin Rosenberg
2008-10-19 17:14 ` Shawn O. Pearce
2008-10-19 18:24 ` [JGIT PATCH v2] " Robin Rosenberg
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).