git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
blob bf905c3f9b66ac9bc48f9575250ec474f8dc7954 6233 bytes (raw)
name: pack-objects.h 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
 
#ifndef PACK_OBJECTS_H
#define PACK_OBJECTS_H

#define OE_DFS_STATE_BITS	2
#define OE_DEPTH_BITS		12
#define OE_IN_PACK_BITS		14

/*
 * State flags for depth-first search used for analyzing delta cycles.
 *
 * The depth is measured in delta-links to the base (so if A is a delta
 * against B, then A has a depth of 1, and B a depth of 0).
 */
enum dfs_state {
	DFS_NONE = 0,
	DFS_ACTIVE,
	DFS_DONE,
	DFS_NUM_STATES
};

/*
 * The size of struct nearly determines pack-objects's memory
 * consumption. This struct is packed tight for that reason. When you
 * add or reorder something in this struct, think a bit about this.
 *
 * basic object info
 * -----------------
 * idx.oid is filled up before delta searching starts. idx.crc32 is
 * only valid after the object is written out and will be used for
 * generating the index. idx.offset will be both gradually set and
 * used in writing phase (base objects get offset first, then deltas
 * refer to them)
 *
 * "size" is the uncompressed object size. Compressed size of the raw
 * data for an object in a pack is not stored anywhere but is computed
 * and made available when reverse .idx is made.
 *
 * "hash" contains a path name hash which is used for sorting the
 * delta list and also during delta searching. Once prepare_pack()
 * returns it's no longer needed.
 *
 * source pack info
 * ----------------
 * The (in_pack, in_pack_offset) tuple contains the location of the
 * object in the source pack. in_pack_header_size allows quickly
 * skipping the header and going straight to the zlib stream.
 *
 * "type" and "in_pack_type" both describe object type. in_pack_type
 * may contain a delta type, while type is always the canonical type.
 *
 * deltas
 * ------
 * Delta links (delta, delta_child and delta_sibling) are created to
 * reflect that delta graph from the source pack then updated or added
 * during delta searching phase when we find better deltas.
 *
 * delta_child and delta_sibling are last needed in
 * compute_write_order(). "delta" and "delta_size" must remain valid
 * at object writing phase in case the delta is not cached.
 *
 * If a delta is cached in memory and is compressed, delta_data points
 * to the data and z_delta_size contains the compressed size. If it's
 * uncompressed [1], z_delta_size must be zero. delta_size is always
 * the uncompressed size and must be valid even if the delta is not
 * cached.
 *
 * [1] during try_delta phase we don't bother with compressing because
 * the delta could be quickly replaced with a better one.
 */
struct object_entry {
	struct pack_idx_entry idx;
	unsigned long size;	/* uncompressed size */
	unsigned in_pack_idx:OE_IN_PACK_BITS;	/* already in pack */
	off_t in_pack_offset;
	struct object_entry *delta;	/* delta base object */
	struct object_entry *delta_child; /* deltified objects who bases me */
	struct object_entry *delta_sibling; /* other deltified objects who
					     * uses the same base as me
					     */
	void *delta_data;	/* cached delta (uncompressed) */
	unsigned long delta_size;	/* delta data size (uncompressed) */
	unsigned long z_delta_size;	/* delta data size (compressed) */
	unsigned type_:TYPE_BITS;
	unsigned in_pack_type:TYPE_BITS; /* could be delta */
	unsigned type_valid:1;
	uint32_t hash;			/* name hint hash */
	unsigned char in_pack_header_size;
	unsigned preferred_base:1; /*
				    * we do not pack this, but is available
				    * to be used as the base object to delta
				    * objects against.
				    */
	unsigned no_try_delta:1;
	unsigned tagged:1; /* near the very tip of refs */
	unsigned filled:1; /* assigned write-order */
	unsigned dfs_state:OE_DFS_STATE_BITS;
	unsigned depth:OE_DEPTH_BITS;
};

struct packing_data {
	struct object_entry *objects;
	uint32_t nr_objects, nr_alloc;

	int32_t *index;
	uint32_t index_size;

	unsigned int *in_pack_pos;
	int in_pack_count;
	struct packed_git *in_pack[1 << OE_IN_PACK_BITS];
};

struct object_entry *packlist_alloc(struct packing_data *pdata,
				    const unsigned char *sha1,
				    uint32_t index_pos);

struct object_entry *packlist_find(struct packing_data *pdata,
				   const unsigned char *sha1,
				   uint32_t *index_pos);

static inline uint32_t pack_name_hash(const char *name)
{
	uint32_t c, hash = 0;

	if (!name)
		return 0;

	/*
	 * This effectively just creates a sortable number from the
	 * last sixteen non-whitespace characters. Last characters
	 * count "most", so things that end in ".c" sort together.
	 */
	while ((c = *name++) != 0) {
		if (isspace(c))
			continue;
		hash = (hash >> 2) + (c << 24);
	}
	return hash;
}

static inline enum object_type oe_type(const struct object_entry *e)
{
	return e->type_valid ? e->type_ : OBJ_BAD;
}

static inline void oe_set_type(struct object_entry *e,
			       enum object_type type)
{
	if (type >= OBJ_ANY)
		die("BUG: OBJ_ANY cannot be set in pack-objects code");

	e->type_valid = type >= OBJ_NONE;
	e->type_ = (unsigned)type;
}

static inline unsigned int oe_in_pack_pos(const struct packing_data *pack,
					  const struct object_entry *e)
{
	return pack->in_pack_pos[e - pack->objects];
}

static inline void oe_set_in_pack_pos(const struct packing_data *pack,
				      const struct object_entry *e,
				      unsigned int pos)
{
	pack->in_pack_pos[e - pack->objects] = pos;
}

static inline unsigned int oe_add_pack(struct packing_data *pack,
				       struct packed_git *p)
{
	if (pack->in_pack_count >= (1 << OE_IN_PACK_BITS))
		die(_("too many packs to handle in one go. "
		      "Please add .keep files to exclude\n"
		      "some pack files and keep the number "
		      "of non-kept files below %d."),
		    1 << OE_IN_PACK_BITS);
	if (p) {
		if (p->index > 0)
			die("BUG: this packed is already indexed");
		p->index = pack->in_pack_count;
	}
	pack->in_pack[pack->in_pack_count] = p;
	return pack->in_pack_count++;
}

static inline struct packed_git *oe_in_pack(const struct packing_data *pack,
					    const struct object_entry *e)
{
	return pack->in_pack[e->in_pack_idx];

}

static inline void oe_set_in_pack(struct object_entry *e,
				  struct packed_git *p)
{
	if (p->index <= 0)
		die("BUG: found_pack should be NULL "
		    "instead of having non-positive index");
	e->in_pack_idx = p->index;

}

#endif

debug log:

solving bf905c3f9b ...
found bf905c3f9b in https://public-inbox.org/git/20180318142526.9378-7-pclouds@gmail.com/
found 4a11653657 in https://public-inbox.org/git/20180324063353.24722-6-pclouds@gmail.com/ ||
	https://public-inbox.org/git/20180318142526.9378-6-pclouds@gmail.com/
found 59407aae3c in https://public-inbox.org/git/20180324063353.24722-5-pclouds@gmail.com/ ||
	https://public-inbox.org/git/20180318142526.9378-5-pclouds@gmail.com/
found 8507e1b869 in https://public-inbox.org/git/20180324063353.24722-4-pclouds@gmail.com/ ||
	https://public-inbox.org/git/20180318142526.9378-4-pclouds@gmail.com/
found b883d7aa10 in https://public-inbox.org/git/20180324063353.24722-3-pclouds@gmail.com/ ||
	https://public-inbox.org/git/20180318142526.9378-3-pclouds@gmail.com/
found c0a1f61aac in https://80x24.org/mirrors/git.git
preparing index
index prepared:
100644 c0a1f61aac85cb2597d4ca197e899cfdebc0d28a	pack-objects.h

applying [1/5] https://public-inbox.org/git/20180324063353.24722-3-pclouds@gmail.com/
diff --git a/pack-objects.h b/pack-objects.h
index c0a1f61aac..b883d7aa10 100644

Checking patch pack-objects.h...
Applied patch pack-objects.h cleanly.

skipping https://public-inbox.org/git/20180318142526.9378-3-pclouds@gmail.com/ for b883d7aa10
index at:
100644 b883d7aa10923068f10739f775c7e93219a716ee	pack-objects.h

applying [2/5] https://public-inbox.org/git/20180324063353.24722-4-pclouds@gmail.com/
diff --git a/pack-objects.h b/pack-objects.h
index b883d7aa10..8507e1b869 100644

Checking patch pack-objects.h...
Applied patch pack-objects.h cleanly.

skipping https://public-inbox.org/git/20180318142526.9378-4-pclouds@gmail.com/ for 8507e1b869
index at:
100644 8507e1b869485bb5dcb6f65599b761901e12c748	pack-objects.h

applying [3/5] https://public-inbox.org/git/20180324063353.24722-5-pclouds@gmail.com/
diff --git a/pack-objects.h b/pack-objects.h
index 8507e1b869..59407aae3c 100644

Checking patch pack-objects.h...
Applied patch pack-objects.h cleanly.

skipping https://public-inbox.org/git/20180318142526.9378-5-pclouds@gmail.com/ for 59407aae3c
index at:
100644 59407aae3c991ceba0e1214775d5a7c881e16041	pack-objects.h

applying [4/5] https://public-inbox.org/git/20180324063353.24722-6-pclouds@gmail.com/
diff --git a/pack-objects.h b/pack-objects.h
index 59407aae3c..4a11653657 100644

Checking patch pack-objects.h...
Applied patch pack-objects.h cleanly.

skipping https://public-inbox.org/git/20180318142526.9378-6-pclouds@gmail.com/ for 4a11653657
index at:
100644 4a1165365789b93085f0393e9685e96371fbf2e3	pack-objects.h

applying [5/5] https://public-inbox.org/git/20180318142526.9378-7-pclouds@gmail.com/
diff --git a/pack-objects.h b/pack-objects.h
index 4a11653657..bf905c3f9b 100644

Checking patch pack-objects.h...
Applied patch pack-objects.h cleanly.

index at:
100644 bf905c3f9b66ac9bc48f9575250ec474f8dc7954	pack-objects.h

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).