From: Joel Holdsworth <jholdsworth@nvidia.com>
To: git@vger.kernel.org
Cc: Tzadik Vanderhoof <tzadik.vanderhoof@gmail.com>,
Dorgon Chang <dorgonman@hotmail.com>,
Joachim Kuebart <joachim.kuebart@gmail.com>,
Daniel Levin <dendy.ua@gmail.com>,
Johannes Schindelin <johannes.schindelin@gmx.de>,
Luke Diamand <luke@diamand.org>, Ben Keene <seraphire@gmail.com>,
Andrew Oakley <andrew@adoakley.name>,
Joel Holdsworth <jholdsworth@nvidia.com>
Subject: [PATCH v2 2/5] git-p4: pre-compile RCS keyword regexes
Date: Thu, 16 Dec 2021 13:46:16 +0000 [thread overview]
Message-ID: <20211216134619.2048348-3-jholdsworth@nvidia.com> (raw)
In-Reply-To: <20211216134619.2048348-1-jholdsworth@nvidia.com>
Previously git-p4.py would compile one of two regular expressions for
ever RCS keyword-enabled file. This patch improves simplifies the code
by pre-compiling the two regular expressions when the script first
loads.
Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
---
git-p4.py | 48 ++++++++++++++++++------------------------------
1 file changed, 18 insertions(+), 30 deletions(-)
diff --git a/git-p4.py b/git-p4.py
index 226cdef424..0af83b9c72 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -56,6 +56,9 @@
p4_access_checked = False
+re_ko_keywords = re.compile(r'\$(Id|Header)(:[^$\n]+)?\$')
+re_k_keywords = re.compile(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$')
+
def p4_build_cmd(cmd):
"""Build a suitable p4 command line.
@@ -577,20 +580,12 @@ def p4_type(f):
#
def p4_keywords_regexp_for_type(base, type_mods):
if base in ("text", "unicode", "binary"):
- kwords = None
if "ko" in type_mods:
- kwords = 'Id|Header'
+ return re_ko_keywords
elif "k" in type_mods:
- kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision'
+ return re_k_keywords
else:
return None
- pattern = r"""
- \$ # Starts with a dollar, followed by...
- (%s) # one of the keywords, followed by...
- (:[^$\n]+)? # possibly an old expansion, followed by...
- \$ # another dollar
- """ % kwords
- return pattern
else:
return None
@@ -1753,15 +1748,13 @@ def prepareLogMessage(self, template, message, jobs):
return result
- def patchRCSKeywords(self, file, pattern):
- # Attempt to zap the RCS keywords in a p4 controlled file matching the given pattern
+ def patchRCSKeywords(self, file, regexp):
+ # Attempt to zap the RCS keywords in a p4 controlled file matching the given regex
(handle, outFileName) = tempfile.mkstemp(dir='.')
try:
with os.fdopen(handle, "w+") as outFile, open(file, "r") as inFile:
- regexp = re.compile(pattern, re.VERBOSE)
for line in inFile.readlines():
- line = regexp.sub(r'$\1$', line)
- outFile.write(line)
+ outFile.write(regexp.sub(r'$\1$', line))
# Forcibly overwrite the original file
os.unlink(file)
shutil.move(outFileName, file)
@@ -2088,25 +2081,22 @@ def applyCommit(self, id):
# the patch to see if that's possible.
if gitConfigBool("git-p4.attemptRCSCleanup"):
file = None
- pattern = None
kwfiles = {}
for file in editedFiles | filesToDelete:
# did this file's delta contain RCS keywords?
- pattern = p4_keywords_regexp_for_file(file)
-
- if pattern:
+ regexp = p4_keywords_regexp_for_file(file)
+ if regexp:
# this file is a possibility...look for RCS keywords.
- regexp = re.compile(pattern, re.VERBOSE)
for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
if regexp.search(line):
if verbose:
- print("got keyword match on %s in %s in %s" % (pattern, line, file))
- kwfiles[file] = pattern
+ print("got keyword match on %s in %s in %s" % (regex.pattern, line, file))
+ kwfiles[file] = regexp
break
- for file in kwfiles:
+ for file, regexp in kwfiles.items():
if verbose:
- print("zapping %s with %s" % (line,pattern))
+ print("zapping %s with %s" % (line, regexp.pattern))
# File is being deleted, so not open in p4. Must
# disable the read-only bit on windows.
if self.isWindows and file not in editedFiles:
@@ -3026,12 +3016,10 @@ def streamOneP4File(self, file, contents):
# Note that we do not try to de-mangle keywords on utf16 files,
# even though in theory somebody may want that.
- pattern = p4_keywords_regexp_for_type(type_base, type_mods)
- if pattern:
- regexp = re.compile(pattern, re.VERBOSE)
- text = ''.join(decode_text_stream(c) for c in contents)
- text = regexp.sub(r'$\1$', text)
- contents = [ encode_text_stream(text) ]
+ regexp = p4_keywords_regexp_for_type(type_base, type_mods)
+ if regexp:
+ contents = [encode_text_stream(regexp.sub(
+ r'$\1$', ''.join(decode_text_stream(c) for c in contents)))]
if self.largeFileSystem:
(git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)
--
2.34.1
next prev parent reply other threads:[~2021-12-16 13:47 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-16 13:46 [PATCH v2 0/5] git-p4: fix RCS keyword processing encoding errors Joel Holdsworth
2021-12-16 13:46 ` [PATCH v2 1/5] git-p4: use with statements to close files after use in patchRCSKeywords Joel Holdsworth
2021-12-16 13:46 ` Joel Holdsworth [this message]
2021-12-16 13:46 ` [PATCH v2 3/5] git-p4: add raw option to read_pipelines Joel Holdsworth
2021-12-16 13:46 ` [PATCH v2 4/5] git-p4: open temporary patch file for write only Joel Holdsworth
2021-12-16 13:46 ` [PATCH v2 5/5] git-p4: resolve RCS keywords in bytes not utf-8 Joel Holdsworth
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211216134619.2048348-3-jholdsworth@nvidia.com \
--to=jholdsworth@nvidia.com \
--cc=andrew@adoakley.name \
--cc=dendy.ua@gmail.com \
--cc=dorgonman@hotmail.com \
--cc=git@vger.kernel.org \
--cc=joachim.kuebart@gmail.com \
--cc=johannes.schindelin@gmx.de \
--cc=luke@diamand.org \
--cc=seraphire@gmail.com \
--cc=tzadik.vanderhoof@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).