Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
#!/usr/bin/env python Patch utility to apply unified diffs
Brute-force line-by-line non-recursive parsing
Copyright (c) 2008-2015 anatoly techtonik Available under the terms of MIT license
https://github.com/techtonik/python-patch/
"""
# cStringIO doesn't support unicode in 2.5
#------------------------------------------------ # Logging is controlled by logger named after the # module name (e.g. 'patch' for patch.py module)
""" Copied from Python 2.7 to avoid getting `No handlers could be found for logger "patch"` http://bugs.python.org/issue16539 """ pass
# initialize logger itself
global debugmode, streamhandler
debugmode = True loglevel = logging.DEBUG logformat = "%(levelname)8s %(message)s" logger.setLevel(loglevel)
if streamhandler not in logger.handlers: # when used as a library, streamhandler is not added # by default logger.addHandler(streamhandler)
streamhandler.setFormatter(logging.Formatter(logformat))
#------------------------------------------------ # Constants for Patch/PatchSet types
# mixed type is only actual when PatchSet contains # Patches of different type
#------------------------------------------------ # Helpers (these could come with Python stdlib)
# x...() function are used to work with paths in # cross-platform manner - all paths use forward # slashes even on Windows.
""" Cross-platform version of `os.path.isabs()` Returns True if `filename` is absolute on Linux, OS X or Windows. """
""" Cross-platform version of os.path.normpath """ # replace escapes and Windows slashes # fold the result
""" Make relative path out of absolute by stripping prefixes used on Linux, OS X and Windows.
This function is critical for security. """ # strip windows drive with all slashes # strip all slashes
#----------------------------------------------- # Main API functions
""" Parse patch file. If successful, returns PatchSet() object. Otherwise returns False. """
""" Parse text string and return PatchSet() object (or False if parsing fails) """ return False
""" Parse patch from an URL, return False if an error occured. Note that this also can throw urlopen() exceptions. """ ps = PatchSet( urllib2.urlopen(url) ) if ps.errors == 0: return ps return False
# --- Utility functions --- # [ ] reuse more universal pathsplit() """ Strip n leading components from the given path """ # --- /Utility function ---
""" Parsed hunk data container (hunk starts with @@ -R +R @@) """
# def apply(self, estream): # """ write hunk data into enumerable stream # return strings one by one until hunk is # over # # enumerable stream are tuples (lineno, line) # where lineno starts with 0 # """ # pass
""" Patch for a single file. If used as an iterable, returns hunks. """
for h in self.hunks: yield h
""" PatchSet is a patch parser and container. When used as an iterable, returns patches. """
# --- API accessible fields ---
# name of the PatchSet (filename or ...) # patch set type - one of constants
# list of Patch objects
# --- /API ---
""" parse unified diff return True on success """
# hunkactual variable is used to calculate hunk lines for comparison
"""Enumerate wrapper that uses boolean end of stream status instead of StopIteration exception, and properties to access line information. """
# we don't call parent, it is magically created by __new__ method
"""Try to read the next line and return True if it is available, False if end of stream is reached."""
def is_empty(self):
def line(self):
def lineno(self):
# define states (possible file regions) that direct parse flow
# regexp to match start of hunk, used groups - 1,3,4,6
# temp buffers for header and filenames info
# start of main cycle # each parsing block already has line available in fe.line
# -- deciders: these only switch state to decide who should process # -- line fetched at the start of this cycle filenames = True else: # -- ------------------------------------
# read out header else: # TODO check for \No new line at the end.. # TODO test for unparsed bytes # otherwise error += 1 # this is actually a loop exit
# switch to filenames state
# hunkskip and hunkbody code skipped until definition of hunkhead is parsed # [x] treat empty lines inside hunks as containing single space # (this happens when diff is saved by copy/pasting to editor # that strips trailing whitespace)
# process line first # gather stats about line endings elif line.endswith("\r"): p.hunkends["cr"] += 1
# todo: handle \ No newline cases else: # add hunk status node # switch to hunkskip state
# check exit conditions warning("extra lines for hunk no.%d at %d for target %s" % (nexthunkno, lineno+1, p.target)) # add hunk status node hunk.invalid = True p.hunks.append(hunk) self.errors += 1 # switch to hunkskip state hunkbody = False hunkskip = True # hunk parsed successfully # switch to hunkparsed state
# detect mixed window/unix line ends debuglines = dict(ends) debuglines.update(file=p.target, hunk=nexthunkno) debug("crlf: %(crlf)d lf: %(lf)d cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines) # fetch next line continue
# switch to hunkhead state hunkskip = False hunkhead = True # switch to filenames state debug("- %2d hunks for %s" % (len(p.hunks), p.source))
# XXX testcase warning("skipping false patch for %s" % srcname) srcname = None # XXX header += srcname # double source filename line is encountered # attempt to restart from this second line # todo: support spaces in filenames else: warning("skipping invalid filename at line %d" % (lineno+1)) self.errors += 1 # XXX p.header += line # switch back to headscan state filenames = False headscan = True # XXX header += srcname # XXX header += line else: # this should be unreachable warning("skipping invalid target patch") else: # XXX seems to be a dead branch warning("skipping invalid patch - double target at line %d" % (lineno+1)) self.errors += 1 srcname = None tgtname = None # XXX header += srcname # XXX header += tgtname # XXX header += line # double target filename line is encountered # switch back to headscan state filenames = False headscan = True else: warning("skipping invalid patch - no target filename at line %d" % (lineno+1)) self.errors += 1 srcname = None # switch back to headscan state filenames = False headscan = True else: # switch to hunkhead state
if not p.hunks: warning("skipping invalid patch with no hunks for file %s" % p.source) self.errors += 1 # XXX review switch # switch to headscan state hunkhead = False headscan = True continue else: # TODO review condition case # switch to headscan state hunkhead = False headscan = True else:
# switch to hunkbody state
# /while fe.next()
warning("warning: finished with errors, some hunks may be invalid") else: # extra data at the end of file pass else:
debug("- %2d hunks for %s" % (len(p.hunks), p.source))
# XXX fix total hunks calculation sum(len(p.hunks) for p in self.items)))
# ---- detect patch and patchset types ----
else: # --------
""" detect and return type for the specified Patch object analyzes header and filenames info
NOTE: must be run before filenames are normalized """
# check for SVN # - header starts with Index: # - next line is ===... delimiter # - filename is followed by revision number # TODO add SVN revision and p.header[-1].startswith("="*67)):
# common checks for both HG and GIT and (p.target.startswith('b/') or p.target == '/dev/null'))
# GIT type check # - header[-2] is like "diff --git a/oldname b/newname" # - header[-1] is like "index <hash>..<hash> <mode>" # TODO add git rename diffs and add/remove diffs # add git diff with spaced filename # TODO http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
# Git patch header len is 2 min # detect the start of diff header - there might be some comments before and re.match(r'index \w{7}..\w{7} \d{6}', p.header[idx+1])):
# HG check # # - for plain HG format header is like "diff -r b2d9961ff1f5 filename" # - for Git-style HG patches it is "diff --git a/oldname b/newname" # - filename starts with a/, b/ or is equal to /dev/null # - exported changesets also contain the header # # HG changeset patch # # User name@example.com # ... # TODO add MQ # TODO add revision info
""" sanitize filenames, normalizing paths, i.e.: 1. strip a/ and b/ prefixes from GIT and HG style patches 2. remove all references to parent directories (with warning) 3. translate any absolute paths to relative (with warning)
[x] always use forward slashes to be crossplatform (diff/patch were born as a unix utility after all)
return None """ debug("normalize filenames") debug(" patch type = " + p.type) debug(" source = " + p.source) debug(" target = " + p.target) # TODO: figure out how to deal with /dev/null entries warning("invalid source filename") else: warning("invalid target filename") else:
# references to parent are not allowed # absolute paths are not allowed
""" calculate diffstat and return as a string Notes: - original diffstat ouputs target filename - single + or - shouldn't escape histogram """ # (for histogram width calculation) # %-19s | %-4d %s
# -- calculating histogram -- else: iratio = (float(insert[i]) / maxdiff) * histwidth dratio = (float(delete[i]) / maxdiff) * histwidth
# make sure every entry gets at least one + or - iwidth = 1 if 0 < iratio < 1 else int(iratio) dwidth = 1 if 0 < dratio < 1 else int(dratio) #print iratio, dratio, iwidth, dwidth, histwidth hist = "+"*int(iwidth) + "-"*int(dwidth) # -- /calculating +- histogram --
% (len(names), sum(insert), sum(delete), delta))
""" return name of file to be patched or None """ elif exists(new): return new else: # [w] Google Code generates broken patches with its online editor debug("broken patch from Google Code, stripping prefixes..") if old.startswith('a/') and new.startswith('b/'): old, new = old[2:], new[2:] debug(" %s" % old) debug(" %s" % new) if exists(old): return old elif exists(new): return new return None
""" Apply parsed patch, optionally stripping leading components from file paths. `root` parameter specifies working dir. return True on success """
# [ ] test strip level exceeds nesting level # [ ] test the same only for selected files # [ ] test if files end up being on the same level except ValueError: errors += 1 warning("error: strip parameter '%s' must be an integer" % strip) strip = 0
#for fileno, filename in enumerate(self.source): else:
warning("source/target file does not exist:\n --- %s\n +++ %s" % (old, new)) errors += 1 continue warning("not a file - %s" % filename) errors += 1 continue
# [ ] check absolute paths security here
# validate before patching #pprint(hunkreplace)
# todo \ No newline at end of file
# check hunks in source file else: info("file %d/%d:\t %s" % (i+1, total, filename)) info(" hunk no.%d doesn't match source file at line %d" % (hunkno+1, lineno+1)) info(" expected: %s" % hunkfind[hunklineno]) info(" actual : %s" % line.rstrip("\r\n")) # not counting this as error, because file may already be patched. # check if file is already patched is done after the number of # invalid hunks if found # TODO: check hunks against source/target file in one pass # API - check(stream, srchunks, tgthunks) # return tuple (srcerrs, tgterrs)
# continue to check other hunks for completeness hunkno += 1 if hunkno < len(p.hunks): hunk = p.hunks[hunkno] continue else: break
# check if processed line is the last line else: # patch file else:
warning("already patched %s" % filename) else: warning("can't backup original file to %s - aborting" % backupname) else: else: errors += 1 warning("error patching file %s" % filename) shutil.copy(filename, filename+".invalid") warning("invalid version is saved to %s" % filename+".invalid") # todo: proper rejects shutil.move(backupname, filename)
# todo: check for premature eof
""" reverse patch direction (this doesn't touch filenames) """
""" apply patch in reverse order """
""" Check if specified filename can be patched. Returns None if file can not be found among source filenames. False if patch can not be applied clearly. True otherwise.
:returns: True, False or None """
# skip to first line of the hunk debug("check failed - premature eof on hunk: %d" % (hno+1)) # todo: \ No newline at the end of file raise NoMatch
# todo: display failed hunk, i.e. expected/found
""" Generator that yields stream patched with hunks iterable
Converts lineends in hunk lines to the best suitable format autodetected from input """
# todo: At the moment substituted lineends may not be the same # at the start and at the end of patching. Also issue a # warning/throw about mixed lineends (is it really needed?)
""" local utility function - return line from source stream collecting line end statistics on the way """ # 'U' mode works only with text files elif line.endswith("\r"): lineends["\r"] += 1
# skip to line just before hunk starts
# todo: check \ No newline at the end of file else: # detect if line ends are consistent in source file else: # newlines are mixed yield line2write
# [ ] TODO: add test for permission copy
for p in self.items: for headline in p.header: print headline.rstrip('\n') print '--- ' + p.source print '+++ ' + p.target for h in p.hunks: print '@@ -%s,%s +%s,%s @@' % (h.startsrc, h.linessrc, h.starttgt, h.linestgt) for line in h.text: print line.rstrip('\n')
from optparse import OptionParser from os.path import exists import sys
opt = OptionParser(usage="1. %prog [options] unified.diff\n" " 2. %prog [options] http://host/patch\n" " 3. %prog [options] -- < unified.diff", version="python-patch %s" % __version__) opt.add_option("-q", "--quiet", action="store_const", dest="verbosity", const=0, help="print only warnings and errors", default=1) opt.add_option("-v", "--verbose", action="store_const", dest="verbosity", const=2, help="be verbose") opt.add_option("--debug", action="store_true", dest="debugmode", help="debug mode") opt.add_option("--diffstat", action="store_true", dest="diffstat", help="print diffstat and exit") opt.add_option("-d", "--directory", metavar='DIR', help="specify root directory for applying patch") opt.add_option("-p", "--strip", type="int", metavar='N', default=0, help="strip N path components from filenames") opt.add_option("--revert", action="store_true", help="apply patch in reverse order (unpatch)") (options, args) = opt.parse_args()
if not args and sys.argv[-1:] != ['--']: opt.print_version() opt.print_help() sys.exit() readstdin = (sys.argv[-1:] == ['--'] and not args)
verbosity_levels = {0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG} loglevel = verbosity_levels[options.verbosity] logformat = "%(message)s" logger.setLevel(loglevel) streamhandler.setFormatter(logging.Formatter(logformat))
if options.debugmode: setdebug() # this sets global debugmode variable
if readstdin: patch = PatchSet(sys.stdin) else: patchfile = args[0] urltest = patchfile.split(':')[0] if (':' in patchfile and urltest.isalpha() and len(urltest) > 1): # one char before : is a windows drive letter patch = fromurl(patchfile) else: if not exists(patchfile) or not isfile(patchfile): sys.exit("patch file does not exist - %s" % patchfile) patch = fromfile(patchfile)
if options.diffstat: print patch.diffstat() sys.exit(0)
#pprint(patch) if options.revert: patch.revert(options.strip, root=options.directory) or sys.exit(-1) else: patch.apply(options.strip, root=options.directory) or sys.exit(-1)
# todo: document and test line ends handling logic - patch.py detects proper line-endings # for inserted hunks and issues a warning if patched file has incosistent line ends
main()
# Legend: # [ ] - some thing to be done # [w] - official wart, external or internal that is unlikely to be fixed
# [ ] API break (2.x) wishlist # PatchSet.items --> PatchSet.patches
# [ ] run --revert test for all dataset items # [ ] run .parse() / .dump() test for dataset |