Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

1001

1002

1003

1004

1005

1006

1007

1008

1009

1010

1011

1012

1013

1014

1015

1016

1017

1018

1019

1020

1021

1022

1023

1024

1025

1026

1027

1028

1029

1030

1031

1032

1033

1034

1035

1036

1037

1038

1039

1040

1041

1042

1043

1044

1045

1046

1047

1048

1049

1050

1051

1052

1053

1054

1055

1056

1057

1058

1059

1060

1061

1062

1063

1064

1065

1066

1067

1068

1069

1070

1071

1072

1073

1074

1075

1076

1077

1078

1079

1080

1081

1082

1083

1084

1085

1086

1087

1088

1089

1090

1091

1092

1093

1094

1095

1096

1097

1098

1099

1100

1101

1102

1103

1104

1105

1106

1107

1108

1109

1110

1111

1112

1113

1114

1115

1116

1117

1118

1119

1120

1121

1122

1123

1124

1125

1126

1127

1128

1129

1130

1131

1132

1133

1134

1135

1136

1137

1138

1139

1140

1141

1142

1143

1144

1145

1146

1147

1148

1149

1150

1151

1152

1153

1154

1155

1156

1157

1158

1159

1160

1161

1162

1163

1164

1165

1166

1167

1168

1169

1170

1171

1172

#!/usr/bin/env python 

""" 

    Patch utility to apply unified diffs 

 

    Brute-force line-by-line non-recursive parsing  

 

    Copyright (c) 2008-2015 anatoly techtonik 

    Available under the terms of MIT license 

 

    https://github.com/techtonik/python-patch/ 

 

""" 

 

__author__ = "anatoly techtonik <techtonik@gmail.com>" 

__version__ = "1.15" 

 

import copy 

import logging 

import re 

# cStringIO doesn't support unicode in 2.5 

from StringIO import StringIO 

import urllib2 

 

from os.path import exists, isfile, abspath 

import os 

import posixpath 

import shutil 

 

 

#------------------------------------------------ 

# Logging is controlled by logger named after the 

# module name (e.g. 'patch' for patch.py module) 

 

logger = logging.getLogger(__name__) 

 

debug = logger.debug 

info = logger.info 

warning = logger.warning 

 

class NullHandler(logging.Handler): 

  """ Copied from Python 2.7 to avoid getting 

      `No handlers could be found for logger "patch"` 

      http://bugs.python.org/issue16539 

  """ 

  def handle(self, record): 

    pass 

  def emit(self, record): 

    pass 

  def createLock(self): 

    self.lock = None 

 

streamhandler = logging.StreamHandler() 

 

# initialize logger itself 

logger.addHandler(NullHandler()) 

 

debugmode = False 

 

def setdebug(): 

  global debugmode, streamhandler 

 

  debugmode = True 

  loglevel = logging.DEBUG 

  logformat = "%(levelname)8s %(message)s" 

  logger.setLevel(loglevel) 

 

  if streamhandler not in logger.handlers: 

    # when used as a library, streamhandler is not added 

    # by default 

    logger.addHandler(streamhandler) 

 

  streamhandler.setFormatter(logging.Formatter(logformat)) 

 

 

#------------------------------------------------ 

# Constants for Patch/PatchSet types 

 

DIFF = PLAIN = "plain" 

GIT = "git" 

HG = MERCURIAL = "mercurial" 

SVN = SUBVERSION = "svn" 

# mixed type is only actual when PatchSet contains 

# Patches of different type 

MIXED = MIXED = "mixed" 

 

 

#------------------------------------------------ 

# Helpers (these could come with Python stdlib) 

 

# x...() function are used to work with paths in 

# cross-platform manner - all paths use forward 

# slashes even on Windows. 

 

def xisabs(filename): 

  """ Cross-platform version of `os.path.isabs()` 

      Returns True if `filename` is absolute on 

      Linux, OS X or Windows. 

  """ 

  if filename.startswith('/'):     # Linux/Unix 

    return True 

  elif filename.startswith('\\'):  # Windows 

    return True 

  elif re.match(r'\w:[\\/]', filename): # Windows 

    return True 

  return False 

 

def xnormpath(path): 

  """ Cross-platform version of os.path.normpath """ 

  # replace escapes and Windows slashes 

  normalized = posixpath.normpath(path).replace('\\', '/') 

  # fold the result 

  return posixpath.normpath(normalized) 

 

def xstrip(filename): 

  """ Make relative path out of absolute by stripping 

      prefixes used on Linux, OS X and Windows. 

 

      This function is critical for security. 

  """ 

  while xisabs(filename): 

    # strip windows drive with all slashes 

    if re.match(r'\w:[\\/]', filename): 

      filename = re.sub(r'^\w+:[\\/]+', '', filename) 

    # strip all slashes 

    elif re.match(r'[\\/]', filename): 

      filename = re.sub(r'^[\\/]+', '', filename) 

  return filename 

 

#----------------------------------------------- 

# Main API functions 

 

def fromfile(filename): 

  """ Parse patch file. If successful, returns 

      PatchSet() object. Otherwise returns False. 

  """ 

  patchset = PatchSet() 

  debug("reading %s" % filename) 

  fp = open(filename, "rb") 

  res = patchset.parse(fp) 

  fp.close() 

  if res == True: 

    return patchset 

  return False 

 

 

def fromstring(s): 

  """ Parse text string and return PatchSet() 

      object (or False if parsing fails) 

  """ 

  ps = PatchSet( StringIO(s) ) 

  if ps.errors == 0: 

    return ps 

  return False 

 

 

def fromurl(url): 

  """ Parse patch from an URL, return False 

      if an error occured. Note that this also 

      can throw urlopen() exceptions. 

  """ 

  ps = PatchSet( urllib2.urlopen(url) ) 

  if ps.errors == 0: 

    return ps 

  return False 

 

 

# --- Utility functions --- 

# [ ] reuse more universal pathsplit() 

def pathstrip(path, n): 

  """ Strip n leading components from the given path """ 

  pathlist = [path] 

  while os.path.dirname(pathlist[0]) != '': 

    pathlist[0:1] = os.path.split(pathlist[0]) 

  return '/'.join(pathlist[n:]) 

# --- /Utility function --- 

 

 

class Hunk(object): 

  """ Parsed hunk data container (hunk starts with @@ -R +R @@) """ 

 

  def __init__(self): 

    self.startsrc=None #: line count starts with 1 

    self.linessrc=None 

    self.starttgt=None 

    self.linestgt=None 

    self.invalid=False 

    self.desc='' 

    self.text=[] 

 

#  def apply(self, estream): 

#    """ write hunk data into enumerable stream 

#        return strings one by one until hunk is 

#        over 

# 

#        enumerable stream are tuples (lineno, line) 

#        where lineno starts with 0 

#    """ 

#    pass 

 

 

class Patch(object): 

  """ Patch for a single file. 

      If used as an iterable, returns hunks. 

  """ 

  def __init__(self): 

    self.source = None 

    self.target = None 

    self.hunks = [] 

    self.hunkends = [] 

    self.header = [] 

 

    self.type = None 

 

  def __iter__(self): 

    for h in self.hunks: 

      yield h 

 

 

class PatchSet(object): 

  """ PatchSet is a patch parser and container. 

      When used as an iterable, returns patches. 

  """ 

 

  def __init__(self, stream=None): 

    # --- API accessible fields --- 

 

    # name of the PatchSet (filename or ...) 

    self.name = None 

    # patch set type - one of constants 

    self.type = None 

 

    # list of Patch objects 

    self.items = [] 

 

    self.errors = 0    # fatal parsing errors 

    self.warnings = 0  # non-critical warnings 

    # --- /API --- 

 

    if stream: 

      self.parse(stream) 

 

  def __len__(self): 

    return len(self.items) 

 

  def __iter__(self): 

    for i in self.items: 

      yield i 

 

  def parse(self, stream): 

    """ parse unified diff 

        return True on success 

    """ 

    lineends = dict(lf=0, crlf=0, cr=0) 

    nexthunkno = 0    #: even if index starts with 0 user messages number hunks from 1 

 

    p = None 

    hunk = None 

    # hunkactual variable is used to calculate hunk lines for comparison 

    hunkactual = dict(linessrc=None, linestgt=None) 

 

 

    class wrapumerate(enumerate): 

      """Enumerate wrapper that uses boolean end of stream status instead of 

      StopIteration exception, and properties to access line information. 

      """ 

 

      def __init__(self, *args, **kwargs): 

        # we don't call parent, it is magically created by __new__ method 

 

        self._exhausted = False 

        self._lineno = False     # after end of stream equal to the num of lines 

        self._line = False       # will be reset to False after end of stream 

 

      def next(self): 

        """Try to read the next line and return True if it is available, 

           False if end of stream is reached.""" 

        if self._exhausted: 

          return False 

 

        try: 

          self._lineno, self._line = super(wrapumerate, self).next() 

        except StopIteration: 

          self._exhausted = True 

          self._line = False 

          return False 

        return True 

 

      @property 

      def is_empty(self): 

        return self._exhausted 

 

      @property 

      def line(self): 

        return self._line 

 

      @property 

      def lineno(self): 

        return self._lineno 

 

    # define states (possible file regions) that direct parse flow 

    headscan  = True  # start with scanning header 

    filenames = False # lines starting with --- and +++ 

 

    hunkhead = False  # @@ -R +R @@ sequence 

    hunkbody = False  # 

    hunkskip = False  # skipping invalid hunk mode 

 

    hunkparsed = False # state after successfully parsed hunk 

 

    # regexp to match start of hunk, used groups - 1,3,4,6 

    re_hunk_start = re.compile("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@") 

 

    self.errors = 0 

    # temp buffers for header and filenames info 

    header = [] 

    srcname = None 

    tgtname = None 

 

    # start of main cycle 

    # each parsing block already has line available in fe.line 

    fe = wrapumerate(stream) 

    while fe.next(): 

 

      # -- deciders: these only switch state to decide who should process 

      # --           line fetched at the start of this cycle 

      if hunkparsed: 

        hunkparsed = False 

        if re_hunk_start.match(fe.line): 

            hunkhead = True 

        elif fe.line.startswith("--- "): 

            filenames = True 

        else: 

            headscan = True 

      # -- ------------------------------------ 

 

      # read out header 

      if headscan: 

        while not fe.is_empty and not fe.line.startswith("--- "): 

            header.append(fe.line) 

            fe.next() 

        if fe.is_empty: 

            if p == None: 

              debug("no patch data found")  # error is shown later 

              self.errors += 1 

            else: 

              info("%d unparsed bytes left at the end of stream" % len(''.join(header))) 

              self.warnings += 1 

              # TODO check for \No new line at the end..  

              # TODO test for unparsed bytes 

              # otherwise error += 1 

            # this is actually a loop exit 

            continue 

 

        headscan = False 

        # switch to filenames state 

        filenames = True 

 

      line = fe.line 

      lineno = fe.lineno 

 

 

      # hunkskip and hunkbody code skipped until definition of hunkhead is parsed 

      if hunkbody: 

        # [x] treat empty lines inside hunks as containing single space 

        #     (this happens when diff is saved by copy/pasting to editor 

        #      that strips trailing whitespace) 

        if line.strip("\r\n") == "": 

            debug("expanding empty line in a middle of hunk body") 

            self.warnings += 1 

            line = ' ' + line 

 

        # process line first 

        if re.match(r"^[- \+\\]", line): 

            # gather stats about line endings 

            if line.endswith("\r\n"): 

              p.hunkends["crlf"] += 1 

            elif line.endswith("\n"): 

              p.hunkends["lf"] += 1 

            elif line.endswith("\r"): 

              p.hunkends["cr"] += 1 

 

            if line.startswith("-"): 

              hunkactual["linessrc"] += 1 

            elif line.startswith("+"): 

              hunkactual["linestgt"] += 1 

            elif not line.startswith("\\"): 

              hunkactual["linessrc"] += 1 

              hunkactual["linestgt"] += 1 

            hunk.text.append(line) 

            # todo: handle \ No newline cases 

        else: 

            warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, p.target)) 

            # add hunk status node 

            hunk.invalid = True 

            p.hunks.append(hunk) 

            self.errors += 1 

            # switch to hunkskip state 

            hunkbody = False 

            hunkskip = True 

 

        # check exit conditions 

        if hunkactual["linessrc"] > hunk.linessrc or hunkactual["linestgt"] > hunk.linestgt: 

            warning("extra lines for hunk no.%d at %d for target %s" % (nexthunkno, lineno+1, p.target)) 

            # add hunk status node 

            hunk.invalid = True 

            p.hunks.append(hunk) 

            self.errors += 1 

            # switch to hunkskip state 

            hunkbody = False 

            hunkskip = True 

        elif hunk.linessrc == hunkactual["linessrc"] and hunk.linestgt == hunkactual["linestgt"]: 

            # hunk parsed successfully 

            p.hunks.append(hunk) 

            # switch to hunkparsed state 

            hunkbody = False 

            hunkparsed = True 

 

            # detect mixed window/unix line ends 

            ends = p.hunkends 

            if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1: 

              warning("inconsistent line ends in patch hunks for %s" % p.source) 

              self.warnings += 1 

            if debugmode: 

              debuglines = dict(ends) 

              debuglines.update(file=p.target, hunk=nexthunkno) 

              debug("crlf: %(crlf)d  lf: %(lf)d  cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines) 

            # fetch next line 

            continue 

 

      if hunkskip: 

        if re_hunk_start.match(line): 

          # switch to hunkhead state 

          hunkskip = False 

          hunkhead = True 

        elif line.startswith("--- "): 

          # switch to filenames state 

          hunkskip = False 

          filenames = True 

          if debugmode and len(self.items) > 0: 

            debug("- %2d hunks for %s" % (len(p.hunks), p.source)) 

 

      if filenames: 

        if line.startswith("--- "): 

          if srcname != None: 

            # XXX testcase 

            warning("skipping false patch for %s" % srcname) 

            srcname = None 

            # XXX header += srcname 

            # double source filename line is encountered 

            # attempt to restart from this second line 

          re_filename = "^--- ([^\t]+)" 

          match = re.match(re_filename, line) 

          # todo: support spaces in filenames 

          if match: 

            srcname = match.group(1).strip() 

          else: 

            warning("skipping invalid filename at line %d" % (lineno+1)) 

            self.errors += 1 

            # XXX p.header += line 

            # switch back to headscan state 

            filenames = False 

            headscan = True 

        elif not line.startswith("+++ "): 

          if srcname != None: 

            warning("skipping invalid patch with no target for %s" % srcname) 

            self.errors += 1 

            srcname = None 

            # XXX header += srcname 

            # XXX header += line 

          else: 

            # this should be unreachable 

            warning("skipping invalid target patch") 

          filenames = False 

          headscan = True 

        else: 

          if tgtname != None: 

            # XXX seems to be a dead branch   

            warning("skipping invalid patch - double target at line %d" % (lineno+1)) 

            self.errors += 1 

            srcname = None 

            tgtname = None 

            # XXX header += srcname 

            # XXX header += tgtname 

            # XXX header += line 

            # double target filename line is encountered 

            # switch back to headscan state 

            filenames = False 

            headscan = True 

          else: 

            re_filename = "^\+\+\+ ([^\t]+)" 

            match = re.match(re_filename, line) 

            if not match: 

              warning("skipping invalid patch - no target filename at line %d" % (lineno+1)) 

              self.errors += 1 

              srcname = None 

              # switch back to headscan state 

              filenames = False 

              headscan = True 

            else: 

              if p: # for the first run p is None 

                self.items.append(p) 

              p = Patch() 

              p.source = srcname 

              srcname = None 

              p.target = match.group(1).strip() 

              p.header = header 

              header = [] 

              # switch to hunkhead state 

              filenames = False 

              hunkhead = True 

              nexthunkno = 0 

              p.hunkends = lineends.copy() 

              continue 

 

      if hunkhead: 

        match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@(.*)", line) 

        if not match: 

          if not p.hunks: 

            warning("skipping invalid patch with no hunks for file %s" % p.source) 

            self.errors += 1 

            # XXX review switch 

            # switch to headscan state 

            hunkhead = False 

            headscan = True 

            continue 

          else: 

            # TODO review condition case 

            # switch to headscan state 

            hunkhead = False 

            headscan = True 

        else: 

          hunk = Hunk() 

          hunk.startsrc = int(match.group(1)) 

          hunk.linessrc = 1 

          if match.group(3): hunk.linessrc = int(match.group(3)) 

          hunk.starttgt = int(match.group(4)) 

          hunk.linestgt = 1 

          if match.group(6): hunk.linestgt = int(match.group(6)) 

          hunk.invalid = False 

          hunk.desc = match.group(7)[1:].rstrip() 

          hunk.text = [] 

 

          hunkactual["linessrc"] = hunkactual["linestgt"] = 0 

 

          # switch to hunkbody state 

          hunkhead = False 

          hunkbody = True 

          nexthunkno += 1 

          continue 

 

    # /while fe.next() 

 

    if p: 

      self.items.append(p) 

 

    if not hunkparsed: 

      if hunkskip: 

        warning("warning: finished with errors, some hunks may be invalid") 

      elif headscan: 

        if len(self.items) == 0: 

          warning("error: no patch data found!") 

          return False 

        else: # extra data at the end of file 

          pass 

      else: 

        warning("error: patch stream is incomplete!") 

        self.errors += 1 

        if len(self.items) == 0: 

          return False 

 

    if debugmode and len(self.items) > 0: 

        debug("- %2d hunks for %s" % (len(p.hunks), p.source)) 

 

    # XXX fix total hunks calculation 

    debug("total files: %d  total hunks: %d" % (len(self.items), 

        sum(len(p.hunks) for p in self.items))) 

 

    # ---- detect patch and patchset types ---- 

    for idx, p in enumerate(self.items): 

      self.items[idx].type = self._detect_type(p) 

 

    types = set([p.type for p in self.items]) 

    if len(types) > 1: 

      self.type = MIXED 

    else: 

      self.type = types.pop() 

    # -------- 

 

    self._normalize_filenames() 

 

    return (self.errors == 0) 

 

  def _detect_type(self, p): 

    """ detect and return type for the specified Patch object 

        analyzes header and filenames info 

 

        NOTE: must be run before filenames are normalized 

    """ 

 

    # check for SVN 

    #  - header starts with Index: 

    #  - next line is ===... delimiter 

    #  - filename is followed by revision number 

    # TODO add SVN revision 

    if (len(p.header) > 1 and p.header[-2].startswith("Index: ") 

          and p.header[-1].startswith("="*67)): 

        return SVN 

 

    # common checks for both HG and GIT 

    DVCS = ((p.source.startswith('a/') or p.source == '/dev/null') 

        and (p.target.startswith('b/') or p.target == '/dev/null')) 

 

    # GIT type check 

    #  - header[-2] is like "diff --git a/oldname b/newname" 

    #  - header[-1] is like "index <hash>..<hash> <mode>" 

    # TODO add git rename diffs and add/remove diffs 

    #      add git diff with spaced filename 

    # TODO http://www.kernel.org/pub/software/scm/git/docs/git-diff.html 

 

    # Git patch header len is 2 min 

    if len(p.header) > 1: 

      # detect the start of diff header - there might be some comments before 

      for idx in reversed(range(len(p.header))): 

        if p.header[idx].startswith("diff --git"): 

          break 

      if p.header[idx].startswith('diff --git a/'): 

        if (idx+1 < len(p.header) 

            and re.match(r'index \w{7}..\w{7} \d{6}', p.header[idx+1])): 

          if DVCS: 

            return GIT 

 

    # HG check 

    #  

    #  - for plain HG format header is like "diff -r b2d9961ff1f5 filename" 

    #  - for Git-style HG patches it is "diff --git a/oldname b/newname" 

    #  - filename starts with a/, b/ or is equal to /dev/null 

    #  - exported changesets also contain the header 

    #    # HG changeset patch 

    #    # User name@example.com 

    #    ...    

    # TODO add MQ 

    # TODO add revision info 

    if len(p.header) > 0: 

      if DVCS and re.match(r'diff -r \w{12} .*', p.header[-1]): 

        return HG 

      if DVCS and p.header[-1].startswith('diff --git a/'): 

        if len(p.header) == 1:  # native Git patch header len is 2 

          return HG 

        elif p.header[0].startswith('# HG changeset patch'): 

          return HG 

 

    return PLAIN 

 

 

  def _normalize_filenames(self): 

    """ sanitize filenames, normalizing paths, i.e.: 

        1. strip a/ and b/ prefixes from GIT and HG style patches 

        2. remove all references to parent directories (with warning) 

        3. translate any absolute paths to relative (with warning) 

 

        [x] always use forward slashes to be crossplatform 

            (diff/patch were born as a unix utility after all) 

         

        return None 

    """ 

    if debugmode: 

      debug("normalize filenames") 

    for i,p in enumerate(self.items): 

      if debugmode: 

        debug("    patch type = " + p.type) 

        debug("    source = " + p.source) 

        debug("    target = " + p.target) 

      if p.type in (HG, GIT): 

        # TODO: figure out how to deal with /dev/null entries 

        debug("stripping a/ and b/ prefixes") 

        if p.source != '/dev/null': 

          if not p.source.startswith("a/"): 

            warning("invalid source filename") 

          else: 

            p.source = p.source[2:] 

        if p.target != '/dev/null': 

          if not p.target.startswith("b/"): 

            warning("invalid target filename") 

          else: 

            p.target = p.target[2:] 

 

      p.source = xnormpath(p.source) 

      p.target = xnormpath(p.target) 

 

      sep = '/'  # sep value can be hardcoded, but it looks nice this way 

 

      # references to parent are not allowed 

      if p.source.startswith(".." + sep): 

        warning("error: stripping parent path for source file patch no.%d" % (i+1)) 

        self.warnings += 1 

        while p.source.startswith(".." + sep): 

          p.source = p.source.partition(sep)[2] 

      if p.target.startswith(".." + sep): 

        warning("error: stripping parent path for target file patch no.%d" % (i+1)) 

        self.warnings += 1 

        while p.target.startswith(".." + sep): 

          p.target = p.target.partition(sep)[2] 

      # absolute paths are not allowed 

      if xisabs(p.source) or xisabs(p.target): 

        warning("error: absolute paths are not allowed - file no.%d" % (i+1)) 

        self.warnings += 1 

        if xisabs(p.source): 

          warning("stripping absolute path from source name '%s'" % p.source) 

          p.source = xstrip(p.source) 

        if xisabs(p.target): 

          warning("stripping absolute path from target name '%s'" % p.target) 

          p.target = xstrip(p.target) 

 

      self.items[i].source = p.source 

      self.items[i].target = p.target 

 

 

  def diffstat(self): 

    """ calculate diffstat and return as a string 

        Notes: 

          - original diffstat ouputs target filename 

          - single + or - shouldn't escape histogram 

    """ 

    names = [] 

    insert = [] 

    delete = [] 

    delta = 0    # size change in bytes 

    namelen = 0 

    maxdiff = 0  # max number of changes for single file 

                 # (for histogram width calculation) 

    for patch in self.items: 

      i,d = 0,0 

      for hunk in patch.hunks: 

        for line in hunk.text: 

          if line.startswith('+'): 

            i += 1 

            delta += len(line)-1 

          elif line.startswith('-'): 

            d += 1 

            delta -= len(line)-1 

      names.append(patch.target) 

      insert.append(i) 

      delete.append(d) 

      namelen = max(namelen, len(patch.target)) 

      maxdiff = max(maxdiff, i+d) 

    output = '' 

    statlen = len(str(maxdiff))  # stats column width 

    for i,n in enumerate(names): 

      # %-19s | %-4d %s 

      format = " %-" + str(namelen) + "s | %" + str(statlen) + "s %s\n" 

 

      hist = '' 

      # -- calculating histogram -- 

      width = len(format % ('', '', '')) 

      histwidth = max(2, 80 - width) 

      if maxdiff < histwidth: 

        hist = "+"*insert[i] + "-"*delete[i] 

      else: 

        iratio = (float(insert[i]) / maxdiff) * histwidth 

        dratio = (float(delete[i]) / maxdiff) * histwidth 

 

        # make sure every entry gets at least one + or - 

        iwidth = 1 if 0 < iratio < 1 else int(iratio) 

        dwidth = 1 if 0 < dratio < 1 else int(dratio) 

        #print iratio, dratio, iwidth, dwidth, histwidth 

        hist = "+"*int(iwidth) + "-"*int(dwidth) 

      # -- /calculating +- histogram -- 

      output += (format % (names[i], insert[i] + delete[i], hist)) 

 

    output += (" %d files changed, %d insertions(+), %d deletions(-), %+d bytes" 

               % (len(names), sum(insert), sum(delete), delta)) 

    return output 

 

 

  def findfile(self, old, new): 

    """ return name of file to be patched or None """ 

    if exists(old): 

      return old 

    elif exists(new): 

      return new 

    else: 

      # [w] Google Code generates broken patches with its online editor 

      debug("broken patch from Google Code, stripping prefixes..") 

      if old.startswith('a/') and new.startswith('b/'): 

        old, new = old[2:], new[2:] 

        debug("   %s" % old) 

        debug("   %s" % new) 

        if exists(old): 

          return old 

        elif exists(new): 

          return new 

      return None 

 

 

  def apply(self, strip=0, root=None): 

    """ Apply parsed patch, optionally stripping leading components 

        from file paths. `root` parameter specifies working dir. 

        return True on success 

    """ 

    if root: 

      prevdir = os.getcwd() 

      os.chdir(root) 

 

    total = len(self.items) 

    errors = 0 

    if strip: 

      # [ ] test strip level exceeds nesting level 

      #   [ ] test the same only for selected files 

      #     [ ] test if files end up being on the same level 

      try: 

        strip = int(strip) 

      except ValueError: 

        errors += 1 

        warning("error: strip parameter '%s' must be an integer" % strip) 

        strip = 0 

 

    #for fileno, filename in enumerate(self.source): 

    for i,p in enumerate(self.items): 

      if strip: 

        debug("stripping %s leading component(s) from:" % strip) 

        debug("   %s" % p.source) 

        debug("   %s" % p.target) 

        old = pathstrip(p.source, strip) 

        new = pathstrip(p.target, strip) 

      else: 

        old, new = p.source, p.target 

 

      filename = self.findfile(old, new) 

 

      if not filename: 

          warning("source/target file does not exist:\n  --- %s\n  +++ %s" % (old, new)) 

          errors += 1 

          continue 

      if not isfile(filename): 

        warning("not a file - %s" % filename) 

        errors += 1 

        continue 

 

      # [ ] check absolute paths security here 

      debug("processing %d/%d:\t %s" % (i+1, total, filename)) 

 

      # validate before patching 

      f2fp = open(filename) 

      hunkno = 0 

      hunk = p.hunks[hunkno] 

      hunkfind = [] 

      hunkreplace = [] 

      validhunks = 0 

      canpatch = False 

      for lineno, line in enumerate(f2fp): 

        if lineno+1 < hunk.startsrc: 

          continue 

        elif lineno+1 == hunk.startsrc: 

          hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"] 

          hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"] 

          #pprint(hunkreplace) 

          hunklineno = 0 

 

          # todo \ No newline at end of file 

 

        # check hunks in source file 

        if lineno+1 < hunk.startsrc+len(hunkfind)-1: 

          if line.rstrip("\r\n") == hunkfind[hunklineno]: 

            hunklineno+=1 

          else: 

            info("file %d/%d:\t %s" % (i+1, total, filename)) 

            info(" hunk no.%d doesn't match source file at line %d" % (hunkno+1, lineno+1)) 

            info("  expected: %s" % hunkfind[hunklineno]) 

            info("  actual  : %s" % line.rstrip("\r\n")) 

            # not counting this as error, because file may already be patched. 

            # check if file is already patched is done after the number of 

            # invalid hunks if found 

            # TODO: check hunks against source/target file in one pass 

            #   API - check(stream, srchunks, tgthunks) 

            #           return tuple (srcerrs, tgterrs) 

 

            # continue to check other hunks for completeness 

            hunkno += 1 

            if hunkno < len(p.hunks): 

              hunk = p.hunks[hunkno] 

              continue 

            else: 

              break 

 

        # check if processed line is the last line 

        if lineno+1 == hunk.startsrc+len(hunkfind)-1: 

          debug(" hunk no.%d for file %s  -- is ready to be patched" % (hunkno+1, filename)) 

          hunkno+=1 

          validhunks+=1 

          if hunkno < len(p.hunks): 

            hunk = p.hunks[hunkno] 

          else: 

            if validhunks == len(p.hunks): 

              # patch file 

              canpatch = True 

              break 

      else: 

        if hunkno < len(p.hunks): 

          warning("premature end of source file %s at hunk %d" % (filename, hunkno+1)) 

          errors += 1 

 

      f2fp.close() 

 

      if validhunks < len(p.hunks): 

        if self._match_file_hunks(filename, p.hunks): 

          warning("already patched  %s" % filename) 

        else: 

          warning("source file is different - %s" % filename) 

          errors += 1 

      if canpatch: 

        backupname = filename+".orig" 

        if exists(backupname): 

          warning("can't backup original file to %s - aborting" % backupname) 

        else: 

          import shutil 

          shutil.move(filename, backupname) 

          if self.write_hunks(backupname, filename, p.hunks): 

            info("successfully patched %d/%d:\t %s" % (i+1, total, filename)) 

            os.unlink(backupname) 

          else: 

            errors += 1 

            warning("error patching file %s" % filename) 

            shutil.copy(filename, filename+".invalid") 

            warning("invalid version is saved to %s" % filename+".invalid") 

            # todo: proper rejects 

            shutil.move(backupname, filename) 

 

    if root: 

      os.chdir(prevdir) 

 

    # todo: check for premature eof 

    return (errors == 0) 

 

 

  def _reverse(self): 

    """ reverse patch direction (this doesn't touch filenames) """ 

    for p in self.items: 

      for h in p.hunks: 

        h.startsrc, h.starttgt = h.starttgt, h.startsrc 

        h.linessrc, h.linestgt = h.linestgt, h.linessrc 

        for i,line in enumerate(h.text): 

          if line[0] == '+': 

            h.text[i] = '-' + line[1:] 

          elif line[0] == '-': 

            h.text[i] = '+' +line[1:] 

 

  def revert(self, strip=0, root=None): 

    """ apply patch in reverse order """ 

    reverted = copy.deepcopy(self) 

    reverted._reverse() 

    return reverted.apply(strip, root) 

 

 

  def can_patch(self, filename): 

    """ Check if specified filename can be patched. Returns None if file can 

    not be found among source filenames. False if patch can not be applied 

    clearly. True otherwise. 

 

    :returns: True, False or None 

    """ 

    filename = abspath(filename) 

    for p in self.items: 

      if filename == abspath(p.source): 

        return self._match_file_hunks(filename, p.hunks) 

    return None 

 

 

  def _match_file_hunks(self, filepath, hunks): 

    matched = True 

    fp = open(abspath(filepath)) 

 

    class NoMatch(Exception): 

      pass 

 

    lineno = 1 

    line = fp.readline() 

    hno = None 

    try: 

      for hno, h in enumerate(hunks): 

        # skip to first line of the hunk 

        while lineno < h.starttgt: 

          if not len(line): # eof 

            debug("check failed - premature eof before hunk: %d" % (hno+1)) 

            raise NoMatch 

          line = fp.readline() 

          lineno += 1 

        for hline in h.text: 

          if hline.startswith("-"): 

            continue 

          if not len(line): 

            debug("check failed - premature eof on hunk: %d" % (hno+1)) 

            # todo: \ No newline at the end of file 

            raise NoMatch 

          if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"): 

            debug("file is not patched - failed hunk: %d" % (hno+1)) 

            raise NoMatch 

          line = fp.readline() 

          lineno += 1 

 

    except NoMatch: 

      matched = False 

      # todo: display failed hunk, i.e. expected/found 

 

    fp.close() 

    return matched 

 

 

  def patch_stream(self, instream, hunks): 

    """ Generator that yields stream patched with hunks iterable 

     

        Converts lineends in hunk lines to the best suitable format 

        autodetected from input 

    """ 

 

    # todo: At the moment substituted lineends may not be the same 

    #       at the start and at the end of patching. Also issue a 

    #       warning/throw about mixed lineends (is it really needed?) 

 

    hunks = iter(hunks) 

 

    srclineno = 1 

 

    lineends = {'\n':0, '\r\n':0, '\r':0} 

    def get_line(): 

      """ 

      local utility function - return line from source stream 

      collecting line end statistics on the way 

      """ 

      line = instream.readline() 

        # 'U' mode works only with text files 

      if line.endswith("\r\n"): 

        lineends["\r\n"] += 1 

      elif line.endswith("\n"): 

        lineends["\n"] += 1 

      elif line.endswith("\r"): 

        lineends["\r"] += 1 

      return line 

 

    for hno, h in enumerate(hunks): 

      debug("hunk %d" % (hno+1)) 

      # skip to line just before hunk starts 

      while srclineno < h.startsrc: 

        yield get_line() 

        srclineno += 1 

 

      for hline in h.text: 

        # todo: check \ No newline at the end of file 

        if hline.startswith("-") or hline.startswith("\\"): 

          get_line() 

          srclineno += 1 

          continue 

        else: 

          if not hline.startswith("+"): 

            get_line() 

            srclineno += 1 

          line2write = hline[1:] 

          # detect if line ends are consistent in source file 

          if sum([bool(lineends[x]) for x in lineends]) == 1: 

            newline = [x for x in lineends if lineends[x] != 0][0] 

            yield line2write.rstrip("\r\n")+newline 

          else: # newlines are mixed 

            yield line2write 

 

    for line in instream: 

      yield line 

 

 

  def write_hunks(self, srcname, tgtname, hunks): 

    src = open(srcname, "rb") 

    tgt = open(tgtname, "wb") 

 

    debug("processing target file %s" % tgtname) 

 

    tgt.writelines(self.patch_stream(src, hunks)) 

 

    tgt.close() 

    src.close() 

    # [ ] TODO: add test for permission copy 

    shutil.copymode(srcname, tgtname) 

    return True 

 

 

  def dump(self): 

    for p in self.items: 

      for headline in p.header: 

        print headline.rstrip('\n') 

      print '--- ' + p.source 

      print '+++ ' + p.target 

      for h in p.hunks: 

        print '@@ -%s,%s +%s,%s @@' % (h.startsrc, h.linessrc, h.starttgt, h.linestgt) 

        for line in h.text: 

          print line.rstrip('\n') 

 

 

def main(): 

  from optparse import OptionParser 

  from os.path import exists 

  import sys 

 

  opt = OptionParser(usage="1. %prog [options] unified.diff\n" 

                    "       2. %prog [options] http://host/patch\n" 

                    "       3. %prog [options] -- < unified.diff", 

                     version="python-patch %s" % __version__) 

  opt.add_option("-q", "--quiet", action="store_const", dest="verbosity", 

                                  const=0, help="print only warnings and errors", default=1) 

  opt.add_option("-v", "--verbose", action="store_const", dest="verbosity", 

                                  const=2, help="be verbose") 

  opt.add_option("--debug", action="store_true", dest="debugmode", help="debug mode") 

  opt.add_option("--diffstat", action="store_true", dest="diffstat", 

                                           help="print diffstat and exit") 

  opt.add_option("-d", "--directory", metavar='DIR', 

                                           help="specify root directory for applying patch") 

  opt.add_option("-p", "--strip", type="int", metavar='N', default=0, 

                                           help="strip N path components from filenames") 

  opt.add_option("--revert", action="store_true", 

                                           help="apply patch in reverse order (unpatch)") 

  (options, args) = opt.parse_args() 

 

  if not args and sys.argv[-1:] != ['--']: 

    opt.print_version() 

    opt.print_help() 

    sys.exit() 

  readstdin = (sys.argv[-1:] == ['--'] and not args) 

 

  verbosity_levels = {0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG} 

  loglevel = verbosity_levels[options.verbosity] 

  logformat = "%(message)s" 

  logger.setLevel(loglevel) 

  streamhandler.setFormatter(logging.Formatter(logformat)) 

 

  if options.debugmode: 

    setdebug()  # this sets global debugmode variable 

 

  if readstdin: 

    patch = PatchSet(sys.stdin) 

  else: 

    patchfile = args[0] 

    urltest = patchfile.split(':')[0] 

    if (':' in patchfile and urltest.isalpha() 

        and len(urltest) > 1): # one char before : is a windows drive letter 

      patch = fromurl(patchfile) 

    else: 

      if not exists(patchfile) or not isfile(patchfile): 

        sys.exit("patch file does not exist - %s" % patchfile) 

      patch = fromfile(patchfile) 

 

  if options.diffstat: 

    print patch.diffstat() 

    sys.exit(0) 

 

  #pprint(patch) 

  if options.revert: 

    patch.revert(options.strip, root=options.directory) or sys.exit(-1) 

  else: 

    patch.apply(options.strip, root=options.directory) or sys.exit(-1) 

 

  # todo: document and test line ends handling logic - patch.py detects proper line-endings 

  #       for inserted hunks and issues a warning if patched file has incosistent line ends 

 

 

if __name__ == "__main__": 

  main() 

 

# Legend: 

# [ ]  - some thing to be done 

# [w]  - official wart, external or internal that is unlikely to be fixed 

 

# [ ] API break (2.x) wishlist 

# PatchSet.items  -->  PatchSet.patches 

 

# [ ] run --revert test for all dataset items 

# [ ] run .parse() / .dump() test for dataset