und den Diff-Algo aus meinem History-Viewer extrahiert.
Es sind da doch einige Veränderungen nötig gewesen, um das für
die Allgemeinheit tauglich zu machen.
Ich lege also nicht die Hand dafür ins Feuer, dass da nicht noch Fehler drin sind.
Der Diff-Algo basiert auf einem C#-Code von Matthias Hertel.
Das ganze ist (mal wieder) als Interface aufgebaut, damit es möglichst wenig mit einem Hauptprogramm kollidiert.
Es wird mindestens PB4.5 benötigt.
GetTextDiff.pbi
Code: Alles auswählen
;/--------------------------
;|
;| GetTextDiff.pbi V1.00
;| [12.04.2011]
;|
;| ©HeX0R 2011
;|
;| Include to compare two
;| Textfiles and show
;| the differences
;|
;| TextDiff-Algorithm is ported from
;| this C#-Code from Matthias Hertel:
;| http://www.mathertel.de/Diff/Default.aspx
;|
;/--------------------------
CompilerIf #PB_Compiler_Version < 450
CompilerError "This include needs at least PureBasic 4.5!"
CompilerEndIf
#DIFF_INSERT_BLOCK = $01
#DIFF_DELETE_BLOCK = $02
Interface InterfaceGetTextDiff
GetTextDiff(OriginalFile.s, FileToCheck.s)
CountDiffBlocks()
DiffBlockType(DiffBlockNum)
DiffBlockSize(DiffBlockNum, BlockType)
DiffBlockLine.s(DiffBlockNum, BlockType, LineNum, *RealLineNum.INTEGER = 0)
GetOriginalFileLine.s(Num)
GetCheckFileLine.s(Num)
EndInterface
Structure _DIFF_
S.s
I.l
modified.b
EndStructure
Structure _aITEM_
StartA.l
StartB.l
DeletedA.l
InsertedB.l
EndStructure
Structure _GETTEXTDIFF_STRUC_
VTable.i
LinesOrig.i
LinesCheck.i
DiffBlocks.i
Array LineOrig._DIFF_(5000) ;will increase dynamically if needed
Array LineCheck._DIFF_(5000) ;will increase dynamically if needed
Array MyRes._aITEM_(500) ;will increase dynamically if needed
EndStructure
Procedure GTD_CountDiffBlocks(*THIS._GETTEXTDIFF_STRUC_)
;/----------------
;| How many Diffblocks are there?
;/----------------
ProcedureReturn *THIS\DiffBlocks
EndProcedure
Procedure GTD_DiffBlockType(*THIS._GETTEXTDIFF_STRUC_, DiffBlockNum)
;/-------------
;| Which Type is this Block
;|
;| You get a combination of #DIFF_DELETE_BLOCK and/or #DIFF_INSERT_BLOCK
;/-------------
Protected Result
If DiffBlockNum >= 0 And DiffBlockNum < *THIS\DiffBlocks
If *THIS\MyRes(DiffBlockNum)\DeletedA
Result | #DIFF_DELETE_BLOCK
EndIf
If *THIS\MyRes(DiffBlockNum)\InsertedB
Result | #DIFF_INSERT_BLOCK
EndIf
EndIf
ProcedureReturn Result
EndProcedure
Procedure GTD_DiffBlockSize(*THIS._GETTEXTDIFF_STRUC_, DiffBlockNum, BlockType)
;/-------------
;| Get the size of this DiffBlock
;|
;| One DiffBlock can contain both, a deleteblock and an insertblock
;| So you have to specify with BlockType, which block you need information from
;/-------------
Protected Result
If DiffBlockNum >= 0 And DiffBlockNum < *THIS\DiffBlocks
If BlockType = #DIFF_DELETE_BLOCK
Result = *THIS\MyRes(DiffBlockNum)\DeletedA
ElseIf BlockType = #DIFF_INSERT_BLOCK
Result = *THIS\MyRes(DiffBlockNum)\InsertedB
EndIf
EndIf
ProcedureReturn Result
EndProcedure
Procedure.s GTD_DiffBlockLine(*THIS._GETTEXTDIFF_STRUC_, DiffBlockNum, BlockType, LineNum, *RealLineNum.INTEGER = 0)
;/---------------
;| Get a line of a DifferenceBlock
;|
;| You have to specify, which kind of block you are interested in with BlockType
;| LineNum is the Number within this Block, starting with 0
;| You can optional receive the real LineNumber in *RealLineNum
;/---------------
Protected Result.s
If DiffBlockNum >= 0 And DiffBlockNum < *THIS\DiffBlocks
If BlockType = #DIFF_DELETE_BLOCK
If LineNum >= 0 And LineNum < *THIS\MyRes(DiffBlockNum)\DeletedA
Result = *THIS\LineOrig(*THIS\MyRes(DiffBlockNum)\StartA + LineNum)\S
If *RealLineNum
*RealLineNum\i = *THIS\MyRes(DiffBlockNum)\StartA + LineNum + 1
EndIf
EndIf
ElseIf BlockType = #DIFF_INSERT_BLOCK
If LineNum >= 0 And LineNum < *THIS\MyRes(DiffBlockNum)\InsertedB
Result = *THIS\LineCheck(*THIS\MyRes(DiffBlockNum)\StartB + LineNum)\S
If *RealLineNum
*RealLineNum\i = *THIS\MyRes(DiffBlockNum)\StartB + LineNum + 1
EndIf
EndIf
EndIf
EndIf
ProcedureReturn Result
EndProcedure
Procedure.s GTD_GetOriginalFileLine(*THIS._GETTEXTDIFF_STRUC_, Num)
;/-----------
;| Get the Line (Num) of the OriginalFile (starts with 0)
;/-----------
Protected Result.s
If Num >= 0 And Num < *THIS\LinesOrig
Result = *THIS\LineOrig(Num)\S
EndIf
ProcedureReturn Result
EndProcedure
Procedure.s GTD_GetCheckFileLine(*THIS._GETTEXTDIFF_STRUC_, Num)
;/-----------
;| Get the Line (Num) of the File to check (starts with 0)
;/-----------
Protected Result.s
If Num >= 0 And Num < *THIS\LinesCheck
Result = *THIS\LineCheck(Num)\S
EndIf
ProcedureReturn Result
EndProcedure
Procedure TextSearch_SMS(*THIS._GETTEXTDIFF_STRUC_, LowerA, UpperA, LowerB, UpperB, SizeA, SizeB, *ret.POINT, Array DownVector.i(1), Array UpVector.i(1))
;Internal Procedure for Diff-Algorithm
Protected MAX, UpOffset, DownOffset, DownK, UpK, Delta, oddDelta
Protected MaxD, D, k, x, y
MAX = SizeA + SizeB + 1
DownK = LowerA - LowerB ;the k-line to start the forward search
UpK = UpperA - UpperB ;the k-line to start the reverse search
Delta = (UpperA - LowerA) - (UpperB - LowerB)
If Delta & 1
oddDelta = #True
EndIf
;// The vectors in the publication accepts negative indexes. the vectors implemented here are 0-based
;// And are access using a specific offset: UpOffset UpVector And DownOffset For DownVektor
DownOffset = MAX - DownK
UpOffset = MAX - UpK
MaxD = ((UpperA - LowerA + UpperB - LowerB) / 2) + 1
;Debug.Write(2, "SMS", String.Format("Search the box: A[{0}-{1}] to B[{2}-{3}]", LowerA, UpperA, LowerB, UpperB));
;init vectors
DownVector(DownOffset + DownK + 1) = LowerA
UpVector(UpOffset + UpK - 1) = UpperA
For D = 0 To MaxD
;Extend the forward path.
For k = DownK - D To DownK + D Step 2
;find the only Or better starting point
If k = DownK - D
x = DownVector(DownOffset + k + 1)
Else
x = DownVector(DownOffset + k - 1) + 1
If k < DownK + D And DownVector(DownOffset + k + 1) >= x
x = DownVector(DownOffset + k + 1)
EndIf
EndIf
y = x - k
;find the End of the furthest reaching forward D-path in diagonal k.
While x < UpperA And y < UpperB And *THIS\LineOrig(x)\I = *THIS\LineCheck(y)\I
x + 1
y + 1
Wend
DownVector(DownOffset + k) = x
;overlap ?
If oddDelta And UpK - D < k And k < UpK + D
If UpVector(UpOffset + k) <= DownVector(DownOffset + k)
*ret\x = DownVector(DownOffset + k)
*ret\y = DownVector(DownOffset + k) - k
Break 2
EndIf
EndIf
Next k
;Extend the reverse path.
For k = UpK - D To UpK + D Step 2
;find the only Or better starting point
If k = UpK + D
x = UpVector(UpOffset + k - 1) ;up
Else
x = UpVector(UpOffset + k + 1) - 1 ;left
If k > UpK - D And UpVector(UpOffset + k - 1) < x
x = UpVector(UpOffset + k - 1) ;up
EndIf
EndIf
y = x - k;
While x > LowerA And y > LowerB And *THIS\LineOrig(x - 1)\I = *THIS\LineCheck(y - 1)\I
x - 1
y - 1 ;diagonal
Wend
UpVector(UpOffset + k) = x
;overlap ?
If oddDelta = 0 And DownK - D <= k And k <= DownK + D
If UpVector(UpOffset + k) <= DownVector(DownOffset + k)
*ret\x = DownVector(DownOffset + k)
*ret\y = DownVector(DownOffset + k) - k
Break 2
EndIf
EndIf
Next k
Next D
EndProcedure
Procedure TextSearch_LCS(*THIS._GETTEXTDIFF_STRUC_, LowerA, UpperA, LowerB, UpperB, SizeA, SizeB, Array A1.i(1), Array B1.i(1))
;Internal Procedure for Diff-Algorithm
Protected Ret.POINT
;Fast walkthrough equal lines at the start
While LowerA < UpperA And LowerB < UpperB And *THIS\LineOrig(LowerA)\I = *THIS\LineCheck(LowerB)\I
LowerA + 1
LowerB + 1
Wend
;Fast walkthrough equal lines at the End
While LowerA < UpperA And LowerB < UpperB And *THIS\LineOrig(UpperA - 1)\I = *THIS\LineCheck(UpperB - 1)\I
UpperA - 1
UpperB - 1
Wend
If LowerA = UpperA
;mark As inserted lines.
While LowerB < UpperB
*THIS\LineCheck(LowerB)\modified = #True
LowerB + 1
Wend
ElseIf LowerB = UpperB
;mark As deleted lines.
While LowerA < UpperA
*THIS\LineOrig(LowerA)\modified = #True
LowerA + 1
Wend
Else
;Find the middle snakea And length of an optimal path For A And B
TextSearch_SMS(*THIS, LowerA, UpperA, LowerB, UpperB, SizeA, SizeB, @Ret, A1(), B1())
;The path is from LowerX To (x,y) And (x,y) ot UpperX
TextSearch_LCS(*THIS, LowerA, Ret\x, LowerB, Ret\y, SizeA, SizeB, A1(), B1())
TextSearch_LCS(*THIS, Ret\x, UpperA, Ret\y, UpperB, SizeA, SizeB, A1(), B1())
EndIf
EndProcedure
Procedure TextSearch_Optimize(*THIS._GETTEXTDIFF_STRUC_)
;Internal Procedure for Diff-Algorithm
Protected StartPos, EndPos, Size
Size = *THIS\LinesOrig + 1
While StartPos < Size
While StartPos < Size And *THIS\LineOrig(StartPos)\modified = 0
StartPos + 1
Wend
EndPos = StartPos
While EndPos < Size And *THIS\LineOrig(EndPos)\modified
EndPos + 1
Wend
If EndPos < Size And *THIS\LineOrig(StartPos)\I = *THIS\LineOrig(EndPos)\I
*THIS\LineOrig(StartPos)\modified = #False
*THIS\LineOrig(EndPos)\modified = #True
Else
StartPos = EndPos
EndIf
Wend
StartPos = 0
EndPos = 0
Size = *THIS\LinesCheck + 1
While StartPos < Size
While StartPos < Size And *THIS\LineCheck(StartPos)\modified = 0
StartPos + 1
Wend
EndPos = StartPos
While EndPos < Size And *THIS\LineCheck(EndPos)\modified
EndPos + 1
Wend
If EndPos < Size And *THIS\LineCheck(StartPos)\I = *THIS\LineCheck(EndPos)\I
*THIS\LineCheck(StartPos)\modified = #False
*THIS\LineCheck(EndPos)\modified = #True
Else
StartPos = EndPos
EndIf
Wend
EndProcedure
Procedure TextSearch_CreateDiffs(*THIS._GETTEXTDIFF_STRUC_)
;Internal Procedure for Diff-Algorithm
Protected SizeA, SizeB, StartA, StartB, LineA, LineB
SizeA = *THIS\LinesOrig + 1
SizeB = *THIS\LinesCheck + 1
While LineA < SizeA Or LineB < SizeB
If LineA < SizeA And *THIS\LineOrig(LineA)\modified = 0 And LineB < SizeB And *THIS\LineCheck(LineB)\modified = 0
;equal lines
LineA + 1
LineB + 1
Else
;maybe deleted And/Or inserted lines
StartA = LineA
StartB = LineB
While LineA < SizeA And (LineB >= SizeB Or *THIS\LineOrig(LineA)\modified)
LineA + 1
Wend
While LineB < SizeB And (LineA >= SizeA Or *THIS\LineCheck(LineB)\modified)
LineB + 1
Wend
If StartA < LineA Or StartB < LineB
;store a new difference-item
*THIS\DiffBlocks + 1
If ArraySize(*THIS\MyRes()) < *THIS\DiffBlocks
ReDim *THIS\MyRes._aITEM_(ArraySize(*THIS\MyRes()) + 500)
EndIf
*THIS\MyRes(*THIS\DiffBlocks - 1)\StartA = StartA
*THIS\MyRes(*THIS\DiffBlocks - 1)\StartB = StartB
*THIS\MyRes(*THIS\DiffBlocks - 1)\DeletedA = LineA - StartA
*THIS\MyRes(*THIS\DiffBlocks - 1)\InsertedB = LineB - StartB
EndIf
EndIf
Wend
EndProcedure
Procedure GTD_GetTextDiff(*THIS._GETTEXTDIFF_STRUC_, OriginalFile.s, FileToCheck.s)
;/--------------
;| Main Procedure
;| Loads two textfiles and will immediately
;| compare them and store differences
;|
;| Use the other procedures two see results easily.
;|
;/--------------
Protected Result, FID, uID, j, k, Max, BOM
Protected ArraySize_Orig, ArraySize_Check
*THIS\LinesOrig = -1
*THIS\LinesCheck = -1
ArraySize_Orig = ArraySize(*THIS\LineOrig())
ArraySize_Check = ArraySize(*THIS\LineCheck())
FID = ReadFile(#PB_Any, OriginalFile)
If FID
BOM = ReadStringFormat(FID)
While Eof(FID) = 0
*THIS\LinesOrig + 1
If *THIS\LinesOrig > ArraySize_Orig
ReDim *THIS\LineOrig._DIFF_(ArraySize_Orig + 5000)
ArraySize_Orig + 5000
EndIf
*THIS\LineOrig(*THIS\LinesOrig)\S = ReadString(FID, BOM)
k = #True
For j = 0 To *THIS\LinesOrig - 1
If *THIS\LineOrig(j)\S = *THIS\LineOrig(*THIS\LinesOrig)\S
*THIS\LineOrig(*THIS\LinesOrig)\I = *THIS\LineOrig(j)\I
k = #False
Break
EndIf
Next j
If k
*THIS\LineOrig(*THIS\LinesOrig)\I = uID
uID + 1
EndIf
Wend
CloseFile(FID)
EndIf
FID = ReadFile(#PB_Any, FileToCheck)
If FID = 0
ProcedureReturn 0
EndIf
BOM = ReadStringFormat(FID)
While Eof(FID) = 0
*THIS\LinesCheck + 1
If *THIS\LinesCheck > ArraySize_Check
ReDim *THIS\LineCheck._DIFF_(ArraySize_Check + 5000)
ArraySize_Check + 5000
EndIf
*THIS\LineCheck(*THIS\LinesCheck)\S = ReadString(FID, BOM)
k = #True
For j = 0 To *THIS\LinesCheck - 1
If *THIS\LineCheck(j)\S = *THIS\LineCheck(*THIS\LinesCheck)\S
*THIS\LineCheck(*THIS\LinesCheck)\I = *THIS\LineCheck(j)\I
k = #False
Break
EndIf
Next j
If k
For j = 0 To *THIS\LinesOrig
If *THIS\LineOrig(j)\S = *THIS\LineCheck(*THIS\LinesCheck)\S
*THIS\LineCheck(*THIS\LinesCheck)\I = *THIS\LineOrig(j)\I
k = #False
Break
EndIf
Next j
If k
*THIS\LineCheck(*THIS\LinesCheck)\I = uID
uID + 1
EndIf
EndIf
Wend
CloseFile(FID)
MAX = *THIS\LinesOrig + *THIS\LinesCheck + 1
Dim DV.i(2 * MAX + 2)
Dim UV.i(2 * MAX + 2)
TextSearch_LCS(*THIS, 0, *THIS\LinesOrig + 1, 0, *THIS\LinesCheck + 1, *THIS\LinesOrig + 1, *THIS\LinesCheck + 1, DV(), UV())
TextSearch_Optimize(*THIS)
*THIS\DiffBlocks = 0
TextSearch_CreateDiffs(*THIS)
Result = *THIS\DiffBlocks
ProcedureReturn Result
EndProcedure
Procedure GetTextDiff_CreateInterface()
;Create the damn Interface
Protected *G._GETTEXTDIFF_STRUC_
*G = AllocateMemory(SizeOf(_GETTEXTDIFF_STRUC_))
If *G
InitializeStructure(*G, _GETTEXTDIFF_STRUC_)
*G\VTable = ?_TEXTDIFF_PROCEDURES_
EndIf
ProcedureReturn *G
EndProcedure
DataSection
_TEXTDIFF_PROCEDURES_:
Data.i @GTD_GetTextDiff()
Data.i @GTD_CountDiffBlocks()
Data.i @GTD_DiffBlockType()
Data.i @GTD_DiffBlockSize()
Data.i @GTD_DiffBlockLine()
Data.i @GTD_GetOriginalFileLine()
Data.i @GTD_GetCheckFileLine()
EndDataSection
Code: Alles auswählen
EnableExplicit
XIncludeFile "GetTextDiff.pbi"
;- Edit here
#File_Original = "C:\test.txt"
#File_ToCheck = "C:\test2.txt"
Procedure main()
Protected *TD.InterfaceGetTextDiff
Protected i, j, k, LineNo, Type, Size, a$
*TD.InterfaceGetTextDiff = GetTextDiff_CreateInterface()
If *TD\GetTextDiff(#File_Original, #File_ToCheck)
j = *TD\CountDiffBlocks()
If j
For i = 0 To j - 1
Type = *TD\DiffBlockType(i)
If Type & #DIFF_DELETE_BLOCK
Size = *TD\DiffBlockSize(i, #DIFF_DELETE_BLOCK)
Debug "Deleted following " + Str(Size) + " Line(s):"
For k = 0 To Size - 1
a$ = *TD\DiffBlockLine(i, #DIFF_DELETE_BLOCK, k, @LineNo)
Debug "Line " + Str(LineNo) + ": " + a$
Next k
EndIf
If Type & #DIFF_INSERT_BLOCK
Size = *TD\DiffBlockSize(i, #DIFF_INSERT_BLOCK)
Debug "Added following " + Str(Size) + " Line(s):"
For k = 0 To Size - 1
a$ = *TD\DiffBlockLine(i, #DIFF_INSERT_BLOCK, k, @LineNo)
Debug "Line " + Str(LineNo) + ": " + a$
Next k
EndIf
Next i
EndIf
EndIf
EndProcedure
main()