Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Linq;
- using System.Threading.Tasks;
- using Aspose.App.Models;
- using Aspose.Html;
- using Tools.Foundation.Models;
- namespace Aspose.App.Controllers
- {
- internal class AsposeHtmlComparisonController : ApiControllerBase
- {
- public async Task<Response> Compare(string fileName1, string fileName2, string folderName)
- {
- Aspose.App.Models.License.SetAsposeHtmlLicense();
- var comparedDocument = string.Format("{0}_compare_to_{1}.html",
- Path.GetFileNameWithoutExtension(fileName1), Path.GetFileNameWithoutExtension(fileName2));
- return await Process(this.GetType().Name, comparedDocument, folderName, ".html", false, false,
- AsposeHTML + ComparisonApp, ProductFamilyNameKeysEnum.html, "Compare",
- (inFilePath, outPath, zipOutFolder) =>
- {
- var tc1 = GetTextContent(AppSettings.WorkingDirectory + folderName + "/" + fileName1);
- var tc2 = GetTextContent(AppSettings.WorkingDirectory + folderName + "/" + fileName2);
- var count = Math.Min(tc1.Count, tc2.Count);
- var result = new Aspose.Html.HTMLDocument();
- for (int i = 0; i < count; i++)
- {
- var originalStr = tc1[i].Trim();
- var modifiedStr = tc2[i].Trim();
- var changes = Levenshtein3(originalStr, modifiedStr);
- var modifications = new System.Collections.Generic.List<Modification>();
- int pos1 = 0, pos2 = 0;
- var current = new Modification
- {
- Status = changes.route[0]
- };
- for (int ii = 0; ii < changes.route.Length; ii++)
- {
- var status = changes.route[ii];
- char symbol = ' ';
- if (changes.route[ii] == '=')
- {
- symbol = originalStr[pos1++];
- pos2++;
- }
- if (changes.route[ii] == 'I')
- {
- symbol = modifiedStr[pos2++];
- }
- if (changes.route[ii] == 'D')
- {
- symbol = originalStr[pos1++];
- pos2++;
- }
- if (changes.route[ii] == 'R')
- {
- symbol = modifiedStr[pos2++];
- pos1++;
- }
- if (current.Status == status)
- {
- current.Value += symbol;
- }
- else
- {
- modifications.Add(current);
- current = new Modification
- {
- Status = status,
- Value = symbol.ToString()
- };
- }
- }
- modifications.Add(current);
- var sb = new System.Text.StringBuilder();
- foreach (var item in modifications)
- {
- switch (item.Status)
- {
- case '=':
- sb.AppendFormat("<span>{0}</span>", item.Value);
- break;
- case 'I':
- sb.AppendFormat("<ins>{0}</ins>", item.Value);
- break;
- case 'R':
- sb.AppendFormat("<mark>{0}</mark>", item.Value);
- break;
- case 'D':
- sb.AppendFormat("<del>{0}</del>", item.Value);
- break;
- default:
- break;
- }
- }
- result.Body.InnerHTML += sb.ToString();
- }
- result.Save(outPath);
- });
- }
- private static List<string> GetTextContent(string fileName)
- {
- var document = new HTMLDocument(System.IO.File.ReadAllText(fileName), string.Empty);
- var scriptsElements = document.GetElementsByTagName("script");
- foreach (var element in scriptsElements)
- {
- element.TextContent = string.Empty;
- }
- var styleElements = document.GetElementsByTagName("style");
- foreach (var element in styleElements)
- {
- element.TextContent = string.Empty;
- }
- var textFragments = new List<string>();
- foreach (var element in document.Children)
- {
- textFragments.Add(element.TextContent);
- }
- return textFragments;
- }
- private static Prescription Levenshtein3(string S1, string S2)
- {
- int m = S1.Length, n = S2.Length;
- int h = (int)Math.Sqrt(m + 1);
- int[,] D = new int[h + 1, n + 1];
- char[,] P = new char[h + 1, n + 1];
- int d = 0;
- var route = new System.Text.StringBuilder();
- int iPos = m, jPos = n;
- do
- {
- for (int i = 0; i <= jPos; i++)
- {
- D[0, i] = i;
- P[0, i] = 'I';
- }
- int index = 1;
- for (int i = 1; i <= iPos; i++)
- {
- for (int j = 0; j <= jPos; j++)
- {
- if (j == 0) D[index, j] = i;
- else
- {
- int cost = (S1[i - 1] != S2[j - 1]) ? 1 : 0;
- if (D[index, j - 1] < D[index - 1, j] && D[index, j - 1] < D[index - 1, j - 1] + cost)
- {
- //Insert
- D[index, j] = D[index, j - 1] + 1;
- P[index, j] = 'I';
- }
- else if (D[index - 1, j] < D[index - 1, j - 1] + cost)
- {
- //Remove
- D[index, j] = D[index - 1, j] + 1;
- P[index, j] = 'D';
- }
- else
- {
- //Replace or no ops
- D[index, j] = D[index - 1, j - 1] + cost;
- P[index, j] = (cost == 1) ? 'R' : '=';
- }
- }
- }
- if (i % h == 0)
- {
- //Allocate of memory for new lines and copying the last from the previous band to the first line of the new
- int[] vRow = new int[n + 1];
- char[] cRow = new char[n + 1];
- for (int j = 0; j <= n; j++)
- {
- vRow[j] = D[index, j];
- cRow[j] = P[index, j];
- }
- D = new int[h + 1, n + 1];
- P = new char[h + 1, n + 1];
- for (int j = 0; j <= n; j++)
- {
- D[0, j] = vRow[j];
- P[0, j] = cRow[j];
- }
- index = 0;
- }
- index++;
- }
- if (iPos == m && jPos == n) d = D[index - 1, n];
- //Restore prescriptions in the last _i - 1 lines
- while (index > 0 && iPos != 0 && jPos != 0)
- {
- char c = P[index - 1, jPos];
- route.Append(c);
- if (c == 'R' || c == '=')
- {
- iPos--;
- jPos--;
- index--;
- }
- else if (c == 'D')
- {
- iPos--;
- index--;
- }
- else
- {
- jPos--;
- }
- }
- } while ((iPos != 0) && (jPos != 0));
- return new Prescription(d, string.Join(string.Empty, route.ToString().ToCharArray().Reverse()));
- }
- }
- internal class Prescription
- {
- public string route;
- public int distance;
- public Prescription(int distance, string route)
- {
- this.distance = distance;
- this.route = route;
- }
- }
- internal class Modification
- {
- public char Status;
- public string Value;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement