Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public static string RemoveEmptyTags(string text)
- {
- int lowerBound = 0;
- // <tag></tag> => "".
- const string EmptyTags = "><";
- int idx = text.IndexOf(EmptyTags);
- while (idx >= 0)
- {
- int j = idx - 1;
- int k = idx + 2;
- // Find tag start index.
- for (; j >= lowerBound; j--)
- {
- if (text[j] == '>' || text[j] == '<') break;
- }
- // Find tag end index.
- for (; k < text.Length; k++)
- {
- if (text[k] == '<' || text[k] == '>') break;
- }
- // Invalid tag.
- if ((j < 0 || text[j] != '<') && k >= text.Length)
- {
- break;
- }
- // For tags like: ><Foobar<b></b>.>< => ><Foobar.>< or ><.<b></b>Foobar>< => ><.Foobar><
- if ((j >= 0 && text[j] == '>') || (k < text.Length && text[k] == '<'))
- {
- lowerBound = idx + 2;
- idx = text.IndexOf(EmptyTags, lowerBound);
- continue;
- }
- if (Configuration.Settings.Tools.OcrFixUseHardcodedRules)
- {
- // Foo><tag>bar. => Foobar
- if (j < 0 && k < text.Length && text[k] == '>')
- {
- j = idx;
- }
- // Foo<tag><bar. => Foobar.
- else if (j >= 0 && text[j] == '<' && k >= text.Length)
- {
- k = idx + 1; // Note: +1 will be add when removing.
- }
- }
- else
- {
- // Do nothing for text like: ><Foobar.><
- if (j < lowerBound || k >= text.Length)
- {
- lowerBound = idx + 2;
- idx = text.IndexOf(EmptyTags, lowerBound);
- continue;
- }
- }
- // Remove no space inserted.
- text = text.Remove(j, k - j + 1);
- idx = text.IndexOf(EmptyTags, j);
- }
- return text;
- }
- // Note: Algorithms written for Subtitle Edit.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement