ivandrofly

Optimized FixExtraSpaces(string s) (From Subtitle Edit)

Jul 22nd, 2023 (edited)
462
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 5.56 KB | None | 0 0
  1. using Xunit.Sdk;
  2.  
  3. namespace SubtitleEditTest;
  4.  
  5. public class UnitTest1
  6. {
  7.     [Theory]
  8.     [InlineData("foobar \r\nfoobar")]
  9.     [InlineData("foobar\r\n foobar")]
  10.     [InlineData("foobar \r\n foobar")]
  11.     [InlineData("foobar\r\nfoobar")]
  12.     public void RemoveWhiteSpaceAfterLineBreakTest(string input)
  13.     {
  14.         const string expected = "foobar\r\nfoobar";
  15.         var outputChars = RemoveWhiteSpaceAfterLineBreak(input);
  16.         Assert.Equal(expected, outputChars);
  17.     }
  18.  
  19.     [Theory]
  20.     [InlineData(" ")]
  21.     public void RemoveWhiteSpaceAfterLineBreakTestExpectingWhiteSpace(string input)
  22.     {
  23.         const string expected = " ";
  24.         var outputChars = RemoveWhiteSpaceAfterLineBreak(input);
  25.         Assert.Equal(expected, outputChars);
  26.     }
  27.  
  28.     [Theory]
  29.     [InlineData("f o")]
  30.     [InlineData("f  o")]
  31.     public void FixExtraSpacesTest(string s)
  32.     {
  33.         var outputChars = FixExtraSpaces(s);
  34.         Assert.Equal("f o", outputChars);
  35.     }
  36.  
  37.     [Theory]
  38.     [InlineData("o ")]
  39.     [InlineData("o                         ")]
  40.     public void FixExtraSpacesTrailingTest(string s)
  41.     {
  42.         var outputChars = FixExtraSpaces(s);
  43.         Assert.Equal("o ", outputChars);
  44.     }
  45.    
  46.     [Theory]
  47.     [InlineData("o           .              ")]
  48.     public void FixExtraSpacesDotTest(string s)
  49.     {
  50.         var outputChars = FixExtraSpaces(s);
  51.         Assert.Equal("o . ", outputChars);
  52.     }
  53.  
  54.     [Theory]
  55.     [InlineData(" o")]
  56.     public void FixExtraSpacesLeadingTest(string s)
  57.     {
  58.         var outputChars = FixExtraSpaces(s);
  59.         Assert.Equal(" o", outputChars);
  60.     }
  61.  
  62.     [Theory]
  63.     [InlineData("foobar   !")]
  64.     [InlineData("foobar !")]
  65.     public void FixExtraSpacesCharAfter(string s)
  66.     {
  67.         var outputChars = FixExtraSpaces(s);
  68.         Assert.Equal("foobar !", outputChars);
  69.     }
  70.  
  71.     [Theory]
  72.     [InlineData("foobar \r\n    ")]
  73.     [InlineData("foobar   \r\n")]
  74.     public void FixExtraSpacesCharNewLine(string s)
  75.     {
  76.         var outputChars = FixExtraSpaces(s);
  77.         Assert.Equal("foobar\r\n", outputChars);
  78.     }
  79.  
  80.     private static string FixExtraSpaces(string s)
  81.     {
  82.         if (string.IsNullOrEmpty(s))
  83.         {
  84.             return s;
  85.         }
  86.  
  87.         var lineBreakPositionTrack = -1;
  88.         var whiteSpaceTrack = -1; // track last white space position
  89.         var writeIndex = s.Length - 1; // track current slow available for writing
  90.         var charBuffer = new char[s.Length]; // array storing the outputChars text
  91.  
  92.         // if current is letter or symbols take and rest track position
  93.         // if current whitespace and no track take and track position
  94.         // if pre was whitespace and current is \r or \n => replace white-space
  95.         for (int i = s.Length - 1; i >= 0; i--)
  96.         {
  97.             var ch = s[i];
  98.             if (ch == ' ')
  99.             {
  100.                 // if condition is false then ignore the white space
  101.                 if (whiteSpaceTrack >= 0 || lineBreakPositionTrack >= 0)
  102.                 {
  103.                     continue; // ignore white space
  104.                 }
  105.                
  106.                 // if last known char beside white space was \r or \n then we want to ignore the white space
  107.                 whiteSpaceTrack = writeIndex;
  108.                        
  109.                 // write the white space index and decrement to next position
  110.                 charBuffer[writeIndex--] = ch;
  111.             }
  112.             else if (ch == '\n' || ch == '\r')
  113.             {
  114.                 // if white space tracking (whiteSpaceTrack) variable has value then
  115.                 // then that means out last write was a white space and now we want to override the value with \r or \n
  116.                 writeIndex = whiteSpaceTrack > 0 ? whiteSpaceTrack : writeIndex;
  117.                
  118.                 // track the last line breaking position
  119.                 lineBreakPositionTrack = writeIndex;
  120.                
  121.                 // write line breaking chars and decrement to next position
  122.                 charBuffer[writeIndex--] = ch;
  123.                
  124.                 // reset the white space tracking variable
  125.                 whiteSpaceTrack = -1;
  126.             }
  127.             else // handle none whitespace and new line chars
  128.             {
  129.                 // white currently non-white space char and decrement the writing tracking variable
  130.                 charBuffer[writeIndex--] = ch;
  131.                
  132.                 // reset the white space tracking index
  133.                 whiteSpaceTrack = -1;
  134.             }
  135.         }
  136.  
  137.         return new string(charBuffer, writeIndex + 1, charBuffer.Length - (writeIndex + 1));
  138.     }
  139.  
  140.     private static string RemoveWhiteSpaceAfterLineBreak(string input)
  141.     {
  142.         if (!input.Contains(Environment.NewLine))
  143.         {
  144.             return input;
  145.         }
  146.  
  147.         var inputLen = input.Length;
  148.         var inputCharSlotTrack = inputLen - 1;
  149.         var inputChars = new char[inputLen];
  150.         for (int i = inputLen - 1; i >= 0; i--)
  151.         {
  152.             var ch = input[i];
  153.             if (ch != ' ' || (!IsLineBreakFollowOrHeading(i - 1) && !IsLineBreakFollowOrHeading(i + 1)))
  154.             {
  155.                 inputChars[inputCharSlotTrack--] = ch;
  156.             }
  157.         }
  158.  
  159.         return new string(inputChars, inputCharSlotTrack + 1, inputLen - (inputCharSlotTrack + 1));
  160.  
  161.         bool IsKnownLineBreakChar(char ch) => ch == '\n' || ch == '\r';
  162.  
  163.         bool IsLineBreakFollowOrHeading(int charIndex) =>
  164.             charIndex >= 0 && charIndex < input.Length && IsKnownLineBreakChar(input[charIndex]);
  165.     }
  166. }
Add Comment
Please, Sign In to add comment