Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System.Collections.Immutable;
- using System.Text;
- using Text = System.ReadOnlyMemory<char>;
- namespace TextSegmenter;
- public class UnitTest1
- {
- public delegate (ImmutableArray<Segment>, ReadOnlyMemory<char>)? SegmentParser(Text text);
- [Theory]
- [InlineData("{tag}foobar")]
- public void SegmentTest(string input)
- {
- var result = Parse(input);
- // Span<char> span;
- Assert.Equal(2, result.Count());
- }
- [Theory]
- [InlineData("<i>foo bar</i>")]
- public void SegmentTestSeveral(string input)
- {
- var result = Parse(input);
- // Span<char> span;
- Assert.Equal(3, result.Count());
- }
- [Theory]
- [InlineData("- <font color=\"#0000ff\">Yi's Mom</font>: Hey, kids...\n- <font color=\"#800000\">(BOTH GASP)</font>")]
- public void SegmentTestComplex(string input)
- {
- var result = Parse(input);
- // Span<char> span;
- Assert.Contains(result, segment => segment is TagOpen);
- }
- private static readonly SegmentParser ParseAssTag = AssParser();
- private static readonly SegmentParser ParseOpenTag = OpenTagParser();
- private static readonly SegmentParser ParseClose = CloseTagParser();
- private static readonly SegmentParser ParseText = TextParser();
- public IEnumerable<Segment> Parse(string input)
- {
- SegmentParser segmentParser = Multiple(ParseAssTag, ParseOpenTag, ParseClose, ParseText);
- // transform the result
- return segmentParser(input.AsMemory()) switch
- {
- ({ Length: > 0 } segs, _) => segs.Select(seg => seg),
- _ => new List<Segment>(),
- };
- }
- // note the values can be passed to be the exact match
- private static SegmentParser AssParser()
- {
- return input => input.Length > 3 && input.Span.StartsWith("{")
- // todo: fix "}" it's possible for it not be present
- ? (ImmutableArray<Segment>.Empty.Add(new TagOpen(input[..(input.Span.IndexOf('}') + 1)])), input[(input.Span.IndexOf('}') + 1)..])
- : null;
- }
- private static SegmentParser OpenTagParser()
- {
- return text => (text.Length > 3 &&
- (text.Span.StartsWith("<i>", StringComparison.Ordinal) ||
- text.Span.StartsWith("<b>", StringComparison.Ordinal) ||
- text.Span.StartsWith("<font ", StringComparison.Ordinal) ||
- text.Span.StartsWith("<u>", StringComparison.Ordinal)))
- ? (ImmutableArray<Segment>.Empty.Add(new TagOpen(text[..(text.Span.IndexOf('>') + 1)])), text[(text.Span.IndexOf('>') + 1)..])
- : null;
- }
- private static SegmentParser CloseTagParser()
- {
- return text => (text.Length > 3 &&
- (text.Span.StartsWith("</i>", StringComparison.Ordinal) ||
- text.Span.StartsWith("</b>", StringComparison.Ordinal) ||
- text.Span.StartsWith("</font>", StringComparison.Ordinal) ||
- text.Span.StartsWith("</u>", StringComparison.Ordinal)))
- ? (ImmutableArray<Segment>.Empty.Add(new TagClose(text[..(text.Span.IndexOf('>') + 1)])), text[(text.Span.IndexOf('>') + 1)..])
- : null;
- }
- private static SegmentParser TextParser()
- {
- // get char while != < or {
- // use string builder to accumulate vlaue
- return text => AggregateChar(new StringBuilder(), text) switch
- {
- ({ Length: > 0 } s, var rest) => (ImmutableArray<Segment>.Empty.Add(new TextSegment(s.AsMemory())), rest),
- _ => null
- };
- (string, ReadOnlyMemory<char>) AggregateChar(StringBuilder sb, Text rest) =>
- (rest.Length > 0 && rest.Span[0] != '{' && rest.Span[0] != '<') switch
- {
- true => AggregateChar(sb.Append(rest.Span[0]), rest[1..]),
- false => (sb.ToString(), rest)
- };
- }
- // ReSharper disable once IdentifierTypo
- private static SegmentParser Multiple(params SegmentParser[] segmentParsers)
- {
- return text => AggregateValue(ImmutableArray<Segment>.Empty, text); // todo: switch against this?
- (ImmutableArray<Segment>, ReadOnlyMemory<char>)? AggregateValue(ImmutableArray<Segment> accumulator, Text text) => text.Length > 0
- // try parsing using one of the parser, as soon as one of them return a value start all over again
- ? segmentParsers.Select(parser => parser(text)).FirstOrDefault(result => result != null) switch
- {
- ({ Length: > 0 } segments, var rest) => AggregateValue(accumulator.AddRange(segments), rest),
- // var (result, rest) => null,
- // _ => (accumulator, text)
- _ => (accumulator, text)
- }
- : (accumulator, text);
- }
- public abstract record Segment(ReadOnlyMemory<char> Content);
- public record TextSegment(ReadOnlyMemory<char> Content) : Segment(Content);
- public record TagOpen(ReadOnlyMemory<char> Content) : Segment(Content);
- public record TagClose(ReadOnlyMemory<char> Content) : Segment(Content);
- public record Symbol(ReadOnlyMemory<char> Content) : Segment(Content);
- public record HearingImpaired(ReadOnlyMemory<char> Content, ImmutableArray<Segment> Segments) : Segment(Content);
- }
Add Comment
Please, Sign In to add comment