infinity_2

program t2;
//valgrind --leak-check=full ./new file_amino_acids.txt f_DNK_RNK.txt 1 fpc -gv new.pas
type
    matrix_t = array['A'..'Y', 1..6] of string;

    nucleatides_pair_t = record
        identificator_line: qword;
        identificator_column: qword;
        result: char;
    end;

    our_sequence = record
        name: array of char;
        seq_type: (DNA, RNA, AMINO, UNKNOWN);
        items : array of nucleatides_pair_t;
    end;

var
    file_amino_acids, f_DNK_RNK: file of char;

    amino_filename, DNK_RNK_FileName: string;

    {номер типа программы поиска}
    search_type: integer = 1;

    amino_acid_arr: array of char;
    set_of_nucleotides: set of char;
    set_impossibe_nucleotides: set of char;
    availble_set: set of char;

    nucleatides: our_sequence;
    amino_acid_table: matrix_t;

    length_amino_acid_arr: qword;
    cur_symbol: char;
    i, j: qword;
    flag: boolean = false;
    row, line: qword;
    ratio: qword;
    is_error: boolean = false;


procedure Sequence_Printer_From_Left(var nucleatides: our_sequence; start_position: qword;
finish: qword; start_line: qword; end_line: qword; start_column: qword; end_column: qword);
var
    column_index: longint = 1;
    line_index: longint = 0;
begin

    for i := 1 to Length(nucleatides.name) do
    begin
        write(nucleatides.name[i]);
    end;

    writeln();
    writeln ('[-', Length(nucleatides.items) - start_position, ', -',
    Length(nucleatides.items)  - finish,']');
    writeln('(',start_line, ', ', start_column,')',
     ' - (', end_line, ', ', end_column,')');
    i := finish + 2;

    while (i <= start_position + 2) do
    begin
        while (column_index <= 10) and (i <= start_position + 2) do
        begin
            write(nucleatides.items[i].result);
            column_index := column_index + 1;
            i := i + 1;
        end;
        column_index := 1;
        line_index := line_index +1 ;
        write(' ');
        if (line_index mod 6 = 0) then
            writeln();
    end;
    writeln();
end;

procedure Sequence_Printer(var nucleatides: our_sequence; start_position: qword;
finish:qword; start_line: qword; end_line: qword; start_column: qword; end_column:qword);
var
    iterator: qword;
    column_index: longint = 1;
    line_index: longint = 0;

begin
    for iterator := 1 to Length(nucleatides.name) do
    begin
        write(nucleatides.name[iterator]);
    end;

    writeln();
    writeln ('[', start_position,', ', finish,']');
    writeln('(',start_line, ', ', start_column,')', ' - (',
    end_line, ', ', end_column,')');

    iterator := start_position;

    while (iterator <= finish) do
    begin
        while (column_index <= 10) and (iterator <= finish) do
        begin
            write(nucleatides.items[iterator].result);
            column_index := column_index + 1;
            iterator := iterator + 1;
        end;
        column_index := 1;
        line_index := line_index +1 ;
        write(' ');
        if (line_index mod 6 = 0) then
            writeln();
    end;
    writeln();
end;

procedure Triplet_DNA(var nucleatides: our_sequence; var triplet: string; i: qword);
var
    j: qword = 1;

begin
    for j := i + 2 downto i do
                    begin
                        if (nucleatides.items[j].result = 't') or
                        (nucleatides.items[j].result = 'T') then
                        begin
                            triplet := triplet + 'A';
                            continue;
                        end;
                        if (nucleatides.items[j].result = 'a') or
                        (nucleatides.items[j].result = 'A') then
                        begin
                            triplet := triplet + 'U';
                            continue;
                        end;
                        if (nucleatides.items[j].result = 'g') or
                        (nucleatides.items[j].result = 'G') then
                        begin
                            triplet := triplet + 'C';
                            continue;
                        end;
                        if (nucleatides.items[j].result = 'c') or
                        (nucleatides.items[j].result = 'C') then
                        begin
                            triplet := triplet + 'G';
                            continue;
                        end;
                        if nucleatides.items[j].result in ['a'..'z'] then
                        begin
                            triplet := triplet +
                            chr(ord(nucleatides.items[j].result) -
                            (ord('a') - ord('A')));
                        end
                        else
                        begin
                            triplet := triplet + nucleatides.items[j].result;
                        end;
                    end;

end;


procedure Make_Triplet(var nucleatides: our_sequence; var triplet: string; i: qword);
var
    j: qword;
begin
    j := 1;
    for j:= i to 2 + i do
    begin
        if (nucleatides.seq_type = DNA) then
        begin
            if (nucleatides.items[j].result = 't' ) or
            (nucleatides.items[j].result = 'T' ) then
            begin
                triplet := triplet + 'U';
                continue;
            end;
        end;
        if nucleatides.items[j].result in ['a'..'z'] then
        begin
            triplet := triplet
            + chr(ord(nucleatides.items[j].result)
            - (ord('a') - ord('A')));
        end
        else
        begin
            triplet := triplet + nucleatides.items[j].result;
        end;
    end;
end;

procedure Mode_Find_Normal(var nucleatides: our_sequence;
var aminoAcids: array of char;  Tabble: matrix_t);
var
    i: qword;
    triplet: string = ' ';
    cur_amino_acid: char = ' ';
    flag_is_triplet: boolean = false;
    start: boolean = false;
    k, l: qword;
    p: qword;
    is_find: boolean = false;
    start_position: qword = 1;
    finish: qword;
    start_column, start_line, end_column, end_line: qword;
    iter_for_print :qword;
    iter_is_first: boolean;

begin
    while(not is_find) do
    begin

        for p := 0 to 2 do
        begin

            {обновление значений переменных}
            l := 0;
            i := start_position + p;
            is_find := false;
            flag_is_triplet := false;
            finish := 1;
            cur_amino_acid := ' ';
            iter_is_first := true;
            i := i + 1;

            while (i <= Length(nucleatides.items)) do
            begin
                triplet := '';

                {заполняем триплет значениями из массива}
                i := i - 2;
                Make_Triplet(nucleatides, triplet, i);
                i := i + 2;

                {стоп-кодон}
                if ((triplet = ('UAA')) or ((triplet = 'UGA'))
                or ((triplet = 'UAG'))) and (start) then
                begin
                    if(flag_is_triplet) then
                        is_find:= true;
                    break;
                end;

                {
                    если предыдущий триплет совпал или первая итерация,
                    то считываем из массива аминокислот остаток
                }

                if (flag_is_triplet = true) or (iter_is_first = true) then
                begin
                    cur_amino_acid := aminoAcids[l];
                end;

                {ищем триплет в таблице аминокислотных остатков}
                for k := 1 to Length(Tabble[cur_amino_acid]) do
                begin
                    if (search_type = 3) and (cur_amino_acid = '-') then
                    begin
                        flag_is_triplet := true;
                        start := true;
                        if (l = 0) then
                        begin
                            start_position := i;
                            start_line := nucleatides.items[i].identificator_line;
                            start_column := nucleatides.items[i].identificator_column;
                        end;
                        l := l + 1;
                        break;
                    end;

                    if (Tabble[cur_amino_acid][k] = triplet) and
                    (cur_amino_acid <> '-') then
                    begin
                        flag_is_triplet := true;
                        start := true;

                        {если первое вхождение, то запоминаем координаты начала}
                        if (l = 0) then
                        begin
                            start_position := i;
                            start_line := nucleatides.items[i].identificator_line;
                            start_column := nucleatides.items[i].identificator_column;
                        end;
                        l := l + 1;
                        break;
                    end;

                    if (Tabble[cur_amino_acid][k] <> triplet) and
                    (cur_amino_acid <> '-') then
                    begin
                        flag_is_triplet := false;
                    end;

                end;

                {
                    если неточное совпадение и текущий триплет
                    не соответствует, то ненаход
                }
                if (i > 1) and (flag_is_triplet = false)
                and start then
                    break;

                {
                    если нашлись все аминокислотные остатки и совпало,
                    то записываем координаты конца последовательности
                }
                if (l >= Length(amino_acid_arr) ) and (flag_is_triplet = true) then
                begin
                    is_find := true;
                    end_line := nucleatides.items[i + 2].identificator_line;
                    end_column := nucleatides.items[i + 2].identificator_column;
                    finish := i + 2;
                    break;
                end;

                if (start) then
                begin
                    i := i + 3;
                end
                else
                    i := i + 1;
                iter_is_first := false;
            end;

            if (is_find) then
            begin
                if l < Length(amino_acid_arr) then
                begin
                    is_find := false;
                    continue;
                end;
                Sequence_Printer(nucleatides, start_position, finish,
                start_line, end_line,start_column, end_column);
                break;
            end;
        end;

        if (p = 2) and (not is_find) and (search_type = 3) then
        begin
            start_position:= start_position + 3;
            triplet := '';
            i := i - 6;
            Make_Triplet(nucleatides, triplet, i);
            i := i + 6;
        end;

        if (p = 2) and (not is_find) and (search_type = 1) then
         begin
            start_position:= start_position + 3;
            triplet := '';
            i := i - 6;
            Make_Triplet(nucleatides, triplet, i);
            i := i + 6;
        end;

        if i > Length(nucleatides.items) then
            exit;
    end;
end;

procedure Mode_Find_Left(var nucleatides: our_sequence; var aminoAcids : array of char;  Tabble : matrix_t);
var
    i, j: longint;
    triplet: string = ' ';
    cur_amino_acid: char = ' ';
    flag_is_triplet: boolean = false;
    start: boolean = false;
    k, l: qword;
    p: qword;
    is_find: boolean = false;
    start_position: qword;
    beg, finish: longint;
    start_column, start_line, end_column, end_line: qword;
    iter_for_print: qword;
    iter_is_first: boolean;
begin
    start_position := Length(nucleatides.items);
    i := start_position - 2;
    while (is_find <> true) do
    begin

        if i < 1 then
            break;

        for p := 0 to 2 do

        begin
            l := 0;
            i := start_position + p - 2;
            is_find := false;
            flag_is_triplet := false;
            finish := Length(nucleatides.items);
            cur_amino_acid := ' ';
            iter_is_first := true;
            start := false;

            if (p = 2) then
                i := i - 1;

            while (i >= 1) do
            begin
                triplet := '';

                Triplet_DNA(nucleatides, triplet, i);

                if ((triplet = ('UAA')) or ((triplet = 'UGA')) or
                ((triplet = 'UAG'))) and (start) then
                begin
                    if flag_is_triplet then
                        is_find := true;
                    break;
                end;

                if (flag_is_triplet = true) or (iter_is_first = true) then
                begin
                    cur_amino_acid := aminoAcids[l];
                end;

                for k := 1 to Length(Tabble[cur_amino_acid]) do
                begin

                    if (search_type = 3) and (cur_amino_acid = '-') then
                    begin
                        flag_is_triplet := true;
                        start := true;
                        if (l = 0) then
                        begin
                            start_position := i;
                            start_line := nucleatides.items[i].identificator_line;
                            start_column := nucleatides.items[i].identificator_column;
                        end;
                        l := l + 1;
                        break;
                    end;

                    if (Tabble[cur_amino_acid][k] = triplet)
                    and (Length(triplet) <> 0) then
                    begin

                        flag_is_triplet := true;
                        start := true;
                        if (l = 0) then
                        begin
                            start_position := i;
                            start_line := nucleatides.items[i].identificator_line;
                            start_column := nucleatides.items[i].identificator_column;
                        end;
                        l := l + 1;
                        break;
                    end
                    else
                    if (Tabble[cur_amino_acid][k] <> triplet) and
                    (cur_amino_acid <> '-') then
                    begin
                        flag_is_triplet := false;
                    end;
                end;

                {
                    если неточное совпадение, то ненаход
                }
                if (i > 1) and (not flag_is_triplet) and start then
                    break;

                if (i > 1) and (not flag_is_triplet) and start then
                begin
                    if(i < 3) then
                    begin
                        is_find := false;
                    end;
                    break;
                end;

                if (l >= Length(amino_acid_arr) ) and (flag_is_triplet = true) then
                begin
                    is_find := true;
                    end_line := nucleatides.items[i].identificator_line;
                    end_column := nucleatides.items[i].identificator_column;
                    finish := i - 2;
                    break;
                end;
                if start then
                begin
                    i:= i - 3;
                end
                else
                begin
                    i := i - 1;
                end;
                iter_is_first := false;
            end;

            if (is_find) and (i >= 1)  then
            begin
                if l < Length(amino_acid_arr) then
                begin
                    is_find := false;
                    continue;
                end;
                Sequence_Printer_From_Left(nucleatides, start_position, finish,
                start_line, end_line,start_column, end_column);
                break;

            end;
        end;

        if (p = 2) and (not is_find) and (search_type = 3) then
        begin

            start_position := start_position - 1;
            triplet := '';

            Triplet_DNA(nucleatides, triplet, i);

        end;

        if (p = 2) and (not is_find) and (search_type = 1) then
        begin

            start_position := start_position - 3;
            triplet := '';
            Triplet_DNA(nucleatides, triplet, i);

        end;

        if i > Length(nucleatides.items) then
            exit;

    end;
end;

procedure CLearSequense(var nucleatides: our_sequence);
begin
    SetLength(nucleatides.name,0);
    SetLength(nucleatides.items, 0);
    nucleatides.seq_type:=UNKNOWN;
end;

begin
    set_of_nucleotides := ['A', 'a', 'C', 'c', 'G', 'g', 'U', 'u', 'T', 't'];
    availble_set := [' ', '-', #10, #9, #11];
    set_impossibe_nucleotides := ['B', 'J', 'O', 'U', 'X', 'Z'];

    {создание таблицы триплетов}
    amino_acid_table['A'][1] := 'GCU';
    amino_acid_table['A'][2] := 'GCC';
    amino_acid_table['A'][3] := 'GCA';
    amino_acid_table['A'][4] := 'GCG';
    amino_acid_table['R'][1] := 'CGU';
    amino_acid_table['R'][2] := 'CGC';
    amino_acid_table['R'][3] := 'CGA';
    amino_acid_table['R'][4] := 'CGG';
    amino_acid_table['R'][5] := 'AGA';
    amino_acid_table['R'][6] := 'AGG';
    amino_acid_table['N'][1] := 'AAU';
    amino_acid_table['N'][2] := 'AAC';
    amino_acid_table['D'][1] := 'GAU';
    amino_acid_table['D'][2] := 'GAC';
    amino_acid_table['C'][1] := 'UGU';
    amino_acid_table['C'][2] := 'UGC';
    amino_acid_table['Q'][1] := 'CAA';
    amino_acid_table['Q'][2] := 'CAG';
    amino_acid_table['E'][1] := 'GAA';
    amino_acid_table['E'][2] := 'GAG';
    amino_acid_table['G'][1] := 'GGU';
    amino_acid_table['G'][2] := 'GGC';
    amino_acid_table['G'][3] := 'GGA';
    amino_acid_table['G'][4] := 'GGG';
    amino_acid_table['H'][1] := 'CAU';
    amino_acid_table['H'][2] := 'CAC';
    amino_acid_table['I'][1] := 'AUU';
    amino_acid_table['I'][2] := 'AUC';
    amino_acid_table['I'][3] := 'AUA';
    amino_acid_table['L'][1] := 'UUA';
    amino_acid_table['L'][2] := 'UUG';
    amino_acid_table['L'][3] := 'CUU';
    amino_acid_table['L'][4] := 'CUC';
    amino_acid_table['L'][5] := 'CUA';
    amino_acid_table['L'][6] := 'CUG';
    amino_acid_table['K'][1] := 'AAA';
    amino_acid_table['K'][2] := 'AAG';
    amino_acid_table['M'][1] := 'AUG';
    amino_acid_table['F'][1] := 'UUU';
    amino_acid_table['F'][2] := 'UUC';
    amino_acid_table['P'][1] := 'CCU';
    amino_acid_table['P'][2] := 'CCC';
    amino_acid_table['P'][3] := 'CCA';
    amino_acid_table['P'][4] := 'CCG';
    amino_acid_table['S'][1] := 'UCU';
    amino_acid_table['S'][2] := 'UCC';
    amino_acid_table['S'][3] := 'UCA';
    amino_acid_table['S'][4] := 'UCG';
    amino_acid_table['S'][5] := 'AGU';
    amino_acid_table['S'][6] := 'AGC';
    amino_acid_table['T'][1] := 'ACU';
    amino_acid_table['T'][2] := 'ACC';
    amino_acid_table['T'][3] := 'ACA';
    amino_acid_table['T'][4] := 'ACG';
    amino_acid_table['W'][1] := 'UGG';
    amino_acid_table['Y'][1] := 'UAU';
    amino_acid_table['Y'][2] := 'UAC';
    amino_acid_table['V'][1] := 'GUU';
    amino_acid_table['V'][2] := 'GUC';
    amino_acid_table['V'][3] := 'GUA';
    amino_acid_table['V'][4] := 'GUG';

    amino_filename := ParamStr(1);
    DNK_RNK_FileName := ParamStr(2);
    Val(ParamStr(3), search_type);

    if (search_type <> 1) and (search_type <> 3) then
    begin
        writeln('Неправильный режим работы!');
        exit;
    end;

    Assign(file_amino_acids, amino_filename);
    Assign(f_DNK_RNK, DNK_RNK_FileName);
    { Откройте файлы для сравнения последовательностей и выполните необходимые операции}

    { Пример чтения данных из файлов:}
    Reset(file_amino_acids);
    Reset(f_DNK_RNK);

    length_amino_acid_arr:=FileSize(file_amino_acids);
    SetLength(amino_acid_arr, length_amino_acid_arr);

    {Запись в маасив последовательности аминокислот}
    i := 0;
    while not EOF(file_amino_acids) do
    begin
        if (i = 0) then
            Read(file_amino_acids, cur_symbol);

        if (cur_symbol = '>') then
        begin
            while (cur_symbol <> #10) do
            begin
                if not EOF(file_amino_acids) then
                    Read(file_amino_acids, cur_symbol);
            end;
        end;

        if (not EOF(file_amino_acids)) and (i <> 0) then
                Read(file_amino_acids, cur_symbol);

        if (not (cur_symbol in (['A'..'Z'] + ['0'..'9'] + availble_set))) then
        begin
            writeln('[Ошибка! Неверная последовательность аминокислот!]');
            exit;
        end;

        if (search_type = 1) then
        begin
            if (cur_symbol in ['A'..'Z']) then
            begin
                length_amino_acid_arr := i+1;
                amino_acid_arr[i] := cur_symbol;
                Inc(i);
            end;
        end
        else if (search_type = 3) then
        begin
            if (cur_symbol in ['A'..'Z']) or (cur_symbol = '-') then
            begin
                length_amino_acid_arr := i+1;
                amino_acid_arr[i] := cur_symbol;
                Inc(i);
            end;
        end;
    end;

    {выделяем память под массив аминокислот}
    SetLength(amino_acid_arr, length_amino_acid_arr);
    i := 1;
    j := 1;
    row := 1;
    line := 1;

    {Запись в маасив последовательности нуклеотидов}
    ratio := 1;
    while(not EOF(f_DNK_RNK)) do
    begin
        is_error := false;
        {идем по файлу пока не найдем начало названия последовательности}
        if (flag <> true) then
            read(f_DNK_RNK, cur_symbol);

        {проверка на начало записи называния}
        if(cur_symbol = '>') then
        begin

            {выделяем память}
            SetLength(nucleatides.name, 100);

            {проверка на то, что при считывании не выйдем за файл}
            if not EOF(f_DNK_RNK) then
                read(f_DNK_RNK, cur_symbol);

            {записыввем название}
            while (cur_symbol <> #10) do
            begin
                if (j > ratio * 100 - 1) then
                begin
                    ratio := ratio + 1;
                    SetLength(nucleatides.name, 100 * ratio);
                end;
                nucleatides.name[j] := cur_symbol;
                j := j + 1;

                {проверка на то, что при считывании не выйдем за файл}
                if not EOF(f_DNK_RNK) then
                begin
                    read(f_DNK_RNK, cur_symbol);
                end
                else
                    break;
            end;
            line := line + 1;
            {проверка на то, что при считывании не выйдем за файл}
            if not EOF(f_DNK_RNK) then
            begin
                read(f_DNK_RNK, cur_symbol);
            end
            else
            begin
                writeln('[пустая последовательность 2]');
                break;
            end;

            {выделяем место под значения последовательности}
            SetLength(nucleatides.items, 100);
            j := 1;
            nucleatides.seq_type := UNKNOWN;
            ratio := 1;

            {тело последовательности}
            if(cur_symbol = '>') then
            begin
                writeln('[пустая последовательность]');
                CLearSequense(nucleatides);
                continue;
            end;

            {}
            row := 1;
            while(cur_symbol <> '>') do
            begin
                if (cur_symbol = #10) then
                begin
                    line := line + 1;
                    row := 1;
                end;

                {}
                if (j > ratio * 100 - 1) then
                begin
                    ratio := ratio + 1;
                    SetLength(nucleatides.items, 100 * ratio);
                end;

                {
                    обработка неправильных символов в
                    последовательности нуклеотидов
                }
                if (not (cur_symbol in availble_set)) and
                (not (cur_symbol in ['0'..'9'])) and
                (not (cur_symbol in set_of_nucleotides))then
                begin
                    writeln('[Ошибка!]', cur_symbol);
                    writeln();
                    is_error := true;
                end;

                {}
                if (cur_symbol in set_of_nucleotides) then
                begin
                    if ((cur_symbol = 'u') or (cur_symbol = 'U'))
                    and (nucleatides.seq_type = UNKNOWN) then
                        nucleatides.seq_type := RNA;
                    if ((cur_symbol = 't') or (cur_symbol = 'T'))
                    and (nucleatides.seq_type = UNKNOWN) then
                        nucleatides.seq_type := DNA;
                    if ((nucleatides.seq_type = DNA) and (cur_symbol = 'u'))
                    or ((nucleatides.seq_type = RNA) and (cur_symbol = 't')) then
                        writeln('[Ошибка!]');
                    nucleatides.items[j].result := cur_symbol;
                    nucleatides.items[j].identificator_line := line;
                    nucleatides.items[j].identificator_column := row;
                    j := j + 1;
                end;

                if not EOF(f_DNK_RNK) then
                begin
                    read(f_DNK_RNK, cur_symbol);
                    row := row + 1;
                end
                else
                    break;
            end;


            {Если не удалось понять тип последовательности, то тип: DNA}
            if (nucleatides.seq_type = UNKNOWN) then
                nucleatides.seq_type := DNA;

            {обработка последовательности}

                if (nucleatides.seq_type = RNA) and (not is_error) then
                begin
                    Mode_Find_Normal(nucleatides, amino_acid_arr, amino_acid_table);
                    writeln();
                end
                else if (nucleatides.seq_type = DNA) and (not is_error) then
                begin
                    Mode_Find_Normal(nucleatides, amino_acid_arr, amino_acid_table);
                    Mode_Find_Left(nucleatides, amino_acid_arr, amino_acid_table);
                    writeln();
                end;

            flag := true;
            j := 1;
            CLearSequense(nucleatides);
        end;
    end;

    close(file_amino_acids);
    close(f_DNK_RNK);

end.