prac2_ann

uses SysUtils;

{
    nucleotid_coord - координаты нуклеотидов для печати
    series - последовательности с именем, типом, координатами
}

type
    nucleotid_coord = record
        str_coord: longint;
        column_coord: longint;
        val: char;
    end;

    series = record
        name: array of char;
        type_trip: (DNA, RNA, NoName);
        items: array of nucleotid_coord;
    end;

{
    table_tripl - массив для расшифровки аминокислот
}
    table_tripl = array['A'..'Y', 1..6] of string;

{
    file1, file2 - текстовые файлы для чтения
    file_amin - файл с аминокислотами
    file_nucl - файл с последовательностями ДНК, РНК

    mode - режим (1 или 4)
    space - сколько можем максимально пропустить триплетов
    iter - счетчик пропусков триплетов

    nucleatide - массив нуклеотидов с именем, типом (ДНК/РНК и тд) и координатами
    aminoAcidArr - массив аминокислотных остатков
    table_of_amin - таблица триплетов

    len_amin - длина последовательности из аминокислот
    i, j - итераторы
    str, colomn - координаты для старта и конца
    multiplier - множитель (коэффицент) для выделения памяти с запасом
    counter - счетчик для освобождения лишней памяти

    flag_start_name - флаг начала названия новой последовательности
    flag_error - флаг ошибки
    dop_flag - флаг для 4 режима (найдена нужная последовательность через <= space триплетов)
}
var
    file_amin, file_nucl: file of char;
    file1, file2: string;

    mode: integer;
    space: longint;

    nucleatide: series;
    aminoAcidArr: array of char;
    table_of_amin: table_tripl;

    len_amin, coloumn, str, multiplier: longint;

    flag_start_name, flag_error, dop_flag: boolean;

{
    таблица триплетов
}
procedure CreateTable(var table_of_amin: table_tripl);
begin
    table_of_amin['A'][1] := 'GCU';
    table_of_amin['A'][2] := 'GCC';
    table_of_amin['A'][3] := 'GCA';
    table_of_amin['A'][4] := 'GCG';

    table_of_amin['R'][1] := 'CGU';
    table_of_amin['R'][2] := 'CGC';
    table_of_amin['R'][3] := 'CGA';
    table_of_amin['R'][4] := 'CGG';
    table_of_amin['R'][5] := 'AGA';
    table_of_amin['R'][6] := 'AGG';

    table_of_amin['N'][1] := 'AAU';
    table_of_amin['N'][2] := 'AAC';

    table_of_amin['D'][1] := 'GAU';
    table_of_amin['D'][2] := 'GAC';

    table_of_amin['C'][1] := 'UGU';
    table_of_amin['C'][2] := 'UGC';

    table_of_amin['Q'][1] := 'CAA';
    table_of_amin['Q'][2] := 'CAG';

    table_of_amin['E'][1] := 'GAA';
    table_of_amin['E'][2] := 'GAG';

    table_of_amin['G'][1] := 'GGU';
    table_of_amin['G'][2] := 'GGC';
    table_of_amin['G'][3] := 'GGA';
    table_of_amin['G'][4] := 'GGG';

    table_of_amin['H'][1] := 'CAU';
    table_of_amin['H'][2] := 'CAC';

    table_of_amin['I'][1] := 'AUU';
    table_of_amin['I'][2] := 'AUC';
    table_of_amin['I'][3] := 'AUA';

    table_of_amin['L'][1] := 'UUA';
    table_of_amin['L'][2] := 'UUG';
    table_of_amin['L'][3] := 'CUU';
    table_of_amin['L'][4] := 'CUC';
    table_of_amin['L'][5] := 'CUA';
    table_of_amin['L'][6] := 'CUG';

    table_of_amin['K'][1] := 'AAA';
    table_of_amin['K'][2] := 'AAG';

    table_of_amin['M'][1] := 'AUG';

    table_of_amin['F'][1] := 'UUU';
    table_of_amin['F'][2] := 'UUC';

    table_of_amin['P'][1] := 'CCU';
    table_of_amin['P'][2] := 'CCC';
    table_of_amin['P'][3] := 'CCA';
    table_of_amin['P'][4] := 'CCG';

    table_of_amin['S'][1] := 'UCU';
    table_of_amin['S'][2] := 'UCC';
    table_of_amin['S'][3] := 'UCA';
    table_of_amin['S'][4] := 'UCG';
    table_of_amin['S'][5] := 'AGU';
    table_of_amin['S'][6] := 'AGC';

    table_of_amin['T'][1] := 'ACU';
    table_of_amin['T'][2] := 'ACC';
    table_of_amin['T'][3] := 'ACA';
    table_of_amin['T'][4] := 'ACG';

    table_of_amin['W'][1] := 'UGG';

    table_of_amin['Y'][1] := 'UAU';
    table_of_amin['Y'][2] := 'UAC';

    table_of_amin['V'][1] := 'GUU';
    table_of_amin['V'][2] := 'GUC';
    table_of_amin['V'][3] := 'GUA';
    table_of_amin['V'][4] := 'GUG';
end;

{
    start_str, end_str - координаты начала и конца по строке
    start_clmn, end_clmn - координаты начала и конца по столбцу
    clmn_i - итератор по столбцам
    str_i - итератор по строкам

    start_pos - начальная позиция
    end_pos - финальная позиция

    процедура вывода координат в обратной проходке
}

procedure Print_End_Start(var nucleatide: series;
            start_pos: longint; end_pos: longint;
            start_str: longint; end_str: longint;
            start_clmn: longint; end_clmn: longint;
            end_coord: longint);
var
    i, clmn_i, str_i: longint;
begin
    clmn_i := 1;
    str_i := 0;
    for i := 1 to Length(nucleatide.name) - 1 do
        write(nucleatide.name[i]);
    writeln;
    writeln('[', start_pos - end_coord, ', ',  end_pos - end_coord, ']');
    writeln('(', start_str, ', ', start_clmn, ')', ' - (', end_str, ', ', end_clmn, ')');
    i := end_pos + 2;
    while (i <= start_pos + 2) do
    begin
        while (clmn_i <= 10) and (i <= start_pos + 2) do
        begin
            write(nucleatide.items[i].val);
            clmn_i += 1;
            i += 1;
        end;
        clmn_i := 1;
        str_i += 1;
        write(' ');
        i += 6;
        if (str_i mod 6 = 0) then
            writeln;
    end;
    writeln;
end;

procedure Print_Start_End(var nucleatide: series;
            start_pos: longint; end_pos: longint;
            start_str: longint; end_str: longint;
            start_clmn: longint; end_clmn: longint);
var
    i, clmn_i, str_i: longint;
begin
    clmn_i := 1;
    str_i := 0;
    for i := 1 to Length(nucleatide.name) - 1 do
        write(nucleatide.name[i]);
    writeln;
    writeln('[', start_pos, ', ', end_pos, ']');
    writeln('(', start_str, ', ', start_clmn, ')', ' - (', end_str, ', ', end_clmn, ')');
    i := start_pos;
    while (i <= end_pos) do
    begin
        while (clmn_i <= 10) and (i <= end_pos) do
        begin
            write(nucleatide.items[i].val);
            clmn_i += 1;
            i += 1;
        end;
        clmn_i := 1;
        str_i += 1;
        write(' ');
        if (str_i mod 6 = 0) then
            writeln;
    end;
    writeln;
end;

procedure CreateDNA(var nucleatide: series;
                    var triplet: string; i: longint);
var
    j: longint;
begin
    for j := i + 2 downto i do
    begin
        if (nucleatide.items[j].val = 't') or
           (nucleatide.items[j].val = 'T') then
        begin
            triplet += 'A';
            continue;
        end;
        if (nucleatide.items[j].val = 'a') or
           (nucleatide.items[j].val = 'A') then
        begin
            triplet += 'U';
            continue;
        end;
        if (nucleatide.items[j].val = 'g') or
           (nucleatide.items[j].val = 'G') then
        begin
            triplet += 'C';
            continue;
        end;
       if (nucleatide.items[j].val = 'c') or
           (nucleatide.items[j].val = 'C') then
        begin
            triplet += 'G';
            continue;
        end;
        if nucleatide.items[j].val in ['a'..'z'] then
            triplet += (chr(ord(nucleatide.items[j].val) - (ord('a') - ord('A'))))
        else
            triplet += nucleatide.items[j].val;
    end;

end;


procedure CreateTrip(var nucleatide: series;
                     var triplet: string; i: longint);
var
    j: longint;
begin
    for j := i to 2 + i do
    begin
        if (nucleatide.type_trip = DNA) then
            if (nucleatide.items[j].val = 't') or
               (nucleatide.items[j].val = 'T') then
            begin
                triplet += 'U';
                continue;
            end;
        if nucleatide.items[j].val in ['a'..'z'] then
            triplet += (chr(ord(nucleatide.items[j].val) - (ord('a') - ord('A'))))
        else
            triplet += nucleatide.items[j].val;
    end;
end;

{
    count_error - счетчик сколько уже пропустили
}
procedure Find(var nucleatide: series;
               var aminoAcids: array of char;
               Tabble: table_tripl; space: longint);
var
    i, k, count_occure, num_in_trip: longint;
    triplet: string;
    cur_amino_acid: char;
    flag_right_triplet, flag_mini_start: boolean;
    flag_find, flag_main_start: boolean;
    start_pos, end_pos: longint;
    start_clmn, start_str, end_clmn, end_str: longint;
    count_error: longint;
begin
    count_error := 0;
    triplet := ' ';
    cur_amino_acid := ' ';
    flag_right_triplet := false;
    flag_mini_start := false;
    flag_find := false;
    start_pos := 1;
    while not flag_find do
    begin
        for num_in_trip := 0 to 2 do
        begin
            count_occure := 0;
            i := start_pos + num_in_trip;
            flag_find := false;
            flag_right_triplet := false;
            end_pos := 1;
            cur_amino_acid := ' ';
            flag_main_start := true;

            i += 1;
            while (i <= Length(nucleatide.items) - 2) do
            begin
                count_error := 0;
                triplet := '';
                i -= 2;
                CreateTrip(nucleatide, triplet, i);
                i += 2;
                if flag_right_triplet or flag_main_start then
                    cur_amino_acid := aminoAcids[count_occure];
                for k := 1 to Length(Tabble[cur_amino_acid]) do
                begin
                    if (Tabble[cur_amino_acid][k] = triplet) then
                    begin
                        flag_right_triplet := true;
                        flag_mini_start := true;

                        if (count_occure = 0) then
                        begin
                            start_pos := i;
                            start_str := nucleatide.items[i].str_coord;
                            start_clmn := nucleatide.items[i].column_coord;
                        end;
                        count_occure += 1;
                        break;
                    end;
                    if (Tabble[cur_amino_acid][k] <> triplet) then
                        flag_right_triplet := false;
                end;
                if (i > 1) and not (flag_right_triplet) and flag_mini_start then
                begin
                    if (flag_main_start) and (mode = 4) then
                    begin
                        count_error += 1;
                        if (count_error > space) then
                            break;
                    end
                    else if (mode = 1) then
                        break;
                end;
                if (count_occure >= Length(aminoAcidArr)) and flag_right_triplet then
                begin
                    flag_find := true;
                    end_str := nucleatide.items[i + 2].str_coord;
                    end_clmn := nucleatide.items[i + 2].column_coord;
                    end_pos := i + 2;
                    break;
                end;
                if (flag_mini_start) then
                    i += 3
                else
                    i += 1;
                if (count_error > space) and (mode = 4) then
                begin
                    flag_main_start := false
                end
                else
                if (mode = 1) then
                    flag_main_start := false
            end;

            if (flag_find) then
            begin
                if count_occure < Length(aminoAcidArr) then
                begin
                    flag_find := false;
                    continue;
                end;
                Print_Start_End(nucleatide, start_pos, end_pos,
                     start_str, end_str, start_clmn, end_clmn);
                break;
            end
        end;

        if (num_in_trip = 2) and (not flag_find)
            and ((mode = 4) or (mode = 1)) then
        begin
            start_pos += 3;
            triplet := '';
            i -= 6;
            CreateTrip(nucleatide, triplet, i);
            i += 6;
        end;
        if i > Length(nucleatide.items) then
            exit;
    end;
end;

{
    flag_mini_start - начало каждого триплета
    flag_main_start - начало всей последовательности
}
procedure FindDNA(var nucleatide: series; var aminoAcids: array of char;
                  Tabble: table_tripl);
var
    i, k, count_occure, num_in_trip: longint;
    triplet: string;
    cur_amino_acid: char;
    flag_right_triplet, flag_mini_start: boolean;
    flag_find, flag_main_start: boolean;
    start_pos, end_pos: longint;
    start_clmn, start_str, end_clmn, end_str: longint;
begin
    triplet := ' ';
    cur_amino_acid := ' ';
    flag_right_triplet := false;
    flag_mini_start := false;
    flag_find := false;
    start_pos := Length(nucleatide.items);
    i := start_pos - 2;
    while not flag_find do
    begin
        if i < 1 then
            break;
        for num_in_trip := 0 to 2 do
        begin
            count_occure := 0;
            i := start_pos + num_in_trip - 2;
            flag_find := false;
            flag_right_triplet := false;
            end_pos := Length(nucleatide.items);
            cur_amino_acid := ' ';
            flag_main_start := true;
            flag_mini_start := false;

            if (num_in_trip = 2) then
                i -= 1;
            i -= 1;
            while (i >= 1) do
            begin
                triplet := '';
                CreateDNA(nucleatide, triplet, i);
                if ((triplet = ('UAA')) or ((triplet = 'UGA')) or
                        ((triplet = 'UAG'))) and (flag_mini_start) then
                begin
                    if flag_right_triplet then
                        flag_find := true;
                    break;
                end;
                if flag_right_triplet or flag_main_start then
                    cur_amino_acid := aminoAcids[count_occure];

                for k := 1 to Length(Tabble[cur_amino_acid]) do
                    if (Tabble[cur_amino_acid][k] = triplet) and (Length(triplet) <> 0) then
                    begin
                        flag_right_triplet := true;
                        flag_mini_start := true;
                        if (count_occure = 0) then
                        begin
                            start_pos := i;
                            start_str := nucleatide.items[i].str_coord;
                            start_clmn := nucleatide.items[i].column_coord;
                        end;
                        count_occure += 1;
                        break;
                    end
                    else
                        flag_right_triplet := false;
                if (i > 1) and not (flag_right_triplet) and flag_mini_start then
                begin
                    if (i < 3) then
                        flag_find := false;
                    break;
                end;
                if (count_occure = Length(aminoAcidArr)) and flag_right_triplet then
                begin
                    flag_find := true;
                    end_str := nucleatide.items[i].str_coord;
                    end_clmn := nucleatide.items[i].column_coord;
                    end_pos := i - 2;
                    break;
                end;
                if flag_mini_start then
                    i -= 3
                else
                    i -= 1;
                flag_main_start := false;
            end;
            if (flag_find) and (i >= 1)  then
            begin
                if count_occure < Length(aminoAcidArr) then
                begin
                    flag_find := false;
                    continue;
                end;
                Print_End_Start(nucleatide, start_pos, end_pos,
                    start_str, end_str, start_clmn, end_clmn,
                    length(nucleatide.items));
                break;

            end;
        end;
        if (num_in_trip = 2) and not (flag_find)
            and ((mode = 4) or (mode = 1)) then
        begin
            start_pos -= 3;
            triplet := '';
            CreateDNA(nucleatide, triplet, i);
        end;
        if i > Length(nucleatide.items) then
            exit;

    end;
end;

{
    основное тело программы

    current_sym - текущий символ
    i, j - итераторы
}
var
    current_sym: char;
    i, j: longint;
    iter_param : longint;

begin
    file1 := ParamStr(1);
    file2 := ParamStr(2);
    Val(ParamStr(3), mode);
    space := 0;
    if (mode = 4) then
    begin
        for iter_param := 1 to length(ParamStr(4)) do
        begin
            if not((ParamStr(4)[iter_param] >= '0') and
                 (ParamStr(4)[iter_param] <= '9')) then
            begin
                writeln('error parametr');
                halt;
            end
        end;
        Val(ParamStr(4), space);

        if (space < 0) then
        begin
            writeln('error parametr');
            halt;
        end
        else
            writeln(space);
    end
    else
    if (mode <> 1) then
    begin
        writeln('error mode');
        exit;
    end;

    if (space = 0) then mode := 1;

    Assign(file_amin, file1);
    Assign(file_nucl, file2);

    Reset(file_amin);
    Reset(file_nucl);

    len_amin := FileSize(file_amin);
    SetLength(aminoAcidArr, len_amin);

    flag_start_name := false;
    CreateTable(table_of_amin);

    {
        запись последовательности аминокислот
    }
    i := 0;
    while not Eof(file_amin) do
    begin
        if (i = 0) then
            Read(file_amin, current_sym);

        if (current_sym = '>') then
            while (current_sym <> #10) do
                if not Eof(file_amin) then
                    Read(file_amin, current_sym);

        if (not Eof(file_amin)) and (i <> 0) then
            Read(file_amin, current_sym);

        if (not ((current_sym in ['A'..'Z']) or
                 (current_sym in ['0'..'9']) or
                 (current_sym in [' ', '-', #9, #10, #11]))) then
        begin
            writeln('error in series of amin');
            exit;
        end;

        if (mode = 1) or (mode = 4) then
        begin
            if (current_sym in ['A'..'Z']) then
            begin
                len_amin := i + 1;
                aminoAcidArr[i] := current_sym;
                i += 1;
            end;
        end;
    end;

    SetLength(aminoAcidArr, len_amin);
    i := 1;
    j := 1;
    coloumn := 1;
    str := 1;

    {
        запись последовательности нуклеотидов
    }
    multiplier := 1;
    while not eof(file_nucl) do
    begin
        flag_error := false;
        if not flag_start_name then
            read(file_nucl, current_sym);

        {
            начало новой последовательности
        }
        if(current_sym = '>') then
        begin
            j := 1;
            SetLength(nucleatide.name, 100);

            if not Eof(file_nucl) then
                read(file_nucl, current_sym);

            while (current_sym <> #10) do
            begin
                if (j > multiplier * 100 - 1) then
                begin
                    multiplier += 1;
                    SetLength(nucleatide.name, 100 * multiplier);
                end;
                nucleatide.name[j] := current_sym;
                j += 1;
                if not Eof(file_nucl) then
                    read(file_nucl, current_sym)
                else
                    break;
            end;
            str += 1;
            if not Eof(file_nucl) then
                read(file_nucl, current_sym)
            else
            begin
                writeln('free series');
                break;
            end;

            SetLength(nucleatide.items, 100);
            j := 1;
            nucleatide.type_trip := NoName;
            multiplier := 1;

            if(current_sym = '>') then
            begin
                writeln('free series');
                SetLength(nucleatide.name, 0);
                SetLength(nucleatide.items, 0);
                nucleatide.type_trip := NoName;
                continue;
            end;

            coloumn := 1;
            while(current_sym <> '>') do
            begin
                if (current_sym = #10) then
                begin
                    str += 1;
                    coloumn := 1;
                end;
                if (j > multiplier * 100 - 1) then
                begin
                    multiplier += 1;
                    SetLength(nucleatide.items, 100 * multiplier);
                end;
                if not ((current_sym in [' ', '-', #10, #9, #11]) or
                        (current_sym in ['0'..'9']) or
                        (current_sym in ['A', 'a', 'C', 'c', 'G', 'g', 'U', 'u', 'T', 't'])) then
                begin
                    writeln('error input series');
                    writeln;
                    flag_error := true;
                end;
                if (current_sym in ['A', 'a', 'C', 'c', 'G', 'g', 'U', 'u', 'T', 't']) then
                begin
                    if ((current_sym = 'u') or (current_sym = 'U'))
                        and (nucleatide.type_trip = NoName) then
                        nucleatide.type_trip := RNA;

                    if ((current_sym = 't') or (current_sym = 'T'))
                        and (nucleatide.type_trip = NoName) then
                        nucleatide.type_trip := DNA;

                    if ((nucleatide.type_trip = DNA) and (current_sym = 'u'))
                       or ((nucleatide.type_trip = RNA) and (current_sym = 't')) then
                        writeln('error input of series');

                    nucleatide.items[j].val := current_sym;
                    nucleatide.items[j].str_coord := str;
                    nucleatide.items[j].column_coord := coloumn;
                    j += 1;
                end;
                if not Eof(file_nucl) then
                begin
                    read(file_nucl, current_sym);
                    coloumn += 1;
                end
                else
                    break;
            end;

            if (nucleatide.type_trip = NoName) then
                nucleatide.type_trip := DNA;

            if (nucleatide.type_trip = RNA) and (not flag_error) then
            begin
                Find(nucleatide, aminoAcidArr, table_of_amin, space);
                writeln;
            end
            else if (nucleatide.type_trip = DNA) and (not flag_error) then
            begin
                Find(nucleatide, aminoAcidArr, table_of_amin, space);
                FindDNA(nucleatide, aminoAcidArr, table_of_amin);
                writeln;
            end;
            flag_start_name := true;
            SetLength(nucleatide.name, 0);
            SetLength(nucleatide.items, 0);
            nucleatide.type_trip := NoName;
        end;
    end;
    close(file_amin);
    close(file_nucl);
end.