Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- uses SysUtils;
- {
- nucleotid_coord - координаты нуклеотидов для печати
- series - последовательности с именем, типом, координатами
- }
- type
- nucleotid_coord = record
- str_coord: longint;
- column_coord: longint;
- val: char;
- end;
- series = record
- name: array of char;
- type_trip: (DNA, RNA, NoName);
- items: array of nucleotid_coord;
- end;
- {
- table_tripl - массив для расшифровки аминокислот
- }
- table_tripl = array['A'..'Y', 1..6] of string;
- {
- file1, file2 - текстовые файлы для чтения
- file_amin - файл с аминокислотами
- file_nucl - файл с последовательностями ДНК, РНК
- mode - режим (1 или 4)
- space - сколько можем максимально пропустить триплетов
- iter - счетчик пропусков триплетов
- nucleatide - массив нуклеотидов с именем, типом (ДНК/РНК и тд) и координатами
- aminoAcidArr - массив аминокислотных остатков
- table_of_amin - таблица триплетов
- len_amin - длина последовательности из аминокислот
- i, j - итераторы
- str, colomn - координаты для старта и конца
- multiplier - множитель (коэффицент) для выделения памяти с запасом
- counter - счетчик для освобождения лишней памяти
- flag_start_name - флаг начала названия новой последовательности
- flag_error - флаг ошибки
- dop_flag - флаг для 4 режима (найдена нужная последовательность через <= space триплетов)
- }
- var
- file_amin, file_nucl: file of char;
- file1, file2: string;
- mode: integer;
- space: longint;
- nucleatide: series;
- aminoAcidArr: array of char;
- table_of_amin: table_tripl;
- len_amin, coloumn, str, multiplier: longint;
- flag_start_name, flag_error, dop_flag: boolean;
- {
- таблица триплетов
- }
- procedure CreateTable(var table_of_amin: table_tripl);
- begin
- table_of_amin['A'][1] := 'GCU';
- table_of_amin['A'][2] := 'GCC';
- table_of_amin['A'][3] := 'GCA';
- table_of_amin['A'][4] := 'GCG';
- table_of_amin['R'][1] := 'CGU';
- table_of_amin['R'][2] := 'CGC';
- table_of_amin['R'][3] := 'CGA';
- table_of_amin['R'][4] := 'CGG';
- table_of_amin['R'][5] := 'AGA';
- table_of_amin['R'][6] := 'AGG';
- table_of_amin['N'][1] := 'AAU';
- table_of_amin['N'][2] := 'AAC';
- table_of_amin['D'][1] := 'GAU';
- table_of_amin['D'][2] := 'GAC';
- table_of_amin['C'][1] := 'UGU';
- table_of_amin['C'][2] := 'UGC';
- table_of_amin['Q'][1] := 'CAA';
- table_of_amin['Q'][2] := 'CAG';
- table_of_amin['E'][1] := 'GAA';
- table_of_amin['E'][2] := 'GAG';
- table_of_amin['G'][1] := 'GGU';
- table_of_amin['G'][2] := 'GGC';
- table_of_amin['G'][3] := 'GGA';
- table_of_amin['G'][4] := 'GGG';
- table_of_amin['H'][1] := 'CAU';
- table_of_amin['H'][2] := 'CAC';
- table_of_amin['I'][1] := 'AUU';
- table_of_amin['I'][2] := 'AUC';
- table_of_amin['I'][3] := 'AUA';
- table_of_amin['L'][1] := 'UUA';
- table_of_amin['L'][2] := 'UUG';
- table_of_amin['L'][3] := 'CUU';
- table_of_amin['L'][4] := 'CUC';
- table_of_amin['L'][5] := 'CUA';
- table_of_amin['L'][6] := 'CUG';
- table_of_amin['K'][1] := 'AAA';
- table_of_amin['K'][2] := 'AAG';
- table_of_amin['M'][1] := 'AUG';
- table_of_amin['F'][1] := 'UUU';
- table_of_amin['F'][2] := 'UUC';
- table_of_amin['P'][1] := 'CCU';
- table_of_amin['P'][2] := 'CCC';
- table_of_amin['P'][3] := 'CCA';
- table_of_amin['P'][4] := 'CCG';
- table_of_amin['S'][1] := 'UCU';
- table_of_amin['S'][2] := 'UCC';
- table_of_amin['S'][3] := 'UCA';
- table_of_amin['S'][4] := 'UCG';
- table_of_amin['S'][5] := 'AGU';
- table_of_amin['S'][6] := 'AGC';
- table_of_amin['T'][1] := 'ACU';
- table_of_amin['T'][2] := 'ACC';
- table_of_amin['T'][3] := 'ACA';
- table_of_amin['T'][4] := 'ACG';
- table_of_amin['W'][1] := 'UGG';
- table_of_amin['Y'][1] := 'UAU';
- table_of_amin['Y'][2] := 'UAC';
- table_of_amin['V'][1] := 'GUU';
- table_of_amin['V'][2] := 'GUC';
- table_of_amin['V'][3] := 'GUA';
- table_of_amin['V'][4] := 'GUG';
- end;
- {
- start_str, end_str - координаты начала и конца по строке
- start_clmn, end_clmn - координаты начала и конца по столбцу
- clmn_i - итератор по столбцам
- str_i - итератор по строкам
- start_pos - начальная позиция
- end_pos - финальная позиция
- процедура вывода координат в обратной проходке
- }
- procedure Print_End_Start(var nucleatide: series;
- start_pos: longint; end_pos: longint;
- start_str: longint; end_str: longint;
- start_clmn: longint; end_clmn: longint;
- end_coord: longint);
- var
- i, clmn_i, str_i: longint;
- begin
- clmn_i := 1;
- str_i := 0;
- for i := 1 to Length(nucleatide.name) - 1 do
- write(nucleatide.name[i]);
- writeln;
- writeln('[', start_pos - end_coord, ', ', end_pos - end_coord, ']');
- writeln('(', start_str, ', ', start_clmn, ')', ' - (', end_str, ', ', end_clmn, ')');
- i := end_pos + 2;
- while (i <= start_pos + 2) do
- begin
- while (clmn_i <= 10) and (i <= start_pos + 2) do
- begin
- write(nucleatide.items[i].val);
- clmn_i += 1;
- i += 1;
- end;
- clmn_i := 1;
- str_i += 1;
- write(' ');
- i += 6;
- if (str_i mod 6 = 0) then
- writeln;
- end;
- writeln;
- end;
- procedure Print_Start_End(var nucleatide: series;
- start_pos: longint; end_pos: longint;
- start_str: longint; end_str: longint;
- start_clmn: longint; end_clmn: longint);
- var
- i, clmn_i, str_i: longint;
- begin
- clmn_i := 1;
- str_i := 0;
- for i := 1 to Length(nucleatide.name) - 1 do
- write(nucleatide.name[i]);
- writeln;
- writeln('[', start_pos, ', ', end_pos, ']');
- writeln('(', start_str, ', ', start_clmn, ')', ' - (', end_str, ', ', end_clmn, ')');
- i := start_pos;
- while (i <= end_pos) do
- begin
- while (clmn_i <= 10) and (i <= end_pos) do
- begin
- write(nucleatide.items[i].val);
- clmn_i += 1;
- i += 1;
- end;
- clmn_i := 1;
- str_i += 1;
- write(' ');
- if (str_i mod 6 = 0) then
- writeln;
- end;
- writeln;
- end;
- procedure CreateDNA(var nucleatide: series;
- var triplet: string; i: longint);
- var
- j: longint;
- begin
- for j := i + 2 downto i do
- begin
- if (nucleatide.items[j].val = 't') or
- (nucleatide.items[j].val = 'T') then
- begin
- triplet += 'A';
- continue;
- end;
- if (nucleatide.items[j].val = 'a') or
- (nucleatide.items[j].val = 'A') then
- begin
- triplet += 'U';
- continue;
- end;
- if (nucleatide.items[j].val = 'g') or
- (nucleatide.items[j].val = 'G') then
- begin
- triplet += 'C';
- continue;
- end;
- if (nucleatide.items[j].val = 'c') or
- (nucleatide.items[j].val = 'C') then
- begin
- triplet += 'G';
- continue;
- end;
- if nucleatide.items[j].val in ['a'..'z'] then
- triplet += (chr(ord(nucleatide.items[j].val) - (ord('a') - ord('A'))))
- else
- triplet += nucleatide.items[j].val;
- end;
- end;
- procedure CreateTrip(var nucleatide: series;
- var triplet: string; i: longint);
- var
- j: longint;
- begin
- for j := i to 2 + i do
- begin
- if (nucleatide.type_trip = DNA) then
- if (nucleatide.items[j].val = 't') or
- (nucleatide.items[j].val = 'T') then
- begin
- triplet += 'U';
- continue;
- end;
- if nucleatide.items[j].val in ['a'..'z'] then
- triplet += (chr(ord(nucleatide.items[j].val) - (ord('a') - ord('A'))))
- else
- triplet += nucleatide.items[j].val;
- end;
- end;
- {
- count_error - счетчик сколько уже пропустили
- }
- procedure Find(var nucleatide: series;
- var aminoAcids: array of char;
- Tabble: table_tripl; space: longint);
- var
- i, k, count_occure, num_in_trip: longint;
- triplet: string;
- cur_amino_acid: char;
- flag_right_triplet, flag_mini_start: boolean;
- flag_find, flag_main_start: boolean;
- start_pos, end_pos: longint;
- start_clmn, start_str, end_clmn, end_str: longint;
- count_error: longint;
- begin
- count_error := 0;
- triplet := ' ';
- cur_amino_acid := ' ';
- flag_right_triplet := false;
- flag_mini_start := false;
- flag_find := false;
- start_pos := 1;
- while not flag_find do
- begin
- for num_in_trip := 0 to 2 do
- begin
- count_occure := 0;
- i := start_pos + num_in_trip;
- flag_find := false;
- flag_right_triplet := false;
- end_pos := 1;
- cur_amino_acid := ' ';
- flag_main_start := true;
- i += 1;
- while (i <= Length(nucleatide.items) - 2) do
- begin
- count_error := 0;
- triplet := '';
- i -= 2;
- CreateTrip(nucleatide, triplet, i);
- i += 2;
- if flag_right_triplet or flag_main_start then
- cur_amino_acid := aminoAcids[count_occure];
- for k := 1 to Length(Tabble[cur_amino_acid]) do
- begin
- if (Tabble[cur_amino_acid][k] = triplet) then
- begin
- flag_right_triplet := true;
- flag_mini_start := true;
- if (count_occure = 0) then
- begin
- start_pos := i;
- start_str := nucleatide.items[i].str_coord;
- start_clmn := nucleatide.items[i].column_coord;
- end;
- count_occure += 1;
- break;
- end;
- if (Tabble[cur_amino_acid][k] <> triplet) then
- flag_right_triplet := false;
- end;
- if (i > 1) and not (flag_right_triplet) and flag_mini_start then
- begin
- if (flag_main_start) and (mode = 4) then
- begin
- count_error += 1;
- if (count_error > space) then
- break;
- end
- else if (mode = 1) then
- break;
- end;
- if (count_occure >= Length(aminoAcidArr)) and flag_right_triplet then
- begin
- flag_find := true;
- end_str := nucleatide.items[i + 2].str_coord;
- end_clmn := nucleatide.items[i + 2].column_coord;
- end_pos := i + 2;
- break;
- end;
- if (flag_mini_start) then
- i += 3
- else
- i += 1;
- if (count_error > space) and (mode = 4) then
- begin
- flag_main_start := false
- end
- else
- if (mode = 1) then
- flag_main_start := false
- end;
- if (flag_find) then
- begin
- if count_occure < Length(aminoAcidArr) then
- begin
- flag_find := false;
- continue;
- end;
- Print_Start_End(nucleatide, start_pos, end_pos,
- start_str, end_str, start_clmn, end_clmn);
- break;
- end
- end;
- if (num_in_trip = 2) and (not flag_find)
- and ((mode = 4) or (mode = 1)) then
- begin
- start_pos += 3;
- triplet := '';
- i -= 6;
- CreateTrip(nucleatide, triplet, i);
- i += 6;
- end;
- if i > Length(nucleatide.items) then
- exit;
- end;
- end;
- {
- flag_mini_start - начало каждого триплета
- flag_main_start - начало всей последовательности
- }
- procedure FindDNA(var nucleatide: series; var aminoAcids: array of char;
- Tabble: table_tripl);
- var
- i, k, count_occure, num_in_trip: longint;
- triplet: string;
- cur_amino_acid: char;
- flag_right_triplet, flag_mini_start: boolean;
- flag_find, flag_main_start: boolean;
- start_pos, end_pos: longint;
- start_clmn, start_str, end_clmn, end_str: longint;
- begin
- triplet := ' ';
- cur_amino_acid := ' ';
- flag_right_triplet := false;
- flag_mini_start := false;
- flag_find := false;
- start_pos := Length(nucleatide.items);
- i := start_pos - 2;
- while not flag_find do
- begin
- if i < 1 then
- break;
- for num_in_trip := 0 to 2 do
- begin
- count_occure := 0;
- i := start_pos + num_in_trip - 2;
- flag_find := false;
- flag_right_triplet := false;
- end_pos := Length(nucleatide.items);
- cur_amino_acid := ' ';
- flag_main_start := true;
- flag_mini_start := false;
- if (num_in_trip = 2) then
- i -= 1;
- i -= 1;
- while (i >= 1) do
- begin
- triplet := '';
- CreateDNA(nucleatide, triplet, i);
- if ((triplet = ('UAA')) or ((triplet = 'UGA')) or
- ((triplet = 'UAG'))) and (flag_mini_start) then
- begin
- if flag_right_triplet then
- flag_find := true;
- break;
- end;
- if flag_right_triplet or flag_main_start then
- cur_amino_acid := aminoAcids[count_occure];
- for k := 1 to Length(Tabble[cur_amino_acid]) do
- if (Tabble[cur_amino_acid][k] = triplet) and (Length(triplet) <> 0) then
- begin
- flag_right_triplet := true;
- flag_mini_start := true;
- if (count_occure = 0) then
- begin
- start_pos := i;
- start_str := nucleatide.items[i].str_coord;
- start_clmn := nucleatide.items[i].column_coord;
- end;
- count_occure += 1;
- break;
- end
- else
- flag_right_triplet := false;
- if (i > 1) and not (flag_right_triplet) and flag_mini_start then
- begin
- if (i < 3) then
- flag_find := false;
- break;
- end;
- if (count_occure = Length(aminoAcidArr)) and flag_right_triplet then
- begin
- flag_find := true;
- end_str := nucleatide.items[i].str_coord;
- end_clmn := nucleatide.items[i].column_coord;
- end_pos := i - 2;
- break;
- end;
- if flag_mini_start then
- i -= 3
- else
- i -= 1;
- flag_main_start := false;
- end;
- if (flag_find) and (i >= 1) then
- begin
- if count_occure < Length(aminoAcidArr) then
- begin
- flag_find := false;
- continue;
- end;
- Print_End_Start(nucleatide, start_pos, end_pos,
- start_str, end_str, start_clmn, end_clmn,
- length(nucleatide.items));
- break;
- end;
- end;
- if (num_in_trip = 2) and not (flag_find)
- and ((mode = 4) or (mode = 1)) then
- begin
- start_pos -= 3;
- triplet := '';
- CreateDNA(nucleatide, triplet, i);
- end;
- if i > Length(nucleatide.items) then
- exit;
- end;
- end;
- {
- основное тело программы
- current_sym - текущий символ
- i, j - итераторы
- }
- var
- current_sym: char;
- i, j: longint;
- iter_param : longint;
- begin
- file1 := ParamStr(1);
- file2 := ParamStr(2);
- Val(ParamStr(3), mode);
- space := 0;
- if (mode = 4) then
- begin
- for iter_param := 1 to length(ParamStr(4)) do
- begin
- if not((ParamStr(4)[iter_param] >= '0') and
- (ParamStr(4)[iter_param] <= '9')) then
- begin
- writeln('error parametr');
- halt;
- end
- end;
- Val(ParamStr(4), space);
- if (space < 0) then
- begin
- writeln('error parametr');
- halt;
- end
- else
- writeln(space);
- end
- else
- if (mode <> 1) then
- begin
- writeln('error mode');
- exit;
- end;
- if (space = 0) then mode := 1;
- Assign(file_amin, file1);
- Assign(file_nucl, file2);
- Reset(file_amin);
- Reset(file_nucl);
- len_amin := FileSize(file_amin);
- SetLength(aminoAcidArr, len_amin);
- flag_start_name := false;
- CreateTable(table_of_amin);
- {
- запись последовательности аминокислот
- }
- i := 0;
- while not Eof(file_amin) do
- begin
- if (i = 0) then
- Read(file_amin, current_sym);
- if (current_sym = '>') then
- while (current_sym <> #10) do
- if not Eof(file_amin) then
- Read(file_amin, current_sym);
- if (not Eof(file_amin)) and (i <> 0) then
- Read(file_amin, current_sym);
- if (not ((current_sym in ['A'..'Z']) or
- (current_sym in ['0'..'9']) or
- (current_sym in [' ', '-', #9, #10, #11]))) then
- begin
- writeln('error in series of amin');
- exit;
- end;
- if (mode = 1) or (mode = 4) then
- begin
- if (current_sym in ['A'..'Z']) then
- begin
- len_amin := i + 1;
- aminoAcidArr[i] := current_sym;
- i += 1;
- end;
- end;
- end;
- SetLength(aminoAcidArr, len_amin);
- i := 1;
- j := 1;
- coloumn := 1;
- str := 1;
- {
- запись последовательности нуклеотидов
- }
- multiplier := 1;
- while not eof(file_nucl) do
- begin
- flag_error := false;
- if not flag_start_name then
- read(file_nucl, current_sym);
- {
- начало новой последовательности
- }
- if(current_sym = '>') then
- begin
- j := 1;
- SetLength(nucleatide.name, 100);
- if not Eof(file_nucl) then
- read(file_nucl, current_sym);
- while (current_sym <> #10) do
- begin
- if (j > multiplier * 100 - 1) then
- begin
- multiplier += 1;
- SetLength(nucleatide.name, 100 * multiplier);
- end;
- nucleatide.name[j] := current_sym;
- j += 1;
- if not Eof(file_nucl) then
- read(file_nucl, current_sym)
- else
- break;
- end;
- str += 1;
- if not Eof(file_nucl) then
- read(file_nucl, current_sym)
- else
- begin
- writeln('free series');
- break;
- end;
- SetLength(nucleatide.items, 100);
- j := 1;
- nucleatide.type_trip := NoName;
- multiplier := 1;
- if(current_sym = '>') then
- begin
- writeln('free series');
- SetLength(nucleatide.name, 0);
- SetLength(nucleatide.items, 0);
- nucleatide.type_trip := NoName;
- continue;
- end;
- coloumn := 1;
- while(current_sym <> '>') do
- begin
- if (current_sym = #10) then
- begin
- str += 1;
- coloumn := 1;
- end;
- if (j > multiplier * 100 - 1) then
- begin
- multiplier += 1;
- SetLength(nucleatide.items, 100 * multiplier);
- end;
- if not ((current_sym in [' ', '-', #10, #9, #11]) or
- (current_sym in ['0'..'9']) or
- (current_sym in ['A', 'a', 'C', 'c', 'G', 'g', 'U', 'u', 'T', 't'])) then
- begin
- writeln('error input series');
- writeln;
- flag_error := true;
- end;
- if (current_sym in ['A', 'a', 'C', 'c', 'G', 'g', 'U', 'u', 'T', 't']) then
- begin
- if ((current_sym = 'u') or (current_sym = 'U'))
- and (nucleatide.type_trip = NoName) then
- nucleatide.type_trip := RNA;
- if ((current_sym = 't') or (current_sym = 'T'))
- and (nucleatide.type_trip = NoName) then
- nucleatide.type_trip := DNA;
- if ((nucleatide.type_trip = DNA) and (current_sym = 'u'))
- or ((nucleatide.type_trip = RNA) and (current_sym = 't')) then
- writeln('error input of series');
- nucleatide.items[j].val := current_sym;
- nucleatide.items[j].str_coord := str;
- nucleatide.items[j].column_coord := coloumn;
- j += 1;
- end;
- if not Eof(file_nucl) then
- begin
- read(file_nucl, current_sym);
- coloumn += 1;
- end
- else
- break;
- end;
- if (nucleatide.type_trip = NoName) then
- nucleatide.type_trip := DNA;
- if (nucleatide.type_trip = RNA) and (not flag_error) then
- begin
- Find(nucleatide, aminoAcidArr, table_of_amin, space);
- writeln;
- end
- else if (nucleatide.type_trip = DNA) and (not flag_error) then
- begin
- Find(nucleatide, aminoAcidArr, table_of_amin, space);
- FindDNA(nucleatide, aminoAcidArr, table_of_amin);
- writeln;
- end;
- flag_start_name := true;
- SetLength(nucleatide.name, 0);
- SetLength(nucleatide.items, 0);
- nucleatide.type_trip := NoName;
- end;
- end;
- close(file_amin);
- close(file_nucl);
- end.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement