Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- program t2;
- //valgrind --leak-check=full ./new file_amino_acids.txt f_DNK_RNK.txt 1 fpc -gv new.pas
- type
- matrix_t = array['A'..'Y', 1..6] of string;
- nucleatides_pair_t = record
- identificator_line: qword;
- identificator_column: qword;
- result: char;
- end;
- our_sequence = record
- name: array of char;
- seq_type: (DNA, RNA, AMINO, UNKNOWN);
- items : array of nucleatides_pair_t;
- end;
- var
- file_amino_acids, f_DNK_RNK: file of char;
- amino_filename, DNK_RNK_FileName: string;
- {номер типа программы поиска}
- search_type: integer = 1;
- amino_acid_arr: array of char;
- set_of_nucleotides: set of char;
- set_impossibe_nucleotides: set of char;
- availble_set: set of char;
- nucleatides: our_sequence;
- amino_acid_table: matrix_t;
- length_amino_acid_arr: qword;
- cur_symbol: char;
- i, j: qword;
- flag: boolean = false;
- row, line: qword;
- ratio: qword;
- is_error: boolean = false;
- procedure Sequence_Printer_From_Left(var nucleatides: our_sequence; start_position: qword;
- finish: qword; start_line: qword; end_line: qword; start_column: qword; end_column: qword);
- var
- column_index: longint = 1;
- line_index: longint = 0;
- begin
- for i := 1 to Length(nucleatides.name) do
- begin
- write(nucleatides.name[i]);
- end;
- writeln();
- writeln ('[-', Length(nucleatides.items) - start_position, ', -',
- Length(nucleatides.items) - finish,']');
- writeln('(',start_line, ', ', start_column,')',
- ' - (', end_line, ', ', end_column,')');
- i := finish + 2;
- while (i <= start_position + 2) do
- begin
- while (column_index <= 10) and (i <= start_position + 2) do
- begin
- write(nucleatides.items[i].result);
- column_index := column_index + 1;
- i := i + 1;
- end;
- column_index := 1;
- line_index := line_index +1 ;
- write(' ');
- if (line_index mod 6 = 0) then
- writeln();
- end;
- writeln();
- end;
- procedure Sequence_Printer(var nucleatides: our_sequence; start_position: qword;
- finish:qword; start_line: qword; end_line: qword; start_column: qword; end_column:qword);
- var
- iterator: qword;
- column_index: longint = 1;
- line_index: longint = 0;
- begin
- for iterator := 1 to Length(nucleatides.name) do
- begin
- write(nucleatides.name[iterator]);
- end;
- writeln();
- writeln ('[', start_position,', ', finish,']');
- writeln('(',start_line, ', ', start_column,')', ' - (',
- end_line, ', ', end_column,')');
- iterator := start_position;
- while (iterator <= finish) do
- begin
- while (column_index <= 10) and (iterator <= finish) do
- begin
- write(nucleatides.items[iterator].result);
- column_index := column_index + 1;
- iterator := iterator + 1;
- end;
- column_index := 1;
- line_index := line_index +1 ;
- write(' ');
- if (line_index mod 6 = 0) then
- writeln();
- end;
- writeln();
- end;
- procedure Triplet_DNA(var nucleatides: our_sequence; var triplet: string; i: qword);
- var
- j: qword = 1;
- begin
- for j := i + 2 downto i do
- begin
- if (nucleatides.items[j].result = 't') or
- (nucleatides.items[j].result = 'T') then
- begin
- triplet := triplet + 'A';
- continue;
- end;
- if (nucleatides.items[j].result = 'a') or
- (nucleatides.items[j].result = 'A') then
- begin
- triplet := triplet + 'U';
- continue;
- end;
- if (nucleatides.items[j].result = 'g') or
- (nucleatides.items[j].result = 'G') then
- begin
- triplet := triplet + 'C';
- continue;
- end;
- if (nucleatides.items[j].result = 'c') or
- (nucleatides.items[j].result = 'C') then
- begin
- triplet := triplet + 'G';
- continue;
- end;
- if nucleatides.items[j].result in ['a'..'z'] then
- begin
- triplet := triplet +
- chr(ord(nucleatides.items[j].result) -
- (ord('a') - ord('A')));
- end
- else
- begin
- triplet := triplet + nucleatides.items[j].result;
- end;
- end;
- end;
- procedure Make_Triplet(var nucleatides: our_sequence; var triplet: string; i: qword);
- var
- j: qword;
- begin
- j := 1;
- for j:= i to 2 + i do
- begin
- if (nucleatides.seq_type = DNA) then
- begin
- if (nucleatides.items[j].result = 't' ) or
- (nucleatides.items[j].result = 'T' ) then
- begin
- triplet := triplet + 'U';
- continue;
- end;
- end;
- if nucleatides.items[j].result in ['a'..'z'] then
- begin
- triplet := triplet
- + chr(ord(nucleatides.items[j].result)
- - (ord('a') - ord('A')));
- end
- else
- begin
- triplet := triplet + nucleatides.items[j].result;
- end;
- end;
- end;
- procedure Mode_Find_Normal(var nucleatides: our_sequence;
- var aminoAcids: array of char; Tabble: matrix_t);
- var
- i: qword;
- triplet: string = ' ';
- cur_amino_acid: char = ' ';
- flag_is_triplet: boolean = false;
- start: boolean = false;
- k, l: qword;
- p: qword;
- is_find: boolean = false;
- start_position: qword = 1;
- finish: qword;
- start_column, start_line, end_column, end_line: qword;
- iter_for_print :qword;
- iter_is_first: boolean;
- begin
- while(not is_find) do
- begin
- for p := 0 to 2 do
- begin
- {обновление значений переменных}
- l := 0;
- i := start_position + p;
- is_find := false;
- flag_is_triplet := false;
- finish := 1;
- cur_amino_acid := ' ';
- iter_is_first := true;
- i := i + 1;
- while (i <= Length(nucleatides.items)) do
- begin
- triplet := '';
- {заполняем триплет значениями из массива}
- i := i - 2;
- Make_Triplet(nucleatides, triplet, i);
- i := i + 2;
- {стоп-кодон}
- if ((triplet = ('UAA')) or ((triplet = 'UGA'))
- or ((triplet = 'UAG'))) and (start) then
- begin
- if(flag_is_triplet) then
- is_find:= true;
- break;
- end;
- {
- если предыдущий триплет совпал или первая итерация,
- то считываем из массива аминокислот остаток
- }
- if (flag_is_triplet = true) or (iter_is_first = true) then
- begin
- cur_amino_acid := aminoAcids[l];
- end;
- {ищем триплет в таблице аминокислотных остатков}
- for k := 1 to Length(Tabble[cur_amino_acid]) do
- begin
- if (search_type = 3) and (cur_amino_acid = '-') then
- begin
- flag_is_triplet := true;
- start := true;
- if (l = 0) then
- begin
- start_position := i;
- start_line := nucleatides.items[i].identificator_line;
- start_column := nucleatides.items[i].identificator_column;
- end;
- l := l + 1;
- break;
- end;
- if (Tabble[cur_amino_acid][k] = triplet) and
- (cur_amino_acid <> '-') then
- begin
- flag_is_triplet := true;
- start := true;
- {если первое вхождение, то запоминаем координаты начала}
- if (l = 0) then
- begin
- start_position := i;
- start_line := nucleatides.items[i].identificator_line;
- start_column := nucleatides.items[i].identificator_column;
- end;
- l := l + 1;
- break;
- end;
- if (Tabble[cur_amino_acid][k] <> triplet) and
- (cur_amino_acid <> '-') then
- begin
- flag_is_triplet := false;
- end;
- end;
- {
- если неточное совпадение и текущий триплет
- не соответствует, то ненаход
- }
- if (i > 1) and (flag_is_triplet = false)
- and start then
- break;
- {
- если нашлись все аминокислотные остатки и совпало,
- то записываем координаты конца последовательности
- }
- if (l >= Length(amino_acid_arr) ) and (flag_is_triplet = true) then
- begin
- is_find := true;
- end_line := nucleatides.items[i + 2].identificator_line;
- end_column := nucleatides.items[i + 2].identificator_column;
- finish := i + 2;
- break;
- end;
- if (start) then
- begin
- i := i + 3;
- end
- else
- i := i + 1;
- iter_is_first := false;
- end;
- if (is_find) then
- begin
- if l < Length(amino_acid_arr) then
- begin
- is_find := false;
- continue;
- end;
- Sequence_Printer(nucleatides, start_position, finish,
- start_line, end_line,start_column, end_column);
- break;
- end;
- end;
- if (p = 2) and (not is_find) and (search_type = 3) then
- begin
- start_position:= start_position + 3;
- triplet := '';
- i := i - 6;
- Make_Triplet(nucleatides, triplet, i);
- i := i + 6;
- end;
- if (p = 2) and (not is_find) and (search_type = 1) then
- begin
- start_position:= start_position + 3;
- triplet := '';
- i := i - 6;
- Make_Triplet(nucleatides, triplet, i);
- i := i + 6;
- end;
- if i > Length(nucleatides.items) then
- exit;
- end;
- end;
- procedure Mode_Find_Left(var nucleatides: our_sequence; var aminoAcids : array of char; Tabble : matrix_t);
- var
- i, j: longint;
- triplet: string = ' ';
- cur_amino_acid: char = ' ';
- flag_is_triplet: boolean = false;
- start: boolean = false;
- k, l: qword;
- p: qword;
- is_find: boolean = false;
- start_position: qword;
- beg, finish: longint;
- start_column, start_line, end_column, end_line: qword;
- iter_for_print: qword;
- iter_is_first: boolean;
- begin
- start_position := Length(nucleatides.items);
- i := start_position - 2;
- while (is_find <> true) do
- begin
- if i < 1 then
- break;
- for p := 0 to 2 do
- begin
- l := 0;
- i := start_position + p - 2;
- is_find := false;
- flag_is_triplet := false;
- finish := Length(nucleatides.items);
- cur_amino_acid := ' ';
- iter_is_first := true;
- start := false;
- if (p = 2) then
- i := i - 1;
- while (i >= 1) do
- begin
- triplet := '';
- Triplet_DNA(nucleatides, triplet, i);
- if ((triplet = ('UAA')) or ((triplet = 'UGA')) or
- ((triplet = 'UAG'))) and (start) then
- begin
- if flag_is_triplet then
- is_find := true;
- break;
- end;
- if (flag_is_triplet = true) or (iter_is_first = true) then
- begin
- cur_amino_acid := aminoAcids[l];
- end;
- for k := 1 to Length(Tabble[cur_amino_acid]) do
- begin
- if (search_type = 3) and (cur_amino_acid = '-') then
- begin
- flag_is_triplet := true;
- start := true;
- if (l = 0) then
- begin
- start_position := i;
- start_line := nucleatides.items[i].identificator_line;
- start_column := nucleatides.items[i].identificator_column;
- end;
- l := l + 1;
- break;
- end;
- if (Tabble[cur_amino_acid][k] = triplet)
- and (Length(triplet) <> 0) then
- begin
- flag_is_triplet := true;
- start := true;
- if (l = 0) then
- begin
- start_position := i;
- start_line := nucleatides.items[i].identificator_line;
- start_column := nucleatides.items[i].identificator_column;
- end;
- l := l + 1;
- break;
- end
- else
- if (Tabble[cur_amino_acid][k] <> triplet) and
- (cur_amino_acid <> '-') then
- begin
- flag_is_triplet := false;
- end;
- end;
- {
- если неточное совпадение, то ненаход
- }
- if (i > 1) and (not flag_is_triplet) and start then
- break;
- if (i > 1) and (not flag_is_triplet) and start then
- begin
- if(i < 3) then
- begin
- is_find := false;
- end;
- break;
- end;
- if (l >= Length(amino_acid_arr) ) and (flag_is_triplet = true) then
- begin
- is_find := true;
- end_line := nucleatides.items[i].identificator_line;
- end_column := nucleatides.items[i].identificator_column;
- finish := i - 2;
- break;
- end;
- if start then
- begin
- i:= i - 3;
- end
- else
- begin
- i := i - 1;
- end;
- iter_is_first := false;
- end;
- if (is_find) and (i >= 1) then
- begin
- if l < Length(amino_acid_arr) then
- begin
- is_find := false;
- continue;
- end;
- Sequence_Printer_From_Left(nucleatides, start_position, finish,
- start_line, end_line,start_column, end_column);
- break;
- end;
- end;
- if (p = 2) and (not is_find) and (search_type = 3) then
- begin
- start_position := start_position - 1;
- triplet := '';
- Triplet_DNA(nucleatides, triplet, i);
- end;
- if (p = 2) and (not is_find) and (search_type = 1) then
- begin
- start_position := start_position - 3;
- triplet := '';
- Triplet_DNA(nucleatides, triplet, i);
- end;
- if i > Length(nucleatides.items) then
- exit;
- end;
- end;
- procedure CLearSequense(var nucleatides: our_sequence);
- begin
- SetLength(nucleatides.name,0);
- SetLength(nucleatides.items, 0);
- nucleatides.seq_type:=UNKNOWN;
- end;
- begin
- set_of_nucleotides := ['A', 'a', 'C', 'c', 'G', 'g', 'U', 'u', 'T', 't'];
- availble_set := [' ', '-', #10, #9, #11];
- set_impossibe_nucleotides := ['B', 'J', 'O', 'U', 'X', 'Z'];
- {создание таблицы триплетов}
- amino_acid_table['A'][1] := 'GCU';
- amino_acid_table['A'][2] := 'GCC';
- amino_acid_table['A'][3] := 'GCA';
- amino_acid_table['A'][4] := 'GCG';
- amino_acid_table['R'][1] := 'CGU';
- amino_acid_table['R'][2] := 'CGC';
- amino_acid_table['R'][3] := 'CGA';
- amino_acid_table['R'][4] := 'CGG';
- amino_acid_table['R'][5] := 'AGA';
- amino_acid_table['R'][6] := 'AGG';
- amino_acid_table['N'][1] := 'AAU';
- amino_acid_table['N'][2] := 'AAC';
- amino_acid_table['D'][1] := 'GAU';
- amino_acid_table['D'][2] := 'GAC';
- amino_acid_table['C'][1] := 'UGU';
- amino_acid_table['C'][2] := 'UGC';
- amino_acid_table['Q'][1] := 'CAA';
- amino_acid_table['Q'][2] := 'CAG';
- amino_acid_table['E'][1] := 'GAA';
- amino_acid_table['E'][2] := 'GAG';
- amino_acid_table['G'][1] := 'GGU';
- amino_acid_table['G'][2] := 'GGC';
- amino_acid_table['G'][3] := 'GGA';
- amino_acid_table['G'][4] := 'GGG';
- amino_acid_table['H'][1] := 'CAU';
- amino_acid_table['H'][2] := 'CAC';
- amino_acid_table['I'][1] := 'AUU';
- amino_acid_table['I'][2] := 'AUC';
- amino_acid_table['I'][3] := 'AUA';
- amino_acid_table['L'][1] := 'UUA';
- amino_acid_table['L'][2] := 'UUG';
- amino_acid_table['L'][3] := 'CUU';
- amino_acid_table['L'][4] := 'CUC';
- amino_acid_table['L'][5] := 'CUA';
- amino_acid_table['L'][6] := 'CUG';
- amino_acid_table['K'][1] := 'AAA';
- amino_acid_table['K'][2] := 'AAG';
- amino_acid_table['M'][1] := 'AUG';
- amino_acid_table['F'][1] := 'UUU';
- amino_acid_table['F'][2] := 'UUC';
- amino_acid_table['P'][1] := 'CCU';
- amino_acid_table['P'][2] := 'CCC';
- amino_acid_table['P'][3] := 'CCA';
- amino_acid_table['P'][4] := 'CCG';
- amino_acid_table['S'][1] := 'UCU';
- amino_acid_table['S'][2] := 'UCC';
- amino_acid_table['S'][3] := 'UCA';
- amino_acid_table['S'][4] := 'UCG';
- amino_acid_table['S'][5] := 'AGU';
- amino_acid_table['S'][6] := 'AGC';
- amino_acid_table['T'][1] := 'ACU';
- amino_acid_table['T'][2] := 'ACC';
- amino_acid_table['T'][3] := 'ACA';
- amino_acid_table['T'][4] := 'ACG';
- amino_acid_table['W'][1] := 'UGG';
- amino_acid_table['Y'][1] := 'UAU';
- amino_acid_table['Y'][2] := 'UAC';
- amino_acid_table['V'][1] := 'GUU';
- amino_acid_table['V'][2] := 'GUC';
- amino_acid_table['V'][3] := 'GUA';
- amino_acid_table['V'][4] := 'GUG';
- amino_filename := ParamStr(1);
- DNK_RNK_FileName := ParamStr(2);
- Val(ParamStr(3), search_type);
- if (search_type <> 1) and (search_type <> 3) then
- begin
- writeln('Неправильный режим работы!');
- exit;
- end;
- Assign(file_amino_acids, amino_filename);
- Assign(f_DNK_RNK, DNK_RNK_FileName);
- { Откройте файлы для сравнения последовательностей и выполните необходимые операции}
- { Пример чтения данных из файлов:}
- Reset(file_amino_acids);
- Reset(f_DNK_RNK);
- length_amino_acid_arr:=FileSize(file_amino_acids);
- SetLength(amino_acid_arr, length_amino_acid_arr);
- {Запись в маасив последовательности аминокислот}
- i := 0;
- while not EOF(file_amino_acids) do
- begin
- if (i = 0) then
- Read(file_amino_acids, cur_symbol);
- if (cur_symbol = '>') then
- begin
- while (cur_symbol <> #10) do
- begin
- if not EOF(file_amino_acids) then
- Read(file_amino_acids, cur_symbol);
- end;
- end;
- if (not EOF(file_amino_acids)) and (i <> 0) then
- Read(file_amino_acids, cur_symbol);
- if (not (cur_symbol in (['A'..'Z'] + ['0'..'9'] + availble_set))) then
- begin
- writeln('[Ошибка! Неверная последовательность аминокислот!]');
- exit;
- end;
- if (search_type = 1) then
- begin
- if (cur_symbol in ['A'..'Z']) then
- begin
- length_amino_acid_arr := i+1;
- amino_acid_arr[i] := cur_symbol;
- Inc(i);
- end;
- end
- else if (search_type = 3) then
- begin
- if (cur_symbol in ['A'..'Z']) or (cur_symbol = '-') then
- begin
- length_amino_acid_arr := i+1;
- amino_acid_arr[i] := cur_symbol;
- Inc(i);
- end;
- end;
- end;
- {выделяем память под массив аминокислот}
- SetLength(amino_acid_arr, length_amino_acid_arr);
- i := 1;
- j := 1;
- row := 1;
- line := 1;
- {Запись в маасив последовательности нуклеотидов}
- ratio := 1;
- while(not EOF(f_DNK_RNK)) do
- begin
- is_error := false;
- {идем по файлу пока не найдем начало названия последовательности}
- if (flag <> true) then
- read(f_DNK_RNK, cur_symbol);
- {проверка на начало записи называния}
- if(cur_symbol = '>') then
- begin
- {выделяем память}
- SetLength(nucleatides.name, 100);
- {проверка на то, что при считывании не выйдем за файл}
- if not EOF(f_DNK_RNK) then
- read(f_DNK_RNK, cur_symbol);
- {записыввем название}
- while (cur_symbol <> #10) do
- begin
- if (j > ratio * 100 - 1) then
- begin
- ratio := ratio + 1;
- SetLength(nucleatides.name, 100 * ratio);
- end;
- nucleatides.name[j] := cur_symbol;
- j := j + 1;
- {проверка на то, что при считывании не выйдем за файл}
- if not EOF(f_DNK_RNK) then
- begin
- read(f_DNK_RNK, cur_symbol);
- end
- else
- break;
- end;
- line := line + 1;
- {проверка на то, что при считывании не выйдем за файл}
- if not EOF(f_DNK_RNK) then
- begin
- read(f_DNK_RNK, cur_symbol);
- end
- else
- begin
- writeln('[пустая последовательность 2]');
- break;
- end;
- {выделяем место под значения последовательности}
- SetLength(nucleatides.items, 100);
- j := 1;
- nucleatides.seq_type := UNKNOWN;
- ratio := 1;
- {тело последовательности}
- if(cur_symbol = '>') then
- begin
- writeln('[пустая последовательность]');
- CLearSequense(nucleatides);
- continue;
- end;
- {}
- row := 1;
- while(cur_symbol <> '>') do
- begin
- if (cur_symbol = #10) then
- begin
- line := line + 1;
- row := 1;
- end;
- {}
- if (j > ratio * 100 - 1) then
- begin
- ratio := ratio + 1;
- SetLength(nucleatides.items, 100 * ratio);
- end;
- {
- обработка неправильных символов в
- последовательности нуклеотидов
- }
- if (not (cur_symbol in availble_set)) and
- (not (cur_symbol in ['0'..'9'])) and
- (not (cur_symbol in set_of_nucleotides))then
- begin
- writeln('[Ошибка!]', cur_symbol);
- writeln();
- is_error := true;
- end;
- {}
- if (cur_symbol in set_of_nucleotides) then
- begin
- if ((cur_symbol = 'u') or (cur_symbol = 'U'))
- and (nucleatides.seq_type = UNKNOWN) then
- nucleatides.seq_type := RNA;
- if ((cur_symbol = 't') or (cur_symbol = 'T'))
- and (nucleatides.seq_type = UNKNOWN) then
- nucleatides.seq_type := DNA;
- if ((nucleatides.seq_type = DNA) and (cur_symbol = 'u'))
- or ((nucleatides.seq_type = RNA) and (cur_symbol = 't')) then
- writeln('[Ошибка!]');
- nucleatides.items[j].result := cur_symbol;
- nucleatides.items[j].identificator_line := line;
- nucleatides.items[j].identificator_column := row;
- j := j + 1;
- end;
- if not EOF(f_DNK_RNK) then
- begin
- read(f_DNK_RNK, cur_symbol);
- row := row + 1;
- end
- else
- break;
- end;
- {Если не удалось понять тип последовательности, то тип: DNA}
- if (nucleatides.seq_type = UNKNOWN) then
- nucleatides.seq_type := DNA;
- {обработка последовательности}
- if (nucleatides.seq_type = RNA) and (not is_error) then
- begin
- Mode_Find_Normal(nucleatides, amino_acid_arr, amino_acid_table);
- writeln();
- end
- else if (nucleatides.seq_type = DNA) and (not is_error) then
- begin
- Mode_Find_Normal(nucleatides, amino_acid_arr, amino_acid_table);
- Mode_Find_Left(nucleatides, amino_acid_arr, amino_acid_table);
- writeln();
- end;
- flag := true;
- j := 1;
- CLearSequense(nucleatides);
- end;
- end;
- close(file_amino_acids);
- close(f_DNK_RNK);
- end.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement