Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- delimiter @@
- DROP PROCEDURE IF EXISTS Build_Bio_Similarity() @@
- CREATE PROCEDURE Build_Bio_Similarity()
- BEGIN
- -- ----------------------------------------------------------------
- -- -- DisGenCoef --
- -- ----------------------------------------------------------------
- -- Suma de genes por enfermedad
- DROP TABLE IF EXISTS tmp_disgentot;
- CREATE TABLE tmp_disgentot AS
- SELECT disease_cui, COUNT(*) AS gentot
- FROM disease_gene
- GROUP BY disease_cui;
- CREATE INDEX ix_tmp_disgentot ON tmp_disgentot ( disease_cui );
- -- Combinaciones de enfermedades con genes coincidentes sin agrupar
- DROP TABLE IF EXISTS tmp_disgencomb;
- CREATE TABLE tmp_disgencomb AS
- SELECT dg1.disease_cui AS id_dis1, dg2.disease_cui AS id_dis2
- FROM disease_gene dg1 INNER JOIN disease_gene dg2
- ON ( dg1.gene_id = dg2.gene_id AND dg2.disease_cui > dg1.disease_cui );
- CREATE INDEX ix_tmp_disgencomb ON tmp_disgencomb ( id_dis1, id_dis2 );
- -- Coeficientes de los vectores de enfermedades relacionadas por genes
- DROP TABLE IF EXISTS tmp_disgencoef;
- CREATE TABLE tmp_disgencoef
- ( id_dis1 CHAR(8)
- ,id_dis2 CHAR(8)
- ,gen_cos FLOAT
- ,gen_jac FLOAT
- ,gen_dic FLOAT )
- AS
- SELECT dgvc.id_dis1
- ,dgvc.id_dis2
- ,dgvc.gentot / (SQRT(dgv1.gentot) * SQRT(dgv2.gentot)) AS gen_cos
- ,dgvc.gentot / (dgv1.gentot + dgv2.gentot - dgvc.gentot) AS gen_jac
- ,dgvc.gentot * 2 / (dgv1.gentot + dgv2.gentot) AS gen_dic
- FROM (SELECT id_dis1, id_dis2, count(*) AS gentot
- FROM tmp_disgencomb
- GROUP BY id_dis1, id_dis2 ) AS dgvc
- ,tmp_disgentot AS dgv1
- ,tmp_disgentot AS dgv2
- WHERE dgvc.id_dis1 = dgv1.disease_cui
- AND dgvc.id_dis2 = dgv2.disease_cui;
- CREATE INDEX ix_tmp_disgencoef ON tmp_disgencoef ( id_dis1, id_dis2 );
- -- ----------------------------------------------------------------
- -- -- DisProtCoef --
- -- ----------------------------------------------------------------
- -- Relacion proteinas con enfermedades a traves de genes
- DROP TABLE IF EXISTS tmp_disprot;
- CREATE TABLE tmp_disprot AS
- SELECT DISTINCT disease_cui, prot_id
- FROM disease_gene gd INNER JOIN encodes en
- ON ( gd.gene_id = en.gene_id );
- CREATE INDEX ix_tmp_disprot1 ON tmp_disprot ( disease_cui, prot_id );
- CREATE INDEX ix_tmp_disprot2 ON tmp_disprot ( prot_id, disease_cui );
- -- Suma de proteinas por enfermedad
- DROP TABLE IF EXISTS tmp_disprottot;
- CREATE TABLE tmp_disprottot AS
- SELECT disease_cui, COUNT(*) AS prottot
- FROM tmp_disprot
- GROUP BY disease_cui;
- CREATE INDEX ix_tmp_disprottot ON tmp_disprottot ( disease_cui );
- -- Combinaciones de enfermedades con proteinas coincidentes sin agrupar
- DROP TABLE IF EXISTS tmp_disprotcomb;
- CREATE TABLE tmp_disprotcomb AS
- SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
- FROM tmp_disprot dp1 INNER JOIN tmp_disprot dp2
- ON ( dp1.prot_id = dp2.prot_id AND dp2.disease_cui > dp1.disease_cui );
- CREATE INDEX ix_tmp_disprotcomb ON tmp_disprotcomb ( id_dis1, id_dis2 );
- -- Coeficientes de los vectores de enfermedades relacionadas por proteinas
- DROP TABLE IF EXISTS tmp_disprotcoef;
- CREATE TABLE tmp_disprotcoef
- ( id_dis1 CHAR(8)
- ,id_dis2 CHAR(8)
- ,prot_cos FLOAT
- ,prot_jac FLOAT
- ,prot_dic FLOAT )
- AS
- SELECT dpvc.id_dis1
- ,dpvc.id_dis2
- ,dpvc.prottot / (SQRT(dpv1.prottot) * SQRT(dpv2.prottot)) AS prot_cos
- ,dpvc.prottot / (dpv1.prottot + dpv2.prottot - dpvc.prottot) AS prot_jac
- ,dpvc.prottot * 2 / (dpv1.prottot + dpv2.prottot) AS prot_dic
- FROM (SELECT id_dis1, id_dis2, count(*) AS prottot
- FROM tmp_disprotcomb
- GROUP BY id_dis1, id_dis2 ) AS dpvc
- ,tmp_disprottot AS dpv1
- ,tmp_disprottot AS dpv2
- WHERE dpvc.id_dis1 = dpv1.disease_cui
- AND dpvc.id_dis2 = dpv2.disease_cui;
- CREATE INDEX ix_tmp_disprotcoef ON tmp_disprotcoef ( id_dis1, id_dis2 );
- -- ----------------------------------------------------------------
- -- -- DisPathCoef --
- -- ----------------------------------------------------------------
- -- Relacion rutas con enfermedades a traves de genes
- DROP TABLE IF EXISTS tmp_dispath;
- CREATE TABLE tmp_dispath AS
- SELECT DISTINCT disease_cui, path_id
- FROM disease_gene gd INNER JOIN gene_pathway gp
- ON ( gd.gene_id = gp.gen_id );
- CREATE INDEX ix_tmp_dispath1 ON tmp_dispath ( disease_cui, path_id );
- CREATE INDEX ix_tmp_dispath2 ON tmp_dispath ( path_id, disease_cui );
- -- Suma de rutas por enfermedad
- DROP TABLE IF EXISTS tmp_dispathtot;
- CREATE TABLE tmp_dispathtot AS
- SELECT disease_cui, COUNT(*) AS pathtot
- FROM tmp_dispath
- GROUP BY disease_cui;
- CREATE INDEX ix_tmp_dispathtot ON tmp_dispathtot ( disease_cui );
- -- Combinaciones de enfermedades con rutas coincidentes sin agrupar
- DROP TABLE IF EXISTS tmp_dispathcomb;
- CREATE TABLE tmp_dispathcomb AS
- SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
- FROM tmp_dispath dp1 INNER JOIN tmp_dispath dp2
- ON ( dp1.path_id = dp2.path_id AND dp2.disease_cui > dp1.disease_cui );
- CREATE INDEX ix_tmp_dispathcomb ON tmp_dispathcomb ( id_dis1, id_dis2 );
- -- Coeficientes de los vectores de enfermedades relacionadas por rutas
- DROP TABLE IF EXISTS tmp_dispathcoef;
- CREATE TABLE tmp_dispathcoef
- ( id_dis1 CHAR(8)
- ,id_dis2 CHAR(8)
- ,path_cos FLOAT
- ,path_jac FLOAT
- ,path_dic FLOAT )
- AS
- SELECT dpvc.id_dis1
- ,dpvc.id_dis2
- ,dpvc.pathtot / (SQRT(dpv1.pathtot) * SQRT(dpv2.pathtot)) AS path_cos
- ,dpvc.pathtot / (dpv1.pathtot + dpv2.pathtot - dpvc.pathtot) AS path_jac
- ,dpvc.pathtot * 2 / (dpv1.pathtot + dpv2.pathtot) AS path_dic
- FROM (SELECT id_dis1, id_dis2, count(*) AS pathtot
- FROM tmp_dispathcomb
- GROUP BY id_dis1, id_dis2 ) AS dpvc
- ,tmp_dispathtot AS dpv1
- ,tmp_dispathtot AS dpv2
- WHERE dpvc.id_dis1 = dpv1.disease_cui
- AND dpvc.id_dis2 = dpv2.disease_cui;
- CREATE INDEX ix_tmp_dispathcoef ON tmp_dispathcoef ( id_dis1, id_dis2 );
- -- ----------------------------------------------------------------
- -- -- DisPPiCoef --
- -- ----------------------------------------------------------------
- -- Relacion interaccion proteinas con enfermedades a traves de genes
- DROP TABLE IF EXISTS tmp_disppi;
- CREATE TABLE tmp_disppi AS
- SELECT DISTINCT dp1.disease_cui, ppi.id_p1, ppi.id_p2
- FROM p_interacts_with_p ppi
- INNER JOIN edsssdb.tmp_disprot dp1
- ON ( dp1.prot_id = ppi.id_p1 )
- INNER JOIN edsssdb.tmp_disprot dp2
- ON ( dp2.prot_id = ppi.id_p2 AND dp1.disease_cui = dp2.disease_cui);
- CREATE INDEX ix_tmp_disppi1 ON tmp_disppi ( id_p1, id_p2, disease_cui );
- CREATE INDEX ix_tmp_disppi2 ON tmp_disppi ( disease_cui, id_p1, id_p2 );
- -- Suma de interaccion proteinas por enfermedad
- DROP TABLE IF EXISTS tmp_disppitot;
- CREATE TABLE tmp_disppitot AS
- SELECT disease_cui, COUNT(*) AS ppitot
- FROM tmp_disppi
- GROUP BY disease_cui;
- CREATE INDEX ix_tmp_disppitot ON tmp_disppitot ( disease_cui );
- -- Combinaciones de enfermedades con interaccion proteinas coincidentes sin agrupar
- DROP TABLE IF EXISTS tmp_disppicomb;
- CREATE TABLE tmp_disppicomb AS
- SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
- FROM tmp_disppi dp1 INNER JOIN tmp_disppi dp2
- ON ( dp1.id_p1 = dp2.id_p1 AND dp1.id_p2 = dp2.id_p2 AND dp2.disease_cui > dp1.disease_cui );
- CREATE INDEX ix_tmp_disppicomb ON tmp_disppicomb ( id_dis1, id_dis2 );
- -- Coeficientes de los vectores de enfermedades relacionadas por interaccion proteinas
- DROP TABLE IF EXISTS tmp_disppicoef;
- CREATE TABLE tmp_disppicoef
- ( id_dis1 CHAR(8)
- ,id_dis2 CHAR(8)
- ,ppi_cos FLOAT
- ,ppi_jac FLOAT
- ,ppi_dic FLOAT )
- AS
- SELECT dpvc.id_dis1
- ,dpvc.id_dis2
- ,dpvc.ppitot / (SQRT(dpv1.ppitot) * SQRT(dpv2.ppitot)) AS ppi_cos
- ,dpvc.ppitot / (dpv1.ppitot + dpv2.ppitot - dpvc.ppitot) AS ppi_jac
- ,dpvc.ppitot * 2 / (dpv1.ppitot + dpv2.ppitot) AS ppi_dic
- FROM (SELECT id_dis1, id_dis2, count(*) AS ppitot
- FROM tmp_disppicomb
- GROUP BY id_dis1, id_dis2 ) AS dpvc
- ,tmp_disppitot AS dpv1
- ,tmp_disppitot AS dpv2
- WHERE dpvc.id_dis1 = dpv1.disease_cui
- AND dpvc.id_dis2 = dpv2.disease_cui;
- CREATE INDEX ix_tmp_disppicoef ON tmp_disppicoef ( id_dis1, id_dis2 );
- -- ----------------------------------------------------------------
- -- -- BioSimilarity --
- -- ----------------------------------------------------------------
- DROP TABLE IF EXISTS bio_similarity;
- CREATE TABLE bio_similarity (
- id_dis1 char(8) NOT NULL
- ,id_dis2 char(8) NOT NULL
- ,S_gen_cos float DEFAULT 0
- ,S_gen_jaccard float DEFAULT 0
- ,S_gen_dice float DEFAULT 0
- ,S_prot_cos float DEFAULT 0
- ,S_prot_jaccard float DEFAULT 0
- ,S_prot_dice float DEFAULT 0
- ,S_path_cos float DEFAULT 0
- ,S_path_jaccard float DEFAULT 0
- ,S_path_dice float DEFAULT 0
- ,S_ppi_cos float DEFAULT 0
- ,S_ppi_jaccard float DEFAULT 0
- ,S_ppi_dice float DEFAULT 0 )
- AS
- SELECT comb.id_dis1 AS id_dis1
- ,comb.id_dis2 AS id_dis2
- ,COALESCE(gen.gen_cos,0) AS S_gen_cos
- ,COALESCE(gen.gen_jac,0) AS S_gen_jaccard
- ,COALESCE(gen.gen_dic,0) AS S_gen_dice
- ,COALESCE(prot.prot_cos,0) AS S_prot_cos
- ,COALESCE(prot.prot_jac,0) AS S_prot_jaccard
- ,COALESCE(prot.prot_dic,0) AS S_prot_dice
- ,COALESCE(path.path_cos,0) AS S_path_cos
- ,COALESCE(path.path_jac,0) AS S_path_jaccard
- ,COALESCE(path.path_dic,0) AS S_path_dice
- ,COALESCE(ppi.ppi_cos,0) AS S_ppi_cos
- ,COALESCE(ppi.ppi_jac,0) AS S_ppi_jaccard
- ,COALESCE(ppi.ppi_dic,0) AS S_ppi_dice
- FROM ( SELECT a.cui AS id_dis1, b.cui AS id_dis2
- FROM (SELECT DISTINCT cui AS cui FROM edsssdb.has_biological_info) a,
- (SELECT DISTINCT cui AS cui FROM edsssdb.has_biological_info) b
- WHERE b.cui > a.cui ) comb
- LEFT JOIN tmp_disgencoef AS gen
- ON (comb.id_dis1 = gen.id_dis1 AND comb.id_dis2 = gen.id_dis2)
- LEFT JOIN tmp_disprotcoef AS prot
- ON (comb.id_dis1 = prot.id_dis1 AND comb.id_dis2 = prot.id_dis2)
- LEFT JOIN tmp_dispathcoef AS path
- ON (comb.id_dis1 = path.id_dis1 AND comb.id_dis2 = path.id_dis2)
- LEFT JOIN tmp_disppicoef AS ppi
- ON (comb.id_dis1 = ppi.id_dis1 AND comb.id_dis2 = ppi.id_dis2);
- ALTER TABLE bio_similarity
- ADD PRIMARY KEY (id_dis1, id_dis2);
- -- ----------------------------------------------------------------
- -- -- Housekeeping --
- -- ----------------------------------------------------------------
- DROP TABLE IF EXISTS tmp_disgentot;
- DROP TABLE IF EXISTS tmp_disgencomb;
- DROP TABLE IF EXISTS tmp_disgencoef;
- DROP TABLE IF EXISTS tmp_disprot;
- DROP TABLE IF EXISTS tmp_disprottot;
- DROP TABLE IF EXISTS tmp_disprotcomb;
- DROP TABLE IF EXISTS tmp_disprotcoef;
- DROP TABLE IF EXISTS tmp_dispath;
- DROP TABLE IF EXISTS tmp_dispathtot;
- DROP TABLE IF EXISTS tmp_dispathcomb;
- DROP TABLE IF EXISTS tmp_dispathcoef;
- DROP TABLE IF EXISTS tmp_disppi;
- DROP TABLE IF EXISTS tmp_disppitot;
- DROP TABLE IF EXISTS tmp_disppicomb;
- DROP TABLE IF EXISTS tmp_disppicoef;
- END;
- @@
- delimiter ;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement