Advertisement
LuciaPrieto

Untitled

Jun 16th, 2018
562
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MySQL 11.39 KB | None | 0 0
  1. delimiter @@
  2.  
  3. DROP PROCEDURE IF EXISTS Build_Bio_Similarity() @@
  4.  
  5. CREATE PROCEDURE Build_Bio_Similarity()
  6. BEGIN
  7.  
  8. -- ----------------------------------------------------------------
  9. -- -- DisGenCoef                                                 --
  10. -- ----------------------------------------------------------------
  11.  
  12. -- Suma de genes por enfermedad
  13. DROP TABLE IF EXISTS tmp_disgentot;
  14. CREATE TABLE tmp_disgentot AS
  15. SELECT disease_cui, COUNT(*) AS gentot
  16.   FROM disease_gene
  17.  GROUP BY disease_cui;
  18. CREATE INDEX ix_tmp_disgentot ON tmp_disgentot ( disease_cui );
  19.  
  20. -- Combinaciones de enfermedades con genes coincidentes sin agrupar
  21. DROP TABLE IF EXISTS tmp_disgencomb;
  22. CREATE TABLE tmp_disgencomb AS
  23. SELECT dg1.disease_cui AS id_dis1, dg2.disease_cui AS id_dis2
  24.   FROM disease_gene dg1 INNER JOIN disease_gene dg2
  25.     ON ( dg1.gene_id = dg2.gene_id AND dg2.disease_cui > dg1.disease_cui );
  26. CREATE INDEX ix_tmp_disgencomb ON tmp_disgencomb ( id_dis1, id_dis2 );
  27.  
  28. -- Coeficientes de los vectores de enfermedades relacionadas por genes
  29. DROP TABLE IF EXISTS tmp_disgencoef;
  30. CREATE TABLE tmp_disgencoef
  31.        ( id_dis1 CHAR(8)
  32.         ,id_dis2 CHAR(8)
  33.         ,gen_cos FLOAT
  34.         ,gen_jac FLOAT
  35.         ,gen_dic FLOAT )
  36.     AS
  37. SELECT dgvc.id_dis1
  38.       ,dgvc.id_dis2
  39.       ,dgvc.gentot / (SQRT(dgv1.gentot) * SQRT(dgv2.gentot)) AS gen_cos
  40.       ,dgvc.gentot / (dgv1.gentot + dgv2.gentot - dgvc.gentot) AS gen_jac
  41.       ,dgvc.gentot * 2 / (dgv1.gentot + dgv2.gentot) AS gen_dic
  42.   FROM (SELECT id_dis1, id_dis2, count(*) AS gentot
  43.           FROM tmp_disgencomb
  44.          GROUP BY id_dis1, id_dis2 ) AS dgvc
  45.        ,tmp_disgentot AS dgv1
  46.        ,tmp_disgentot AS dgv2
  47.  WHERE dgvc.id_dis1 = dgv1.disease_cui
  48.    AND dgvc.id_dis2 = dgv2.disease_cui;
  49. CREATE INDEX ix_tmp_disgencoef ON tmp_disgencoef ( id_dis1, id_dis2 );
  50.  
  51. -- ----------------------------------------------------------------
  52. -- -- DisProtCoef                                                --
  53. -- ----------------------------------------------------------------
  54.  
  55. -- Relacion proteinas con enfermedades a traves de genes
  56. DROP TABLE IF EXISTS tmp_disprot;
  57. CREATE TABLE tmp_disprot AS
  58. SELECT DISTINCT disease_cui, prot_id
  59.   FROM disease_gene gd INNER JOIN encodes en
  60.     ON ( gd.gene_id = en.gene_id );
  61. CREATE INDEX ix_tmp_disprot1 ON tmp_disprot ( disease_cui, prot_id );
  62. CREATE INDEX ix_tmp_disprot2 ON tmp_disprot ( prot_id, disease_cui );
  63.  
  64. -- Suma de proteinas por enfermedad
  65. DROP TABLE IF EXISTS tmp_disprottot;
  66. CREATE TABLE tmp_disprottot AS
  67. SELECT disease_cui, COUNT(*) AS prottot
  68.   FROM tmp_disprot
  69.  GROUP BY disease_cui;
  70. CREATE INDEX ix_tmp_disprottot ON tmp_disprottot ( disease_cui );
  71.  
  72. -- Combinaciones de enfermedades con proteinas coincidentes sin agrupar
  73. DROP TABLE IF EXISTS tmp_disprotcomb;
  74. CREATE TABLE tmp_disprotcomb AS
  75. SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
  76.   FROM tmp_disprot dp1 INNER JOIN tmp_disprot dp2
  77.     ON ( dp1.prot_id = dp2.prot_id AND dp2.disease_cui > dp1.disease_cui );
  78. CREATE INDEX ix_tmp_disprotcomb ON tmp_disprotcomb ( id_dis1, id_dis2 );
  79.  
  80. -- Coeficientes de los vectores de enfermedades relacionadas por proteinas
  81. DROP TABLE IF EXISTS tmp_disprotcoef;
  82. CREATE TABLE tmp_disprotcoef
  83.        ( id_dis1 CHAR(8)
  84.         ,id_dis2 CHAR(8)
  85.         ,prot_cos FLOAT
  86.         ,prot_jac FLOAT
  87.         ,prot_dic FLOAT )
  88.     AS
  89. SELECT dpvc.id_dis1
  90.       ,dpvc.id_dis2
  91.       ,dpvc.prottot / (SQRT(dpv1.prottot) * SQRT(dpv2.prottot)) AS prot_cos
  92.       ,dpvc.prottot / (dpv1.prottot + dpv2.prottot - dpvc.prottot) AS prot_jac
  93.       ,dpvc.prottot * 2 / (dpv1.prottot + dpv2.prottot) AS prot_dic
  94.   FROM (SELECT id_dis1, id_dis2, count(*) AS prottot
  95.           FROM tmp_disprotcomb
  96.          GROUP BY id_dis1, id_dis2 ) AS dpvc
  97.        ,tmp_disprottot AS dpv1
  98.        ,tmp_disprottot AS dpv2
  99.  WHERE dpvc.id_dis1 = dpv1.disease_cui
  100.    AND dpvc.id_dis2 = dpv2.disease_cui;
  101. CREATE INDEX ix_tmp_disprotcoef ON tmp_disprotcoef ( id_dis1, id_dis2 );
  102.  
  103. -- ----------------------------------------------------------------
  104. -- -- DisPathCoef                                                --
  105. -- ----------------------------------------------------------------
  106.  
  107. -- Relacion rutas con enfermedades a traves de genes
  108. DROP TABLE IF EXISTS tmp_dispath;
  109. CREATE TABLE tmp_dispath AS
  110. SELECT DISTINCT disease_cui, path_id
  111.   FROM disease_gene gd INNER JOIN gene_pathway gp
  112.     ON ( gd.gene_id = gp.gen_id );
  113. CREATE INDEX ix_tmp_dispath1 ON tmp_dispath ( disease_cui, path_id );
  114. CREATE INDEX ix_tmp_dispath2 ON tmp_dispath ( path_id, disease_cui );
  115.  
  116. -- Suma de rutas por enfermedad
  117. DROP TABLE IF EXISTS tmp_dispathtot;
  118. CREATE TABLE tmp_dispathtot AS
  119. SELECT disease_cui, COUNT(*) AS pathtot
  120.   FROM tmp_dispath
  121.  GROUP BY disease_cui;
  122. CREATE INDEX ix_tmp_dispathtot ON tmp_dispathtot ( disease_cui );
  123.  
  124. -- Combinaciones de enfermedades con rutas coincidentes sin agrupar
  125. DROP TABLE IF EXISTS tmp_dispathcomb;
  126. CREATE TABLE tmp_dispathcomb AS
  127. SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
  128.   FROM tmp_dispath dp1 INNER JOIN tmp_dispath dp2
  129.     ON ( dp1.path_id = dp2.path_id AND dp2.disease_cui > dp1.disease_cui );
  130. CREATE INDEX ix_tmp_dispathcomb ON tmp_dispathcomb ( id_dis1, id_dis2 );
  131.  
  132. -- Coeficientes de los vectores de enfermedades relacionadas por rutas
  133. DROP TABLE IF EXISTS tmp_dispathcoef;
  134. CREATE TABLE tmp_dispathcoef
  135.        ( id_dis1 CHAR(8)
  136.         ,id_dis2 CHAR(8)
  137.         ,path_cos FLOAT
  138.         ,path_jac FLOAT
  139.         ,path_dic FLOAT )
  140.     AS
  141. SELECT dpvc.id_dis1
  142.       ,dpvc.id_dis2
  143.       ,dpvc.pathtot / (SQRT(dpv1.pathtot) * SQRT(dpv2.pathtot)) AS path_cos
  144.       ,dpvc.pathtot / (dpv1.pathtot + dpv2.pathtot - dpvc.pathtot) AS path_jac
  145.       ,dpvc.pathtot * 2 / (dpv1.pathtot + dpv2.pathtot) AS path_dic
  146.   FROM (SELECT id_dis1, id_dis2, count(*) AS pathtot
  147.           FROM tmp_dispathcomb
  148.          GROUP BY id_dis1, id_dis2 ) AS dpvc
  149.        ,tmp_dispathtot AS dpv1
  150.        ,tmp_dispathtot AS dpv2
  151.  WHERE dpvc.id_dis1 = dpv1.disease_cui
  152.    AND dpvc.id_dis2 = dpv2.disease_cui;
  153. CREATE INDEX ix_tmp_dispathcoef ON tmp_dispathcoef ( id_dis1, id_dis2 );
  154.  
  155. -- ----------------------------------------------------------------
  156. -- -- DisPPiCoef                                                 --
  157. -- ----------------------------------------------------------------
  158.  
  159. -- Relacion interaccion proteinas con enfermedades a traves de genes
  160. DROP TABLE IF EXISTS tmp_disppi;
  161. CREATE TABLE tmp_disppi AS
  162. SELECT DISTINCT dp1.disease_cui, ppi.id_p1, ppi.id_p2
  163.   FROM p_interacts_with_p ppi
  164.        INNER JOIN edsssdb.tmp_disprot dp1
  165.          ON ( dp1.prot_id = ppi.id_p1 )
  166.        INNER JOIN edsssdb.tmp_disprot dp2
  167.          ON ( dp2.prot_id = ppi.id_p2 AND dp1.disease_cui = dp2.disease_cui);
  168. CREATE INDEX ix_tmp_disppi1 ON tmp_disppi ( id_p1, id_p2, disease_cui );
  169. CREATE INDEX ix_tmp_disppi2 ON tmp_disppi ( disease_cui, id_p1, id_p2 );
  170.  
  171. -- Suma de interaccion proteinas por enfermedad
  172. DROP TABLE IF EXISTS tmp_disppitot;
  173. CREATE TABLE tmp_disppitot AS
  174. SELECT disease_cui, COUNT(*) AS ppitot
  175.   FROM tmp_disppi
  176.  GROUP BY disease_cui;
  177. CREATE INDEX ix_tmp_disppitot ON tmp_disppitot ( disease_cui );
  178.  
  179. -- Combinaciones de enfermedades con interaccion proteinas coincidentes sin agrupar
  180. DROP TABLE IF EXISTS tmp_disppicomb;
  181. CREATE TABLE tmp_disppicomb AS
  182. SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
  183.   FROM tmp_disppi dp1 INNER JOIN tmp_disppi dp2
  184.     ON ( dp1.id_p1 = dp2.id_p1 AND dp1.id_p2 = dp2.id_p2 AND dp2.disease_cui > dp1.disease_cui );
  185. CREATE INDEX ix_tmp_disppicomb ON tmp_disppicomb ( id_dis1, id_dis2 );
  186.  
  187. -- Coeficientes de los vectores de enfermedades relacionadas por interaccion proteinas
  188. DROP TABLE IF EXISTS tmp_disppicoef;
  189. CREATE TABLE tmp_disppicoef
  190.        ( id_dis1 CHAR(8)
  191.         ,id_dis2 CHAR(8)
  192.         ,ppi_cos FLOAT
  193.         ,ppi_jac FLOAT
  194.         ,ppi_dic FLOAT )
  195.     AS
  196. SELECT dpvc.id_dis1
  197.       ,dpvc.id_dis2
  198.       ,dpvc.ppitot / (SQRT(dpv1.ppitot) * SQRT(dpv2.ppitot)) AS ppi_cos
  199.       ,dpvc.ppitot / (dpv1.ppitot + dpv2.ppitot - dpvc.ppitot) AS ppi_jac
  200.       ,dpvc.ppitot * 2 / (dpv1.ppitot + dpv2.ppitot) AS ppi_dic
  201.   FROM (SELECT id_dis1, id_dis2, count(*) AS ppitot
  202.           FROM tmp_disppicomb
  203.          GROUP BY id_dis1, id_dis2 ) AS dpvc
  204.        ,tmp_disppitot AS dpv1
  205.        ,tmp_disppitot AS dpv2
  206.  WHERE dpvc.id_dis1 = dpv1.disease_cui
  207.    AND dpvc.id_dis2 = dpv2.disease_cui;
  208. CREATE INDEX ix_tmp_disppicoef ON tmp_disppicoef ( id_dis1, id_dis2 );
  209.  
  210. -- ----------------------------------------------------------------
  211. -- -- BioSimilarity                                              --
  212. -- ----------------------------------------------------------------
  213.  
  214. DROP TABLE IF EXISTS bio_similarity;
  215. CREATE TABLE bio_similarity (
  216.            id_dis1 char(8) NOT NULL
  217.           ,id_dis2 char(8) NOT NULL
  218.           ,S_gen_cos float DEFAULT 0
  219.           ,S_gen_jaccard float DEFAULT 0
  220.           ,S_gen_dice float DEFAULT 0
  221.           ,S_prot_cos float DEFAULT 0
  222.           ,S_prot_jaccard float DEFAULT 0
  223.           ,S_prot_dice float DEFAULT 0
  224.           ,S_path_cos float DEFAULT 0
  225.           ,S_path_jaccard float DEFAULT 0
  226.           ,S_path_dice float DEFAULT 0
  227.           ,S_ppi_cos float DEFAULT 0
  228.           ,S_ppi_jaccard float DEFAULT 0
  229.           ,S_ppi_dice float DEFAULT 0  )
  230.     AS
  231. SELECT  comb.id_dis1 AS id_dis1
  232.        ,comb.id_dis2 AS id_dis2
  233.        ,COALESCE(gen.gen_cos,0) AS S_gen_cos
  234.        ,COALESCE(gen.gen_jac,0) AS S_gen_jaccard
  235.        ,COALESCE(gen.gen_dic,0) AS S_gen_dice
  236.        ,COALESCE(prot.prot_cos,0) AS S_prot_cos
  237.        ,COALESCE(prot.prot_jac,0) AS S_prot_jaccard
  238.        ,COALESCE(prot.prot_dic,0) AS S_prot_dice
  239.        ,COALESCE(path.path_cos,0) AS S_path_cos
  240.        ,COALESCE(path.path_jac,0) AS S_path_jaccard
  241.        ,COALESCE(path.path_dic,0) AS S_path_dice
  242.        ,COALESCE(ppi.ppi_cos,0) AS S_ppi_cos
  243.        ,COALESCE(ppi.ppi_jac,0) AS S_ppi_jaccard
  244.        ,COALESCE(ppi.ppi_dic,0) AS S_ppi_dice
  245.   FROM ( SELECT a.cui AS id_dis1, b.cui AS id_dis2
  246.            FROM (SELECT DISTINCT cui AS cui FROM edsssdb.has_biological_info) a,
  247.                 (SELECT DISTINCT cui AS cui FROM edsssdb.has_biological_info) b
  248.           WHERE b.cui > a.cui ) comb
  249.        LEFT JOIN tmp_disgencoef AS gen
  250.          ON (comb.id_dis1 = gen.id_dis1 AND comb.id_dis2 = gen.id_dis2)
  251.        LEFT JOIN tmp_disprotcoef AS prot
  252.          ON (comb.id_dis1 = prot.id_dis1 AND comb.id_dis2 = prot.id_dis2)
  253.        LEFT JOIN tmp_dispathcoef AS path
  254.          ON (comb.id_dis1 = path.id_dis1 AND comb.id_dis2 = path.id_dis2)
  255.        LEFT JOIN tmp_disppicoef AS ppi
  256.          ON (comb.id_dis1 = ppi.id_dis1 AND comb.id_dis2 = ppi.id_dis2);
  257. ALTER TABLE bio_similarity
  258. ADD PRIMARY KEY (id_dis1, id_dis2);
  259.  
  260. -- ----------------------------------------------------------------
  261. -- -- Housekeeping                                               --
  262. -- ----------------------------------------------------------------
  263.  
  264. DROP TABLE IF EXISTS tmp_disgentot;
  265. DROP TABLE IF EXISTS tmp_disgencomb;
  266. DROP TABLE IF EXISTS tmp_disgencoef;
  267. DROP TABLE IF EXISTS tmp_disprot;
  268. DROP TABLE IF EXISTS tmp_disprottot;
  269. DROP TABLE IF EXISTS tmp_disprotcomb;
  270. DROP TABLE IF EXISTS tmp_disprotcoef;
  271. DROP TABLE IF EXISTS tmp_dispath;
  272. DROP TABLE IF EXISTS tmp_dispathtot;
  273. DROP TABLE IF EXISTS tmp_dispathcomb;
  274. DROP TABLE IF EXISTS tmp_dispathcoef;
  275. DROP TABLE IF EXISTS tmp_disppi;
  276. DROP TABLE IF EXISTS tmp_disppitot;
  277. DROP TABLE IF EXISTS tmp_disppicomb;
  278. DROP TABLE IF EXISTS tmp_disppicoef;
  279.  
  280.  
  281. END;
  282.  
  283. @@
  284.  
  285. delimiter ;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement