Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "from string import ascii_letters\n",
- "from sklearn.feature_extraction.text import TfidfVectorizer\n",
- "import requests\n",
- "import re\n",
- "from sklearn.cluster import DBSCAN\n",
- "from tkinter import *"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def parsString(s):\n",
- " resS=\"\"\n",
- " tmpS=\"\"\n",
- " s=s.lower()\n",
- " for ch in s:\n",
- " if (ch in ascii_letters):\n",
- " tmpS+=ch\n",
- " else:\n",
- " #resS+=tmpS\n",
- " #tmpS=ch\n",
- " if (tmpS!=\"\"):\n",
- " resS+=' '\n",
- " resS+=tmpS\n",
- " else:\n",
- " resS+=' '\n",
- " resS+=ch\n",
- " tmpS=\"\"\n",
- " if tmpS==\"\":\n",
- " resS+=tmpS\n",
- " #print(resS)\n",
- " return resS\n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "def readDataset():\n",
- " f=open('dataset', 'r')\n",
- " it=0;\n",
- " vectorsS=[]\n",
- " for i in f:\n",
- " vectorsS.append(parsString(i))\n",
- " it+=1\n",
- " #if (it>1000):\n",
- " # break\n",
- " f.close()\n",
- " return vectorsS"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "def makeDataset():\n",
- " f=open('dataset', 'w')\n",
- " for i in range(200):\n",
- " responce=requests.get('https://slot-ml.ptsecurity.com/api/v1/users/ad828642388ee5a23f0c90483520309204ea3d53/vectors/?random')\n",
- " vect=str(responce.text)[str(responce.text).find(\"vector\")+10:str(responce.text).find(\"\\\"id\\\":\")-4]\n",
- " #id=str(responce.text)[str(responce.text).find(\"\\\"id\\\":\")+7:-4]\n",
- " #print(rpost, mpost)\n",
- " f.write(vect+'\\n')\n",
- " #if (i%10==0):\n",
- " # print(i)\n",
- " f.close()\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def train(dataset):\n",
- " # Obtain some string samples.\n",
- " # Get a char-based vectorizer with (1,2) n-gram range.\n",
- " vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 1))\n",
- " # Vectorize the samples.\n",
- " vectors = vectorizer.fit_transform(dataset)\n",
- " #print(vectorizer.get_feature_names())\n",
- " #print(vectors)\n",
- " return vectors"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def clusterization(vectors):\n",
- " model = DBSCAN(eps=0.5).fit(vectors)\n",
- " return model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "def sendSol(vectorsString):\n",
- " #f=open('dataset', 'w')\n",
- " ok=0\n",
- " for i in range(100):\n",
- " responce=requests.get('https://slot-ml.ptsecurity.com/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/vectors/?random')\n",
- " vect=str(responce.text)[str(responce.text).find(\"vector\")+10:str(responce.text).find(\"\\\"id\\\":\")-4]\n",
- " id=str(responce.text)[str(responce.text).find(\"\\\"id\\\":\")+7:-4]\n",
- " #print(responce.text)\n",
- " #print(vect)\n",
- " #print(id) \n",
- " rpost = ('https://slot-ml.ptsecurity.com/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/results/')\n",
- " #rpost=('5.8.180.70:80/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/results/')\n",
- " #mpost=('{ \\\"vector\\\": \\\"'+id+'\\\", \\\"class\\\": 1 }')\n",
- " #vectorsString[i]=parsString(vect)\n",
- " strv=parsString(vect)\n",
- " #vectorsString.append(parsString(strv))\n",
- " vectorsString[999]=strv\n",
- " vectors=train(vectorsString)\n",
- " #print(vectors)\n",
- " model = clusterization(vectors)\n",
- " cnt=0;\n",
- " clas=model.labels_[999]\n",
- " if(clas==-1):\n",
- " if (ok*2<=i):\n",
- " if (len(strv)>100):\n",
- " clas=-1\n",
- " else:\n",
- " clas=-2\n",
- " else:\n",
- " clas=-4\n",
- " mpost={}\n",
- " mpost[\"vector\"]=id\n",
- " mpost[\"class\"]=clas+3\n",
- " #print(clas)\n",
- " #print(rpost, mpost)\n",
- " msg=requests.post(rpost, data=mpost)\n",
- " if (str(msg)==\"<Response [200]>\"):\n",
- " ok+=1\n",
- " #if (i%10==0):\n",
- " # print(i, ok)\n",
- " #f.close()\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def check(strfr):\n",
- " makeDataset()\n",
- " vectorsString=readDataset()\n",
- " #f=open('dataset', 'w')\n",
- " ok=0\n",
- " vect=str(strfr)\n",
- " #print(responce.text)\n",
- " #print(vect)\n",
- " #print(id) \n",
- " #rpost = ('https://slot-ml.ptsecurity.com/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/results/')\n",
- " #rpost=('5.8.180.70:80/api/v1/users/c34f6b9b747ebe438da878a07a318892a988c06f/results/')\n",
- " #mpost=('{ \\\"vector\\\": \\\"'+id+'\\\", \\\"class\\\": 1 }')\n",
- " #vectorsString[i]=parsString(vect)\n",
- " strv=parsString(vect)\n",
- " #vectorsString.append(parsString(strv))\n",
- " vectorsString[999]=strv\n",
- " vectors=train(vectorsString)\n",
- " #print(vectors)\n",
- " model = clusterization(vectors)\n",
- " cnt=0;\n",
- " clas=model.labels_[999]\n",
- " return clas\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "def action(event):\n",
- " ent.delete(\"1.0\", END)\n",
- " ent.insert(\"1.0\", check(tf.get(\"1.0\",END)))\n",
- "root = Tk()\n",
- "tf=Text(root, height=8, width=70)\n",
- "tf.pack()\n",
- "ent = Text(root,height=1,width=3)\n",
- "ent.pack()\n",
- "but = Button(root)\n",
- "but[\"text\"]=\"Check\"\n",
- "\n",
- "but.bind(\"<Button-1>\", action)\n",
- "but.pack()\n",
- "root.mainloop()\n",
- "#makeDataset()\n",
- "#print(vectorsString)\n",
- "#vectors=train(vectorsString)\n",
- "#print(vectors)\n",
- "#model = clusterization(vectors)\n",
- "#sendSol(vectorsString)\n",
- "\n"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement