Advertisement
here2share

# pure_python_neural_network.py

Feb 5th, 2019
299
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.37 KB | None | 0 0
  1. # pure_python_neural_network.py
  2.  
  3. import os, re, sys, math, random, time, datetime
  4. #from os import walk
  5. from copy import deepcopy
  6.  
  7. ### Activation Functions
  8.  
  9. def grad_activation(x):
  10.     fx = activation(x)
  11.     return (fx * (1.0 - fx))
  12.  
  13. def activation(x):
  14.     op = 1.0 / (1.0 + math.exp(-x))
  15.     return op
  16.  
  17. ### Build Initial Weights and Structure
  18.  
  19. def buildNN(Ninput, Nhidden, Noutput, w_scale):
  20.     random.seed()
  21.     w_dict = {}
  22.     w_dict['hidden'] = {}
  23.     w_dict['output'] = {}
  24.  
  25.     for j in xrange(0, Nhidden):
  26.         w_dict['hidden'][j] = {}
  27.         for i in xrange(0, Ninput+1): # add in extra bias weight
  28.             w_dict['hidden'][j][i] = w_scale * random.uniform(-1, 1)
  29.     for k in xrange(0, Noutput):
  30.         w_dict['output'][k] = {}
  31.         for j in xrange(0, Nhidden+1): # add in extra bias weight
  32.             w_dict['output'][k][j] = w_scale * random.uniform(-1, 1)   
  33.     #print w_dict  
  34.     return w_dict
  35.  
  36. def dotprod(A, B): # dictionaries
  37.     dp = 0.0
  38.     if len(A) != len(B):
  39.         print "Major error, vectors don't match..."
  40.         print len(A), len(B)
  41.     for a in A.keys(): ## loop over keys
  42.         dp+=A[a]*B[a]
  43.     return dp      
  44.  
  45.  
  46.  
  47. def computeFF(w_dict, input_dict):
  48.    
  49.     #tmp_input = deepcopy(input_dict)
  50.     act = {}
  51.     act['hidden'] = {}
  52.     act['output'] = {}
  53.     bias_i = len(w_dict['hidden'][0]) - 1 ## Add in pure bias here
  54.     #tmp_input[bias_i] = 1.0 ## FULL ACTIVATION
  55.     input_dict[bias_i] = 1.0
  56.     for j in w_dict['hidden']:
  57.         dp = dotprod(w_dict['hidden'][j], input_dict)
  58.         act['hidden'][j] = activation(dp)
  59.     bias_h = len(w_dict['output'][0]) - 1
  60.     act['hidden'][bias_h] = 1.0
  61.     for k in w_dict['output']:
  62.         dp = dotprod(w_dict['output'][k], act['hidden'])
  63.         act['output'][k] = activation(dp)
  64.     act['input'] = input_dict
  65.     del act['hidden'][bias_h]
  66.     del input_dict[bias_i]
  67.     return act
  68.  
  69.  
  70. def computeMSE(target, actual):
  71.     tot = 0.0
  72.     for i in target:
  73.         tot += (target[i] - actual[i])**2.0
  74.     #print tot, len(target)
  75.     return (tot / float(len(target)))
  76.    
  77.  
  78. # GRADIENT FUNCTIONS
  79.  
  80. def grad_output(act, target, grad_dict, w_dict):
  81.     bias_h = len(w_dict['hidden'])
  82.     #print "bias_h", bias_h
  83.     deltas = {}
  84.     for k in act['output']:
  85.         delta = (target[k] - act['output'][k]) * grad_activation(act['output'][k])
  86.         deltas[k] = delta
  87.         for j in w_dict['hidden']:
  88.            
  89.             grad_dict['output'][k][j] += delta * act['hidden'][j]
  90.         grad_dict['output'][k][bias_h] += delta
  91.     return grad_dict, deltas
  92.  
  93.  
  94. def grad_hidden(act, target, grad_dict, w_dict, deltas_o):
  95.  
  96.     bias_i = len(act['input'])
  97.     bias_h = len(act['hidden'])
  98.     act['input'][bias_i] = 1.0
  99.     act['hidden'][bias_h] = 1.0
  100.  
  101.     delta = {}
  102.     #w_dict['hidden'].keys()[0]
  103.     #for j in w_dict['hidden'][0]:
  104.     for j in w_dict['hidden']:
  105.  
  106.  
  107.         #print "j", j
  108.         sum_d = 0.0
  109.         for k in w_dict['output']:
  110.             sum_d += deltas_o[k] * w_dict['output'][k][j]
  111.  
  112.         delta[j] = sum_d * grad_activation(act['hidden'][j])
  113.        
  114.  
  115.     for j in w_dict['hidden']:
  116.  
  117.         for i in w_dict['hidden'][0]:
  118.             #print j, i
  119.             #print delta[j]
  120.            
  121.             grad_dict['hidden'][j][i] += delta[j] * act['input'][i]            
  122.        
  123.        
  124.         #grad_dict['hidden'][j][bias_i] += delta[j]
  125.    
  126.     del act['input'][bias_i]
  127.     del act['hidden'][bias_h]
  128.     return grad_dict
  129.  
  130.  
  131. def InitializeGrad(w_dict):
  132.    
  133.     grad = {}
  134.     grad['output'] = {}
  135.     grad['hidden'] = {}
  136.     for k in w_dict['output']:
  137.         grad['output'][k] = {}
  138.         for j in w_dict['output'][k]:
  139.             grad['output'][k][j]  = 0.0
  140.  
  141.     for j in w_dict['hidden']:
  142.         grad['hidden'][j] = {}
  143.         for i in w_dict['hidden'][j]:
  144.             grad['hidden'][j][i]  = 0.0
  145.    
  146.     return grad
  147.    
  148.  
  149. def UpdateWeights(w_dict, grad, pgrad, rate, mom):
  150.     ## do output first
  151.     max_grad = 0
  152.     for k in w_dict['output']:
  153.         for j in w_dict['output'][k]:
  154.             corr = rate * grad['output'][k][j] + mom * pgrad['output'][k][j]
  155.            
  156.             w_dict['output'][k][j] += corr
  157.             pgrad['output'][k][j] = corr
  158.             if abs(grad['output'][k][j]) > max_grad:
  159.                 max_grad = grad['output'][k][j]
  160.  
  161.     for j in w_dict['hidden']:
  162.         for i in w_dict['hidden'][j]:
  163.             corr = rate * grad['hidden'][j][i] + mom * pgrad['hidden'][j][i]
  164.            
  165.             w_dict['hidden'][j][i] += corr
  166.             pgrad['hidden'][j][i] = corr
  167.  
  168.             if abs(grad['hidden'][j][i]) > max_grad:
  169.                 max_grad = grad['hidden'][j][i]
  170.     return w_dict, abs(max_grad)
  171.  
  172.    
  173. def SumGrad(start, end, grad):
  174.     grad_sum = {}
  175.     grad_sum['output'] = {}
  176.     for k in grad[start]['output']:
  177.         grad_sum['output'][k] = {}
  178.         for j in grad[start]['output'][k]:
  179.             grad_sum['output'][k][j] = sum([float(grad[pres]['output'][k][j]) for pres in xrange(start, end)])
  180.            
  181.     grad_sum['hidden'] = {}
  182.     for j in grad[start]['hidden']:
  183.         grad_sum['hidden'][j] = {}
  184.         for i in grad[start]['hidden'][j]:
  185.             grad_sum['hidden'][j][i] = sum([float(grad[pres]['hidden'][j][i]) for pres in xrange(start, end)])
  186.    
  187.     return grad_sum
  188.  
  189. def printNN(w_dict):
  190.     # Print out our NN in a more pretty format
  191.     op_str = ""
  192.     for k in xrange(0,len(w_dict['output'])):
  193.         for j in xrange(0,len(w_dict['output'][k])):
  194.             op_str += "["+str(k)+","+str(j)+ "]: " + str("{:.3f}".format(float(w_dict['output'][k][j]))) + " "
  195.     print "OP: " + op_str
  196.     op_str = ""
  197.     for j in xrange(0,len(w_dict['hidden'])):
  198.         for i in xrange(0,len(w_dict['hidden'][j])):
  199.             op_str += "["+str(j)+","+str(i)+ "]: " + str("{:.3f}".format(float(w_dict['hidden'][j][i]))) + " "
  200.     print "HD: " + op_str
  201.  
  202.  
  203. def randomize(value, scale):
  204.     if value <= 0:
  205.         value += scale
  206.     else:
  207.         value -= scale
  208.     return value   
  209.  
  210.  
  211. def no_presentations(Npresentations, Nepochs):
  212.     epoch_dict = {}
  213.     current = Npresentations
  214.     average_per_epoch = Npresentations / Nepochs
  215.     print "Average presentations per Epoch: ", average_per_epoch
  216.     for epoch in xrange(0, Nepochs):
  217.         Nexamples = random.randint(1, average_per_epoch)
  218.        
  219.         if current - Nexamples > 0:
  220.             current -= Nexamples
  221.         else:
  222.             Nexamples = current
  223.             current = 0
  224.         epoch_dict[epoch] = Nexamples
  225.     return epoch_dict
  226.    
  227. def SplitEpoch(presentations, NoSplits):
  228.    
  229.     rand_lst = []
  230.     tot_lst = []
  231.    
  232.     for no in xrange(0, NoSplits-1):
  233.         rand = random.randint(0, presentations/NoSplits)
  234.         if sum(rand_lst) + rand > presentations:
  235.             rand_lst.append(0)
  236.         else:
  237.             rand_lst.append(rand)
  238.     rand_lst.append(presentations - sum(rand_lst))
  239.     random.shuffle(rand_lst)
  240.     total = 0
  241.     tot_lst.append(total)
  242.     for i in xrange(1, NoSplits+1):
  243.         total = sum(rand_lst[0:i])
  244.         tot_lst.append(total)
  245.  
  246.     return tot_lst
  247.  
  248.  
  249.  
  250. def build_xor_train(Npresentations, Nepochs):
  251.     scale = 0.1
  252.     train_dict = {}
  253.     epoch_dict = no_presentations(Npresentations, Nepochs)
  254.     for epoch in epoch_dict:
  255.         pres_per_epoch = epoch_dict[epoch]
  256.         train_dict[epoch] = {}
  257.  
  258.         split_epoch = SplitEpoch(pres_per_epoch, 4)
  259.         for i in xrange(0, len(split_epoch)-1):
  260.            
  261.             for pres in xrange(split_epoch[i], split_epoch[i+1]):
  262.                 a = random.randint(0,1)
  263.                 b = random.randint(0,1)
  264.                 c = a ^ b
  265.                 #print a, b, c
  266.                 train_dict[epoch][pres] = {}
  267.                 train_dict[epoch][pres]['input'] = {}
  268.                 train_dict[epoch][pres]['output'] = {}
  269.                 train_dict[epoch][pres]['input'][0] = randomize(a,scale)
  270.                 train_dict[epoch][pres]['input'][1] = randomize(b,scale)
  271.                 train_dict[epoch][pres]['output'][0] = randomize(c,scale)
  272.            
  273.     return train_dict      
  274.  
  275.  
  276. def find_grad(epoch_train_dict, w_dict, rate, mom, grad=None):
  277.    
  278.     error = 0.0
  279.    
  280.    
  281.     if grad == None:
  282.         grad = {}
  283.         grad = InitializeGrad(w_dict)
  284.         print "Initalized gradients..."
  285.        
  286.     mse_dict = {}
  287.     mse_dict[0] = {}
  288.     count = 0
  289.     for pres in epoch_train_dict:
  290.         pgrad = deepcopy(grad)     
  291.         count+=1
  292.         act_dict = {}
  293.         act_dict = computeFF(w_dict, epoch_train_dict[pres]['input'])
  294.         grad, deltas_o = grad_output(act_dict, epoch_train_dict[pres]['output'], grad, w_dict)
  295.         grad = grad_hidden(act_dict, epoch_train_dict[pres]['output'], grad, w_dict, deltas_o)
  296.  
  297.  
  298.         mse_dict[pres] = computeMSE(epoch_train_dict[pres]['output'], act_dict['output'])
  299.     if count > 0:
  300.         #print "Update Weights..."
  301.         w_dict, max_grad = UpdateWeights(w_dict, pgrad, grad, rate, mom)
  302.    
  303.     if len(mse_dict) > 0:
  304.         error = sum(mse_dict.values())/float(len(mse_dict))
  305.    
  306.     return float(error), grad, w_dict
  307.  
  308.  
  309.  
  310.  
  311. def evaluate_XOR(w_dict):
  312.     #net = {}
  313.     net = computeFF(w_dict, {0:0.0, 1:0.0})
  314.     print {0:0.0, 1:0.0}, net['output']
  315.     #net = {}  
  316.     net = computeFF(w_dict, {0:0.0, 1:1.0})
  317.     print {0:0.0, 1:1.0}, net['output']
  318.     #net = {}
  319.     net = computeFF(w_dict, {0:1.0, 1:0.0})
  320.     print {0:1.0, 1:0.0}, net['output']
  321.     #net = {}
  322.     net = computeFF(w_dict, {0:1.0, 1:1.0})
  323.     print {0:1.0, 1:1.0}, net['output']
  324.  
  325.  
  326.  
  327. if __name__ == "__main__":
  328.  
  329.     #### Neural Network Structure Parameters for three layers ####
  330.    
  331.     Ninput = 2
  332.     Nhidden = 2
  333.     Noutput = 1
  334.     Weight = 4 #Range of weights for each node, between -Weight and Weight
  335.  
  336.     ##############################################################
  337.  
  338.  
  339.     output = False
  340.     Npresentations = 4000000
  341.     Nepochs = 100000
  342.     grad_tol = 0.001
  343.     error_tol = 0.0001
  344.     rate = 0.001
  345.     mom = 0.1
  346.     fraction_epoch = 0.10
  347.  
  348.     min_epoch = fraction_epoch * Nepochs
  349.     batch_update = False # update after iteration
  350.  
  351.     w_dict = buildNN(Ninput, Nhidden, Noutput, Weight)
  352.     print w_dict
  353.     print "Press Enter..."
  354.     raw_input()
  355.     xor_train = build_xor_train(Npresentations, Nepochs)
  356.    
  357.    
  358.  
  359.     w_init = deepcopy(w_dict)
  360.     if output:
  361.         print "Initial Evaluation"
  362.         evaluate_XOR(w_dict)
  363.         print "============================"
  364.     grad = None
  365.     total_pres = 0
  366.     exit_type = 'NO'
  367.     Nepoch_exit = fraction_epoch*Nepochs
  368.     Nepoch_exit = min_epoch
  369.     with open("error.txt", 'w+') as fout:          
  370.         for epoch in xor_train:
  371.             if len(xor_train[epoch]) > 0:
  372.                 if output:
  373.                     print "Epoch: ", epoch, " Presentations: ", len(xor_train[epoch]), " Epoch: ", epoch, " of: ", len(xor_train)
  374.                    
  375.                     print "========BEFORE========="
  376.                     printNN(w_dict)
  377.                     print "========BEFORE========="    
  378.                 error, grad, w_dict = find_grad(xor_train[epoch], w_dict, rate, mom, grad)
  379.                 print "Error: ", error
  380.                 total_pres += len(xor_train[epoch])
  381.                 if output:
  382.                     print "========AFTER=========" 
  383.                     printNN(w_dict)
  384.                     print "========AFTER=========" 
  385.                     print "Error: ", error
  386.                
  387.                 fout.write(str(total_pres) + " " + str(error)+"\n")
  388.                 if error <= error_tol and epoch > Nepoch_exit:
  389.                     print "E: ", error,
  390.                     print total_pres
  391.                     print "==================================================================="
  392.                     print "Error Tolerance Reached"
  393.                     print "==================================================================="
  394.                     exit_type = 'MSE'
  395.                     break
  396.         if exit_type == 'MSE':
  397.             evaluate_XOR(w_dict)
  398.    
  399.     print "Initial"
  400.     evaluate_XOR(w_init)
  401.     print "Final"
  402.     evaluate_XOR(w_dict)
  403.  
  404.     print w_dict
  405.  
  406.     print time.ctime()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement