optimizer, prediction = None, None

def train(hidden,training_rate = 0.1,decay_rate = 0.96):

optimizer, prediction = None, None

graph = tf.Graph()

with graph.as_default():

x = tf.placeholder(tf.float32,[None, FN])

y = tf.placeholder(tf.float32,[None, 1])

W1 = tf.Variable(tf.truncated_normal([FN,hidden], stddev=0.001))

b1 = tf.Variable(tf.zeros([hidden]))

W2 = tf.Variable(tf.truncated_normal([hidden,1], stddev=0.001))

b2 = tf.Variable(tf.zeros([1]))

inter1 = tf.nn.relu(tf.matmul(x,W1) + b1)

logits = tf.matmul(inter1,W2)+b2

regularizers = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2)

loss = tf.reduce_mean(tf.square(logits-y)) + 0*regularizers

global_step = tf.Variable(0, trainable=False)

learning_rate = tf.train.exponential_decay(training_rate, global_step,1000, decay_rate, staircase=True)

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)

prediction = logits

ac = []

st =[]

r_previous = None

with tf.Session(graph=graph) as session:

tf.initialize_all_variables().run()

#print('Initialized')

for j in range(25000):

feed_dict = {x:df.iloc[:1000,:-1].values,y:df.iloc[:1000,-1].values[:,None]}

_, l, predict,r1,r2,lr = session.run([optimizer,loss, prediction,W1,W2,learning_rate], feed_dict=feed_dict)

if j%5000==0:

acc = accuracy(predict.flatten(),df.iloc[:1000,-1].values[:1000])

#print("step",j," is ",acc,l,lr,r1.shape)

#print "train results (tmp):",predict.flatten()[:5]

#if r_previous is not None:

# print "diff:",np.sqrt(np.sum(np.square(r_previous-r1)))

r_previous = r1

ac.append(acc)

st.append(j)

#plt.figure()

#plt.plot(st,ac)

predict,r1,r2,lr = session.run([prediction,W1,W2,learning_rate],feed_dict={x:df.iloc[1000:,:-1].values,y:df.iloc[1000:,-1].values[:,None]})

#print "prediction:",accuracy(predict.flatten(),df.iloc[1000:,-1].values)

#print "predicted results:",predict.flatten()[:10]

return accuracy(predict.flatten(),df.iloc[1000:,-1].values)