Reinforcement Learning w/ Keras+OpenAI: The Basics

Yash Patel
Jul 26, 2017 · 6 min read
for _ in range(10000):
observation = env.reset()
training_sampleX, training_sampleY = [], []
for step in range(sim_steps):
action = np.random.randint(0, 2)
one_hot_action = np.zeros(2)
one_hot_action[action] = 1
training_sampleX.append(observation)
training_sampleY.append(one_hot_action)

observation, reward, done, _ = env.step(action)
if done:
break
trainingX += training_sampleX
trainingY += training_sampleY
def gather_data(env):
min_score = 50
sim_steps = 500
trainingX, trainingY = [], []
scores = []
for _ in range(10000):
observation = env.reset()
score = 0
training_sampleX, training_sampleY = [], []
for step in range(sim_steps):
action = np.random.randint(0, 2)
one_hot_action = np.zeros(2)
one_hot_action[action] = 1
training_sampleX.append(observation)
training_sampleY.append(one_hot_action)

observation, reward, done, _ = env.step(action)
score += reward
if done:
break
if score > min_score:
scores.append(score)
trainingX += training_sampleX
trainingY += training_sampleY
trainingX, trainingY = np.array(trainingX), np.array(trainingY)
print("Average: {}".format(np.mean(scores)))
print("Median: {}".format(np.median(scores)))
return trainingX, trainingY
from keras.models import Sequential
from keras.layers import Dense, Dropout
def create_model():
model = Sequential()
model.add(Dense(128, input_shape=(4,), activation="relu"))
model.add(Dropout(0.6))

model.add(Dense(256, activation="relu"))
model.add(Dropout(0.6))

model.add(Dense(512, activation="relu"))
model.add(Dropout(0.6))

model.add(Dense(256, activation="relu"))
model.add(Dropout(0.6))

model.add(Dense(128, activation="relu"))
model.add(Dropout(0.6))
model.add(Dense(2, activation="softmax"))

model.compile(
loss="categorical_crossentropy",
optimizer="adam",
metrics=["accuracy"])
return model
import gym
import numpy as np
from data import gather_data
from model import create_model
def predict():
env = gym.make("CartPole-v0")
trainingX, trainingY = gather_data(env)
model = create_model()
model.fit(trainingX, trainingY, epochs=5)

scores = []
num_trials = 50
sim_steps = 500
for trial in range(num_trials):
observation = env.reset()
score = 0
for step in range(sim_steps):
action = np.argmax(model.predict(
observation.reshape(1,4)))
observation, reward, done, _ = env.step(action)
score += reward
if done:
break
scores.append(score)

print(np.mean(scores))

Comment and click that ❤️ below to show support!

Yash Patel

Written by

Developer interested in Computer Vision, Graphics, and VR working at Oculus VR (Facebook). Graduate from Princeton University (http://www.ypatel.io)

Welcome to a place where words matter. On Medium, smart voices and original ideas take center stage - with no ads in sight. Watch
Follow all the topics you care about, and we’ll deliver the best stories for you to your homepage and inbox. Explore
Get unlimited access to the best stories on Medium — and support writers while you’re at it. Just $5/month. Upgrade