Commit 13f62fdc authored by Yuxin Wu's avatar Yuxin Wu

discretizer

parent fdad5c4f
......@@ -11,4 +11,5 @@ To run:
./DQN.py --rom breakout.rom --gpu 0
```
Can reproduce the claimed performance, on games I've tested with (curves will be available soon).
A demo trained with Double-DQN on breakout is available at [youtube](https://youtu.be/o21mddZtE5Y).
## ResNet
Implement the paper "Deep Residual Learning for Image Recognition", [http://arxiv.org/abs/1512.03385](http://arxiv.org/abs/1512.03385)
Reproduce the results in paper "Deep Residual Learning for Image Recognition", [http://arxiv.org/abs/1512.03385](http://arxiv.org/abs/1512.03385)
with the variants proposed in "Identity Mappings in Deep Residual Networks", [https://arxiv.org/abs/1603.05027](https://arxiv.org/abs/1603.05027).
The train error shown here is a moving average of the error rate of each batch in training.
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: discretize.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
from . import logger, memoized
from abc import abstractmethod, ABCMeta
import numpy as np
from six.moves import range
__all__ = ['UniformDiscretizer1D']
@memoized
def log_once(s):
logger.warn(s)
# just placeholder
class Discretizer(object):
__metaclass__ = ABCMeta
@abstractmethod
def get_nr_bin(self):
pass
@abstractmethod
def get_bin(self, v):
pass
class Discretizer1D(Discretizer):
pass
class UniformDiscretizer1D(Discretizer1D):
def __init__(self, minv, maxv, spacing):
self.minv = float(minv)
self.maxv = float(maxv)
self.spacing = float(spacing)
self.nr_bin = np.ceil((self.maxv - self.minv) / self.spacing)
def get_nr_bin(self):
return self.nr_bin
def get_bin(self, v):
if v < self.minv:
log_once("UniformDiscretizer1D: value smaller than min!")
return 0
if v > self.maxv:
log_once("UniformDiscretizer1D: value larger than max!")
return self.nr_bin - 1
return int(np.clip(
(v - self.minv) / self.spacing,
0, self.nr_bin - 1))
def get_distribution(self, v, smooth_factor=0.05, smooth_radius=2):
b = self.get_bin(v)
ret = np.zeros((self.nr_bin, ), dtype='float32')
ret[b] = 1.0
if v >= self.maxv or v <= self.minv:
return ret
try:
for k in range(1, smooth_radius+1):
ret[b+k] = smooth_factor ** k
except IndexError:
pass
for k in range(1, min(smooth_radius+1, b+1)):
ret[b-k] = smooth_factor ** k
ret /= ret.sum()
return ret
if __name__ == '__main__':
u = UniformDiscretizer1D(-10, 10, 0.12)
import IPython as IP;
IP.embed(config=IP.terminal.ipapp.load_default_config())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment