This repository has been archived by the owner on Mar 19, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
/
12_reinforcement_learning.tex
68 lines (56 loc) · 2.29 KB
/
12_reinforcement_learning.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
\section[games]{Playing Games / Reinforcement Learning}
\begin{frame}
\frametitle{Learning to play Go}
\begin{center}
\includegraphics[width=0.5\textwidth]{go.jpg}
\end{center}
\textbf{Challenge:} Computational effort scales with $b^d$
\begin{itemize}
\item \textbf{Game's breadth $b$} Number of legal moves per position\\ (Go $\approx 250$; Chess $\approx 35$)
\item \textbf{depth $d$} Length of the game (Go $\approx 150$; Chess $\approx 80$)
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Reinforcement Learning}
\textbf{Optimize reward $R = \sum_t \gamma^t r_t$ given}
\begin{itemize}
\item the previous states of the environment and actions of the agent $S$,
\item and the currently legal actions $A$.
\end{itemize}
\begin{exampleblock}{Key Idea}
Approximate the expected reward
\setlength{\abovedisplayskip}{0pt}
\setlength{\belowdisplayskip}{0pt}
\begin{align*}
Q^* &: S \times A \rightarrow \mathcal{R}\\
Q^*(s,a) &= \mathrm{E} \left(r + \gamma \max_{a'} Q^*(s', a') | s,a\right)
\end{align*}
using \textbf{action-value network or Q network} $Q(s,a; \vec{w}) \approx Q^*(s,a)$, which minimizes the loss-function:
\begin{align*}
\mathcal{L}(\vec{w}_i) &= \mathrm{E}\left(y_i - Q(s,a; \vec{w})\right)^2 \\
y_i &= \mathrm{E} \left(r + \gamma \max_{a'} Q(s', a'; \vec{w}_{i-1}) | s,a\right)
\end{align*}
\end{exampleblock}
\end{frame}
\begin{frame}
\frametitle{Example: Playing Atari 2600 Games}
\begin{center}
\vspace{-1em}
\begin{tikzpicture}
\node[anchor=center,inner sep=0] at (0, 0) {\includegraphics[height=0.2\textheight]{atari_paper.png}};
\node[anchor=center,inner sep=0] at (0, -1.8) {\includegraphics[width=\textwidth]{atari_games.png}};
\node[anchor=center,inner sep=0] at (0, -4) {\includegraphics[width=\textwidth]{atari_result.png}};
\end{tikzpicture}
\end{center}
\textbf{Plays (simple) computer games with fixed architecture using only raw pixels as input}
\end{frame}
\begin{frame}
\frametitle{Example: Alpha Go}
\begin{center}
\vspace{-1em}
\begin{tikzpicture}
\node[anchor=center,inner sep=0] at (0, 0) {\includegraphics[height=0.2\textheight]{go_paper.png}};
\node[anchor=center,inner sep=0] at (0, -4) {\includegraphics[width=\textwidth]{go_network.png}};
\end{tikzpicture}
\end{center}
\end{frame}