added more neuro and cogsci refs

eringrant · Feb 15, 2019 · db27d1d · db27d1d
1 parent 16f6685
commit db27d1d
Show file tree

Hide file tree

Showing 2 changed files with 105 additions and 4 deletions.
diff --git a/all.bib b/all.bib
@@ -16,6 +16,16 @@ @article{singh1992transfer
   url = {https://link.springer.com/article/10.1007/BF00992700},
 }
 
+@article{russell1994provably,
+  title = {Provably bounded-optimal agents},
+  author = {Russell, Stuart J and Subramanian, Devika},
+  volume = {2},
+  pages = {575--609},
+  journaltitle = {Journal of Artificial Intelligence Research},
+  year = {1994},
+  url = {https://arxiv.org/abs/cs/9505103},
+}
+
 @inproceedings{thrun1994finding,
   title = {Finding structure in reinforcement learning},
   author = {Thrun, Sebastian and Schwartz, Anton},
@@ -40,6 +50,18 @@ @inproceedings{parr1997reinforcement
   url = {https://papers.nips.cc/paper/1384-reinforcement-learning-with-hierarchies-of-machines},
 }
 
+@article{schultz1997neural,
+  title = {A neural substrate of prediction and reward},
+  author = {Schultz, Wolfram and Dayan, Peter and Montague, P Read},
+  volume = {275},
+  number = {5306},
+  pages = {1593--1599},
+  journaltitle = {Science},
+  publisher = {American Association for the Advancement of Science},
+  year = {1997},
+  url = {http://science.sciencemag.org/content/275/5306/1593},
+}
+
 @article{sutton1999between,
   title = {Between {MDPs} and semi-{MDPs}: A framework for temporal abstraction in reinforcement learning},
   author = {Sutton, Richard S and Precup, Doina and Singh, Satinder},
@@ -124,6 +146,18 @@ @inproceedings{levy2011unified
   url = {https://ewrl.files.wordpress.com/2011/08/ewrl2011_submission_21.pdf},
 }
 
+@article{ribas2011neural,
+  title = {A neural signature of hierarchical reinforcement learning},
+  author = {Ribas-Fernandes, Jose JF and Solway, Alec and Diuk, Carlos and McGuire, Joseph T and Barto, Andrew G and Niv, Yael and Botvinick, Matthew M},
+  volume = {71},
+  number = {2},
+  pages = {370--379},
+  journaltitle = {Neuron},
+  publisher = {Elsevier},
+  year = {2011},
+  url = {https://www.ncbi.nlm.nih.gov/pubmed/21791294},
+}
+
 @inproceedings{sutton2011horde,
   title = {Horde: A scalable real-time architecture for learning knowledge from unsupervised sensorimotor interaction},
   author = {Sutton, Richard S and Modayil, Joseph and Delp, Michael and Degris, Thomas and Pilarski, Patrick M and White, Adam and Precup, Doina},
@@ -150,6 +184,18 @@ @incollection{diuk2013divide
   url = {https://link.springer.com/chapter/10.1007%2F978-3-642-39875-9_12},
 }
 
+@article{solway2014optimal,
+  title = {Optimal behavioral hierarchy},
+  author = {Solway, Alec and Diuk, Carlos and C{ó}rdova, Natalia and Yee, Debbie and Barto, Andrew G and Niv, Yael and Botvinick, Matthew M},
+  volume = {10},
+  number = {8},
+  pages = {e1003779},
+  journaltitle = {PLoS computational biology},
+  publisher = {Public Library of Science},
+  year = {2014},
+  url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003779},
+}
+
 @article{boureau2015deciding,
   title = {Deciding how to decide: Self-control and meta-decision making},
   author = {Boureau, Y-Lan and Sokol-Hessner, Peter and Daw, Nathaniel D},
@@ -274,6 +320,30 @@ @inproceedings{hausman2017multi
   url = {https://arxiv.org/abs/1705.10479},
 }
 
+@article{lieder2017strategy,
+  title = {Strategy selection as rational metareasoning.},
+  author = {Lieder, Falk and Griffiths, Thomas L},
+  volume = {124},
+  number = {6},
+  pages = {762},
+  journaltitle = {Psychological review},
+  publisher = {American Psychological Association},
+  year = {2017},
+  url = {http://cocosci.princeton.edu/falk/Strategy%20selection%20as%20rational%20metareasoning.pdf},
+}
+
+@article{momennejad2017successor,
+  title = {The successor representation in human reinforcement learning},
+  author = {Momennejad, Ida and Russek, Evan M and Cheong, Jin H and Botvinick, Matthew M and Daw, ND and Gershman, Samuel J},
+  volume = {1},
+  number = {9},
+  pages = {680},
+  journaltitle = {Nature Human Behaviour},
+  publisher = {Nature Publishing Group},
+  year = {2017},
+  url = {https://www.nature.com/articles/s41562-017-0180-8},
+}
+
 @inproceedings{silver2017predictron,
   title = {The predictron: End-to-end learning and planning},
   author = {Silver, David and van Hasselt, Hado and Hessel, Matteo and Schaul, Tom and Guez, Arthur and Harley, Tim and Dulac{-}Arnold, Gabriel and Reichert, David P. and Rabinowitz, Neil C. and Barreto, André and Degris, Thomas},
@@ -314,6 +384,18 @@ @inproceedings{frans2018meta
   url = {https://arxiv.org/abs/1710.09767},
 }
 
+@article{gershman2018successor,
+  title = {The successor representation: its computational logic and neural substrates},
+  author = {Gershman, Samuel J},
+  volume = {38},
+  number = {33},
+  pages = {7193--7200},
+  journaltitle = {Journal of Neuroscience},
+  publisher = {Soc Neuroscience},
+  year = {2018},
+  url = {http://www.jneurosci.org/content/38/33/7193},
+}
+
 @inproceedings{ghosh2018divide,
   title = {Divide-and-conquer reinforcement learning},
   author = {Ghosh, Dibya and Singh, Avi and Rajeswaran, Aravind and Kumar, Vikash and Levine, Sergey},
@@ -386,6 +468,17 @@ @inproceedings{chang2019automatically
   url = {https://arxiv.org/abs/1807.04640},
 }
 
+@article{konidaris2019necessity,
+  title = {On the necessity of abstraction},
+  author = {Konidaris, George},
+  volume = {29},
+  pages = {1--7},
+  journaltitle = {Current Opinion in Behavioral Sciences},
+  publisher = {Elsevier},
+  year = {2019},
+  url = {https://www.sciencedirect.com/science/article/pii/S2352154618302080},
+}
+
 @inproceedings{lowrey2019plan,
   title = {Plan Online, Learn Offline: Efficient Learning and Exploration via Model-Based Control},
   author = {Lowrey, Kendall and Rajeswaran, Aravind and Kakade, Sham and Todorov, Emanuel and Mordatch, Igor},

diff --git a/readings.md b/readings.md
@@ -6,15 +6,25 @@ email us at [organizers@spirl.info](mailto:organizers@spirl.info) if there's rel
 
 #### Cognitive Science
 * [@trommershauser2008decision]
-* [@botvinick2009hierarchically]
 * [@diuk2013divide]
+* [@solway2014optimal]
 * [@boureau2015deciding]
 * [@gershman2015novelty]
+* [@lieder2017strategy]
+* [@momennejad2017successor]
 * [@dubey2018investigating]
+* [@konidaris2019necessity]
+
+#### Neuroscience
+* [@schultz1997neural]
+* [@botvinick2009hierarchically]
+* [@ribas2011neural]
+* [@gershman2018successor]
 
 #### Hierarchical RL
 * [@dayan1992feudal]
 * [@sutton1999between]
+* [@parr1997reinforcement]
 * [@dietterich2000hierarchical]
 * [@levy2011unified]
 * [@bacon2017option]
@@ -30,6 +40,7 @@ email us at [organizers@spirl.info](mailto:organizers@spirl.info) if there's rel
 * [@saemundsson2018meta]
 
 #### Modularity in RL
+* [@singh1992transfer]
 * [@heess2016learning]
 * [@andreas2017modular]
 * [@devin2017learning]
@@ -45,10 +56,8 @@ email us at [organizers@spirl.info](mailto:organizers@spirl.info) if there's rel
 * [@osband2018randomized]
 
 #### Structure in RL
-* [@singh1992transfer]
 * [@thrun1994finding]
 * [@sutton1995td]
-* [@parr1997reinforcement]
 * [@littman2001predictive]
 * [@ponsen2009abstraction]
 * [@sutton2011horde]
@@ -57,7 +66,6 @@ email us at [organizers@spirl.info](mailto:organizers@spirl.info) if there's rel
 * [@silver2017predictron]
 * [@ok2018exploration]
 * [@sanchez2018graph]
-* [@lowrey2019plan]
 
 #### Transfer, Multi-Task and Lifelong RL
 * [@taylor2009transfer]