### Bibliography on computational learning

*(Refs: 1072)*@InProceedings{dabekk-ifasofa-92, author = "T. Dean and D. Angluin and K. Basye and S. Engelson and L. Kaelbling and E. Kokkevis and O. Maron", title = "Inferring

**finite****automat**a with stochastic output functions and an application to map**learning**", booktitle = "Proceedings of AAAI-92", publisher = "AAAI", year = "1992", pages = "208--214", annote = "San Jose, California, July 12-17, 1992", }@InProceedings{fkrrss-eltfarw-93, author = "Y. Freund and M. Kearns and D. Ron and R. Rubinfeld and R. Schapire and L. Sellie", title = "Efficient

**learning**of typical**finite****automat**a from random walks", booktitle = "Proc. 25th Annu. ACM Sympos. Theory Comput.", publisher = "ACM Press, New York, NY", year = "1993", pages = "315--324", }@InProceedings{kv-cllbffa-89, author = "M. Kearns and L. G. Valiant", title = "Cryptographic limitations on

**learning**{B}oolean formulae and**finite****automat**a", booktitle = "Proc. of the 21st Symposium on Theory of Computing", publisher = "ACM Press, New York, NY", year = "1989", pages = "433--444", comment = "To appear in J. ACM", }### 1981

*(88 KB)*@Book{IB-D82026, author = "S. Lakshmirvarahaan", title = "Learning Algorithms Theory and Applications", publisher = "Springer", address = "New York", year = "1981", ISBN = "0-387-90640-1", descriptor = "Ergodisch, Lernalgorithmus, Markovkette, Spieltheorie, Stochastisch, Endlicher Automat, Wahrscheinlichkeit, Automat, Kuenstliche Intelligenz, Simulation", annote = "This book deals with a powerful class of

**learning**algorithms that have been developed over the past two decades in the context of**learning**systems modelled by**finite**state probabilistic**automat**a. 1. Introduction 2. Ergodic**learning**algorithms 3. Absolutely expediant**learning**algorithms 4. Time varying**learning**algorithms 5. Two-person zero-sum sequential, stochastic games with imperfect and incomplete information - game matrix with suddle-point in pure strategies 6. Two-person zero-sum sequential, stochastic games with imperfect and incomplete information - general case 7. Two-person decentralized team problem with incomplete information 8. Control of a Markov chain with unknown dynamics and cost-structure", }### Bibliography

*(Refs: 1401)*@InProceedings{Zhou86, author = "H Zhou and J J Grefenstette", title = "Induction of

**finite****automat**a by genetic algorithms", booktitle = "Proc. 1986 Int. Conf. on Systems, Man and Cybernetics", pages = "170--174", year = "1986", abstract = "The construction of**finite****automat**a with n states from examples is known to be an NP-complete problem. This paper deals with the conceptual**learning**problem of constructing the**finite****automat**on with fewest states intended by the teacher rather than just any**finite**state**automat**on which accurately satisfies the examples. The process of**learning**is formulated as a search based on incomplete information for**finite****automat**a equivalent to the language specified by some regular expression. The goal of thi sresearch is to show the robustness and effectiveness of genetic algorithms as adaptive search methods.", note = "ZHOU86", }### Lloyd Allison's Bibliography

*(Refs: 2960)*@InProceedings{Zhou86, author = "Ha-yong Zhou and J. J. Grefenstette", title = "Induction of

**finite**state**automat**a by genetic algorithms.", booktitle = "Proc IEEE Conference on Systems Man and Cybernetics", volume = "1", pages = "170--174", month = oct, year = "1986", keywords = "GA, genetic algorithm, FSA, inductive inference, II, learn,**learning**", abstract = "breed FSA that describe given example binary strings Limited to at most 8 states", }### Description not available, full text in /Techreports/university-of-wisconsin

@TechReport{Maclin??, author = "R. Maclin and J. W. Shavlik", title = "Refining Domain Theories Expressed as Finite-State Automata", institution = "University of Wisconsin", number = "MLW-91-MACLIN", url = "ftp://ftp.cs.wisc.edu/machine-

-- Corrected by Y. K..**learning**/shavlik-group/maclin.mlc91.ps.Z",abstract = "The KBANN system uses neural networks to refine domain theories. Currently, domain knowledge in KBANN is expressed as non-recursive, propositional rules. We extend KBANN to domain theories expressed as

(See this. -- A comment added by Y. K.)**finite**-state**automat**a. We apply**finite**-state KBANN to the task of predicting how proteins fold, producing a small but statistically significant gain in accuracy over both a standard neural network approach and a non-**learning**algorithm from the biological literature. Our method shows promise at solving this and other real-world problems that can be described i n terms of state-dependent decisions.", note = "(Proc. of the 1991 International Machine Learning Workshop.)", }@TechReport{Maclin??a, author = "R. Maclin and J. W. Shavlik", title = "Using Knowledge-Based Neural Networks to Improve Algorithms: Refining the Chou-Fasman Algorithm for Protein Folding", institution = "University of Wisconsin", number = "AAAI-92-MACLIN", url = "ftp://ftp.cs.wisc.edu/machine-

-- Corrected by Y. K..**learning**/shavlik-group/maclin.aaai92.ps.Z",abstract = "We describe a method for using machine

The following article is added by Y. K..**learning**to refine algorithms represented as generalized**finite**-state**automat**a. The knowledge in an**automat**on is translated into an artificial neural network, and then refined with backpropagation on a set of examples. Our technique for translating an**automat**on into a network extends KBANN, a system that translates a set of propositional rules into a corresponding neural network. The extended system, FSKBANN, allows one to refine the large class of algorithms that can be represented as state-based processes. As a test, we use FSKBANN to refine the Chou-Fasman algorithm, a method for predicting how globular proteins fold. Empirical evidence shows the refined algorithm FSKBANN produces is statistically significantly more accurate than both the original Chou-Fasman algorithm and a neural network trained using the standard approach.", note = "(Proc. of the 1992 National Conference on AI.)", }@article{maclin.mlj93 ,author = "R. Maclin and J. Shavlik" ,title = "Using knowledge-based neural networks to improve algorithms: Refining the {Chou-Fasman} algorithm for protein folding" ,journal = "Machine Learning" ,volume = 11 ,pages = "195--215" ,year = 1993 } File name: maclin.mlj93.ps.Z Abstract: This paper describes a connectionist method for refining algorithms represented as generalized finite-state automata. The method translates the rule-like knowledge in an automaton into a corresponding artificial neural network, and then refines the reformulated automaton by applying backpropagation to a set of examples. This technique for translating an automaton into a network extends the KBANN algorithm, a system that translates a set of propositional rules into a corresponding neural network. The extended system, FSKBANN, allows one to refine the large class of algorithms that can be represented as state-based processes. As a test, FSKBANN is used to improve the Chou-Fasman algorithm, a method for predicting how globular proteins fold. Empirical evidence shows that the multistrategy approach of FSKBANN leads to a statistically significantly more accurate solution than both the original Chou-Fasman algorithm and a neural network trained using the standard approach. Extensive statistics report the types of errors made by the Chou-Fasman algorithm, the standard neural network, and by the FSKBANN network.

@TechReport{Cherkauer??a, author = "K. J. Cherkauer and J. W. Shavlik", title = "Selecting Salient Features for Machine Learning from Large Candidate Pools through Parallel Decision-Tree Construction", institution = "University of Wisconsin", type = "Technical Report", pages = "28 pages", url = "ftp://ftp.cs.wisc.edu/machine-

**learning**/shavlik-group/cherkauer.mass-parallel-ai.ps.Z", abstract = "The particular representation used to describe training and testing examples can have profound effects on an inductive algorithm's ability to learn. However, the space of possible representations is virtually in**finite**, so choosing a good representation is not a simple task. This chapter describes a method whereby the selection of a good input representation for classification tasks is**automat**ed. This technique, which we call DT-Select (``Decision Tree feature Selection''), builds decision trees, via a fast parallel implementation of ID3 (Quinlan, 1986), which attempt to correctly classify the training data. The internal nodes of the trees are features drawn from very large pools of complex general-purpose and domain-specific constructed features. Thus, the features included in the trees constitute compact and informative sets which can then be used as input representations for other**learning**algorithms attacking the same problem. We have implemented DT-Select on a parallel message-passing MIMD architecture, the Thinking Machines CM-5, enabling us to select from pools containing several hundred thousand features in reasonable time. We present here some work using this approach to produce augmentations of artificial neural network input representations for the molecular biology problem of predicting protein secondary structures.", note = "(A version of this paper will appear in: Massively Parallel Artificial Intelligence, H. Kitano (ed). Menlo Park, CA: AAAI Press/The MIT Press (1993).)", }### Technical Reports: Neuroprose

*(Refs: 105)*@TechReport{Fahlman??, author = "Scott E. Fahlman", title = "The Recurrent Cascade-Correlation Architecture", institution = "Neuroprose", number = "CMU-CS-91-100", url = "ftp://archive.cis.ohio-state.edu/pub/neuroprose/fahlman.rcc.ps.Z", abstract = "Recurrent Cascade-Correlation (RCC) is a recurrent version of the Cascade-Correlation

**learning**architecture of Fahlman and Lebiere \cite{fahlman:cascor}. RCC can learn from examples to map a sequence of inputs into a desired sequence of outputs. New hidden units with recurrent connections are added to the network one at a time, as they are needed during training. In effect, the network builds up a**finite**-state machine tailored specifically for the current problem. RCC retains the advantages of Cascade-Correlation: fast**learning**, good generalization,**automat**ic construction of a near-minimal multi-layered network, and the ability to learn complex behaviors through a sequence of simple lessons. The power of RCC is demonstrated on two tasks:**learning**a**finite**-state grammar from examples of legal strings, and**learning**to recognize characters in Morse code.", note = "(NIPS 3 proceedings)", }### Technical Reports of the MIT Laboratory of Computer Science

*(Refs: 1170)*@TechReport{MIT/LCS/TR-413, author = "R. E. Schapire", title = "{DIVERSITY}-{BASED} {INFERENCE} {OF} {FINITE} {AUTOMATA}", institution = "MIT Laboratory for Computer Science", number = "MIT/LCS/TR-413", pages = "49", month = may, year = "1988", price = "USD 13.00", keywords = "

**learning**,**automat**a, induction, inference of**finite****automat**a", abstract = "We present a new procedure for inferring the structure of a**finite**-state**automat**on (FSA) from its input/output behavior, using access to the**automat**on to perform experiments. Our procedure uses a new representation for FSA's, based on the notion of equivalence between tests. We call the number of such equivalence classes the diversity of the**automat**on; the diversity may be as small as the logarithm of the number of states of the**automat**on. For the special class of permutation**automat**a, we show that our inference procedure runs in time polynomial in the diversity and log (/ ), where is a given upper bound on the probability that our procedure returns an incorrect result. (Since our procedure uses randomization to perform experiments, there is a certain controllable chance that it will return an erroneous result.) We also discuss techniques for handling more general**automat**a. We present evidence for the practical efficiency of our approach. For example, our procedure is able to infer the structure of an**automat**on based on Rubik's Cube (which has approximately 10 states) in about 2 minutes on a DEC Micro Vax. This**automat**on is many orders of magnitude larger than possible with previous techniques, which would require time proportional at least to the number of global states. (Note that in this example, only a small fraction (10 ) of the global states were even visited.) Finally, we present a new procedure for inferring**automat**a of a special type in which the global state is composed of a vector of binary local state variables, all of which are observable (or visible) to the experimenter. Our inference procedure runs provably in time polynomial in the size of this vector (which happens to be the diversity of the**automat**on), even though the global state space may be exponentially larger. The procedure plans and executes experiments on the unknown**automat**on; we show that the number of input symbols given to the**automat**on during this process is (to within a constant factor) the best possible. Portions of this thesis are joint work with Ronald Rivest.", }@PhdThesis{MIT/LCS/TR-493, author = "R. E. Schapire", title = "{THE} {DESIGN} {AND} {ANALYSIS} {OF} {EFFICIENT} {LEARNING} {ALGORITHMS}", school = "MIT Laboratory for Computer Science", type = "Ph.{D}. Thesis", number = "MIT/LCS/TR-493", pages = "188", month = feb, year = "1991", price = "USD 21.00", keywords = "machine

**learning**, computational**learning**theory, concept**learning**,**learning**from examples, distribution-free**learning**, probably approximately correct**learning**, polynomial-time identification, exact identification, read-once formulas, probabilistic concepts,**finite**-state**automat**a", abstract = "This thesis explores various theoretical aspects of machine**learning**with particular emphasis on techniques for designing and analyzing computationally efficient**learning**algorithms. Many of the results in this thesis are concerned with a model of concept**learning**proposed by Valiant. The thesis begins in Chapter 2 with a proof that any {"}weak{"}**learning**algorithm in this model that performs just slightly better than random guessing can be converted into one whose error can be made arbitrarily small. Several interesting consequences of this result are also described. Chapter 3 next explores in detail a simple but powerful technique for discovering the structure of an unknown read-once formula from random examples. An especially nice feature of this technique is its powerful resistance to noise. Chapter 4 considers a realistic extension of the PAC model to concepts that may exhibit uncertain or probabilistic behavior. A range of techniques are explored for designing efficient algorithms for**learning**such probabilistic concepts. In the last chapter, we present new algorithms for inferring an unknown**finite**-state**automat**on from its input-output behavior. This problem is motivated by that faced by a robot in unfamiliar surroundings who must, through experimentation, discover the {"}structure{"} of its environment. Portions of this thesis are joint work with Sally A. Goldman, Michael J. Kearns and Ronald L. Rivest.", }### ACM Symposium on Theory of Computing, (STOC), 1969-1993, IEEE Symposium on Foundations of Computer Science (FOCS), 1960-1993

*(Refs: 2522)*@InProceedings{KeaVal89, author = "M. Kearns and L. G. Valiant", title = "Cryptographic limitations on

**learning**boolean formulae and**finite****automat**a", booktitle = "Proc.\ 21th ACM Symposium on Theory of Computing (STOC)", year = "1989", pages = "433--444", }@InProceedings{FreundEtAl93, author = "Y. Freund and M. Kearns and D. Ron and R. Rubinfeld and R. E. Schapire and L. Sellie", title = "Efficient

**learning**of typical**finite****automat**a from random walks", booktitle = "Proc.\ 25th ACM Symposium on Theory of Computing (STOC)", year = "1993", pages = "315--324", }

`"learning AND finite AND automat"`

: