-
@InProceedings{dabekk-ifasofa-92,
author = "T. Dean and D. Angluin and K. Basye and S. Engelson
and L. Kaelbling and E. Kokkevis and O. Maron",
title = "Inferring finite automata with stochastic output
functions and an application to map learning",
booktitle = "Proceedings of AAAI-92",
publisher = "AAAI",
year = "1992",
pages = "208--214",
annote = "San Jose, California, July 12-17, 1992",
}
@InProceedings{fkrrss-eltfarw-93,
author = "Y. Freund and M. Kearns and D. Ron and R. Rubinfeld
and R. Schapire and L. Sellie",
title = "Efficient learning of typical finite automata from
random walks",
booktitle = "Proc. 25th Annu. ACM Sympos. Theory Comput.",
publisher = "ACM Press, New York, NY",
year = "1993",
pages = "315--324",
}
@InProceedings{kv-cllbffa-89,
author = "M. Kearns and L. G. Valiant",
title = "Cryptographic limitations on learning {B}oolean
formulae and finite automata",
booktitle = "Proc. of the 21st Symposium on Theory of Computing",
publisher = "ACM Press, New York, NY",
year = "1989",
pages = "433--444",
comment = "To appear in J. ACM",
}
-
@Book{IB-D82026,
author = "S. Lakshmirvarahaan",
title = "Learning Algorithms Theory and Applications",
publisher = "Springer",
address = "New York",
year = "1981",
ISBN = "0-387-90640-1",
descriptor = "Ergodisch, Lernalgorithmus, Markovkette, Spieltheorie,
Stochastisch, Endlicher Automat, Wahrscheinlichkeit,
Automat, Kuenstliche Intelligenz, Simulation",
annote = "This book deals with a powerful class of learning
algorithms that have been developed over the past two
decades in the context of learning systems modelled by
finite state probabilistic automata. 1. Introduction 2.
Ergodic learning algorithms 3. Absolutely expediant
learning algorithms 4. Time varying learning algorithms
5. Two-person zero-sum sequential, stochastic games
with imperfect and incomplete information - game matrix
with suddle-point in pure strategies 6. Two-person
zero-sum sequential, stochastic games with imperfect
and incomplete information - general case 7. Two-person
decentralized team problem with incomplete information
8. Control of a Markov chain with unknown dynamics and
cost-structure",
}
-
@InProceedings{Zhou86,
author = "H Zhou and J J Grefenstette",
title = "Induction of finite automata by genetic algorithms",
booktitle = "Proc. 1986 Int. Conf. on Systems, Man and
Cybernetics",
pages = "170--174",
year = "1986",
abstract = "The construction of finite automata with n states from
examples is known to be an NP-complete problem. This
paper deals with the conceptual learning problem of
constructing the finite automaton with fewest states
intended by the teacher rather than just any finite
state automaton which accurately satisfies the
examples. The process of learning is formulated as a
search based on incomplete information for finite
automata equivalent to the language specified by some
regular expression. The goal of thi sresearch is to
show the robustness and effectiveness of genetic
algorithms as adaptive search methods.",
note = "ZHOU86",
}
-
@InProceedings{Zhou86,
author = "Ha-yong Zhou and J. J. Grefenstette",
title = "Induction of finite state automata by genetic
algorithms.",
booktitle = "Proc IEEE Conference on Systems Man and Cybernetics",
volume = "1",
pages = "170--174",
month = oct,
year = "1986",
keywords = "GA, genetic algorithm, FSA, inductive inference, II,
learn, learning",
abstract = "breed FSA that describe given example binary strings
Limited to at most 8 states",
}
-
@TechReport{Maclin??,
author = "R. Maclin and J. W. Shavlik",
title = "Refining Domain Theories Expressed as Finite-State
Automata",
institution = "University of Wisconsin",
number = "MLW-91-MACLIN",
url = "ftp://ftp.cs.wisc.edu/machine-learning/shavlik-group/maclin.mlc91.ps.Z",
-- Corrected by Y. K..
abstract = "The KBANN system uses neural networks to refine domain
theories. Currently, domain knowledge in KBANN is
expressed as non-recursive, propositional rules. We
extend KBANN to domain theories expressed as
finite-state automata. We apply finite-state KBANN to
the task of predicting how proteins fold, producing a
small but statistically significant gain in accuracy
over both a standard neural network approach and a
non-learning algorithm from the biological literature.
Our method shows promise at solving this and other
real-world problems that can be described i n terms of
state-dependent decisions.",
note = "(Proc. of the 1991 International Machine Learning
Workshop.)",
}
(See this.
-- A comment added by Y. K.)
@TechReport{Maclin??a,
author = "R. Maclin and J. W. Shavlik",
title = "Using Knowledge-Based Neural Networks to Improve
Algorithms: Refining the Chou-Fasman Algorithm for
Protein Folding",
institution = "University of Wisconsin",
number = "AAAI-92-MACLIN",
url = "ftp://ftp.cs.wisc.edu/machine-learning/shavlik-group/maclin.aaai92.ps.Z",
-- Corrected by Y. K..
abstract = "We describe a method for using machine learning to
refine algorithms represented as generalized
finite-state automata. The knowledge in an automaton is
translated into an artificial neural network, and then
refined with backpropagation on a set of examples. Our
technique for translating an automaton into a network
extends KBANN, a system that translates a set of
propositional rules into a corresponding neural
network. The extended system, FSKBANN, allows one to
refine the large class of algorithms that can be
represented as state-based processes. As a test, we use
FSKBANN to refine the Chou-Fasman algorithm, a method
for predicting how globular proteins fold. Empirical
evidence shows the refined algorithm FSKBANN produces
is statistically significantly more accurate than both
the original Chou-Fasman algorithm and a neural network
trained using the standard approach.",
note = "(Proc. of the 1992 National Conference on AI.)",
}
The following article is added by Y. K..
@article{maclin.mlj93
,author = "R. Maclin and J. Shavlik"
,title = "Using knowledge-based neural networks to improve algorithms:
Refining the {Chou-Fasman} algorithm for protein folding"
,journal = "Machine Learning"
,volume = 11
,pages = "195--215"
,year = 1993
}
File name: maclin.mlj93.ps.Z
Abstract:
This paper describes a connectionist method for refining algorithms
represented as generalized finite-state automata. The method translates the
rule-like knowledge in an automaton into a corresponding artificial neural
network, and then refines the reformulated automaton by applying
backpropagation to a set of examples. This technique for translating an
automaton into a network extends the KBANN algorithm, a system that
translates a set of propositional rules into a corresponding neural network.
The extended system, FSKBANN, allows one to refine the large class of
algorithms that can be represented as state-based processes. As a test,
FSKBANN is used to improve the Chou-Fasman algorithm, a method for
predicting how globular proteins fold. Empirical evidence shows that the
multistrategy approach of FSKBANN leads to a statistically significantly
more accurate solution than both the original Chou-Fasman algorithm and a
neural network trained using the standard approach. Extensive statistics
report the types of errors made by the Chou-Fasman algorithm, the standard
neural network, and by the FSKBANN network.
@TechReport{Cherkauer??a,
author = "K. J. Cherkauer and J. W. Shavlik",
title = "Selecting Salient Features for Machine Learning from
Large Candidate Pools through Parallel Decision-Tree
Construction",
institution = "University of Wisconsin",
type = "Technical Report",
pages = "28 pages",
url = "ftp://ftp.cs.wisc.edu/machine-learning/shavlik-group/cherkauer.mass-parallel-ai.ps.Z",
abstract = "The particular representation used to describe
training and testing examples can have profound effects
on an inductive algorithm's ability to learn. However,
the space of possible representations is virtually
infinite, so choosing a good representation is not a
simple task. This chapter describes a method whereby
the selection of a good input representation for
classification tasks is automated. This technique,
which we call DT-Select (``Decision Tree feature
Selection''), builds decision trees, via a fast
parallel implementation of ID3 (Quinlan, 1986), which
attempt to correctly classify the training data. The
internal nodes of the trees are features drawn from
very large pools of complex general-purpose and
domain-specific constructed features. Thus, the
features included in the trees constitute compact and
informative sets which can then be used as input
representations for other learning algorithms attacking
the same problem. We have implemented DT-Select on a
parallel message-passing MIMD architecture, the
Thinking Machines CM-5, enabling us to select from
pools containing several hundred thousand features in
reasonable time. We present here some work using this
approach to produce augmentations of artificial neural
network input representations for the molecular biology
problem of predicting protein secondary structures.",
note = "(A version of this paper will appear in: Massively
Parallel Artificial Intelligence, H. Kitano (ed). Menlo
Park, CA: AAAI Press/The MIT Press (1993).)",
}
-
@TechReport{Fahlman??,
author = "Scott E. Fahlman",
title = "The Recurrent Cascade-Correlation Architecture",
institution = "Neuroprose",
number = "CMU-CS-91-100",
url = "ftp://archive.cis.ohio-state.edu/pub/neuroprose/fahlman.rcc.ps.Z",
abstract = "Recurrent Cascade-Correlation (RCC) is a recurrent
version of the Cascade-Correlation learning
architecture of Fahlman and Lebiere
\cite{fahlman:cascor}. RCC can learn from examples to
map a sequence of inputs into a desired sequence of
outputs. New hidden units with recurrent connections
are added to the network one at a time, as they are
needed during training. In effect, the network builds
up a finite-state machine tailored specifically for the
current problem. RCC retains the advantages of
Cascade-Correlation: fast learning, good
generalization, automatic construction of a
near-minimal multi-layered network, and the ability to
learn complex behaviors through a sequence of simple
lessons. The power of RCC is demonstrated on two tasks:
learning a finite-state grammar from examples of legal
strings, and learning to recognize characters in Morse
code.",
note = "(NIPS 3 proceedings)",
}
-
@TechReport{MIT/LCS/TR-413,
author = "R. E. Schapire",
title = "{DIVERSITY}-{BASED} {INFERENCE} {OF} {FINITE}
{AUTOMATA}",
institution = "MIT Laboratory for Computer Science",
number = "MIT/LCS/TR-413",
pages = "49",
month = may,
year = "1988",
price = "USD 13.00",
keywords = "learning, automata, induction, inference of finite
automata",
abstract = "We present a new procedure for inferring the structure
of a finite-state automaton (FSA) from its input/output
behavior, using access to the automaton to perform
experiments. Our procedure uses a new representation
for FSA's, based on the notion of equivalence between
tests. We call the number of such equivalence classes
the diversity of the automaton; the diversity may be as
small as the logarithm of the number of states of the
automaton. For the special class of permutation
automata, we show that our inference procedure runs in
time polynomial in the diversity and log (/ ), where is
a given upper bound on the probability that our
procedure returns an incorrect result. (Since our
procedure uses randomization to perform experiments,
there is a certain controllable chance that it will
return an erroneous result.) We also discuss techniques
for handling more general automata. We present evidence
for the practical efficiency of our approach. For
example, our procedure is able to infer the structure
of an automaton based on Rubik's Cube (which has
approximately 10 states) in about 2 minutes on a DEC
Micro Vax. This automaton is many orders of magnitude
larger than possible with previous techniques, which
would require time proportional at least to the number
of global states. (Note that in this example, only a
small fraction (10 ) of the global states were even
visited.) Finally, we present a new procedure for
inferring automata of a special type in which the
global state is composed of a vector of binary local
state variables, all of which are observable (or
visible) to the experimenter. Our inference procedure
runs provably in time polynomial in the size of this
vector (which happens to be the diversity of the
automaton), even though the global state space may be
exponentially larger. The procedure plans and executes
experiments on the unknown automaton; we show that the
number of input symbols given to the automaton during
this process is (to within a constant factor) the best
possible. Portions of this thesis are joint work with
Ronald Rivest.",
}
@PhdThesis{MIT/LCS/TR-493,
author = "R. E. Schapire",
title = "{THE} {DESIGN} {AND} {ANALYSIS} {OF} {EFFICIENT}
{LEARNING} {ALGORITHMS}",
school = "MIT Laboratory for Computer Science",
type = "Ph.{D}. Thesis",
number = "MIT/LCS/TR-493",
pages = "188",
month = feb,
year = "1991",
price = "USD 21.00",
keywords = "machine learning, computational learning theory,
concept learning, learning from examples,
distribution-free learning, probably approximately
correct learning, polynomial-time identification, exact
identification, read-once formulas, probabilistic
concepts, finite-state automata",
abstract = "This thesis explores various theoretical aspects of
machine learning with particular emphasis on techniques
for designing and analyzing computationally efficient
learning algorithms. Many of the results in this thesis
are concerned with a model of concept learning proposed
by Valiant. The thesis begins in Chapter 2 with a proof
that any {"}weak{"} learning algorithm in this model
that performs just slightly better than random guessing
can be converted into one whose error can be made
arbitrarily small. Several interesting consequences of
this result are also described. Chapter 3 next explores
in detail a simple but powerful technique for
discovering the structure of an unknown read-once
formula from random examples. An especially nice
feature of this technique is its powerful resistance to
noise. Chapter 4 considers a realistic extension of the
PAC model to concepts that may exhibit uncertain or
probabilistic behavior. A range of techniques are
explored for designing efficient algorithms for
learning such probabilistic concepts. In the last
chapter, we present new algorithms for inferring an
unknown finite-state automaton from its input-output
behavior. This problem is motivated by that faced by a
robot in unfamiliar surroundings who must, through
experimentation, discover the {"}structure{"} of its
environment. Portions of this thesis are joint work
with Sally A. Goldman, Michael J. Kearns and Ronald L.
Rivest.",
}
-
@InProceedings{KeaVal89,
author = "M. Kearns and L. G. Valiant",
title = "Cryptographic limitations on learning boolean formulae
and finite automata",
booktitle = "Proc.\ 21th ACM Symposium on Theory of Computing
(STOC)",
year = "1989",
pages = "433--444",
}
@InProceedings{FreundEtAl93,
author = "Y. Freund and M. Kearns and D. Ron and R. Rubinfeld
and R. E. Schapire and L. Sellie",
title = "Efficient learning of typical finite automata from
random walks",
booktitle = "Proc.\ 25th ACM Symposium on Theory of Computing
(STOC)",
year = "1993",
pages = "315--324",
}