mirror of
https://github.com/xerial/snappy-java.git
synced 2025-07-27 16:04:16 +02:00
add calgary test data set
This commit is contained in:
parent
0a4ee3c6fb
commit
b694432815
@ -60,11 +60,12 @@ public class SnappyInputStream extends InputStream
|
|||||||
protected void readHeader() throws IOException {
|
protected void readHeader() throws IOException {
|
||||||
byte[] header = new byte[SnappyCodec.headerSize()];
|
byte[] header = new byte[SnappyCodec.headerSize()];
|
||||||
int readBytes = in.read(header, 0, header.length);
|
int readBytes = in.read(header, 0, header.length);
|
||||||
if (readBytes < header.length) {
|
if (header[0] != SnappyCodec.MAGIC_HEADER[0]) {
|
||||||
// do the default uncompression
|
// do the default uncompression
|
||||||
readFully(header, readBytes);
|
readFully(header, readBytes);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
SnappyCodec codec = SnappyCodec.readHeader(new ByteArrayInputStream(header));
|
SnappyCodec codec = SnappyCodec.readHeader(new ByteArrayInputStream(header));
|
||||||
if (codec.isValidMagicHeader()) {
|
if (codec.isValidMagicHeader()) {
|
||||||
// compressed by SnappyOutputStream
|
// compressed by SnappyOutputStream
|
||||||
@ -113,22 +114,22 @@ public class SnappyInputStream extends InputStream
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int read(byte[] b, int off, int len) throws IOException {
|
public int read(byte[] b, int off, int len) throws IOException {
|
||||||
int wroteBytes = 0;
|
int writtenBytes = 0;
|
||||||
for (; wroteBytes < len;) {
|
for (; writtenBytes < len;) {
|
||||||
if (uncompressedCursor >= uncompressedLimit) {
|
if (uncompressedCursor >= uncompressedLimit) {
|
||||||
if (hasNextChunk())
|
if (hasNextChunk())
|
||||||
continue;
|
continue;
|
||||||
else {
|
else {
|
||||||
return wroteBytes == 0 ? -1 : wroteBytes;
|
return writtenBytes == 0 ? -1 : writtenBytes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int bytesToWrite = Math.min(uncompressedLimit - uncompressedCursor, len);
|
int bytesToWrite = Math.min(uncompressedLimit - uncompressedCursor, len);
|
||||||
System.arraycopy(uncompressed, uncompressedCursor, b, off + wroteBytes, bytesToWrite);
|
System.arraycopy(uncompressed, uncompressedCursor, b, off + writtenBytes, bytesToWrite);
|
||||||
wroteBytes += bytesToWrite;
|
writtenBytes += bytesToWrite;
|
||||||
uncompressedCursor += bytesToWrite;
|
uncompressedCursor += bytesToWrite;
|
||||||
}
|
}
|
||||||
|
|
||||||
return wroteBytes;
|
return writtenBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean hasNextChunk() throws IOException {
|
protected boolean hasNextChunk() throws IOException {
|
||||||
|
@ -98,7 +98,7 @@ public class SnappyInputStreamTest
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void biteWiseRead() throws Exception {
|
public void biteWiseRead() throws Exception {
|
||||||
byte[] orig = readResourceFile("alice29.txt");
|
byte[] orig = readResourceFile("testdata/calgary/paper6");
|
||||||
byte[] compressed = Snappy.compress(orig);
|
byte[] compressed = Snappy.compress(orig);
|
||||||
|
|
||||||
SnappyInputStream in = new SnappyInputStream(new ByteArrayInputStream(compressed));
|
SnappyInputStream in = new SnappyInputStream(new ByteArrayInputStream(compressed));
|
||||||
|
6280
src/test/java/org/xerial/snappy/testdata/calgary/bib
vendored
Normal file
6280
src/test/java/org/xerial/snappy/testdata/calgary/bib
vendored
Normal file
File diff suppressed because it is too large
Load Diff
16622
src/test/java/org/xerial/snappy/testdata/calgary/book1
vendored
Normal file
16622
src/test/java/org/xerial/snappy/testdata/calgary/book1
vendored
Normal file
File diff suppressed because it is too large
Load Diff
15634
src/test/java/org/xerial/snappy/testdata/calgary/book2
vendored
Normal file
15634
src/test/java/org/xerial/snappy/testdata/calgary/book2
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src/test/java/org/xerial/snappy/testdata/calgary/geo
vendored
Normal file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/geo
vendored
Normal file
Binary file not shown.
10059
src/test/java/org/xerial/snappy/testdata/calgary/news
vendored
Normal file
10059
src/test/java/org/xerial/snappy/testdata/calgary/news
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src/test/java/org/xerial/snappy/testdata/calgary/obj1
vendored
Executable file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/obj1
vendored
Executable file
Binary file not shown.
BIN
src/test/java/org/xerial/snappy/testdata/calgary/obj2
vendored
Normal file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/obj2
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 241 KiB |
1250
src/test/java/org/xerial/snappy/testdata/calgary/paper1
vendored
Normal file
1250
src/test/java/org/xerial/snappy/testdata/calgary/paper1
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1731
src/test/java/org/xerial/snappy/testdata/calgary/paper2
vendored
Normal file
1731
src/test/java/org/xerial/snappy/testdata/calgary/paper2
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1100
src/test/java/org/xerial/snappy/testdata/calgary/paper3
vendored
Normal file
1100
src/test/java/org/xerial/snappy/testdata/calgary/paper3
vendored
Normal file
File diff suppressed because it is too large
Load Diff
294
src/test/java/org/xerial/snappy/testdata/calgary/paper4
vendored
Normal file
294
src/test/java/org/xerial/snappy/testdata/calgary/paper4
vendored
Normal file
@ -0,0 +1,294 @@
|
|||||||
|
.EQ
|
||||||
|
delim $$
|
||||||
|
.EN
|
||||||
|
.ls 1
|
||||||
|
.ce
|
||||||
|
PROGRAMMING BY EXAMPLE REVISITED
|
||||||
|
.sp
|
||||||
|
.ce
|
||||||
|
by John G. Cleary
|
||||||
|
.ce
|
||||||
|
Man-Machine Systems Laboratory
|
||||||
|
.ce
|
||||||
|
University of Calgary.
|
||||||
|
.sp
|
||||||
|
.sh "Introduction"
|
||||||
|
.pp
|
||||||
|
Efforts to construct an artificial intelligence have relied on
|
||||||
|
ever more complex and carefully prepared programs. While useful in
|
||||||
|
themselves, these programs
|
||||||
|
are unlikely to be useful in situations where ephemeral and
|
||||||
|
low value knowledge must be acquired. For example a person (or robot)
|
||||||
|
working in a normal domestic environment knows a lot about which
|
||||||
|
cupboards have sticky doors and where the marmalade is kept. It seems
|
||||||
|
unlikely that it will ever be economic to program such knowledge
|
||||||
|
whether this be via a language or a discourse with an expert system.
|
||||||
|
.pp
|
||||||
|
It is my thesis, then, that any flexible robot system working in the
|
||||||
|
real world must contain a component of control intermediate
|
||||||
|
between hard wired 'reflex' responses and complex intellectual
|
||||||
|
reasoning. Such an intermediate system must be adaptive, be able
|
||||||
|
to carry out complex patterned responses and be fast in operation.
|
||||||
|
It need not, however, carry out complex forward planning or be capable
|
||||||
|
of introspection (in the sense that expert systems are able to explain
|
||||||
|
their actions).
|
||||||
|
.pp
|
||||||
|
In this talk I will examine a system that acquires knowledge by
|
||||||
|
constructing a model of its input behaviour and uses this to select its
|
||||||
|
actions. It can be viewed either as an automatic adaptive system or
|
||||||
|
as an instance of 'programming by example'. Other workers have
|
||||||
|
attempted to do this, by constructing compact models in some appropriate
|
||||||
|
programming language:e.g. finite state automata [Bierman, 1972],
|
||||||
|
[Bierman and Feldman, 1972]; LISP [Bierman and Krishnaswamy, 1976];
|
||||||
|
finite non-deterministic
|
||||||
|
automata [Gaines,1976], [Gaines,1977],
|
||||||
|
[Witten,1980]; high level languages [Bauer, 1979], [Halbert, 1981].
|
||||||
|
These efforts, however, suffer from
|
||||||
|
the flaw that for some inputs their computing time is
|
||||||
|
super-exponential in the number
|
||||||
|
of inputs seen. This makes them totally impractical in any system which
|
||||||
|
is continuously receiving inputs over a long period of time.
|
||||||
|
.pp
|
||||||
|
The system I will examine comprises one or more simple independent
|
||||||
|
models. Because of their simplicity and because no attempt is made to
|
||||||
|
construct models which are minimal,
|
||||||
|
the time taken to store new information and to make
|
||||||
|
predictions is constant and independent of the amount of information stored
|
||||||
|
[Cleary, 1980]. This leads to a very integrated and responsive environment.
|
||||||
|
All actions by the programmer are immediately incorporated into the program
|
||||||
|
model. The actions are also acted upon so that their consequences are
|
||||||
|
immediately apparent.
|
||||||
|
However, the amount of memory used could grow
|
||||||
|
linearly with time. [Witten, 1977] introduces a modelling system related
|
||||||
|
to the one here which does not continually grow and which can be updated
|
||||||
|
incrementally.
|
||||||
|
.pp
|
||||||
|
It remains to be shown that the very simple models used are capable
|
||||||
|
of generating any
|
||||||
|
interestingly complex behaviour.
|
||||||
|
In the rest of this
|
||||||
|
talk I will use the problem of executing a subroutine to illustrate
|
||||||
|
the potential of such systems.
|
||||||
|
The example will also illustrate some of the techniques which have been
|
||||||
|
developed for combining multiple models, [Cleary, 1980], [Andreae
|
||||||
|
and Cleary, 1976], [Andreae, 1977], [Witten,1981]. It has also been
|
||||||
|
shown in [Cleary, 1980] and in [Andreae,1977] that such systems can
|
||||||
|
simulate any Turing machine when supplied with a suitable external memory.
|
||||||
|
.sh "The modelling system"
|
||||||
|
.pp
|
||||||
|
Fig. 1 shows the general layout of the modeller. Following the flow
|
||||||
|
of information through the system it first receives a number of inputs
|
||||||
|
from the external world. These are then used to update the current
|
||||||
|
contexts of a number of Markov models. Note, that each Markov model
|
||||||
|
may use different inputs to form its current context, and that they
|
||||||
|
may be attempting to predict different inputs. A simple robot
|
||||||
|
which can hear and move an arm might have two models; one, say, in
|
||||||
|
which the last three sounds it heard are used to predict the next
|
||||||
|
word to be spoken, and another in which the last three sounds and the last
|
||||||
|
three arm movements are used to predict the next arm movement.
|
||||||
|
.pp
|
||||||
|
When the inputs are received each such context and its associated
|
||||||
|
prediction (usually
|
||||||
|
an action) are added to the Markov model. (No
|
||||||
|
counts or statistics are maintained \(em they are not necessary.) When the
|
||||||
|
context recurs later it will be retrieved along with all the predictions
|
||||||
|
which have been stored with it.
|
||||||
|
.pp
|
||||||
|
After the contexts have been stored they
|
||||||
|
are updated by shifting in the new inputs. These new contexts are then
|
||||||
|
matched against the model and all the associated predictions are retrieved.
|
||||||
|
These independent predictions from the individual Markov models
|
||||||
|
are then combined into a single composite
|
||||||
|
prediction.
|
||||||
|
(A general theory of how to do this has been
|
||||||
|
developed in [Cleary, 1980]).
|
||||||
|
.pp
|
||||||
|
The final step is to present this
|
||||||
|
composite prediction to a device I have called the 'choice oracle'.
|
||||||
|
This uses whatever information it sees fit to choose the next action.
|
||||||
|
There are many possibilities for such a device. One might be to choose
|
||||||
|
from amongst the predicted actions if reward is expected and to choose
|
||||||
|
some other random action if reward is not expected. The whole system then
|
||||||
|
looks like
|
||||||
|
a reward seeking homeostat. At the other extreme the oracle might be
|
||||||
|
a human programmer who chooses the next action according to his own
|
||||||
|
principles. The system then functions more like a programming by
|
||||||
|
example system \(em [Witten, 1981] and [Witten, 1982] give examples of such
|
||||||
|
systems.
|
||||||
|
[Andreae, 1977] gives an example of a 'teachable' system lying between
|
||||||
|
these two extremes.
|
||||||
|
.pp
|
||||||
|
After an action is chosen this is
|
||||||
|
transmitted to the external world and the resultant inputs are used
|
||||||
|
to start the whole cycle again. Note that the chosen action will
|
||||||
|
be an input on the next cycle.
|
||||||
|
.sh "Subroutines"
|
||||||
|
.pp
|
||||||
|
An important part of any programming language is the ability to write a
|
||||||
|
fragment of a program and then have it used many times without it having
|
||||||
|
to be reprogrammed each time. A crucial feature of such shared code is
|
||||||
|
that after it has been executed the program should be controlled by the
|
||||||
|
situation which held before the subroutine was called. A subroutine can be
|
||||||
|
visualised as a black box with an unknown and arbitrarily complex interior.
|
||||||
|
There are many paths into the box but after passing through each splits again
|
||||||
|
and goes its own way, independent of what happened inside the box.
|
||||||
|
.np
|
||||||
|
Also, if there are $p$ paths using the subroutine and $q$ different sequences
|
||||||
|
within it then the amount of programming needed should be proportional to
|
||||||
|
$p + q$ and not $p * q$. The example to follow possess both these properties
|
||||||
|
of a subroutine.
|
||||||
|
.rh "Modelling a Subroutine."
|
||||||
|
The actual model we will use is described in Fig. 2. There are two Markov
|
||||||
|
models (model-1 and model-2) each seeing and predicting different parts of
|
||||||
|
the inputs. The inputs are classified into four classes; ACTIONs that
|
||||||
|
move a robot (LEFT, RIGHT, FAST, SLOW), patterns that it 'sees' (danger,
|
||||||
|
moved, wall, stuck) and two types of special 'echo' actions, # actions
|
||||||
|
and * actions (*home, #turn). The # and * actions have no effect on the
|
||||||
|
environment,
|
||||||
|
their only purpose is to be inputs and act as place keepers for relevant
|
||||||
|
information. They may be viewed as comments which remind the system of
|
||||||
|
what it is doing. (The term echo was used in [Andreae,1977], where the
|
||||||
|
idea was first introduced, in analogy to spoken words of which one
|
||||||
|
hears an echo.)
|
||||||
|
.pp
|
||||||
|
Model-2 is a Markov model of order 2 and uses only # actions in its
|
||||||
|
context and seeks to predict only * actions. Model-1 is a Markov model
|
||||||
|
of order 3 and uses all four classes of inputs in its context. It
|
||||||
|
seeks to predict ACTIONs, # actions and * actions. However, * actions
|
||||||
|
are treated specially. Rather than attempt to predict the exact * action
|
||||||
|
it only stores * to indicate that some * action has occurred. This
|
||||||
|
special treatment is also reflected in the procedure for combining the
|
||||||
|
predictions of the two models. Then the prediction of model-2 is used,
|
||||||
|
only if model-1 predicts an *. That is, model-1 predicts that some
|
||||||
|
* action will occur and model-2 is used to select which one. If model-1
|
||||||
|
does not predict an * then its prediction is used as the combined prediction
|
||||||
|
and that from model-2 is ignored.
|
||||||
|
.pp
|
||||||
|
The choice oracle that is used for this example has two modes. In
|
||||||
|
programmer mode a human programmer is allowed to select any action
|
||||||
|
she wishes or to acquiesce with the current prediction, in which case
|
||||||
|
one of the actions in the combined prediction is selected. In
|
||||||
|
execution mode one of the predicted actions is selected and the
|
||||||
|
programmer is not involved at all.
|
||||||
|
.pp
|
||||||
|
Before embarking on the actual example some points about the predictions
|
||||||
|
extracted from the individual Markov models should be noted. First, if
|
||||||
|
no context can be found stored in the memory which equals the current
|
||||||
|
context then it is shortened by one input and a search is made for any
|
||||||
|
recorded contexts which are equal over the reduced length. If necessary
|
||||||
|
this is repeated until the length is zero whereupon all possible
|
||||||
|
allowed actions are predicted.
|
||||||
|
.pp
|
||||||
|
Fig. 3 shows the problem to be programmed. If a robot sees danger it
|
||||||
|
is to turn and flee quickly. If it sees a wall it is to turn and return
|
||||||
|
slowly. The turning is to be done by a subroutine which, if it gets
|
||||||
|
stuck when turning left, turns right instead.
|
||||||
|
.pp
|
||||||
|
Fig. 4 shows the contexts and predictions stored when this is programmed.
|
||||||
|
This is done by two passes through the problem in 'program' mode: once
|
||||||
|
to program the fleeing and turning left; the other to program the wall
|
||||||
|
sequence and the turning right. Fig. 5 then shows how this programming
|
||||||
|
is used in 'execute' mode for one of the combinations which had not been
|
||||||
|
explicitly programmed earlier (a wall sequence with a turn left). The
|
||||||
|
figure shows the contexts and associated predictions for each step.
|
||||||
|
(Note that predictions are made and new contexts are stored in both
|
||||||
|
modes. They have been omitted from the diagrams to preserve clarity.)
|
||||||
|
.sh "Conclusion"
|
||||||
|
.pp
|
||||||
|
The type of simple modelling system presented above is of interest for a
|
||||||
|
number of reasons. Seen as a programing by example system,
|
||||||
|
it is very closely
|
||||||
|
integrated. Because it can update its models incrementally in real time
|
||||||
|
functions such as input/output, programming, compilation and execution
|
||||||
|
are subsumed into a single mechanism. Interactive languages such as LISP
|
||||||
|
or BASIC gain much of their immediacy and usefulness by being interpretive
|
||||||
|
and not requiring a separate compilation step when altering the source
|
||||||
|
program. By making execution integral with the process of program entry
|
||||||
|
(some of) the consequencs of new programming become immediately apparent.
|
||||||
|
.pp
|
||||||
|
Seen as an adaptive controller, the system has the advantage of being fast
|
||||||
|
and being able to encode any control strategy. Times to update the model
|
||||||
|
do not grow with memory size and so it can operate continuously in real time.
|
||||||
|
.pp
|
||||||
|
Seen as a paradigm for understanding natural control systems, it has the
|
||||||
|
advantage of having a very simple underlying storage mechanism. Also,
|
||||||
|
the ability to supply an arbitrary choice oracle allows for a wide
|
||||||
|
range of possible adaptive strategies.
|
||||||
|
.sh "References"
|
||||||
|
.in +4m
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
ANDREAE, J.H. 1977
|
||||||
|
Thinking with the Teachable Machine. Academic Press.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
ANDREAE, J.H. and CLEARY, J.G. 1976
|
||||||
|
A New Mechanism for a Brain. Int. J. Man-Machine Studies
|
||||||
|
8(1):89-119.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
BAUER, M.A. 1979 Programming by examples. Artificial Intelligence 12:1-21.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
BIERMAN, A.W. 1972
|
||||||
|
On the Inference of Turing Machines from Sample Computations.
|
||||||
|
Artificial Intelligence 3(3):181-198.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
BIERMAN, A.W. and FELDMAN, J.A. 1972
|
||||||
|
On the Synthesis of Finite-State Machines from Samples of
|
||||||
|
their Behavior. IEEE Transactions on Computers C-21, June:
|
||||||
|
592-597.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
BIERMAN, A.W. and KRISHNASWAMY, R. 1976 Constructing programs from example
|
||||||
|
computations. IEEE transactions on Software Engineering SE-2:141-153.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
CLEARY, J.G. 1980
|
||||||
|
An Associative and Impressible Computer. PhD thesis, University
|
||||||
|
of Canterbury, Christchurch, New Zealand.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
GAINES, B.R. 1976
|
||||||
|
Behaviour/structure transformations under uncertainty.
|
||||||
|
Int. J. Man-Machine Studies 8:337-365.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
GAINES, B.R. 1977
|
||||||
|
System identification, approximation and complexity.
|
||||||
|
Int. J. General Systems, 3:145-174.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
HALBERT, D.C. 1981
|
||||||
|
An example of programming by example. Xerox Corporation, Palo Alto,
|
||||||
|
California.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
WITTEN, I.H. 1977
|
||||||
|
An adaptive optimal controller for discrete-time Markov
|
||||||
|
environments. Information and Control, 34, August: 286-295.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
WITTEN, I.H. 1979
|
||||||
|
Approximate, non-deterministic modelling of behaviour
|
||||||
|
sequences. Int. J. General Systems, 5, January: 1-12.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
WITTEN, I.H. 1980
|
||||||
|
Probabilistic behaviour/structure transformations using
|
||||||
|
transitive Moore models. Int. J. General Systems, 6(3):
|
||||||
|
129-137.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
WITTEN, I.H. 1981
|
||||||
|
Programming by example for the casual user: a case study.
|
||||||
|
Proc. Canadian Man-Computer Communication Conference, Waterloo,
|
||||||
|
Ontario, 105-113.
|
||||||
|
.sp
|
||||||
|
.ti -4m
|
||||||
|
WITTEN, I.H. 1982
|
||||||
|
An interactive computer terminal interface which predicts user
|
||||||
|
entries. Proc. IEE Conference on Man-Machine Interaction,
|
||||||
|
Manchester, England.
|
||||||
|
.in -4m
|
320
src/test/java/org/xerial/snappy/testdata/calgary/paper5
vendored
Normal file
320
src/test/java/org/xerial/snappy/testdata/calgary/paper5
vendored
Normal file
@ -0,0 +1,320 @@
|
|||||||
|
.pn 0
|
||||||
|
.EQ
|
||||||
|
delim $$
|
||||||
|
define RR 'bold R'
|
||||||
|
define SS 'bold S'
|
||||||
|
define II 'bold I'
|
||||||
|
define mo '"\(mo"'
|
||||||
|
define EXIST ?"\z\-\d\z\-\r\-\d\v'0.2m'\(br\v'-0.2m'"?
|
||||||
|
define NEXIST ?"\z\-\d\z\o'\-\(sl'\r\-\d\v'0.2m'\(br\v'-0.2m'"?
|
||||||
|
define ALL ?"\o'V-'"?
|
||||||
|
define subset '\(sb'
|
||||||
|
define subeq '\(ib'
|
||||||
|
define supset '\(sp'
|
||||||
|
define supeq '\(ip'
|
||||||
|
define mo '\(mo'
|
||||||
|
define nm ?"\o'\(mo\(sl'"?
|
||||||
|
define li '\& sup ['
|
||||||
|
define lo '\& sup ('
|
||||||
|
define hi '\& sup ]'
|
||||||
|
define ho '\& sup )'
|
||||||
|
.EN
|
||||||
|
.ls 1
|
||||||
|
.ce
|
||||||
|
A LOGICAL IMPLEMENTATION OF ARITHMETIC
|
||||||
|
.sp 3
|
||||||
|
.ce
|
||||||
|
John G. Cleary
|
||||||
|
.ce
|
||||||
|
The University of Calgary, Alberta, Canada.
|
||||||
|
.sp 20
|
||||||
|
\u1\dAuthor's Present Address: Man-Machine Systems Group, Department of
|
||||||
|
Computer Science, The University of Calgary, 2500 University Drive NW
|
||||||
|
Calgary, Canada T2N 1N4. Phone: (403)220-6087.
|
||||||
|
.br
|
||||||
|
.nf
|
||||||
|
UUCP: ...!{ihnp4,ubc-vision}!alberta!calgary!cleary
|
||||||
|
...!nrl-css!calgary!cleary
|
||||||
|
ARPA: cleary.calgary.ubc@csnet-relay
|
||||||
|
CDN: cleary@calgary
|
||||||
|
.fi
|
||||||
|
.sp 2
|
||||||
|
.ls 2
|
||||||
|
.bp 0
|
||||||
|
.ls 2
|
||||||
|
.ce
|
||||||
|
Abstract
|
||||||
|
.pp
|
||||||
|
So far implementations of real arithmetic within logic programming
|
||||||
|
have been non-logical. A logical description of the behaviour of arithmetic
|
||||||
|
on actual
|
||||||
|
machines using finite precision numbers is not readily available.
|
||||||
|
Using interval analysis a simple description of real arithmetic is possible.
|
||||||
|
This can be translated to an implementation within Prolog.
|
||||||
|
As well as having a sound logical basis the resulting system
|
||||||
|
allows a very concise and powerful programming style and is potentially
|
||||||
|
very efficient.
|
||||||
|
.bp
|
||||||
|
.sh "1 Introduction"
|
||||||
|
.pp
|
||||||
|
Logic programming aims to use sets of logical formulae as
|
||||||
|
statements in a programming language.
|
||||||
|
Because of many practical difficulties the full generality of logic
|
||||||
|
cannot (yet) be used in this way. However, by restricting the
|
||||||
|
class of formulae used to Horn clauses practical and efficient
|
||||||
|
languages such as PROLOG are obtained.
|
||||||
|
One of the main problems in logic programming is to extend this area
|
||||||
|
of practicality and efficiency to an ever wider range of formulae and
|
||||||
|
applications.
|
||||||
|
This paper considers such an implementation for arithmetic.
|
||||||
|
.pp
|
||||||
|
To see why arithmetic as it is commonly implemented in PROLOG systems
|
||||||
|
is not logical consider the following example:
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
X = 0.67, Y = 0.45, Z is X*Y, Z = 0.30
|
||||||
|
.fi
|
||||||
|
.sp
|
||||||
|
This uses the notation of the 'Edinburgh style' Prologs.
|
||||||
|
(For the moment we assume an underlying floating point
|
||||||
|
decimal arithmetic with two significant places.)
|
||||||
|
The predicate 'is' assumes its righthand side is an arithmetic
|
||||||
|
statement, computes its value, and unifies the result with its lefthand side.
|
||||||
|
In this case the entire sequence succeeds, however, there are some serious
|
||||||
|
problems.
|
||||||
|
.pp
|
||||||
|
In a pure logic program the order of statements should be irrelevant to
|
||||||
|
the correctness of the result (at worst termination or efficiency might be
|
||||||
|
affected). This is not true of the example above. The direction of execution
|
||||||
|
of 'is' is strictly one way so that
|
||||||
|
.sp
|
||||||
|
Y = 0.45, Z = 0.30, Z is X*Y
|
||||||
|
.sp
|
||||||
|
will deliver an error when X is found to be uninstantiated inside 'is'.
|
||||||
|
.pp
|
||||||
|
The second problem is that the answer Z = 0.30 is incorrect!\
|
||||||
|
The correct infinite precision answer is Z = 0.3015. This inaccuracy
|
||||||
|
is caused by the finite precision implemented in the floating point
|
||||||
|
arithmetic of modern computers.
|
||||||
|
It becomes very problematic to say what if anything it means when
|
||||||
|
Z is bound to 0.30 by 'is'. This problem is exacerbated by long sequences
|
||||||
|
of arithmetic operations where the propagation of such errors can lead the
|
||||||
|
final result to have little or no resemblence to the correct answer.
|
||||||
|
.pp
|
||||||
|
This is further class of errors, which is illustrated by the fact that the
|
||||||
|
following two sequences will both succeed if the underlying arithmetic rounds:
|
||||||
|
.sp
|
||||||
|
X = 0.66, Y = 0.45, Z = 0.30, Z is X*Y
|
||||||
|
.br
|
||||||
|
X = 0.67, Y = 0.45, Z = 0.30, Z is X*Y
|
||||||
|
.sp
|
||||||
|
This means that even if some invertable form of arithmetic were devised
|
||||||
|
capable of binding X when:
|
||||||
|
.sp
|
||||||
|
Y = 0.45, Z = 0.30, Z is X*Y
|
||||||
|
.sp
|
||||||
|
it is unclear which value should be given to it.
|
||||||
|
.pp
|
||||||
|
The problem then, is to implement arithmetic in as logical a manner
|
||||||
|
as possible while still making use of efficient floating point arithmetic.
|
||||||
|
The solution to this problem has three major parts.
|
||||||
|
The first is to represent PROLOG's
|
||||||
|
arithmetic variables internally as intervals of real numbers.
|
||||||
|
So the result of 'Z is 0.45*0.67' would be to bind Z to the
|
||||||
|
open interval (0.30,0.31).
|
||||||
|
This says that Z lies somewhere in the interval
|
||||||
|
$0.30 < Z < 0.31$, which is certainly true, and probably as informative
|
||||||
|
as possible given finite precision arithmetic.
|
||||||
|
(Note that Z is NOT bound to the data structure (0.30,0.31), this
|
||||||
|
is a hidden representation in much the same way that pointers are used
|
||||||
|
to implement logical variables in PROLOG but are not explicitly visible
|
||||||
|
to the user. Throughout this paper brackets such as (...) or [...] will
|
||||||
|
be used to represent open and closed intervals not Prolog data structures.)
|
||||||
|
.pp
|
||||||
|
The second part of the solution is to translate expressions such as
|
||||||
|
\&'Z is (X*Y)/2' to the relational form 'multiply(X,Y,T0), multiply(2,Z,T0)'.
|
||||||
|
Note that both the * and / operators have been translated to 'multiply'
|
||||||
|
(with parameters in a different order). This relational form will be seen to
|
||||||
|
be insensitive to which parameters are instantiated and which are not,
|
||||||
|
thus providing invertibility.
|
||||||
|
.pp
|
||||||
|
The third part is to provide a small number of control 'predicates' able
|
||||||
|
to guide the search for solutions.
|
||||||
|
The resulting system is sufficiently powerful to be able to
|
||||||
|
solve equations such as '0 is X*(X-2)+1' directly.
|
||||||
|
.pp
|
||||||
|
The next section gives a somewhat more formal description of arithmetic
|
||||||
|
implemented this way. Section III gives examples of its use and of the
|
||||||
|
types of equations that are soluble within it. Section IV compares our
|
||||||
|
approach here with that of other interval arithmetic systems and with
|
||||||
|
constraint networks. Section V notes some possibilities for a parallel
|
||||||
|
dataflow implementation which avoids many of the difficulties of traditional
|
||||||
|
dataflow execution.
|
||||||
|
.sh "II. Interval Representation"
|
||||||
|
.pp
|
||||||
|
Define $II(RR)$ to be the set of intervals over the real numbers, $RR$.
|
||||||
|
So that the lower and upper bounds of each interval can be operated on as
|
||||||
|
single entities they will be treated as pairs of values.
|
||||||
|
Each value having an attribute of being open or closed
|
||||||
|
and an associated number. For example the interval (0.31,0.33] will be
|
||||||
|
treated as the the pair $lo 0.31$ and $hi 0.33$.
|
||||||
|
The brackets are superscripted to minimize visual confusion when writeing
|
||||||
|
bounds not in pairs.
|
||||||
|
As well as the usual real numbers
|
||||||
|
$- inf$ and $inf$, will be used as part of bounds,
|
||||||
|
with the properties that $ALL x mo RR~- inf < x < inf$
|
||||||
|
The set of all upper bounds is defined as:
|
||||||
|
.sp
|
||||||
|
$H(RR)~==~\{ x sup b : x mo RR union \{ inf \},~b mo \{ hi , ho \} \} $
|
||||||
|
.sp
|
||||||
|
and the set of lower bounds as:
|
||||||
|
.sp
|
||||||
|
$L(RR)~==~\{ \& sup b x : x mo RR union \{ -inf \},~b mo \{ li , lo \} \} $
|
||||||
|
.sp
|
||||||
|
The set of all intervals is then defined by:
|
||||||
|
.sp
|
||||||
|
$II(RR)~==~L(RR) times H(RR)$
|
||||||
|
.sp
|
||||||
|
Using this notation rather loosely intervals will be identified
|
||||||
|
with the apropriate subset of the reals. For example the following
|
||||||
|
identifications will be made:
|
||||||
|
.sp
|
||||||
|
$[0.31,15)~=~< li 0.31, ho 15 >~=~ \{ x mo RR: 0.31 <= x < 15 \}$
|
||||||
|
.br
|
||||||
|
$[-inf,inf]~=~< li -inf , hi inf> ~=~ RR$
|
||||||
|
.br
|
||||||
|
and $(-0.51,inf]~=~< lo -0.51 , hi inf >~=~ \{ x mo RR: 0.51 < x \}$
|
||||||
|
.sp
|
||||||
|
The definition above carefully excludes 'intervals' such as $[inf,inf]$
|
||||||
|
in the interests of simplifying some of the later development.
|
||||||
|
.pp
|
||||||
|
The finite arithmetic available on computers is represented by a
|
||||||
|
finite subset, $SS$, of $RR$. It is assumed that
|
||||||
|
$0,1 mo SS$. The set of intervals allowed over $SS$ is $II(SS)$ defined as
|
||||||
|
above for $RR$. $SS$ might be a bounded set of integers or some more complex
|
||||||
|
set representable by floating point numbers.
|
||||||
|
.pp
|
||||||
|
There is a useful mapping from $II(RR)$ to $II(SS)$ which associates
|
||||||
|
with each real interval the best approximation to it:
|
||||||
|
.nf
|
||||||
|
.sp
|
||||||
|
$approx(<l,h>)~==~<l prime, h prime >$
|
||||||
|
.br
|
||||||
|
where $l prime mo L(SS), l prime <= l, and NEXIST x mo L(SS)~l prime <x<l$
|
||||||
|
.br
|
||||||
|
$h prime mo H(SS), h prime >= h, and NEXIST x mo H(SS)~h prime >x>h$.
|
||||||
|
.pp
|
||||||
|
The ordering on the bounds is defined as follows:
|
||||||
|
.sp
|
||||||
|
$l < h, ~ l,h mo II(RR)~ <->~l= \& sup u x and h = \& sup v y$
|
||||||
|
and $x<y$ or $x=y$ and $u<v$
|
||||||
|
where $ ho, li, hi, lo$ occur in this order and $x<y$ is the usual ordering
|
||||||
|
on the reals extended to include $-inf$ and $inf$.
|
||||||
|
The ordering on the brackets is carefully chosen so that intervals such as
|
||||||
|
(3.1,3.1) map to the empty set.
|
||||||
|
Given this definition it is easily verified that 'approx' gives
|
||||||
|
the smallest interval in $II(SS)$ enclosing the original interval in $II(RR)$.
|
||||||
|
The definition also allows the intersection of two intervals to be readily
|
||||||
|
computed:
|
||||||
|
.sp
|
||||||
|
$<l sub 1,h sub 1> inter <l sub 2, h sub 2>~=~$
|
||||||
|
$< max(l sub 1 , l sub 2), min(h sub 1 , h sub 2 )>$
|
||||||
|
.sp
|
||||||
|
Also and interval $<l,h>$ will be empty if $l > h$. For example, according
|
||||||
|
to the definition above $lo 3.1 > ho 3.1$ so (3.1,3.1) is correctly computed
|
||||||
|
as being empty.
|
||||||
|
.pp
|
||||||
|
Intervals are introduced into logic by extending the notion of
|
||||||
|
unification. A logical variable I can be bound to an interval $I$,
|
||||||
|
written I:$I$. Unification of I to any other value J gives the following
|
||||||
|
results:
|
||||||
|
.LB
|
||||||
|
.NP
|
||||||
|
if J is unbound then it is bound to the interval, J:$I$;
|
||||||
|
.NP
|
||||||
|
if J is bound to the interval J:$J$ then
|
||||||
|
I and J are bound to the same interval $I inter J$.
|
||||||
|
The unification fails if $I inter J$ is empty.
|
||||||
|
.NP
|
||||||
|
a constant C is equivalent to $approx([C,C])$;
|
||||||
|
.NP
|
||||||
|
if J is bound to anything other than an interval the unification fails.
|
||||||
|
.LE
|
||||||
|
.pp
|
||||||
|
Below are some simple Prolog programs and the bindings that result when
|
||||||
|
they are run (assuming as usual two decimal places of accuracy).
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
X = 3.141592
|
||||||
|
X:(3.1,3.2)
|
||||||
|
|
||||||
|
X > -5.22, Y <= 31, X=Y
|
||||||
|
X:(-5.3,32] Y:(-5.3,31]
|
||||||
|
.fi
|
||||||
|
.sp
|
||||||
|
.rh "Addition"
|
||||||
|
.pp
|
||||||
|
Addition is implemented by the relation 'add(I,J,K)'
|
||||||
|
which says that K is the sum of I and J.
|
||||||
|
\&'add' can be viewed as a relation on $RR times RR times RR$ defined
|
||||||
|
by:
|
||||||
|
.sp
|
||||||
|
$add ~==~ \{<x,y,z>:x,y,z mo RR,~x+y=z\}$
|
||||||
|
.sp
|
||||||
|
Given that I,J, and K are initially bound to the intervals $I,J,K$
|
||||||
|
respectively, the fully correct set of solutions with the additional
|
||||||
|
constrain 'add(I,J,K)' is given by all triples in the set
|
||||||
|
$add inter I times J times K$.
|
||||||
|
This set is however infinite, to get an effectively computable procedure
|
||||||
|
I will approximate the additional constraint by binding I, J and K
|
||||||
|
to smaller intervals.
|
||||||
|
So as not to exclude any possible triples the new bindings,
|
||||||
|
$I prime, J prime roman ~and~ K prime$ must obey:
|
||||||
|
.sp
|
||||||
|
$add inter I times J times K ~subeq~ I prime times J prime times K prime$
|
||||||
|
.sp
|
||||||
|
Figure 1 illustrates this process of
|
||||||
|
.ul
|
||||||
|
narrowing.
|
||||||
|
The initial bindings are I:[0,2], J:[1,3]
|
||||||
|
and K:[4,6]. After applying 'add(I,J,K)' the smallest possible bindings
|
||||||
|
are I:[1,2], J:[2,3] and K:[4,5]. Note that all three intervals have been
|
||||||
|
narrowed.
|
||||||
|
.pp
|
||||||
|
It can easily be seen that:
|
||||||
|
.sp
|
||||||
|
$I prime supeq \{x:<x,y,z> ~mo~ add inter I times J times K \}$
|
||||||
|
.br
|
||||||
|
$J prime supeq \{y:<x,y,z> ~mo~ add inter I times J times K \}$
|
||||||
|
.br
|
||||||
|
$K prime supeq \{z:<x,y,z> ~mo~ add inter I times J times K \}$
|
||||||
|
.sp
|
||||||
|
If there are 'holes' in the projected set then $I prime$ will be a strict
|
||||||
|
superset of the projection, however, $I prime$ will still
|
||||||
|
be uniquely determined by the projection. This will be true of any
|
||||||
|
subset of $RR sup n$ not just $add$.
|
||||||
|
.pp
|
||||||
|
In general for
|
||||||
|
.sp
|
||||||
|
$R subeq RR sup n,~ I sub 1 , I sub 2 , ... , I sub n mo II(RR)$
|
||||||
|
and $I prime sub 1 , I prime sub 2 , ... , I prime sub n mo II(RR)$
|
||||||
|
.sp
|
||||||
|
I will write
|
||||||
|
.br
|
||||||
|
$R inter I sub 1 times I sub 2 times ... times I sub n nar
|
||||||
|
I prime sub 1 times I prime sub 2 times ... times I prime sub $
|
||||||
|
.br
|
||||||
|
when the intervals $I prime sub 1 , I prime sub 2 , ... , I prime sub $
|
||||||
|
are the uniquelly determined smallest intervals including all solutions.
|
||||||
|
|
||||||
|
.sh "IV. Comparison with Interval Arithmetic"
|
||||||
|
.pp
|
||||||
|
.sh "V. Implementation"
|
||||||
|
.pp
|
||||||
|
.sh "VI. Summary"
|
||||||
|
.sh "Acknowledgements"
|
||||||
|
.sh "References"
|
||||||
|
.ls 1
|
||||||
|
.[
|
||||||
|
$LIST$
|
||||||
|
.]
|
1019
src/test/java/org/xerial/snappy/testdata/calgary/paper6
vendored
Normal file
1019
src/test/java/org/xerial/snappy/testdata/calgary/paper6
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src/test/java/org/xerial/snappy/testdata/calgary/pic
vendored
Normal file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/pic
vendored
Normal file
Binary file not shown.
1487
src/test/java/org/xerial/snappy/testdata/calgary/progc
vendored
Normal file
1487
src/test/java/org/xerial/snappy/testdata/calgary/progc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2244
src/test/java/org/xerial/snappy/testdata/calgary/progl
vendored
Normal file
2244
src/test/java/org/xerial/snappy/testdata/calgary/progl
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1966
src/test/java/org/xerial/snappy/testdata/calgary/progp
vendored
Normal file
1966
src/test/java/org/xerial/snappy/testdata/calgary/progp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src/test/java/org/xerial/snappy/testdata/calgary/trans
vendored
Normal file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/trans
vendored
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user