mirror of
https://github.com/xerial/snappy-java.git
synced 2025-07-27 07:54:17 +02:00
add calgary test data set
This commit is contained in:
parent
0a4ee3c6fb
commit
b694432815
@ -60,11 +60,12 @@ public class SnappyInputStream extends InputStream
|
||||
protected void readHeader() throws IOException {
|
||||
byte[] header = new byte[SnappyCodec.headerSize()];
|
||||
int readBytes = in.read(header, 0, header.length);
|
||||
if (readBytes < header.length) {
|
||||
if (header[0] != SnappyCodec.MAGIC_HEADER[0]) {
|
||||
// do the default uncompression
|
||||
readFully(header, readBytes);
|
||||
return;
|
||||
}
|
||||
|
||||
SnappyCodec codec = SnappyCodec.readHeader(new ByteArrayInputStream(header));
|
||||
if (codec.isValidMagicHeader()) {
|
||||
// compressed by SnappyOutputStream
|
||||
@ -113,22 +114,22 @@ public class SnappyInputStream extends InputStream
|
||||
|
||||
@Override
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
int wroteBytes = 0;
|
||||
for (; wroteBytes < len;) {
|
||||
int writtenBytes = 0;
|
||||
for (; writtenBytes < len;) {
|
||||
if (uncompressedCursor >= uncompressedLimit) {
|
||||
if (hasNextChunk())
|
||||
continue;
|
||||
else {
|
||||
return wroteBytes == 0 ? -1 : wroteBytes;
|
||||
return writtenBytes == 0 ? -1 : writtenBytes;
|
||||
}
|
||||
}
|
||||
int bytesToWrite = Math.min(uncompressedLimit - uncompressedCursor, len);
|
||||
System.arraycopy(uncompressed, uncompressedCursor, b, off + wroteBytes, bytesToWrite);
|
||||
wroteBytes += bytesToWrite;
|
||||
System.arraycopy(uncompressed, uncompressedCursor, b, off + writtenBytes, bytesToWrite);
|
||||
writtenBytes += bytesToWrite;
|
||||
uncompressedCursor += bytesToWrite;
|
||||
}
|
||||
|
||||
return wroteBytes;
|
||||
return writtenBytes;
|
||||
}
|
||||
|
||||
protected boolean hasNextChunk() throws IOException {
|
||||
|
@ -98,7 +98,7 @@ public class SnappyInputStreamTest
|
||||
|
||||
@Test
|
||||
public void biteWiseRead() throws Exception {
|
||||
byte[] orig = readResourceFile("alice29.txt");
|
||||
byte[] orig = readResourceFile("testdata/calgary/paper6");
|
||||
byte[] compressed = Snappy.compress(orig);
|
||||
|
||||
SnappyInputStream in = new SnappyInputStream(new ByteArrayInputStream(compressed));
|
||||
|
6280
src/test/java/org/xerial/snappy/testdata/calgary/bib
vendored
Normal file
6280
src/test/java/org/xerial/snappy/testdata/calgary/bib
vendored
Normal file
File diff suppressed because it is too large
Load Diff
16622
src/test/java/org/xerial/snappy/testdata/calgary/book1
vendored
Normal file
16622
src/test/java/org/xerial/snappy/testdata/calgary/book1
vendored
Normal file
File diff suppressed because it is too large
Load Diff
15634
src/test/java/org/xerial/snappy/testdata/calgary/book2
vendored
Normal file
15634
src/test/java/org/xerial/snappy/testdata/calgary/book2
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src/test/java/org/xerial/snappy/testdata/calgary/geo
vendored
Normal file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/geo
vendored
Normal file
Binary file not shown.
10059
src/test/java/org/xerial/snappy/testdata/calgary/news
vendored
Normal file
10059
src/test/java/org/xerial/snappy/testdata/calgary/news
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src/test/java/org/xerial/snappy/testdata/calgary/obj1
vendored
Executable file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/obj1
vendored
Executable file
Binary file not shown.
BIN
src/test/java/org/xerial/snappy/testdata/calgary/obj2
vendored
Normal file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/obj2
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 241 KiB |
1250
src/test/java/org/xerial/snappy/testdata/calgary/paper1
vendored
Normal file
1250
src/test/java/org/xerial/snappy/testdata/calgary/paper1
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1731
src/test/java/org/xerial/snappy/testdata/calgary/paper2
vendored
Normal file
1731
src/test/java/org/xerial/snappy/testdata/calgary/paper2
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1100
src/test/java/org/xerial/snappy/testdata/calgary/paper3
vendored
Normal file
1100
src/test/java/org/xerial/snappy/testdata/calgary/paper3
vendored
Normal file
File diff suppressed because it is too large
Load Diff
294
src/test/java/org/xerial/snappy/testdata/calgary/paper4
vendored
Normal file
294
src/test/java/org/xerial/snappy/testdata/calgary/paper4
vendored
Normal file
@ -0,0 +1,294 @@
|
||||
.EQ
|
||||
delim $$
|
||||
.EN
|
||||
.ls 1
|
||||
.ce
|
||||
PROGRAMMING BY EXAMPLE REVISITED
|
||||
.sp
|
||||
.ce
|
||||
by John G. Cleary
|
||||
.ce
|
||||
Man-Machine Systems Laboratory
|
||||
.ce
|
||||
University of Calgary.
|
||||
.sp
|
||||
.sh "Introduction"
|
||||
.pp
|
||||
Efforts to construct an artificial intelligence have relied on
|
||||
ever more complex and carefully prepared programs. While useful in
|
||||
themselves, these programs
|
||||
are unlikely to be useful in situations where ephemeral and
|
||||
low value knowledge must be acquired. For example a person (or robot)
|
||||
working in a normal domestic environment knows a lot about which
|
||||
cupboards have sticky doors and where the marmalade is kept. It seems
|
||||
unlikely that it will ever be economic to program such knowledge
|
||||
whether this be via a language or a discourse with an expert system.
|
||||
.pp
|
||||
It is my thesis, then, that any flexible robot system working in the
|
||||
real world must contain a component of control intermediate
|
||||
between hard wired 'reflex' responses and complex intellectual
|
||||
reasoning. Such an intermediate system must be adaptive, be able
|
||||
to carry out complex patterned responses and be fast in operation.
|
||||
It need not, however, carry out complex forward planning or be capable
|
||||
of introspection (in the sense that expert systems are able to explain
|
||||
their actions).
|
||||
.pp
|
||||
In this talk I will examine a system that acquires knowledge by
|
||||
constructing a model of its input behaviour and uses this to select its
|
||||
actions. It can be viewed either as an automatic adaptive system or
|
||||
as an instance of 'programming by example'. Other workers have
|
||||
attempted to do this, by constructing compact models in some appropriate
|
||||
programming language:e.g. finite state automata [Bierman, 1972],
|
||||
[Bierman and Feldman, 1972]; LISP [Bierman and Krishnaswamy, 1976];
|
||||
finite non-deterministic
|
||||
automata [Gaines,1976], [Gaines,1977],
|
||||
[Witten,1980]; high level languages [Bauer, 1979], [Halbert, 1981].
|
||||
These efforts, however, suffer from
|
||||
the flaw that for some inputs their computing time is
|
||||
super-exponential in the number
|
||||
of inputs seen. This makes them totally impractical in any system which
|
||||
is continuously receiving inputs over a long period of time.
|
||||
.pp
|
||||
The system I will examine comprises one or more simple independent
|
||||
models. Because of their simplicity and because no attempt is made to
|
||||
construct models which are minimal,
|
||||
the time taken to store new information and to make
|
||||
predictions is constant and independent of the amount of information stored
|
||||
[Cleary, 1980]. This leads to a very integrated and responsive environment.
|
||||
All actions by the programmer are immediately incorporated into the program
|
||||
model. The actions are also acted upon so that their consequences are
|
||||
immediately apparent.
|
||||
However, the amount of memory used could grow
|
||||
linearly with time. [Witten, 1977] introduces a modelling system related
|
||||
to the one here which does not continually grow and which can be updated
|
||||
incrementally.
|
||||
.pp
|
||||
It remains to be shown that the very simple models used are capable
|
||||
of generating any
|
||||
interestingly complex behaviour.
|
||||
In the rest of this
|
||||
talk I will use the problem of executing a subroutine to illustrate
|
||||
the potential of such systems.
|
||||
The example will also illustrate some of the techniques which have been
|
||||
developed for combining multiple models, [Cleary, 1980], [Andreae
|
||||
and Cleary, 1976], [Andreae, 1977], [Witten,1981]. It has also been
|
||||
shown in [Cleary, 1980] and in [Andreae,1977] that such systems can
|
||||
simulate any Turing machine when supplied with a suitable external memory.
|
||||
.sh "The modelling system"
|
||||
.pp
|
||||
Fig. 1 shows the general layout of the modeller. Following the flow
|
||||
of information through the system it first receives a number of inputs
|
||||
from the external world. These are then used to update the current
|
||||
contexts of a number of Markov models. Note, that each Markov model
|
||||
may use different inputs to form its current context, and that they
|
||||
may be attempting to predict different inputs. A simple robot
|
||||
which can hear and move an arm might have two models; one, say, in
|
||||
which the last three sounds it heard are used to predict the next
|
||||
word to be spoken, and another in which the last three sounds and the last
|
||||
three arm movements are used to predict the next arm movement.
|
||||
.pp
|
||||
When the inputs are received each such context and its associated
|
||||
prediction (usually
|
||||
an action) are added to the Markov model. (No
|
||||
counts or statistics are maintained \(em they are not necessary.) When the
|
||||
context recurs later it will be retrieved along with all the predictions
|
||||
which have been stored with it.
|
||||
.pp
|
||||
After the contexts have been stored they
|
||||
are updated by shifting in the new inputs. These new contexts are then
|
||||
matched against the model and all the associated predictions are retrieved.
|
||||
These independent predictions from the individual Markov models
|
||||
are then combined into a single composite
|
||||
prediction.
|
||||
(A general theory of how to do this has been
|
||||
developed in [Cleary, 1980]).
|
||||
.pp
|
||||
The final step is to present this
|
||||
composite prediction to a device I have called the 'choice oracle'.
|
||||
This uses whatever information it sees fit to choose the next action.
|
||||
There are many possibilities for such a device. One might be to choose
|
||||
from amongst the predicted actions if reward is expected and to choose
|
||||
some other random action if reward is not expected. The whole system then
|
||||
looks like
|
||||
a reward seeking homeostat. At the other extreme the oracle might be
|
||||
a human programmer who chooses the next action according to his own
|
||||
principles. The system then functions more like a programming by
|
||||
example system \(em [Witten, 1981] and [Witten, 1982] give examples of such
|
||||
systems.
|
||||
[Andreae, 1977] gives an example of a 'teachable' system lying between
|
||||
these two extremes.
|
||||
.pp
|
||||
After an action is chosen this is
|
||||
transmitted to the external world and the resultant inputs are used
|
||||
to start the whole cycle again. Note that the chosen action will
|
||||
be an input on the next cycle.
|
||||
.sh "Subroutines"
|
||||
.pp
|
||||
An important part of any programming language is the ability to write a
|
||||
fragment of a program and then have it used many times without it having
|
||||
to be reprogrammed each time. A crucial feature of such shared code is
|
||||
that after it has been executed the program should be controlled by the
|
||||
situation which held before the subroutine was called. A subroutine can be
|
||||
visualised as a black box with an unknown and arbitrarily complex interior.
|
||||
There are many paths into the box but after passing through each splits again
|
||||
and goes its own way, independent of what happened inside the box.
|
||||
.np
|
||||
Also, if there are $p$ paths using the subroutine and $q$ different sequences
|
||||
within it then the amount of programming needed should be proportional to
|
||||
$p + q$ and not $p * q$. The example to follow possess both these properties
|
||||
of a subroutine.
|
||||
.rh "Modelling a Subroutine."
|
||||
The actual model we will use is described in Fig. 2. There are two Markov
|
||||
models (model-1 and model-2) each seeing and predicting different parts of
|
||||
the inputs. The inputs are classified into four classes; ACTIONs that
|
||||
move a robot (LEFT, RIGHT, FAST, SLOW), patterns that it 'sees' (danger,
|
||||
moved, wall, stuck) and two types of special 'echo' actions, # actions
|
||||
and * actions (*home, #turn). The # and * actions have no effect on the
|
||||
environment,
|
||||
their only purpose is to be inputs and act as place keepers for relevant
|
||||
information. They may be viewed as comments which remind the system of
|
||||
what it is doing. (The term echo was used in [Andreae,1977], where the
|
||||
idea was first introduced, in analogy to spoken words of which one
|
||||
hears an echo.)
|
||||
.pp
|
||||
Model-2 is a Markov model of order 2 and uses only # actions in its
|
||||
context and seeks to predict only * actions. Model-1 is a Markov model
|
||||
of order 3 and uses all four classes of inputs in its context. It
|
||||
seeks to predict ACTIONs, # actions and * actions. However, * actions
|
||||
are treated specially. Rather than attempt to predict the exact * action
|
||||
it only stores * to indicate that some * action has occurred. This
|
||||
special treatment is also reflected in the procedure for combining the
|
||||
predictions of the two models. Then the prediction of model-2 is used,
|
||||
only if model-1 predicts an *. That is, model-1 predicts that some
|
||||
* action will occur and model-2 is used to select which one. If model-1
|
||||
does not predict an * then its prediction is used as the combined prediction
|
||||
and that from model-2 is ignored.
|
||||
.pp
|
||||
The choice oracle that is used for this example has two modes. In
|
||||
programmer mode a human programmer is allowed to select any action
|
||||
she wishes or to acquiesce with the current prediction, in which case
|
||||
one of the actions in the combined prediction is selected. In
|
||||
execution mode one of the predicted actions is selected and the
|
||||
programmer is not involved at all.
|
||||
.pp
|
||||
Before embarking on the actual example some points about the predictions
|
||||
extracted from the individual Markov models should be noted. First, if
|
||||
no context can be found stored in the memory which equals the current
|
||||
context then it is shortened by one input and a search is made for any
|
||||
recorded contexts which are equal over the reduced length. If necessary
|
||||
this is repeated until the length is zero whereupon all possible
|
||||
allowed actions are predicted.
|
||||
.pp
|
||||
Fig. 3 shows the problem to be programmed. If a robot sees danger it
|
||||
is to turn and flee quickly. If it sees a wall it is to turn and return
|
||||
slowly. The turning is to be done by a subroutine which, if it gets
|
||||
stuck when turning left, turns right instead.
|
||||
.pp
|
||||
Fig. 4 shows the contexts and predictions stored when this is programmed.
|
||||
This is done by two passes through the problem in 'program' mode: once
|
||||
to program the fleeing and turning left; the other to program the wall
|
||||
sequence and the turning right. Fig. 5 then shows how this programming
|
||||
is used in 'execute' mode for one of the combinations which had not been
|
||||
explicitly programmed earlier (a wall sequence with a turn left). The
|
||||
figure shows the contexts and associated predictions for each step.
|
||||
(Note that predictions are made and new contexts are stored in both
|
||||
modes. They have been omitted from the diagrams to preserve clarity.)
|
||||
.sh "Conclusion"
|
||||
.pp
|
||||
The type of simple modelling system presented above is of interest for a
|
||||
number of reasons. Seen as a programing by example system,
|
||||
it is very closely
|
||||
integrated. Because it can update its models incrementally in real time
|
||||
functions such as input/output, programming, compilation and execution
|
||||
are subsumed into a single mechanism. Interactive languages such as LISP
|
||||
or BASIC gain much of their immediacy and usefulness by being interpretive
|
||||
and not requiring a separate compilation step when altering the source
|
||||
program. By making execution integral with the process of program entry
|
||||
(some of) the consequencs of new programming become immediately apparent.
|
||||
.pp
|
||||
Seen as an adaptive controller, the system has the advantage of being fast
|
||||
and being able to encode any control strategy. Times to update the model
|
||||
do not grow with memory size and so it can operate continuously in real time.
|
||||
.pp
|
||||
Seen as a paradigm for understanding natural control systems, it has the
|
||||
advantage of having a very simple underlying storage mechanism. Also,
|
||||
the ability to supply an arbitrary choice oracle allows for a wide
|
||||
range of possible adaptive strategies.
|
||||
.sh "References"
|
||||
.in +4m
|
||||
.sp
|
||||
.ti -4m
|
||||
ANDREAE, J.H. 1977
|
||||
Thinking with the Teachable Machine. Academic Press.
|
||||
.sp
|
||||
.ti -4m
|
||||
ANDREAE, J.H. and CLEARY, J.G. 1976
|
||||
A New Mechanism for a Brain. Int. J. Man-Machine Studies
|
||||
8(1):89-119.
|
||||
.sp
|
||||
.ti -4m
|
||||
BAUER, M.A. 1979 Programming by examples. Artificial Intelligence 12:1-21.
|
||||
.sp
|
||||
.ti -4m
|
||||
BIERMAN, A.W. 1972
|
||||
On the Inference of Turing Machines from Sample Computations.
|
||||
Artificial Intelligence 3(3):181-198.
|
||||
.sp
|
||||
.ti -4m
|
||||
BIERMAN, A.W. and FELDMAN, J.A. 1972
|
||||
On the Synthesis of Finite-State Machines from Samples of
|
||||
their Behavior. IEEE Transactions on Computers C-21, June:
|
||||
592-597.
|
||||
.sp
|
||||
.ti -4m
|
||||
BIERMAN, A.W. and KRISHNASWAMY, R. 1976 Constructing programs from example
|
||||
computations. IEEE transactions on Software Engineering SE-2:141-153.
|
||||
.sp
|
||||
.ti -4m
|
||||
CLEARY, J.G. 1980
|
||||
An Associative and Impressible Computer. PhD thesis, University
|
||||
of Canterbury, Christchurch, New Zealand.
|
||||
.sp
|
||||
.ti -4m
|
||||
GAINES, B.R. 1976
|
||||
Behaviour/structure transformations under uncertainty.
|
||||
Int. J. Man-Machine Studies 8:337-365.
|
||||
.sp
|
||||
.ti -4m
|
||||
GAINES, B.R. 1977
|
||||
System identification, approximation and complexity.
|
||||
Int. J. General Systems, 3:145-174.
|
||||
.sp
|
||||
.ti -4m
|
||||
HALBERT, D.C. 1981
|
||||
An example of programming by example. Xerox Corporation, Palo Alto,
|
||||
California.
|
||||
.sp
|
||||
.ti -4m
|
||||
WITTEN, I.H. 1977
|
||||
An adaptive optimal controller for discrete-time Markov
|
||||
environments. Information and Control, 34, August: 286-295.
|
||||
.sp
|
||||
.ti -4m
|
||||
WITTEN, I.H. 1979
|
||||
Approximate, non-deterministic modelling of behaviour
|
||||
sequences. Int. J. General Systems, 5, January: 1-12.
|
||||
.sp
|
||||
.ti -4m
|
||||
WITTEN, I.H. 1980
|
||||
Probabilistic behaviour/structure transformations using
|
||||
transitive Moore models. Int. J. General Systems, 6(3):
|
||||
129-137.
|
||||
.sp
|
||||
.ti -4m
|
||||
WITTEN, I.H. 1981
|
||||
Programming by example for the casual user: a case study.
|
||||
Proc. Canadian Man-Computer Communication Conference, Waterloo,
|
||||
Ontario, 105-113.
|
||||
.sp
|
||||
.ti -4m
|
||||
WITTEN, I.H. 1982
|
||||
An interactive computer terminal interface which predicts user
|
||||
entries. Proc. IEE Conference on Man-Machine Interaction,
|
||||
Manchester, England.
|
||||
.in -4m
|
320
src/test/java/org/xerial/snappy/testdata/calgary/paper5
vendored
Normal file
320
src/test/java/org/xerial/snappy/testdata/calgary/paper5
vendored
Normal file
@ -0,0 +1,320 @@
|
||||
.pn 0
|
||||
.EQ
|
||||
delim $$
|
||||
define RR 'bold R'
|
||||
define SS 'bold S'
|
||||
define II 'bold I'
|
||||
define mo '"\(mo"'
|
||||
define EXIST ?"\z\-\d\z\-\r\-\d\v'0.2m'\(br\v'-0.2m'"?
|
||||
define NEXIST ?"\z\-\d\z\o'\-\(sl'\r\-\d\v'0.2m'\(br\v'-0.2m'"?
|
||||
define ALL ?"\o'V-'"?
|
||||
define subset '\(sb'
|
||||
define subeq '\(ib'
|
||||
define supset '\(sp'
|
||||
define supeq '\(ip'
|
||||
define mo '\(mo'
|
||||
define nm ?"\o'\(mo\(sl'"?
|
||||
define li '\& sup ['
|
||||
define lo '\& sup ('
|
||||
define hi '\& sup ]'
|
||||
define ho '\& sup )'
|
||||
.EN
|
||||
.ls 1
|
||||
.ce
|
||||
A LOGICAL IMPLEMENTATION OF ARITHMETIC
|
||||
.sp 3
|
||||
.ce
|
||||
John G. Cleary
|
||||
.ce
|
||||
The University of Calgary, Alberta, Canada.
|
||||
.sp 20
|
||||
\u1\dAuthor's Present Address: Man-Machine Systems Group, Department of
|
||||
Computer Science, The University of Calgary, 2500 University Drive NW
|
||||
Calgary, Canada T2N 1N4. Phone: (403)220-6087.
|
||||
.br
|
||||
.nf
|
||||
UUCP: ...!{ihnp4,ubc-vision}!alberta!calgary!cleary
|
||||
...!nrl-css!calgary!cleary
|
||||
ARPA: cleary.calgary.ubc@csnet-relay
|
||||
CDN: cleary@calgary
|
||||
.fi
|
||||
.sp 2
|
||||
.ls 2
|
||||
.bp 0
|
||||
.ls 2
|
||||
.ce
|
||||
Abstract
|
||||
.pp
|
||||
So far implementations of real arithmetic within logic programming
|
||||
have been non-logical. A logical description of the behaviour of arithmetic
|
||||
on actual
|
||||
machines using finite precision numbers is not readily available.
|
||||
Using interval analysis a simple description of real arithmetic is possible.
|
||||
This can be translated to an implementation within Prolog.
|
||||
As well as having a sound logical basis the resulting system
|
||||
allows a very concise and powerful programming style and is potentially
|
||||
very efficient.
|
||||
.bp
|
||||
.sh "1 Introduction"
|
||||
.pp
|
||||
Logic programming aims to use sets of logical formulae as
|
||||
statements in a programming language.
|
||||
Because of many practical difficulties the full generality of logic
|
||||
cannot (yet) be used in this way. However, by restricting the
|
||||
class of formulae used to Horn clauses practical and efficient
|
||||
languages such as PROLOG are obtained.
|
||||
One of the main problems in logic programming is to extend this area
|
||||
of practicality and efficiency to an ever wider range of formulae and
|
||||
applications.
|
||||
This paper considers such an implementation for arithmetic.
|
||||
.pp
|
||||
To see why arithmetic as it is commonly implemented in PROLOG systems
|
||||
is not logical consider the following example:
|
||||
.sp
|
||||
.nf
|
||||
X = 0.67, Y = 0.45, Z is X*Y, Z = 0.30
|
||||
.fi
|
||||
.sp
|
||||
This uses the notation of the 'Edinburgh style' Prologs.
|
||||
(For the moment we assume an underlying floating point
|
||||
decimal arithmetic with two significant places.)
|
||||
The predicate 'is' assumes its righthand side is an arithmetic
|
||||
statement, computes its value, and unifies the result with its lefthand side.
|
||||
In this case the entire sequence succeeds, however, there are some serious
|
||||
problems.
|
||||
.pp
|
||||
In a pure logic program the order of statements should be irrelevant to
|
||||
the correctness of the result (at worst termination or efficiency might be
|
||||
affected). This is not true of the example above. The direction of execution
|
||||
of 'is' is strictly one way so that
|
||||
.sp
|
||||
Y = 0.45, Z = 0.30, Z is X*Y
|
||||
.sp
|
||||
will deliver an error when X is found to be uninstantiated inside 'is'.
|
||||
.pp
|
||||
The second problem is that the answer Z = 0.30 is incorrect!\
|
||||
The correct infinite precision answer is Z = 0.3015. This inaccuracy
|
||||
is caused by the finite precision implemented in the floating point
|
||||
arithmetic of modern computers.
|
||||
It becomes very problematic to say what if anything it means when
|
||||
Z is bound to 0.30 by 'is'. This problem is exacerbated by long sequences
|
||||
of arithmetic operations where the propagation of such errors can lead the
|
||||
final result to have little or no resemblence to the correct answer.
|
||||
.pp
|
||||
This is further class of errors, which is illustrated by the fact that the
|
||||
following two sequences will both succeed if the underlying arithmetic rounds:
|
||||
.sp
|
||||
X = 0.66, Y = 0.45, Z = 0.30, Z is X*Y
|
||||
.br
|
||||
X = 0.67, Y = 0.45, Z = 0.30, Z is X*Y
|
||||
.sp
|
||||
This means that even if some invertable form of arithmetic were devised
|
||||
capable of binding X when:
|
||||
.sp
|
||||
Y = 0.45, Z = 0.30, Z is X*Y
|
||||
.sp
|
||||
it is unclear which value should be given to it.
|
||||
.pp
|
||||
The problem then, is to implement arithmetic in as logical a manner
|
||||
as possible while still making use of efficient floating point arithmetic.
|
||||
The solution to this problem has three major parts.
|
||||
The first is to represent PROLOG's
|
||||
arithmetic variables internally as intervals of real numbers.
|
||||
So the result of 'Z is 0.45*0.67' would be to bind Z to the
|
||||
open interval (0.30,0.31).
|
||||
This says that Z lies somewhere in the interval
|
||||
$0.30 < Z < 0.31$, which is certainly true, and probably as informative
|
||||
as possible given finite precision arithmetic.
|
||||
(Note that Z is NOT bound to the data structure (0.30,0.31), this
|
||||
is a hidden representation in much the same way that pointers are used
|
||||
to implement logical variables in PROLOG but are not explicitly visible
|
||||
to the user. Throughout this paper brackets such as (...) or [...] will
|
||||
be used to represent open and closed intervals not Prolog data structures.)
|
||||
.pp
|
||||
The second part of the solution is to translate expressions such as
|
||||
\&'Z is (X*Y)/2' to the relational form 'multiply(X,Y,T0), multiply(2,Z,T0)'.
|
||||
Note that both the * and / operators have been translated to 'multiply'
|
||||
(with parameters in a different order). This relational form will be seen to
|
||||
be insensitive to which parameters are instantiated and which are not,
|
||||
thus providing invertibility.
|
||||
.pp
|
||||
The third part is to provide a small number of control 'predicates' able
|
||||
to guide the search for solutions.
|
||||
The resulting system is sufficiently powerful to be able to
|
||||
solve equations such as '0 is X*(X-2)+1' directly.
|
||||
.pp
|
||||
The next section gives a somewhat more formal description of arithmetic
|
||||
implemented this way. Section III gives examples of its use and of the
|
||||
types of equations that are soluble within it. Section IV compares our
|
||||
approach here with that of other interval arithmetic systems and with
|
||||
constraint networks. Section V notes some possibilities for a parallel
|
||||
dataflow implementation which avoids many of the difficulties of traditional
|
||||
dataflow execution.
|
||||
.sh "II. Interval Representation"
|
||||
.pp
|
||||
Define $II(RR)$ to be the set of intervals over the real numbers, $RR$.
|
||||
So that the lower and upper bounds of each interval can be operated on as
|
||||
single entities they will be treated as pairs of values.
|
||||
Each value having an attribute of being open or closed
|
||||
and an associated number. For example the interval (0.31,0.33] will be
|
||||
treated as the the pair $lo 0.31$ and $hi 0.33$.
|
||||
The brackets are superscripted to minimize visual confusion when writeing
|
||||
bounds not in pairs.
|
||||
As well as the usual real numbers
|
||||
$- inf$ and $inf$, will be used as part of bounds,
|
||||
with the properties that $ALL x mo RR~- inf < x < inf$
|
||||
The set of all upper bounds is defined as:
|
||||
.sp
|
||||
$H(RR)~==~\{ x sup b : x mo RR union \{ inf \},~b mo \{ hi , ho \} \} $
|
||||
.sp
|
||||
and the set of lower bounds as:
|
||||
.sp
|
||||
$L(RR)~==~\{ \& sup b x : x mo RR union \{ -inf \},~b mo \{ li , lo \} \} $
|
||||
.sp
|
||||
The set of all intervals is then defined by:
|
||||
.sp
|
||||
$II(RR)~==~L(RR) times H(RR)$
|
||||
.sp
|
||||
Using this notation rather loosely intervals will be identified
|
||||
with the apropriate subset of the reals. For example the following
|
||||
identifications will be made:
|
||||
.sp
|
||||
$[0.31,15)~=~< li 0.31, ho 15 >~=~ \{ x mo RR: 0.31 <= x < 15 \}$
|
||||
.br
|
||||
$[-inf,inf]~=~< li -inf , hi inf> ~=~ RR$
|
||||
.br
|
||||
and $(-0.51,inf]~=~< lo -0.51 , hi inf >~=~ \{ x mo RR: 0.51 < x \}$
|
||||
.sp
|
||||
The definition above carefully excludes 'intervals' such as $[inf,inf]$
|
||||
in the interests of simplifying some of the later development.
|
||||
.pp
|
||||
The finite arithmetic available on computers is represented by a
|
||||
finite subset, $SS$, of $RR$. It is assumed that
|
||||
$0,1 mo SS$. The set of intervals allowed over $SS$ is $II(SS)$ defined as
|
||||
above for $RR$. $SS$ might be a bounded set of integers or some more complex
|
||||
set representable by floating point numbers.
|
||||
.pp
|
||||
There is a useful mapping from $II(RR)$ to $II(SS)$ which associates
|
||||
with each real interval the best approximation to it:
|
||||
.nf
|
||||
.sp
|
||||
$approx(<l,h>)~==~<l prime, h prime >$
|
||||
.br
|
||||
where $l prime mo L(SS), l prime <= l, and NEXIST x mo L(SS)~l prime <x<l$
|
||||
.br
|
||||
$h prime mo H(SS), h prime >= h, and NEXIST x mo H(SS)~h prime >x>h$.
|
||||
.pp
|
||||
The ordering on the bounds is defined as follows:
|
||||
.sp
|
||||
$l < h, ~ l,h mo II(RR)~ <->~l= \& sup u x and h = \& sup v y$
|
||||
and $x<y$ or $x=y$ and $u<v$
|
||||
where $ ho, li, hi, lo$ occur in this order and $x<y$ is the usual ordering
|
||||
on the reals extended to include $-inf$ and $inf$.
|
||||
The ordering on the brackets is carefully chosen so that intervals such as
|
||||
(3.1,3.1) map to the empty set.
|
||||
Given this definition it is easily verified that 'approx' gives
|
||||
the smallest interval in $II(SS)$ enclosing the original interval in $II(RR)$.
|
||||
The definition also allows the intersection of two intervals to be readily
|
||||
computed:
|
||||
.sp
|
||||
$<l sub 1,h sub 1> inter <l sub 2, h sub 2>~=~$
|
||||
$< max(l sub 1 , l sub 2), min(h sub 1 , h sub 2 )>$
|
||||
.sp
|
||||
Also and interval $<l,h>$ will be empty if $l > h$. For example, according
|
||||
to the definition above $lo 3.1 > ho 3.1$ so (3.1,3.1) is correctly computed
|
||||
as being empty.
|
||||
.pp
|
||||
Intervals are introduced into logic by extending the notion of
|
||||
unification. A logical variable I can be bound to an interval $I$,
|
||||
written I:$I$. Unification of I to any other value J gives the following
|
||||
results:
|
||||
.LB
|
||||
.NP
|
||||
if J is unbound then it is bound to the interval, J:$I$;
|
||||
.NP
|
||||
if J is bound to the interval J:$J$ then
|
||||
I and J are bound to the same interval $I inter J$.
|
||||
The unification fails if $I inter J$ is empty.
|
||||
.NP
|
||||
a constant C is equivalent to $approx([C,C])$;
|
||||
.NP
|
||||
if J is bound to anything other than an interval the unification fails.
|
||||
.LE
|
||||
.pp
|
||||
Below are some simple Prolog programs and the bindings that result when
|
||||
they are run (assuming as usual two decimal places of accuracy).
|
||||
.sp
|
||||
.nf
|
||||
X = 3.141592
|
||||
X:(3.1,3.2)
|
||||
|
||||
X > -5.22, Y <= 31, X=Y
|
||||
X:(-5.3,32] Y:(-5.3,31]
|
||||
.fi
|
||||
.sp
|
||||
.rh "Addition"
|
||||
.pp
|
||||
Addition is implemented by the relation 'add(I,J,K)'
|
||||
which says that K is the sum of I and J.
|
||||
\&'add' can be viewed as a relation on $RR times RR times RR$ defined
|
||||
by:
|
||||
.sp
|
||||
$add ~==~ \{<x,y,z>:x,y,z mo RR,~x+y=z\}$
|
||||
.sp
|
||||
Given that I,J, and K are initially bound to the intervals $I,J,K$
|
||||
respectively, the fully correct set of solutions with the additional
|
||||
constrain 'add(I,J,K)' is given by all triples in the set
|
||||
$add inter I times J times K$.
|
||||
This set is however infinite, to get an effectively computable procedure
|
||||
I will approximate the additional constraint by binding I, J and K
|
||||
to smaller intervals.
|
||||
So as not to exclude any possible triples the new bindings,
|
||||
$I prime, J prime roman ~and~ K prime$ must obey:
|
||||
.sp
|
||||
$add inter I times J times K ~subeq~ I prime times J prime times K prime$
|
||||
.sp
|
||||
Figure 1 illustrates this process of
|
||||
.ul
|
||||
narrowing.
|
||||
The initial bindings are I:[0,2], J:[1,3]
|
||||
and K:[4,6]. After applying 'add(I,J,K)' the smallest possible bindings
|
||||
are I:[1,2], J:[2,3] and K:[4,5]. Note that all three intervals have been
|
||||
narrowed.
|
||||
.pp
|
||||
It can easily be seen that:
|
||||
.sp
|
||||
$I prime supeq \{x:<x,y,z> ~mo~ add inter I times J times K \}$
|
||||
.br
|
||||
$J prime supeq \{y:<x,y,z> ~mo~ add inter I times J times K \}$
|
||||
.br
|
||||
$K prime supeq \{z:<x,y,z> ~mo~ add inter I times J times K \}$
|
||||
.sp
|
||||
If there are 'holes' in the projected set then $I prime$ will be a strict
|
||||
superset of the projection, however, $I prime$ will still
|
||||
be uniquely determined by the projection. This will be true of any
|
||||
subset of $RR sup n$ not just $add$.
|
||||
.pp
|
||||
In general for
|
||||
.sp
|
||||
$R subeq RR sup n,~ I sub 1 , I sub 2 , ... , I sub n mo II(RR)$
|
||||
and $I prime sub 1 , I prime sub 2 , ... , I prime sub n mo II(RR)$
|
||||
.sp
|
||||
I will write
|
||||
.br
|
||||
$R inter I sub 1 times I sub 2 times ... times I sub n nar
|
||||
I prime sub 1 times I prime sub 2 times ... times I prime sub $
|
||||
.br
|
||||
when the intervals $I prime sub 1 , I prime sub 2 , ... , I prime sub $
|
||||
are the uniquelly determined smallest intervals including all solutions.
|
||||
|
||||
.sh "IV. Comparison with Interval Arithmetic"
|
||||
.pp
|
||||
.sh "V. Implementation"
|
||||
.pp
|
||||
.sh "VI. Summary"
|
||||
.sh "Acknowledgements"
|
||||
.sh "References"
|
||||
.ls 1
|
||||
.[
|
||||
$LIST$
|
||||
.]
|
1019
src/test/java/org/xerial/snappy/testdata/calgary/paper6
vendored
Normal file
1019
src/test/java/org/xerial/snappy/testdata/calgary/paper6
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src/test/java/org/xerial/snappy/testdata/calgary/pic
vendored
Normal file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/pic
vendored
Normal file
Binary file not shown.
1487
src/test/java/org/xerial/snappy/testdata/calgary/progc
vendored
Normal file
1487
src/test/java/org/xerial/snappy/testdata/calgary/progc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2244
src/test/java/org/xerial/snappy/testdata/calgary/progl
vendored
Normal file
2244
src/test/java/org/xerial/snappy/testdata/calgary/progl
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1966
src/test/java/org/xerial/snappy/testdata/calgary/progp
vendored
Normal file
1966
src/test/java/org/xerial/snappy/testdata/calgary/progp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src/test/java/org/xerial/snappy/testdata/calgary/trans
vendored
Normal file
BIN
src/test/java/org/xerial/snappy/testdata/calgary/trans
vendored
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user