add calgary test data set

2025-07-27 16:04:16 +02:00 · 2011-04-03 17:51:00 +09:00 · 2011-04-03 17:51:00 +09:00 · b694432815
commit b694432815
parent 0a4ee3c6fb
20 changed files with 60015 additions and 8 deletions
--- a/src/main/java/org/xerial/snappy/SnappyInputStream.java
+++ b/src/main/java/org/xerial/snappy/SnappyInputStream.java
@ -60,11 +60,12 @@ public class SnappyInputStream extends InputStream
    protected void readHeader() throws IOException {
        byte[] header = new byte[SnappyCodec.headerSize()];
        int readBytes = in.read(header, 0, header.length);
-        if (readBytes < header.length) {
+        if (header[0] != SnappyCodec.MAGIC_HEADER[0]) {
            // do the default uncompression
            readFully(header, readBytes);
            return;
        }
        SnappyCodec codec = SnappyCodec.readHeader(new ByteArrayInputStream(header));
        if (codec.isValidMagicHeader()) {
            // compressed by SnappyOutputStream
@ -113,22 +114,22 @@ public class SnappyInputStream extends InputStream
    @Override
    public int read(byte[] b, int off, int len) throws IOException {
-        int wroteBytes = 0;
+        int writtenBytes = 0;
-        for (; wroteBytes < len;) {
+        for (; writtenBytes < len;) {
            if (uncompressedCursor >= uncompressedLimit) {
                if (hasNextChunk())
                    continue;
                else {
-                    return wroteBytes == 0 ? -1 : wroteBytes;
+                    return writtenBytes == 0 ? -1 : writtenBytes;
                }
            }
            int bytesToWrite = Math.min(uncompressedLimit - uncompressedCursor, len);
-            System.arraycopy(uncompressed, uncompressedCursor, b, off + wroteBytes, bytesToWrite);
+            System.arraycopy(uncompressed, uncompressedCursor, b, off + writtenBytes, bytesToWrite);
-            wroteBytes += bytesToWrite;
+            writtenBytes += bytesToWrite;
            uncompressedCursor += bytesToWrite;
        }
-        return wroteBytes;
+        return writtenBytes;
    }
    protected boolean hasNextChunk() throws IOException {
--- a/src/test/java/org/xerial/snappy/SnappyInputStreamTest.java
+++ b/src/test/java/org/xerial/snappy/SnappyInputStreamTest.java
@ -98,7 +98,7 @@ public class SnappyInputStreamTest
    @Test
    public void biteWiseRead() throws Exception {
-        byte[] orig = readResourceFile("alice29.txt");
+        byte[] orig = readResourceFile("testdata/calgary/paper6");
        byte[] compressed = Snappy.compress(orig);
        SnappyInputStream in = new SnappyInputStream(new ByteArrayInputStream(compressed));
--- a/src/test/java/org/xerial/snappy/testdata/calgary/bib
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/bib
--- a/src/test/java/org/xerial/snappy/testdata/calgary/book1
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/book1
--- a/src/test/java/org/xerial/snappy/testdata/calgary/book2
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/book2
--- a/src/test/java/org/xerial/snappy/testdata/calgary/geo
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/geo
--- a/src/test/java/org/xerial/snappy/testdata/calgary/news
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/news
--- a/src/test/java/org/xerial/snappy/testdata/calgary/obj1
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/obj1
--- a/src/test/java/org/xerial/snappy/testdata/calgary/obj2
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/obj2
--- a/src/test/java/org/xerial/snappy/testdata/calgary/paper1
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/paper1
--- a/src/test/java/org/xerial/snappy/testdata/calgary/paper2
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/paper2
--- a/src/test/java/org/xerial/snappy/testdata/calgary/paper3
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/paper3
--- a/src/test/java/org/xerial/snappy/testdata/calgary/paper4
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/paper4
@ -0,0 +1,294 @@
 .EQ
 delim $$
 .EN
 .ls 1
 .ce
 PROGRAMMING BY EXAMPLE REVISITED
 .sp
 .ce
 by John G. Cleary
 .ce
 Man-Machine Systems Laboratory
 .ce
 University of Calgary.
 .sp
 .sh "Introduction"
 .pp
 Efforts to construct an artificial intelligence have relied on
 ever more complex and carefully prepared programs.  While useful in
 themselves, these programs
 are unlikely to be useful in situations where ephemeral and
 low value knowledge must be acquired.  For example a person (or robot)
 working in a normal domestic environment knows a lot about which
 cupboards have sticky doors and where the marmalade is kept.  It seems
 unlikely that it will ever be economic to program such knowledge 
 whether this be via a language or a discourse with an expert system.
 .pp
 It is my thesis, then, that any flexible robot system working in the
 real world must contain a component of control intermediate
 between hard wired 'reflex' responses and complex intellectual 
 reasoning.  Such an intermediate system must be adaptive, be able
 to carry out complex patterned responses and be fast in operation.
 It need not, however, carry out complex forward planning or be capable
 of introspection (in the sense that expert systems are able to explain
 their actions).
 .pp
 In this talk I will examine a system that acquires knowledge by 
 constructing a model of its input behaviour and uses this to select its
 actions.  It can be viewed either as an automatic adaptive system  or
 as an instance of 'programming by example'.  Other workers have
 attempted to do this, by constructing compact models in some appropriate
 programming language:e.g. finite state automata [Bierman, 1972], 
 [Bierman and Feldman, 1972]; LISP [Bierman and Krishnaswamy, 1976]; 
 finite non-deterministic
 automata [Gaines,1976], [Gaines,1977],
 [Witten,1980]; high level languages [Bauer, 1979], [Halbert, 1981].
 These efforts, however, suffer from
 the flaw that for some inputs their computing time is 
 super-exponential in the number
 of inputs seen.  This makes them totally impractical in any system which
 is continuously receiving inputs over a long period of time.
 .pp
 The system I will examine comprises one or more simple independent
 models.  Because of their simplicity and because no attempt is made to 
 construct models which are minimal,
 the time taken to store new information and to make 
 predictions is constant and independent of the amount of information stored
 [Cleary, 1980].  This leads to a very integrated and responsive environment.
 All actions by the programmer are immediately incorporated into the program
 model. The actions are also acted upon so that their consequences are 
 immediately apparent.
 However, the amount of memory used could grow 
 linearly with time. [Witten, 1977] introduces a modelling system related
 to the one here which does not continually grow and which can be updated
 incrementally.
 .pp
 It remains to be shown that the very simple models used are capable 
 of generating any
 interestingly complex behaviour.
 In the rest of this
 talk I will use the problem of executing a subroutine to illustrate
 the potential of such systems.
 The example will also illustrate some of the techniques which have been
 developed for combining multiple models, [Cleary, 1980], [Andreae
 and Cleary, 1976], [Andreae, 1977], [Witten,1981].  It has also been
 shown in [Cleary, 1980] and in [Andreae,1977] that such systems can
 simulate any Turing machine when supplied with a suitable external memory.
 .sh "The modelling system"
 .pp
 Fig. 1 shows the general layout of the modeller.  Following the flow
 of information through the system it first receives a number of inputs
 from the external world.  These are then used to update the current
 contexts of a number of Markov models.  Note, that each Markov model
 may use different inputs to form its current context, and that they
 may be attempting to predict different inputs.  A simple robot
 which can hear and move an arm might have two models; one, say, in
 which the last three sounds it heard are used to predict the next
 word to be spoken, and another in which the last three sounds and the last
 three arm movements are used to predict the next arm movement. 
 .pp
 When the inputs are received each such context and its associated 
 prediction (usually
 an action) are added to the Markov model.  (No
 counts or statistics are maintained \(em they are not necessary.)  When the
 context recurs later it will be retrieved along with all the predictions
 which have been stored with it.
 .pp
 After the contexts have been stored they 
 are updated by shifting in the new inputs. These new contexts are then
 matched against the model and all the associated predictions are retrieved.
 These independent predictions from the individual Markov models
 are then combined into a single composite 
 prediction.
 (A general theory of how to do this has been
 developed in [Cleary, 1980]).  
 .pp
 The final step is to present this 
 composite prediction to a device I have called the 'choice oracle'.
 This uses whatever information it sees fit to choose the next action.
 There are many possibilities for such a device.  One might be to choose
 from amongst the predicted actions if reward is expected and to choose
 some other random action if reward is not expected.  The whole system then 
 looks like
 a reward seeking homeostat.  At the other extreme the oracle might be
 a human programmer who chooses the next action according to his own
 principles.  The system then functions more like a programming by
 example system \(em [Witten, 1981] and [Witten, 1982] give examples of such 
 systems.
 [Andreae, 1977] gives an example of a 'teachable' system lying between
 these two extremes.
 .pp
 After an action is chosen this is
 transmitted to the external world and the resultant inputs are used
 to start the whole cycle again.  Note that the chosen action will
 be an input on the next cycle.
 .sh "Subroutines"
 .pp
 An important part of any programming language is the ability to write a 
 fragment of a program and then have it used many times without it having
 to be reprogrammed each time.  A crucial feature of such shared code is
 that after it has been executed the program should be controlled by the
 situation which held before the subroutine was called. A subroutine can be 
 visualised as a black box with an unknown and arbitrarily complex interior.
 There are many paths into the box but after passing through each splits again
 and goes its own way, independent of what happened inside the box.
 .np
 Also, if there are $p$ paths using the subroutine and $q$ different sequences
 within it then the amount of programming needed should be proportional to
 $p + q$ and not $p * q$.  The example to follow possess both these properties
 of a subroutine.
 .rh "Modelling a Subroutine."
 The actual model we will use is described in Fig. 2.  There are two Markov
 models (model-1 and model-2) each seeing and predicting different parts of
 the inputs.  The inputs are classified into four classes; ACTIONs that
 move a robot (LEFT, RIGHT, FAST, SLOW), patterns that it 'sees' (danger,
 moved, wall, stuck) and two types of special 'echo' actions, # actions
 and * actions (*home, #turn).  The # and * actions have no effect on the 
 environment,
 their only purpose is to be inputs and act as place keepers for relevant
 information.  They may be viewed as comments which remind the system of
 what it is doing.  (The term echo was used in [Andreae,1977], where the
 idea was first introduced, in analogy to spoken words of which one
 hears an echo.)
 .pp
 Model-2 is a Markov model of order 2 and uses only # actions in its
 context and seeks to predict only * actions.  Model-1 is a Markov model 
 of order 3 and uses all four classes of inputs in its context.  It
 seeks to predict ACTIONs, # actions and * actions.  However, * actions
 are treated specially.  Rather than attempt to predict the exact * action
 it only stores * to indicate that some * action has occurred.  This
 special treatment is also reflected in the procedure for combining the
 predictions of the two models.  Then the prediction of model-2 is used,
 only if model-1 predicts an *.  That is, model-1 predicts that some 
 * action will occur and model-2 is used to select which one. If model-1
 does not predict an * then its prediction is used as the combined prediction
 and that from model-2 is ignored.
 .pp
 The choice oracle that is used for this example has two modes.  In
 programmer mode a human programmer is allowed to select any action
 she wishes or to acquiesce with the current prediction, in which case
 one of the actions in the combined prediction is selected.  In
 execution mode one of the predicted actions is selected and the
 programmer is not involved at all.
 .pp
 Before embarking on the actual example some points about the predictions
 extracted from the individual Markov models should be noted.  First, if 
 no context can be found stored in the memory which equals the current
 context then it is shortened by one input and a search is made for any
 recorded contexts which are equal over the reduced length.  If necessary
 this is repeated until the length is zero whereupon all possible
 allowed actions are predicted.
 .pp
 Fig. 3 shows the problem to be programmed.  If a robot sees danger it
 is to turn and flee quickly.  If it sees a wall it is to turn and return
 slowly.  The turning is to be done by a subroutine which, if it gets 
 stuck when turning left, turns right instead.
 .pp
 Fig. 4 shows the contexts and predictions stored when this is programmed.
 This is done by two passes through the problem in 'program' mode: once
 to program the fleeing and turning left; the other to program the wall
 sequence and the turning right.  Fig. 5 then shows how this programming
 is used in 'execute' mode for one of the combinations which had not been
 explicitly programmed earlier (a wall sequence with a turn left).  The
 figure shows the contexts and associated predictions for each step.
 (Note that predictions are made and new contexts are stored in both
 modes.  They have been omitted from the diagrams to preserve clarity.)
 .sh "Conclusion"
 .pp
 The type of simple modelling system presented above is of interest for a
 number of reasons.  Seen as a programing by example system, 
 it is very closely 
 integrated. Because it can update its models incrementally in real time
 functions such as input/output, programming, compilation and execution
 are subsumed into a single mechanism. Interactive languages such as LISP
 or BASIC gain much of their immediacy and usefulness by being interpretive 
 and not requiring a separate compilation step when altering the source
 program. By making execution integral with the process of program entry
 (some of) the consequencs of new programming become immediately apparent.
 .pp
 Seen as an adaptive controller, the system has the advantage of being fast
 and being able to encode any control strategy. Times to update the model
 do not grow with memory size and so it can operate continuously in real time.
 .pp
 Seen as a paradigm for understanding natural control systems, it has the
 advantage of having a very simple underlying storage mechanism. Also,
 the ability to supply an arbitrary choice oracle allows for a wide
 range of possible adaptive strategies.
 .sh "References"
 .in +4m
 .sp
 .ti -4m
 ANDREAE, J.H. 1977
 Thinking with the Teachable Machine.  Academic Press.
 .sp
 .ti -4m
 ANDREAE, J.H. and CLEARY, J.G. 1976
 A New Mechanism for a Brain.  Int. J. Man-Machine Studies
 8(1):89-119.
 .sp
 .ti -4m
 BAUER, M.A. 1979 Programming by examples. Artificial Intelligence 12:1-21.
 .sp
 .ti -4m
 BIERMAN, A.W. 1972
 On the Inference of Turing Machines from Sample Computations.
 Artificial Intelligence 3(3):181-198.
 .sp
 .ti -4m
 BIERMAN, A.W. and FELDMAN, J.A. 1972
 On the Synthesis of Finite-State Machines from Samples of
 their Behavior.  IEEE Transactions on Computers C-21, June:
 592-597.
 .sp
 .ti -4m
 BIERMAN, A.W. and KRISHNASWAMY, R. 1976 Constructing programs from example 
 computations. IEEE transactions on Software Engineering SE-2:141-153.
 .sp
 .ti -4m
 CLEARY, J.G. 1980
 An Associative and Impressible Computer. PhD thesis, University
 of Canterbury, Christchurch, New Zealand.
 .sp
 .ti -4m
 GAINES, B.R. 1976
 Behaviour/structure transformations under uncertainty.
 Int. J. Man-Machine Studies 8:337-365.
 .sp
 .ti -4m
 GAINES, B.R. 1977
 System identification, approximation and complexity.
 Int. J. General Systems, 3:145-174.
 .sp
 .ti -4m
 HALBERT, D.C. 1981
 An example of programming by example. Xerox Corporation, Palo Alto, 
 California.
 .sp
 .ti -4m
 WITTEN, I.H. 1977
 An adaptive optimal controller for discrete-time Markov
 environments.  Information and Control, 34, August: 286-295.
 .sp
 .ti -4m
 WITTEN, I.H. 1979
 Approximate, non-deterministic modelling of behaviour
 sequences.  Int. J. General Systems, 5, January: 1-12.
 .sp
 .ti -4m
 WITTEN, I.H. 1980
 Probabilistic behaviour/structure transformations using
 transitive Moore models.  Int. J. General Systems, 6(3):
 129-137.
 .sp
 .ti -4m
 WITTEN, I.H. 1981
 Programming by example for the casual user: a case study.
 Proc. Canadian Man-Computer Communication Conference, Waterloo,
 Ontario, 105-113.
 .sp
 .ti -4m
 WITTEN, I.H. 1982
 An interactive computer terminal interface which predicts user 
 entries. Proc. IEE Conference on Man-Machine Interaction,
 Manchester, England.
 .in -4m
--- a/src/test/java/org/xerial/snappy/testdata/calgary/paper5
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/paper5
@ -0,0 +1,320 @@
 .pn 0
 .EQ
 delim $$
 define RR 'bold R'
 define SS 'bold S'
 define II 'bold I'
 define mo '"\(mo"'
 define EXIST ?"\z\-\d\z\-\r\-\d\v'0.2m'\(br\v'-0.2m'"?
 define NEXIST ?"\z\-\d\z\o'\-\(sl'\r\-\d\v'0.2m'\(br\v'-0.2m'"?
 define ALL ?"\o'V-'"?
 define subset '\(sb'
 define subeq  '\(ib'
 define supset '\(sp'
 define supeq  '\(ip'
 define mo '\(mo'
 define nm ?"\o'\(mo\(sl'"?
 define li '\& sup ['
 define lo '\& sup ('
 define hi '\& sup ]'
 define ho '\& sup )'
 .EN
 .ls 1	
 .ce
 A LOGICAL IMPLEMENTATION OF ARITHMETIC 
 .sp 3
 .ce
 John G. Cleary 
 .ce
 The University of Calgary, Alberta, Canada.
 .sp 20
 \u1\dAuthor's Present Address: Man-Machine Systems Group, Department of
 Computer Science, The University of Calgary, 2500 University Drive NW
 Calgary, Canada T2N 1N4. Phone: (403)220-6087.  
 .br
 .nf
 UUCP:  ...!{ihnp4,ubc-vision}!alberta!calgary!cleary
       ...!nrl-css!calgary!cleary
 ARPA:  cleary.calgary.ubc@csnet-relay
 CDN:   cleary@calgary
 .fi
 .sp 2
 .ls 2
 .bp 0
 .ls 2
 .ce
 Abstract
 .pp
 So far implementations of real arithmetic within logic programming
 have been non-logical.  A logical description of the behaviour of arithmetic
 on actual
 machines using finite precision numbers is not readily available.  
 Using interval analysis a simple description of real arithmetic is possible.
 This can be translated to an implementation within Prolog.
 As well as having a sound logical basis the resulting system 
 allows a very concise and powerful programming style and is potentially
 very efficient.
 .bp
 .sh "1 Introduction"
 .pp
 Logic programming aims to use sets of logical formulae as
 statements in a programming language.
 Because of many practical difficulties the full generality of logic
 cannot (yet) be used in this way.   However, by restricting the
 class of formulae used to Horn clauses practical and efficient
 languages such as PROLOG are obtained.
 One of the main problems in logic programming is to extend this area
 of practicality and efficiency to an ever wider range of formulae and 
 applications.  
 This paper considers such an implementation for arithmetic.
 .pp
 To see why arithmetic as it is commonly implemented in PROLOG systems
 is not logical consider the following example:
 .sp
 .nf
 	X = 0.67, Y = 0.45, Z is X*Y, Z = 0.30
 .fi
 .sp
 This uses the notation of the 'Edinburgh style' Prologs.
 (For the moment we assume an underlying floating point
 decimal arithmetic with two significant places.)
 The predicate 'is' assumes its righthand side is an arithmetic
 statement, computes its value, and unifies the result with its lefthand side.
 In this case the entire sequence succeeds, however, there are some serious 
 problems.
 .pp
 In a pure logic program the order of statements should be irrelevant to
 the correctness of the result (at worst termination or efficiency might be
 affected).  This is not true of the example above.  The direction of execution
 of 'is' is strictly one way so that
 .sp
 	Y = 0.45, Z = 0.30, Z is X*Y
 .sp
 will deliver an error when X is found to be uninstantiated inside 'is'.
 .pp
 The second problem is that the answer Z = 0.30 is incorrect!\ 
 The correct infinite precision answer is Z = 0.3015.  This inaccuracy
 is caused by the finite precision implemented in the floating point
 arithmetic of modern computers.
 It becomes very problematic to say what if anything it means when
 Z is bound to 0.30 by 'is'.  This problem is exacerbated by long sequences
 of arithmetic operations where the propagation of such errors can lead the
 final result to have little or no resemblence to the correct answer.
 .pp
 This is further class of errors, which is illustrated by the fact that the
 following two sequences will both succeed if the underlying arithmetic rounds:
 .sp
 	X = 0.66, Y = 0.45, Z = 0.30, Z is X*Y
 .br
 	X = 0.67, Y = 0.45, Z = 0.30, Z is X*Y
 .sp
 This means that even if some invertable form of arithmetic were devised
 capable of binding X when:
 .sp
 	Y = 0.45, Z = 0.30, Z is X*Y
 .sp
 it is unclear which value should be given to it.
 .pp
 The problem then, is to implement arithmetic in as logical a manner
 as possible while still making use of efficient floating point arithmetic.
 The solution to this problem has three major parts.
 The first is to represent PROLOG's 
 arithmetic variables internally as intervals of real numbers.
 So the result of 'Z is 0.45*0.67' would be to bind Z to the 
 open interval (0.30,0.31).  
 This says that Z lies somewhere in the interval
 $0.30 < Z < 0.31$, which is certainly true, and probably as informative
 as possible given finite precision arithmetic.
 (Note that Z is NOT bound to the data structure (0.30,0.31), this
 is a hidden representation in much the same way that pointers are used
 to implement logical variables in PROLOG but are not explicitly visible
 to the user.  Throughout this paper brackets such as (...) or [...] will
 be used to represent open and closed intervals not Prolog data structures.)
 .pp
 The second part of the solution is to translate expressions such as
 \&'Z is (X*Y)/2' to the relational form 'multiply(X,Y,T0), multiply(2,Z,T0)'.
 Note that both the * and / operators have been translated to 'multiply'
 (with parameters in a different order).  This relational form will be seen to 
 be insensitive to which parameters are instantiated and which are not,
 thus providing invertibility.
 .pp
 The third part is to provide a small number of control 'predicates' able
 to guide the search for solutions.
 The resulting system is sufficiently powerful to be able to
 solve equations such as '0 is X*(X-2)+1' directly.
 .pp
 The next section gives a somewhat more formal description of arithmetic
 implemented this way.  Section III gives examples of its use and of the
 types of equations that are soluble within it.  Section IV compares our 
 approach here with that of other interval arithmetic systems and with
 constraint networks.  Section V notes some possibilities for a parallel 
 dataflow implementation which avoids many of the difficulties of traditional
 dataflow execution.
 .sh "II. Interval Representation"
 .pp
 Define $II(RR)$ to be the set of intervals over the real numbers, $RR$.
 So that the lower and upper bounds of each interval can be operated on as 
 single entities they will be treated as pairs of values.  
 Each value having an attribute of being open or closed 
 and an associated number.  For example the interval (0.31,0.33] will be
 treated as the the pair $lo 0.31$ and $hi 0.33$.  
 The brackets are superscripted to minimize visual confusion when writeing 
 bounds not in pairs.
 As well as the usual real numbers 
 $- inf$ and $inf$, will be used as part of bounds,
 with the properties that $ALL x mo RR~- inf < x < inf$ 
 The set of all upper bounds is defined as:
 .sp
 	$H(RR)~==~\{ x sup b : x mo RR union \{ inf \},~b mo \{ hi , ho \} \} $
 .sp
 and the set of lower bounds as:
 .sp
 	$L(RR)~==~\{ \& sup b x : x mo RR union \{ -inf \},~b mo \{ li , lo \} \} $
 .sp
 The set of all intervals is then defined by:
 .sp
 	$II(RR)~==~L(RR) times H(RR)$
 .sp
 Using this notation rather loosely intervals will be identified 
 with the apropriate subset of the reals.  For example the following 
 identifications will be made:
 .sp
 	$[0.31,15)~=~< li 0.31, ho 15 >~=~ \{ x mo RR: 0.31 <= x < 15 \}$
 .br
 	$[-inf,inf]~=~< li -inf , hi inf> ~=~ RR$
 .br
 and	$(-0.51,inf]~=~< lo -0.51 , hi inf >~=~ \{ x mo RR: 0.51 < x \}$
 .sp
 The definition above carefully excludes 'intervals' such as $[inf,inf]$
 in the interests of simplifying some of the later development.
 .pp
 The finite arithmetic available on computers is represented by a
 finite subset, $SS$, of $RR$.  It is assumed that 
 $0,1 mo SS$.  The set of intervals allowed over $SS$ is $II(SS)$ defined as 
 above for $RR$.  $SS$ might be a bounded set of integers or some more complex
 set representable by floating point numbers.
 .pp
 There is a useful mapping from $II(RR)$ to $II(SS)$ which associates
 with each real interval the best approximation to it:
 .nf
 .sp
 	$approx(<l,h>)~==~<l prime, h prime >$
 .br
 where	$l prime mo L(SS), l prime <= l, and NEXIST x mo L(SS)~l prime <x<l$
 .br
 	$h prime mo H(SS), h prime >= h, and NEXIST x mo H(SS)~h prime >x>h$.
 .pp
 The ordering on the bounds is defined as follows:
 .sp
 	$l < h, ~ l,h mo II(RR)~ <->~l= \& sup u x and h = \& sup v y$
 			and $x<y$ or $x=y$ and $u<v$
 where 	$ ho, li, hi, lo$ occur in this order and $x<y$ is the usual ordering 
 on the reals extended to include $-inf$ and $inf$.  
 The ordering on the brackets is carefully chosen so that intervals such as
 (3.1,3.1) map to the empty set.
 Given this definition it is easily verified that 'approx' gives
 the smallest interval in $II(SS)$ enclosing the original interval in $II(RR)$.
 The definition also allows the intersection of two intervals to be readily 
 computed:
 .sp
 	$<l sub 1,h sub 1> inter <l sub 2, h sub 2>~=~$
 		$< max(l sub 1 , l sub 2), min(h sub 1 , h sub 2 )>$
 .sp
 Also and interval $<l,h>$ will be empty if $l > h$.  For example, according
 to the definition above $lo 3.1 > ho 3.1$ so (3.1,3.1) is correctly computed
 as being empty.
 .pp
 Intervals are introduced into logic by extending the notion of 
 unification.  A logical variable I can be bound to an interval $I$,
 written I:$I$.  Unification of I to any other value J gives the following
 results:
 .LB
 .NP
 if J is unbound then it is bound to the interval, J:$I$;
 .NP
 if J is bound to the interval J:$J$ then
 I and J are bound to the same interval $I inter J$.
 The unification fails if $I inter J$ is empty.
 .NP
 a constant C is equivalent to $approx([C,C])$;
 .NP
 if J is bound to anything other than an interval the unification fails.
 .LE
 .pp
 Below are some simple Prolog programs and the bindings that result when
 they are run (assuming as usual two decimal places of accuracy).
 .sp
 .nf
 	X = 3.141592
 	X:(3.1,3.2)
 	X > -5.22, Y <= 31, X=Y
 	X:(-5.3,32]  Y:(-5.3,31]
 .fi
 .sp
 .rh "Addition"
 .pp
 Addition is implemented by the relation 'add(I,J,K)'
 which says that K is the sum of I and J.
 \&'add' can be viewed as a relation on $RR times RR times RR$ defined
 by:
 .sp
 	$add ~==~ \{<x,y,z>:x,y,z mo  RR,~x+y=z\}$
 .sp
 Given that I,J, and K are initially bound to the intervals $I,J,K$ 
 respectively, the fully correct set of solutions with the additional
 constrain 'add(I,J,K)' is given by all triples in the set 
 $add inter I times J times K$.  
 This set is however infinite, to get an effectively computable procedure
 I will approximate the additional constraint by binding I, J and K
 to smaller intervals.  
 So as not to exclude any possible triples the new bindings, 
 $I prime, J prime roman ~and~ K prime$ must obey:
 .sp
 	$add inter I times J times K ~subeq~ I prime times J prime times K prime$
 .sp
 Figure 1 illustrates this process of
 .ul
 narrowing.
 The initial bindings are I:[0,2], J:[1,3]
 and K:[4,6].  After applying 'add(I,J,K)' the smallest possible bindings
 are I:[1,2], J:[2,3] and K:[4,5].  Note that all three intervals have been
 narrowed.
 .pp
 It can easily be seen that:
 .sp
 	$I prime supeq \{x:<x,y,z> ~mo~ add inter I times J times K \}$
 .br
 	$J prime supeq \{y:<x,y,z> ~mo~ add inter I times J times K \}$
 .br
 	$K prime supeq \{z:<x,y,z> ~mo~ add inter I times J times K \}$
 .sp
 If there are 'holes' in the projected set then $I prime$ will be a strict
 superset of the projection, however, $I prime$ will still 
 be uniquely determined by the projection.  This will be true of any
 subset of $RR sup n$ not just $add$.
 .pp
 In general for
 .sp
 	$R subeq RR sup n,~ I sub 1 , I sub 2 , ... , I sub n mo II(RR)$
 and $I prime  sub 1 , I prime  sub 2 , ... , I prime  sub n mo II(RR)$
 .sp
 I will write 
 .br
 	$R inter I sub 1 times I sub 2 times ... times I sub n nar 
 I prime sub 1 times I prime sub 2 times ... times I prime sub $
 .br 
 when the intervals $I prime sub 1 , I prime sub 2 , ... , I prime sub $
 are the uniquelly determined smallest intervals including all solutions.
 .sh "IV. Comparison with Interval Arithmetic"
 .pp
 .sh "V.  Implementation"
 .pp
 .sh "VI. Summary"
 .sh "Acknowledgements"
 .sh "References"
 .ls 1
 .[
 $LIST$
 .]
--- a/src/test/java/org/xerial/snappy/testdata/calgary/paper6
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/paper6
--- a/src/test/java/org/xerial/snappy/testdata/calgary/pic
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/pic
--- a/src/test/java/org/xerial/snappy/testdata/calgary/progc
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/progc
--- a/src/test/java/org/xerial/snappy/testdata/calgary/progl
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/progl
--- a/src/test/java/org/xerial/snappy/testdata/calgary/progp
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/progp
--- a/src/test/java/org/xerial/snappy/testdata/calgary/trans
+++ b/src/test/java/org/xerial/snappy/testdata/calgary/trans