mirror of https://github.com/tc39/test262.git
Update Intl.Segmenter to latest spec
Remove methods removed in the latest reversion. Still need to add tests for: 1.5.2.1 %SegmentsPrototype%.containing ( index ) 1.6.2.1 %SegmentIteratorPrototype%.next () 1.6.2.2 %SegmentIteratorPrototype% [ @@toStringTag ]
This commit is contained in:
parent
290ceba31f
commit
c6fd7eb602
|
@ -1,26 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-segment-iterator-prototype
|
||||
description: Verifies the behavior for the iterators.
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
let seg = new Intl.Segmenter();
|
||||
let segmentIterator = seg.segment('text');
|
||||
let prototype = Object.getPrototypeOf(segmentIterator);
|
||||
const otherReceivers = [
|
||||
1, 123.45, undefined, null, "string", true, false,
|
||||
Intl, Intl.Segmenter, Intl.Segmenter.prototype,
|
||||
prototype,
|
||||
new Intl.Segmenter(),
|
||||
new Intl.Collator(),
|
||||
new Intl.DateTimeFormat(),
|
||||
new Intl.NumberFormat(),
|
||||
];
|
||||
for (const rec of otherReceivers) {
|
||||
assert.throws(TypeError, () => prototype.next.call(rec));
|
||||
assert.throws(TypeError, () => prototype.following.call(rec));
|
||||
assert.throws(TypeError, () => prototype.preceding.call(rec));
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-segment-iterator-prototype
|
||||
description: Verifies the behavior for the iterators.
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assert.sameValue(iter.breakType, undefined);
|
|
@ -1,36 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-segment-iterator-prototype
|
||||
description: Verifies the behavior for the iterators.
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assert.sameValue("function", typeof iter.following);
|
||||
|
||||
const tests = [
|
||||
["3", 4],
|
||||
["ABC", 1],
|
||||
[null, 1],
|
||||
[true, 2],
|
||||
[1.4, 2],
|
||||
[{ valueOf() { return 5; } }, 6],
|
||||
[0, 1],
|
||||
[text.length - 1, text.length],
|
||||
];
|
||||
|
||||
for (const [input, index] of tests) {
|
||||
assert.sameValue(iter.following(input), false);
|
||||
assert.sameValue(iter.index, index, String(input));
|
||||
}
|
||||
|
||||
assert.throws(RangeError, () => iter.following(-3));
|
||||
// 1.5.3.2 %SegmentIteratorPrototype%.following( [ from ] )
|
||||
// 3.b If from >= iterator.[[SegmentIteratorString]], throw a RangeError exception.
|
||||
assert.throws(RangeError, () => iter.following(text.length));
|
||||
assert.throws(RangeError, () => iter.following(text.length + 1));
|
|
@ -1,18 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-segment-iterator-prototype
|
||||
description: Verifies the behavior for the iterators.
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
for (const granularity of ["grapheme", "word", "sentence"]) {
|
||||
const segmenter = new Intl.Segmenter("en", { granularity });
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assert.sameValue(typeof iter.index, "number");
|
||||
assert.sameValue(iter.index, 0);
|
||||
assert.sameValue(iter.breakType, undefined);
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-segment-iterator-prototype
|
||||
description: Verifies the behavior for the iterators.
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assert.sameValue(typeof iter.next, "function");
|
|
@ -1,14 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-segment-iterator-prototype
|
||||
description: Verifies the behavior for the iterators.
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assert.sameValue(iter.index, 0);
|
|
@ -1,40 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-segment-iterator-prototype
|
||||
description: Verifies the behavior for the iterators.
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const segmenter = new Intl.Segmenter();
|
||||
const text = "Hello World, Test 123! Foo Bar. How are you?";
|
||||
const iter = segmenter.segment(text);
|
||||
|
||||
assert.sameValue("function", typeof iter.following);
|
||||
|
||||
const tests = [
|
||||
["3", 2],
|
||||
[true, 0],
|
||||
[1.4, 0],
|
||||
[{ valueOf() { return 5; } }, 4],
|
||||
[text.length - 1, text.length - 2],
|
||||
[text.length, text.length - 1],
|
||||
];
|
||||
|
||||
for (const [input, index] of tests) {
|
||||
assert.sameValue(iter.preceding(input), false);
|
||||
assert.sameValue(iter.index, index, String(input));
|
||||
}
|
||||
|
||||
assert.throws(RangeError, () => iter.preceding("ABC"));
|
||||
assert.throws(RangeError, () => iter.preceding(null));
|
||||
assert.throws(RangeError, () => iter.preceding(-3));
|
||||
|
||||
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
|
||||
// 3.b If ... from = 0, throw a RangeError exception.
|
||||
assert.throws(RangeError, () => iter.preceding(0));
|
||||
|
||||
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
|
||||
// 3.b If from > iterator.[[SegmentIteratorString]] ... , throw a RangeError exception.
|
||||
assert.throws(RangeError, () => iter.preceding(text.length + 1));
|
|
@ -1,27 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-segment-iterator-prototype
|
||||
description: Verifies the behavior for the iterators.
|
||||
includes: [propertyHelper.js]
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const prototype = Object.getPrototypeOf((new Intl.Segmenter()).segment('text'));
|
||||
for (const func of ["next", "following", "preceding"]) {
|
||||
verifyProperty(prototype, func, {
|
||||
writable: true,
|
||||
enumerable: false,
|
||||
configurable: true,
|
||||
});
|
||||
}
|
||||
|
||||
for (const property of ["index", "breakType"]) {
|
||||
let desc = Object.getOwnPropertyDescriptor(prototype, property);
|
||||
assert.sameValue(desc.get.name, `get ${property}`);
|
||||
assert.sameValue(typeof desc.get, "function")
|
||||
assert.sameValue(desc.set, undefined);
|
||||
assert.sameValue(desc.enumerable, false);
|
||||
assert.sameValue(desc.configurable, true);
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Segmenter.prototype.segment
|
||||
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
|
||||
info: |
|
||||
Intl.Segmenter.prototype.segment( string )
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
let segments = [];
|
||||
while (!iter.following()) {
|
||||
assert.sameValue(undefined, iter.breakType);
|
||||
assert(iter.index >= 0);
|
||||
assert(iter.index <= text.length);
|
||||
assert(iter.index > prev);
|
||||
segments.push(text.substring(prev, iter.index));
|
||||
prev = iter.index;
|
||||
}
|
||||
assert.sameValue(text, segments.join(""));
|
||||
}
|
|
@ -29,22 +29,11 @@ for (const text of [
|
|||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
let segments = [];
|
||||
// Create another %SegmentIterator% to compare with result from the one that
|
||||
// created in the for of loop.
|
||||
let iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
for (const v of seg.segment(text)) {
|
||||
assert.sameValue(undefined, v.breakType);
|
||||
assert.sameValue(undefined, v.isWordLike);
|
||||
assert.sameValue("string", typeof v.segment);
|
||||
assert(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
|
||||
// manually advance the iter.
|
||||
assert.sameValue(iter.following(), false);
|
||||
assert.sameValue(iter.breakType, v.breakType);
|
||||
assert.sameValue(text.substring(prev, iter.index), v.segment);
|
||||
prev = iter.index;
|
||||
}
|
||||
assert(iter.following());
|
||||
assert.sameValue(text, segments.join(''));
|
||||
}
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Segmenter.prototype.segment
|
||||
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
|
||||
info: |
|
||||
Intl.Segmenter.prototype.segment( string )
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let segments = [];
|
||||
let oldPos = -1;
|
||||
for (let result = iter.next(); !result.done; result = iter.next()) {
|
||||
const v = result.value;
|
||||
assert.sameValue(undefined, v.breakType);
|
||||
assert.sameValue("string", typeof v.segment);
|
||||
assert(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
assert.sameValue(typeof v.index, "number");
|
||||
assert(oldPos < v.index);
|
||||
oldPos = v.index;
|
||||
}
|
||||
assert.sameValue(text, segments.join(''));
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Segmenter.prototype.segment
|
||||
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
|
||||
info: |
|
||||
Intl.Segmenter.prototype.segment( string )
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = text.length;
|
||||
let segments = [];
|
||||
iter.preceding(prev)
|
||||
assert.sameValue(undefined, iter.breakType)
|
||||
assert(iter.index >= 0);
|
||||
assert(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
while (!iter.preceding()) {
|
||||
assert.sameValue(undefined, iter.breakType);
|
||||
assert(iter.index >= 0);
|
||||
assert(iter.index <= text.length);
|
||||
assert(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
}
|
||||
assert.sameValue(text, segments.reverse().join(""));
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Segmenter.prototype.segment
|
||||
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
|
||||
info: |
|
||||
Intl.Segmenter.prototype.segment( string )
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "sentence"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
let segments = [];
|
||||
while (!iter.following()) {
|
||||
assert(["sep", "term"].includes(iter.breakType), iter.breakType);
|
||||
assert(iter.index >= 0);
|
||||
assert(iter.index <= text.length);
|
||||
assert(iter.index > prev);
|
||||
segments.push(text.substring(prev, iter.index));
|
||||
prev = iter.index;
|
||||
}
|
||||
assert.sameValue(text, segments.join(""));
|
||||
}
|
|
@ -29,22 +29,11 @@ for (const text of [
|
|||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
let segments = [];
|
||||
// Create another %SegmentIterator% to compare with result from the one that
|
||||
// created in the for of loop.
|
||||
let iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
for (const v of seg.segment(text)) {
|
||||
assert(["sep", "term"].includes(v.breakType), v.breakType);
|
||||
assert.sameValue(undefined, v.isWordLike);
|
||||
assert.sameValue("string", typeof v.segment);
|
||||
assert(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
|
||||
// manually advance the iter.
|
||||
assert.sameValue(iter.following(), false);
|
||||
assert.sameValue(iter.breakType, v.breakType);
|
||||
assert.sameValue(text.substring(prev, iter.index), v.segment);
|
||||
prev = iter.index;
|
||||
}
|
||||
assert(iter.following());
|
||||
assert.sameValue(text, segments.join(''));
|
||||
}
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Segmenter.prototype.segment
|
||||
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
|
||||
info: |
|
||||
Intl.Segmenter.prototype.segment( string )
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "sentence"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let segments = [];
|
||||
let oldPos = -1;
|
||||
for (let result = iter.next(); !result.done; result = iter.next()) {
|
||||
const v = result.value;
|
||||
assert(["sep", "term"].includes(iter.breakType), iter.breakType);
|
||||
assert.sameValue("string", typeof v.segment);
|
||||
assert(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
assert.sameValue(typeof v.index, "number");
|
||||
assert(oldPos < v.index);
|
||||
oldPos = v.index;
|
||||
}
|
||||
assert.sameValue(text, segments.join(''));
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Segmenter.prototype.segment
|
||||
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
|
||||
info: |
|
||||
Intl.Segmenter.prototype.segment( string )
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "sentence"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = text.length;
|
||||
let segments = [];
|
||||
iter.preceding(prev);
|
||||
assert(["sep", "term"].includes(iter.breakType), iter.breakType);
|
||||
assert(iter.index >= 0);
|
||||
assert(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
while (!iter.preceding()) {
|
||||
assert(["sep", "term"].includes(iter.breakType), iter.breakType);
|
||||
assert(iter.index >= 0);
|
||||
assert(iter.index <= text.length);
|
||||
assert(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
}
|
||||
assert.sameValue(text, segments.reverse().join(""));
|
||||
}
|
|
@ -25,11 +25,8 @@ const tests = [
|
|||
|
||||
const segmenter = new Intl.Segmenter("en", { "granularity": "word" });
|
||||
for (const [args, expected] of tests) {
|
||||
const iterator = segmenter.segment(...args);
|
||||
const result = iterator.next().value;
|
||||
assert.sameValue(result.segment, expected, `Expected segment "${expected}", found "${result.segment}" for arguments ${args}`);
|
||||
assert(["word", "none"].includes(result.breakType), `Expected valid breakType, found "${result.breakType}" for arguments ${args}`);
|
||||
assert.sameValue(result.index, expected.length, `Expected index ${expected.length}, found ${result.index} for arguments ${args}`);
|
||||
const segments = segmenter.segment(...args);
|
||||
assert.sameValue(segments.string, expected, `Expected segment "${expected}", found "${segments.segment}" for arguments ${args}`);
|
||||
}
|
||||
|
||||
const symbol = Symbol();
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Segmenter.prototype.segment
|
||||
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
|
||||
info: |
|
||||
Intl.Segmenter.prototype.segment( string )
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "word"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
let segments = [];
|
||||
while (!iter.following()) {
|
||||
assert(["word", "none"].includes(iter.breakType), iter.breakType);
|
||||
assert(iter.index >= 0);
|
||||
assert(iter.index <= text.length);
|
||||
assert(iter.index > prev);
|
||||
segments.push(text.substring(prev, iter.index));
|
||||
prev = iter.index;
|
||||
}
|
||||
assert.sameValue(text, segments.join(""));
|
||||
}
|
|
@ -29,22 +29,12 @@ for (const text of [
|
|||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
let segments = [];
|
||||
// Create another %SegmentIterator% to compare with result from the one that
|
||||
// created in the for of loop.
|
||||
let iter = seg.segment(text);
|
||||
let prev = 0;
|
||||
for (const v of seg.segment(text)) {
|
||||
assert(["word", "none"].includes(v.breakType), v.breakType);
|
||||
assert.sameValue("boolean", typeof v.isWordLike);
|
||||
assert([true, false].includes(v.isWordLike), v.isWordLike);
|
||||
assert.sameValue("string", typeof v.segment);
|
||||
assert(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
|
||||
// manually advance the iter.
|
||||
assert.sameValue(iter.following(), false);
|
||||
assert.sameValue(iter.breakType, v.breakType);
|
||||
assert.sameValue(text.substring(prev, iter.index), v.segment);
|
||||
prev = iter.index;
|
||||
}
|
||||
assert(iter.following());
|
||||
assert.sameValue(text, segments.join(''));
|
||||
}
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Segmenter.prototype.segment
|
||||
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
|
||||
info: |
|
||||
Intl.Segmenter.prototype.segment( string )
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "word"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let segments = [];
|
||||
let oldPos = -1;
|
||||
for (let result = iter.next(); !result.done; result = iter.next()) {
|
||||
const v = result.value;
|
||||
assert(["word", "none"].includes(iter.breakType), iter.breakType);
|
||||
assert.sameValue("string", typeof v.segment);
|
||||
assert(v.segment.length > 0);
|
||||
segments.push(v.segment);
|
||||
assert.sameValue(typeof v.index, "number");
|
||||
assert(oldPos < v.index);
|
||||
oldPos = v.index;
|
||||
}
|
||||
assert.sameValue(text, segments.join(''));
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// This code is governed by the BSD license found in the LICENSE file.
|
||||
|
||||
/*---
|
||||
esid: sec-Intl.Segmenter.prototype.segment
|
||||
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
|
||||
info: |
|
||||
Intl.Segmenter.prototype.segment( string )
|
||||
features: [Intl.Segmenter]
|
||||
---*/
|
||||
|
||||
const seg = new Intl.Segmenter([], {granularity: "word"})
|
||||
for (const text of [
|
||||
"Hello world!", // English
|
||||
" Hello world! ", // English with space before/after
|
||||
" Hello world? Foo bar!", // English
|
||||
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
|
||||
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
|
||||
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
|
||||
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
|
||||
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
|
||||
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
|
||||
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
|
||||
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
|
||||
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్సైట్లో పెట్టవద్దు'", // Telugu
|
||||
"台北》抹黑柯P失敗?朱學恒酸:姚文智氣pupu嗆大老闆", // Chinese
|
||||
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
|
||||
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
|
||||
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
|
||||
]) {
|
||||
const iter = seg.segment(text);
|
||||
let prev = text.length;
|
||||
let segments = [];
|
||||
iter.preceding(prev);
|
||||
assert(["word", "none"].includes(iter.breakType), iter.breakType);
|
||||
assert(iter.index >= 0);
|
||||
assert(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
while (!iter.preceding()) {
|
||||
assert(["word", "none"].includes(iter.breakType), iter.breakType);
|
||||
assert(iter.index >= 0);
|
||||
assert(iter.index <= text.length);
|
||||
assert(iter.index < prev);
|
||||
segments.push(text.substring(iter.index, prev));
|
||||
prev = iter.index;
|
||||
}
|
||||
assert.sameValue(text, segments.reverse().join(""));
|
||||
}
|
Loading…
Reference in New Issue