Update Intl.Segmenter to latest spec

Remove methods removed in the latest reversion.

Still need to add tests for:
1.5.2.1 %SegmentsPrototype%.containing ( index )
1.6.2.1 %SegmentIteratorPrototype%.next ()
1.6.2.2 %SegmentIteratorPrototype% [ @@toStringTag ]
This commit is contained in:
Frank Tang 2020-06-01 23:38:46 -07:00 committed by Rick Waldron
parent 290ceba31f
commit c6fd7eb602
21 changed files with 6 additions and 641 deletions

View File

@ -1,26 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-segment-iterator-prototype
description: Verifies the behavior for the iterators.
features: [Intl.Segmenter]
---*/
let seg = new Intl.Segmenter();
let segmentIterator = seg.segment('text');
let prototype = Object.getPrototypeOf(segmentIterator);
const otherReceivers = [
1, 123.45, undefined, null, "string", true, false,
Intl, Intl.Segmenter, Intl.Segmenter.prototype,
prototype,
new Intl.Segmenter(),
new Intl.Collator(),
new Intl.DateTimeFormat(),
new Intl.NumberFormat(),
];
for (const rec of otherReceivers) {
assert.throws(TypeError, () => prototype.next.call(rec));
assert.throws(TypeError, () => prototype.following.call(rec));
assert.throws(TypeError, () => prototype.preceding.call(rec));
}

View File

@ -1,14 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-segment-iterator-prototype
description: Verifies the behavior for the iterators.
features: [Intl.Segmenter]
---*/
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assert.sameValue(iter.breakType, undefined);

View File

@ -1,36 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-segment-iterator-prototype
description: Verifies the behavior for the iterators.
features: [Intl.Segmenter]
---*/
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assert.sameValue("function", typeof iter.following);
const tests = [
["3", 4],
["ABC", 1],
[null, 1],
[true, 2],
[1.4, 2],
[{ valueOf() { return 5; } }, 6],
[0, 1],
[text.length - 1, text.length],
];
for (const [input, index] of tests) {
assert.sameValue(iter.following(input), false);
assert.sameValue(iter.index, index, String(input));
}
assert.throws(RangeError, () => iter.following(-3));
// 1.5.3.2 %SegmentIteratorPrototype%.following( [ from ] )
// 3.b If from >= iterator.[[SegmentIteratorString]], throw a RangeError exception.
assert.throws(RangeError, () => iter.following(text.length));
assert.throws(RangeError, () => iter.following(text.length + 1));

View File

@ -1,18 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-segment-iterator-prototype
description: Verifies the behavior for the iterators.
features: [Intl.Segmenter]
---*/
const text = "Hello World, Test 123! Foo Bar. How are you?";
for (const granularity of ["grapheme", "word", "sentence"]) {
const segmenter = new Intl.Segmenter("en", { granularity });
const iter = segmenter.segment(text);
assert.sameValue(typeof iter.index, "number");
assert.sameValue(iter.index, 0);
assert.sameValue(iter.breakType, undefined);
}

View File

@ -1,14 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-segment-iterator-prototype
description: Verifies the behavior for the iterators.
features: [Intl.Segmenter]
---*/
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assert.sameValue(typeof iter.next, "function");

View File

@ -1,14 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-segment-iterator-prototype
description: Verifies the behavior for the iterators.
features: [Intl.Segmenter]
---*/
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assert.sameValue(iter.index, 0);

View File

@ -1,40 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-segment-iterator-prototype
description: Verifies the behavior for the iterators.
features: [Intl.Segmenter]
---*/
const segmenter = new Intl.Segmenter();
const text = "Hello World, Test 123! Foo Bar. How are you?";
const iter = segmenter.segment(text);
assert.sameValue("function", typeof iter.following);
const tests = [
["3", 2],
[true, 0],
[1.4, 0],
[{ valueOf() { return 5; } }, 4],
[text.length - 1, text.length - 2],
[text.length, text.length - 1],
];
for (const [input, index] of tests) {
assert.sameValue(iter.preceding(input), false);
assert.sameValue(iter.index, index, String(input));
}
assert.throws(RangeError, () => iter.preceding("ABC"));
assert.throws(RangeError, () => iter.preceding(null));
assert.throws(RangeError, () => iter.preceding(-3));
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
// 3.b If ... from = 0, throw a RangeError exception.
assert.throws(RangeError, () => iter.preceding(0));
// 1.5.3.3 %SegmentIteratorPrototype%.preceding( [ from ] )
// 3.b If from > iterator.[[SegmentIteratorString]] ... , throw a RangeError exception.
assert.throws(RangeError, () => iter.preceding(text.length + 1));

View File

@ -1,27 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-segment-iterator-prototype
description: Verifies the behavior for the iterators.
includes: [propertyHelper.js]
features: [Intl.Segmenter]
---*/
const prototype = Object.getPrototypeOf((new Intl.Segmenter()).segment('text'));
for (const func of ["next", "following", "preceding"]) {
verifyProperty(prototype, func, {
writable: true,
enumerable: false,
configurable: true,
});
}
for (const property of ["index", "breakType"]) {
let desc = Object.getOwnPropertyDescriptor(prototype, property);
assert.sameValue(desc.get.name, `get ${property}`);
assert.sameValue(typeof desc.get, "function")
assert.sameValue(desc.set, undefined);
assert.sameValue(desc.enumerable, false);
assert.sameValue(desc.configurable, true);
}

View File

@ -1,43 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = 0;
let segments = [];
while (!iter.following()) {
assert.sameValue(undefined, iter.breakType);
assert(iter.index >= 0);
assert(iter.index <= text.length);
assert(iter.index > prev);
segments.push(text.substring(prev, iter.index));
prev = iter.index;
}
assert.sameValue(text, segments.join(""));
}

View File

@ -29,22 +29,11 @@ for (const text of [
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
let segments = [];
// Create another %SegmentIterator% to compare with result from the one that
// created in the for of loop.
let iter = seg.segment(text);
let prev = 0;
for (const v of seg.segment(text)) {
assert.sameValue(undefined, v.breakType);
assert.sameValue(undefined, v.isWordLike);
assert.sameValue("string", typeof v.segment);
assert(v.segment.length > 0);
segments.push(v.segment);
// manually advance the iter.
assert.sameValue(iter.following(), false);
assert.sameValue(iter.breakType, v.breakType);
assert.sameValue(text.substring(prev, iter.index), v.segment);
prev = iter.index;
}
assert(iter.following());
assert.sameValue(text, segments.join(''));
}

View File

@ -1,45 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let segments = [];
let oldPos = -1;
for (let result = iter.next(); !result.done; result = iter.next()) {
const v = result.value;
assert.sameValue(undefined, v.breakType);
assert.sameValue("string", typeof v.segment);
assert(v.segment.length > 0);
segments.push(v.segment);
assert.sameValue(typeof v.index, "number");
assert(oldPos < v.index);
oldPos = v.index;
}
assert.sameValue(text, segments.join(''));
}

View File

@ -1,49 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "grapheme"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = text.length;
let segments = [];
iter.preceding(prev)
assert.sameValue(undefined, iter.breakType)
assert(iter.index >= 0);
assert(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
while (!iter.preceding()) {
assert.sameValue(undefined, iter.breakType);
assert(iter.index >= 0);
assert(iter.index <= text.length);
assert(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
}
assert.sameValue(text, segments.reverse().join(""));
}

View File

@ -1,43 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "sentence"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = 0;
let segments = [];
while (!iter.following()) {
assert(["sep", "term"].includes(iter.breakType), iter.breakType);
assert(iter.index >= 0);
assert(iter.index <= text.length);
assert(iter.index > prev);
segments.push(text.substring(prev, iter.index));
prev = iter.index;
}
assert.sameValue(text, segments.join(""));
}

View File

@ -29,22 +29,11 @@ for (const text of [
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
let segments = [];
// Create another %SegmentIterator% to compare with result from the one that
// created in the for of loop.
let iter = seg.segment(text);
let prev = 0;
for (const v of seg.segment(text)) {
assert(["sep", "term"].includes(v.breakType), v.breakType);
assert.sameValue(undefined, v.isWordLike);
assert.sameValue("string", typeof v.segment);
assert(v.segment.length > 0);
segments.push(v.segment);
// manually advance the iter.
assert.sameValue(iter.following(), false);
assert.sameValue(iter.breakType, v.breakType);
assert.sameValue(text.substring(prev, iter.index), v.segment);
prev = iter.index;
}
assert(iter.following());
assert.sameValue(text, segments.join(''));
}

View File

@ -1,45 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "sentence"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let segments = [];
let oldPos = -1;
for (let result = iter.next(); !result.done; result = iter.next()) {
const v = result.value;
assert(["sep", "term"].includes(iter.breakType), iter.breakType);
assert.sameValue("string", typeof v.segment);
assert(v.segment.length > 0);
segments.push(v.segment);
assert.sameValue(typeof v.index, "number");
assert(oldPos < v.index);
oldPos = v.index;
}
assert.sameValue(text, segments.join(''));
}

View File

@ -1,49 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "sentence"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = text.length;
let segments = [];
iter.preceding(prev);
assert(["sep", "term"].includes(iter.breakType), iter.breakType);
assert(iter.index >= 0);
assert(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
while (!iter.preceding()) {
assert(["sep", "term"].includes(iter.breakType), iter.breakType);
assert(iter.index >= 0);
assert(iter.index <= text.length);
assert(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
}
assert.sameValue(text, segments.reverse().join(""));
}

View File

@ -25,11 +25,8 @@ const tests = [
const segmenter = new Intl.Segmenter("en", { "granularity": "word" });
for (const [args, expected] of tests) {
const iterator = segmenter.segment(...args);
const result = iterator.next().value;
assert.sameValue(result.segment, expected, `Expected segment "${expected}", found "${result.segment}" for arguments ${args}`);
assert(["word", "none"].includes(result.breakType), `Expected valid breakType, found "${result.breakType}" for arguments ${args}`);
assert.sameValue(result.index, expected.length, `Expected index ${expected.length}, found ${result.index} for arguments ${args}`);
const segments = segmenter.segment(...args);
assert.sameValue(segments.string, expected, `Expected segment "${expected}", found "${segments.segment}" for arguments ${args}`);
}
const symbol = Symbol();

View File

@ -1,43 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "word"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = 0;
let segments = [];
while (!iter.following()) {
assert(["word", "none"].includes(iter.breakType), iter.breakType);
assert(iter.index >= 0);
assert(iter.index <= text.length);
assert(iter.index > prev);
segments.push(text.substring(prev, iter.index));
prev = iter.index;
}
assert.sameValue(text, segments.join(""));
}

View File

@ -29,22 +29,12 @@ for (const text of [
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
let segments = [];
// Create another %SegmentIterator% to compare with result from the one that
// created in the for of loop.
let iter = seg.segment(text);
let prev = 0;
for (const v of seg.segment(text)) {
assert(["word", "none"].includes(v.breakType), v.breakType);
assert.sameValue("boolean", typeof v.isWordLike);
assert([true, false].includes(v.isWordLike), v.isWordLike);
assert.sameValue("string", typeof v.segment);
assert(v.segment.length > 0);
segments.push(v.segment);
// manually advance the iter.
assert.sameValue(iter.following(), false);
assert.sameValue(iter.breakType, v.breakType);
assert.sameValue(text.substring(prev, iter.index), v.segment);
prev = iter.index;
}
assert(iter.following());
assert.sameValue(text, segments.join(''));
}

View File

@ -1,45 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "word"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let segments = [];
let oldPos = -1;
for (let result = iter.next(); !result.done; result = iter.next()) {
const v = result.value;
assert(["word", "none"].includes(iter.breakType), iter.breakType);
assert.sameValue("string", typeof v.segment);
assert(v.segment.length > 0);
segments.push(v.segment);
assert.sameValue(typeof v.index, "number");
assert(oldPos < v.index);
oldPos = v.index;
}
assert.sameValue(text, segments.join(''));
}

View File

@ -1,49 +0,0 @@
// Copyright 2018 the V8 project authors. All rights reserved.
// This code is governed by the BSD license found in the LICENSE file.
/*---
esid: sec-Intl.Segmenter.prototype.segment
description: Verifies the behavior for the "segment" function of the Segmenter prototype object.
info: |
Intl.Segmenter.prototype.segment( string )
features: [Intl.Segmenter]
---*/
const seg = new Intl.Segmenter([], {granularity: "word"})
for (const text of [
"Hello world!", // English
" Hello world! ", // English with space before/after
" Hello world? Foo bar!", // English
"Jedovatou mambu objevila žena v zahrádkářské kolonii.", // Czech
"Việt Nam: Nhất thể hóa sẽ khác Trung Quốc?", // Vietnamese
"Σοβαρές ενστάσεις Κομισιόν για τον προϋπολογισμό της Ιταλίας", // Greek
"Решение Индии о покупке российских С-400 расценили как вызов США", // Russian
"הרופא שהציל נשים והנערה ששועבדה ע", // Hebrew,
"ترامب للملك سلمان: أنا جاد للغاية.. عليك دفع المزيد", // Arabic
"भारत की एस 400 मिसाइल के मुकाबले पाक की थाड, जानें कौन कितना ताकतवर", // Hindi
"ரெட் அலர்ட் எச்சரிக்கை; புதுச்சேரியில் நாளை அரசு விடுமுறை!", // Tamil
"'ఉత్తర్వులు అందే వరకు ఓటర్ల తుది జాబితాను వెబ్‌సైట్లో పెట్టవద్దు'", // Telugu
"台北》抹黑柯P失敗朱學恒酸姚文智氣pupu嗆大老闆", // Chinese
"วัดไทรตีระฆังเบาลงช่วงเข้าพรรษา เจ้าอาวาสเผยคนร้องเรียนรับผลกรรมแล้ว", // Thai
"九州北部の一部が暴風域に入りました(日直予報士 2018年10月06日) - 日本気象協会 tenki.jp", // Japanese
"법원 “다스 지분 처분권·수익권 모두 MB가 보유”", // Korean
]) {
const iter = seg.segment(text);
let prev = text.length;
let segments = [];
iter.preceding(prev);
assert(["word", "none"].includes(iter.breakType), iter.breakType);
assert(iter.index >= 0);
assert(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
while (!iter.preceding()) {
assert(["word", "none"].includes(iter.breakType), iter.breakType);
assert(iter.index >= 0);
assert(iter.index <= text.length);
assert(iter.index < prev);
segments.push(text.substring(iter.index, prev));
prev = iter.index;
}
assert.sameValue(text, segments.reverse().join(""));
}