Skip to content

Commit

Permalink
internal/language: bump script types to uint16 and update registry
Browse files Browse the repository at this point in the history
The IANA language-subtag-registry now contains more than 256 scripts,
causing the uint8 types to overflow during table generation. The
internal script types are bumped to uint16 which should be more than
enough.

Fixes golang/go#45093

Change-Id: I58184902e6652f488521d084fce6e0b424121825
Reviewed-on: https://go-review.googlesource.com/c/text/+/304029
TryBot-Result: Gopher Robot <[email protected]>
Reviewed-by: Than McIntosh <[email protected]>
Run-TryBot: Roland Shoemaker <[email protected]>
Reviewed-by: Damien Neil <[email protected]>
Auto-Submit: Roland Shoemaker <[email protected]>
  • Loading branch information
rolandshoemaker authored and gopherbot committed Aug 17, 2022
1 parent ba9b0e1 commit b0ca10f
Show file tree
Hide file tree
Showing 10 changed files with 507 additions and 478 deletions.
9 changes: 9 additions & 0 deletions encoding/htmlindex/tables.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 30 additions & 23 deletions encoding/ianaindex/tables.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions encoding/internal/identifier/mib.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions internal/language/compact/tables.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 14 additions & 14 deletions internal/language/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -1209,12 +1209,12 @@ func (b *builder) writeLikelyData() {
type ( // generated types
likelyScriptRegion struct {
region uint16
script uint8
script uint16
flags uint8
}
likelyLangScript struct {
lang uint16
script uint8
script uint16
flags uint8
}
likelyLangRegion struct {
Expand All @@ -1226,7 +1226,7 @@ func (b *builder) writeLikelyData() {
likelyTag struct {
lang uint16
region uint16
script uint8
script uint16
}
)
var ( // generated variables
Expand Down Expand Up @@ -1279,7 +1279,7 @@ func (b *builder) writeLikelyData() {
log.Fatalf("region changed unexpectedly: %s -> %s", from, to)
}
likelyRegionGroup[id].lang = uint16(b.langIndex(to[0]))
likelyRegionGroup[id].script = uint8(b.script.index(to[1]))
likelyRegionGroup[id].script = uint16(b.script.index(to[1]))
likelyRegionGroup[id].region = uint16(b.region.index(to[2]))
} else {
regionToOther[r] = append(regionToOther[r], fromTo{from, to})
Expand All @@ -1293,11 +1293,11 @@ func (b *builder) writeLikelyData() {
list := langToOther[id]
if len(list) == 1 {
likelyLang[id].region = uint16(b.region.index(list[0].to[2]))
likelyLang[id].script = uint8(b.script.index(list[0].to[1]))
likelyLang[id].script = uint16(b.script.index(list[0].to[1]))
} else if len(list) > 1 {
likelyLang[id].flags = isList
likelyLang[id].region = uint16(len(likelyLangList))
likelyLang[id].script = uint8(len(list))
likelyLang[id].script = uint16(len(list))
for _, x := range list {
flags := uint8(0)
if len(x.from) > 1 {
Expand All @@ -1309,7 +1309,7 @@ func (b *builder) writeLikelyData() {
}
likelyLangList = append(likelyLangList, likelyScriptRegion{
region: uint16(b.region.index(x.to[2])),
script: uint8(b.script.index(x.to[1])),
script: uint16(b.script.index(x.to[1])),
flags: flags,
})
}
Expand All @@ -1324,21 +1324,21 @@ func (b *builder) writeLikelyData() {
list := regionToOther[id]
if len(list) == 1 {
likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0]))
likelyRegion[id].script = uint8(b.script.index(list[0].to[1]))
likelyRegion[id].script = uint16(b.script.index(list[0].to[1]))
if len(list[0].from) > 2 {
likelyRegion[id].flags = scriptInFrom
}
} else if len(list) > 1 {
likelyRegion[id].flags = isList
likelyRegion[id].lang = uint16(len(likelyRegionList))
likelyRegion[id].script = uint8(len(list))
likelyRegion[id].script = uint16(len(list))
for i, x := range list {
if len(x.from) == 2 && i != 0 || i > 0 && len(x.from) != 3 {
log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i)
}
x := likelyLangScript{
lang: uint16(b.langIndex(x.to[0])),
script: uint8(b.script.index(x.to[1])),
script: uint16(b.script.index(x.to[1])),
}
if len(list[0].from) > 2 {
x.flags = scriptInFrom
Expand Down Expand Up @@ -1453,8 +1453,8 @@ func (b *builder) writeRegionInclusionData() {

type parentRel struct {
lang uint16
script uint8
maxScript uint8
script uint16
maxScript uint16
toRegion uint16
fromRegion []uint16
}
Expand All @@ -1477,10 +1477,10 @@ func (b *builder) writeParents() {
if len(sub) == 2 {
// TODO: check that all undefined scripts are indeed Latn in these
// cases.
parent.maxScript = uint8(b.script.index("Latn"))
parent.maxScript = uint16(b.script.index("Latn"))
parent.toRegion = uint16(b.region.index(sub[1]))
} else {
parent.script = uint8(b.script.index(sub[1]))
parent.script = uint16(b.script.index(sub[1]))
parent.maxScript = parent.script
parent.toRegion = uint16(b.region.index(sub[2]))
}
Expand Down
4 changes: 2 additions & 2 deletions internal/language/language_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ import (
func TestTagSize(t *testing.T) {
id := Tag{}
typ := reflect.TypeOf(id)
if typ.Size() > 24 {
t.Errorf("size of Tag was %d; want 24", typ.Size())
if typ.Size() > 32 {
t.Errorf("size of Tag was %d; want <= 32", typ.Size())
}
}

Expand Down
2 changes: 1 addition & 1 deletion internal/language/lookup.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ func (r Region) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0
}

type Script uint8
type Script uint16

// getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}.
Expand Down
Loading

0 comments on commit b0ca10f

Please sign in to comment.