update go-git to v4.12.0 - fixes #7248 (#7249)

2019-06-18 22:14:15 -04:00
parent b209531959
commit 33ad554800
270 changed files with 71049 additions and 14434 deletions
@@ -124,7 +124,7 @@ func (e *Encoder) Writer(w io.Writer) io.Writer {
 }

 // ASCIISub is the ASCII substitute character, as recommended by
-// http://unicode.org/reports/tr36/#Text_Comparison
+// https://unicode.org/reports/tr36/#Text_Comparison
 const ASCIISub = '\x1a'

 // Nop is the nop encoding. Its transformed bytes are the same as the source
@@ -306,6 +306,7 @@ var nameMap = map[string]htmlEncoding{
 	"iso-2022-cn":         replacement,
 	"iso-2022-cn-ext":     replacement,
 	"iso-2022-kr":         replacement,
+	"replacement":         replacement,
 	"utf-16be":            utf16be,
 	"utf-16":              utf16le,
 	"utf-16le":            utf16le,
@@ -109,7 +109,12 @@ func main() {
 								use = use || a.Value != "person"
 							}
 							if a.Name.Local == "data" && use {
-								attr = a.Value + " "
+								// Patch up URLs to use https. From some links, the
+								// https version is different from the http one.
+								s := a.Value
+								s = strings.Replace(s, "http://", "https://", -1)
+								s = strings.Replace(s, "/unicode/", "/", -1)
+								attr = s + " "
 							}
 						}
 					}
@@ -34,7 +34,7 @@ package identifier
 // - http://www.iana.org/assignments/character-sets/character-sets.xhtml
 // - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
 // - http://www.ietf.org/rfc/rfc2978.txt
-// - http://www.unicode.org/reports/tr22/
+// - https://www.unicode.org/reports/tr22/
 // - http://www.w3.org/TR/encoding/
 // - https://encoding.spec.whatwg.org/
 // - https://encoding.spec.whatwg.org/encodings.json
@@ -538,8 +538,6 @@ const (
 	// ISO111ECMACyrillic is the MIB identifier with IANA name ECMA-cyrillic.
 	//
 	// ISO registry
-	// (formerly ECMA
-	// registry )
 	ISO111ECMACyrillic MIB = 77

 	// ISO121Canadian1 is the MIB identifier with IANA name CSA_Z243.4-1985-1.
@@ -732,18 +730,18 @@ const (

 	// ISO885913 is the MIB identifier with IANA name ISO-8859-13.
 	//
-	// ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-13 http://www.iana.org/assignments/charset-reg/ISO-8859-13
+	// ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-13 https://www.iana.org/assignments/charset-reg/ISO-8859-13
 	ISO885913 MIB = 109

 	// ISO885914 is the MIB identifier with IANA name ISO-8859-14.
 	//
-	// ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-14
+	// ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-14
 	ISO885914 MIB = 110

 	// ISO885915 is the MIB identifier with IANA name ISO-8859-15.
 	//
 	// ISO
-	// Please see: http://www.iana.org/assignments/charset-reg/ISO-8859-15
+	// Please see: https://www.iana.org/assignments/charset-reg/ISO-8859-15
 	ISO885915 MIB = 111

 	// ISO885916 is the MIB identifier with IANA name ISO-8859-16.
@@ -754,41 +752,41 @@ const (
 	// GBK is the MIB identifier with IANA name GBK.
 	//
 	// Chinese IT Standardization Technical Committee
-	// Please see: http://www.iana.org/assignments/charset-reg/GBK
+	// Please see: https://www.iana.org/assignments/charset-reg/GBK
 	GBK MIB = 113

 	// GB18030 is the MIB identifier with IANA name GB18030.
 	//
 	// Chinese IT Standardization Technical Committee
-	// Please see: http://www.iana.org/assignments/charset-reg/GB18030
+	// Please see: https://www.iana.org/assignments/charset-reg/GB18030
 	GB18030 MIB = 114

 	// OSDEBCDICDF0415 is the MIB identifier with IANA name OSD_EBCDIC_DF04_15.
 	//
 	// Fujitsu-Siemens standard mainframe EBCDIC encoding
-	// Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15
+	// Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15
 	OSDEBCDICDF0415 MIB = 115

 	// OSDEBCDICDF03IRV is the MIB identifier with IANA name OSD_EBCDIC_DF03_IRV.
 	//
 	// Fujitsu-Siemens standard mainframe EBCDIC encoding
-	// Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV
+	// Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV
 	OSDEBCDICDF03IRV MIB = 116

 	// OSDEBCDICDF041 is the MIB identifier with IANA name OSD_EBCDIC_DF04_1.
 	//
 	// Fujitsu-Siemens standard mainframe EBCDIC encoding
-	// Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1
+	// Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1
 	OSDEBCDICDF041 MIB = 117

 	// ISO115481 is the MIB identifier with IANA name ISO-11548-1.
 	//
-	// See http://www.iana.org/assignments/charset-reg/ISO-11548-1
+	// See https://www.iana.org/assignments/charset-reg/ISO-11548-1
 	ISO115481 MIB = 118

 	// KZ1048 is the MIB identifier with IANA name KZ-1048.
 	//
-	// See http://www.iana.org/assignments/charset-reg/KZ-1048
+	// See https://www.iana.org/assignments/charset-reg/KZ-1048
 	KZ1048 MIB = 119

 	// Unicode is the MIB identifier with IANA name ISO-10646-UCS-2.
@@ -855,7 +853,7 @@ const (

 	// SCSU is the MIB identifier with IANA name SCSU.
 	//
-	// SCSU See http://www.iana.org/assignments/charset-reg/SCSU
+	// SCSU See https://www.iana.org/assignments/charset-reg/SCSU
 	SCSU MIB = 1011

 	// UTF7 is the MIB identifier with IANA name UTF-7.
@@ -884,27 +882,27 @@ const (

 	// CESU8 is the MIB identifier with IANA name CESU-8.
 	//
-	// http://www.unicode.org/unicode/reports/tr26
+	// https://www.unicode.org/reports/tr26
 	CESU8 MIB = 1016

 	// UTF32 is the MIB identifier with IANA name UTF-32.
 	//
-	// http://www.unicode.org/unicode/reports/tr19/
+	// https://www.unicode.org/reports/tr19/
 	UTF32 MIB = 1017

 	// UTF32BE is the MIB identifier with IANA name UTF-32BE.
 	//
-	// http://www.unicode.org/unicode/reports/tr19/
+	// https://www.unicode.org/reports/tr19/
 	UTF32BE MIB = 1018

 	// UTF32LE is the MIB identifier with IANA name UTF-32LE.
 	//
-	// http://www.unicode.org/unicode/reports/tr19/
+	// https://www.unicode.org/reports/tr19/
 	UTF32LE MIB = 1019

 	// BOCU1 is the MIB identifier with IANA name BOCU-1.
 	//
-	// http://www.unicode.org/notes/tn6/
+	// https://www.unicode.org/notes/tn6/
 	BOCU1 MIB = 1020

 	// Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1.
@@ -1461,152 +1459,152 @@ const (

 	// IBM00858 is the MIB identifier with IANA name IBM00858.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM00858
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM00858
 	IBM00858 MIB = 2089

 	// IBM00924 is the MIB identifier with IANA name IBM00924.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM00924
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM00924
 	IBM00924 MIB = 2090

 	// IBM01140 is the MIB identifier with IANA name IBM01140.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01140
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01140
 	IBM01140 MIB = 2091

 	// IBM01141 is the MIB identifier with IANA name IBM01141.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01141
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01141
 	IBM01141 MIB = 2092

 	// IBM01142 is the MIB identifier with IANA name IBM01142.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01142
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01142
 	IBM01142 MIB = 2093

 	// IBM01143 is the MIB identifier with IANA name IBM01143.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01143
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01143
 	IBM01143 MIB = 2094

 	// IBM01144 is the MIB identifier with IANA name IBM01144.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01144
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01144
 	IBM01144 MIB = 2095

 	// IBM01145 is the MIB identifier with IANA name IBM01145.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01145
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01145
 	IBM01145 MIB = 2096

 	// IBM01146 is the MIB identifier with IANA name IBM01146.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01146
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01146
 	IBM01146 MIB = 2097

 	// IBM01147 is the MIB identifier with IANA name IBM01147.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01147
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01147
 	IBM01147 MIB = 2098

 	// IBM01148 is the MIB identifier with IANA name IBM01148.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01148
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01148
 	IBM01148 MIB = 2099

 	// IBM01149 is the MIB identifier with IANA name IBM01149.
 	//
-	// IBM See http://www.iana.org/assignments/charset-reg/IBM01149
+	// IBM See https://www.iana.org/assignments/charset-reg/IBM01149
 	IBM01149 MIB = 2100

 	// Big5HKSCS is the MIB identifier with IANA name Big5-HKSCS.
 	//
-	// See http://www.iana.org/assignments/charset-reg/Big5-HKSCS
+	// See https://www.iana.org/assignments/charset-reg/Big5-HKSCS
 	Big5HKSCS MIB = 2101

 	// IBM1047 is the MIB identifier with IANA name IBM1047.
 	//
-	// IBM1047 (EBCDIC Latin 1/Open Systems) http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
+	// IBM1047 (EBCDIC Latin 1/Open Systems) https://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
 	IBM1047 MIB = 2102

 	// PTCP154 is the MIB identifier with IANA name PTCP154.
 	//
-	// See http://www.iana.org/assignments/charset-reg/PTCP154
+	// See https://www.iana.org/assignments/charset-reg/PTCP154
 	PTCP154 MIB = 2103

 	// Amiga1251 is the MIB identifier with IANA name Amiga-1251.
 	//
-	// See http://www.amiga.ultranet.ru/Amiga-1251.html
+	// See https://www.amiga.ultranet.ru/Amiga-1251.html
 	Amiga1251 MIB = 2104

 	// KOI7switched is the MIB identifier with IANA name KOI7-switched.
 	//
-	// See http://www.iana.org/assignments/charset-reg/KOI7-switched
+	// See https://www.iana.org/assignments/charset-reg/KOI7-switched
 	KOI7switched MIB = 2105

 	// BRF is the MIB identifier with IANA name BRF.
 	//
-	// See http://www.iana.org/assignments/charset-reg/BRF
+	// See https://www.iana.org/assignments/charset-reg/BRF
 	BRF MIB = 2106

 	// TSCII is the MIB identifier with IANA name TSCII.
 	//
-	// See http://www.iana.org/assignments/charset-reg/TSCII
+	// See https://www.iana.org/assignments/charset-reg/TSCII
 	TSCII MIB = 2107

 	// CP51932 is the MIB identifier with IANA name CP51932.
 	//
-	// See http://www.iana.org/assignments/charset-reg/CP51932
+	// See https://www.iana.org/assignments/charset-reg/CP51932
 	CP51932 MIB = 2108

 	// Windows874 is the MIB identifier with IANA name windows-874.
 	//
-	// See http://www.iana.org/assignments/charset-reg/windows-874
+	// See https://www.iana.org/assignments/charset-reg/windows-874
 	Windows874 MIB = 2109

 	// Windows1250 is the MIB identifier with IANA name windows-1250.
 	//
-	// Microsoft http://www.iana.org/assignments/charset-reg/windows-1250
+	// Microsoft https://www.iana.org/assignments/charset-reg/windows-1250
 	Windows1250 MIB = 2250

 	// Windows1251 is the MIB identifier with IANA name windows-1251.
 	//
-	// Microsoft http://www.iana.org/assignments/charset-reg/windows-1251
+	// Microsoft https://www.iana.org/assignments/charset-reg/windows-1251
 	Windows1251 MIB = 2251

 	// Windows1252 is the MIB identifier with IANA name windows-1252.
 	//
-	// Microsoft http://www.iana.org/assignments/charset-reg/windows-1252
+	// Microsoft https://www.iana.org/assignments/charset-reg/windows-1252
 	Windows1252 MIB = 2252

 	// Windows1253 is the MIB identifier with IANA name windows-1253.
 	//
-	// Microsoft http://www.iana.org/assignments/charset-reg/windows-1253
+	// Microsoft https://www.iana.org/assignments/charset-reg/windows-1253
 	Windows1253 MIB = 2253

 	// Windows1254 is the MIB identifier with IANA name windows-1254.
 	//
-	// Microsoft http://www.iana.org/assignments/charset-reg/windows-1254
+	// Microsoft https://www.iana.org/assignments/charset-reg/windows-1254
 	Windows1254 MIB = 2254

 	// Windows1255 is the MIB identifier with IANA name windows-1255.
 	//
-	// Microsoft http://www.iana.org/assignments/charset-reg/windows-1255
+	// Microsoft https://www.iana.org/assignments/charset-reg/windows-1255
 	Windows1255 MIB = 2255

 	// Windows1256 is the MIB identifier with IANA name windows-1256.
 	//
-	// Microsoft http://www.iana.org/assignments/charset-reg/windows-1256
+	// Microsoft https://www.iana.org/assignments/charset-reg/windows-1256
 	Windows1256 MIB = 2256

 	// Windows1257 is the MIB identifier with IANA name windows-1257.
 	//
-	// Microsoft http://www.iana.org/assignments/charset-reg/windows-1257
+	// Microsoft https://www.iana.org/assignments/charset-reg/windows-1257
 	Windows1257 MIB = 2257

 	// Windows1258 is the MIB identifier with IANA name windows-1258.
 	//
-	// Microsoft http://www.iana.org/assignments/charset-reg/windows-1258
+	// Microsoft https://www.iana.org/assignments/charset-reg/windows-1258
 	Windows1258 MIB = 2258

 	// TIS620 is the MIB identifier with IANA name TIS-620.
@@ -1616,6 +1614,6 @@ const (

 	// CP50220 is the MIB identifier with IANA name CP50220.
 	//
-	// See http://www.iana.org/assignments/charset-reg/CP50220
+	// See https://www.iana.org/assignments/charset-reg/CP50220
 	CP50220 MIB = 2260
 )
@@ -10,8 +10,8 @@ package main
 //	go run maketables.go | gofmt > tables.go

 // TODO: Emoji extensions?
-// http://www.unicode.org/faq/emoji_dingbats.html
-// http://www.unicode.org/Public/UNIDATA/EmojiSources.txt
+// https://www.unicode.org/faq/emoji_dingbats.html
+// https://www.unicode.org/Public/UNIDATA/EmojiSources.txt

 import (
 	"bufio"
@@ -145,7 +145,7 @@ func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err e
 // and consumed in a greater context that implies a certain endianness, use
 // IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM.
 //
-// In the language of http://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
+// In the language of https://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
 // corresponds to "Where the precise type of the data stream is known... the
 // BOM should not be used" and ExpectBOM corresponds to "A particular
 // protocol... may require use of the BOM".
@@ -4,13 +4,13 @@ package language

 // This file contains code common to the maketables.go and the package code.

-// langAliasType is the type of an alias in langAliasMap.
-type langAliasType int8
+// AliasType is the type of an alias in AliasMap.
+type AliasType int8

 const (
-	langDeprecated langAliasType = iota
-	langMacro
-	langLegacy
+	Deprecated AliasType = iota
+	Macro
+	Legacy

-	langAliasTypeUnknown langAliasType = -1
+	AliasTypeUnknown AliasType = -1
 )
@@ -0,0 +1,29 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+// CompactCoreInfo is a compact integer with the three core tags encoded.
+type CompactCoreInfo uint32
+
+// GetCompactCore generates a uint32 value that is guaranteed to be unique for
+// different language, region, and script values.
+func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
+	if t.LangID > langNoIndexOffset {
+		return 0, false
+	}
+	cci |= CompactCoreInfo(t.LangID) << (8 + 12)
+	cci |= CompactCoreInfo(t.ScriptID) << 12
+	cci |= CompactCoreInfo(t.RegionID)
+	return cci, true
+}
+
+// Tag generates a tag from c.
+func (c CompactCoreInfo) Tag() Tag {
+	return Tag{
+		LangID:   Language(c >> 20),
+		RegionID: Region(c & 0x3ff),
+		ScriptID: Script(c>>12) & 0xff,
+	}
+}
@@ -0,0 +1,61 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package compact defines a compact representation of language tags.
+//
+// Common language tags (at least all for which locale information is defined
+// in CLDR) are assigned a unique index. Each Tag is associated with such an
+// ID for selecting language-related resources (such as translations) as well
+// as one for selecting regional defaults (currency, number formatting, etc.)
+//
+// It may want to export this functionality at some point, but at this point
+// this is only available for use within x/text.
+package compact // import "golang.org/x/text/internal/language/compact"
+
+import (
+	"sort"
+	"strings"
+
+	"golang.org/x/text/internal/language"
+)
+
+// ID is an integer identifying a single tag.
+type ID uint16
+
+func getCoreIndex(t language.Tag) (id ID, ok bool) {
+	cci, ok := language.GetCompactCore(t)
+	if !ok {
+		return 0, false
+	}
+	i := sort.Search(len(coreTags), func(i int) bool {
+		return cci <= coreTags[i]
+	})
+	if i == len(coreTags) || coreTags[i] != cci {
+		return 0, false
+	}
+	return ID(i), true
+}
+
+// Parent returns the ID of the parent or the root ID if id is already the root.
+func (id ID) Parent() ID {
+	return parents[id]
+}
+
+// Tag converts id to an internal language Tag.
+func (id ID) Tag() language.Tag {
+	if int(id) >= len(coreTags) {
+		return specialTags[int(id)-len(coreTags)]
+	}
+	return coreTags[id].Tag()
+}
+
+var specialTags []language.Tag
+
+func init() {
+	tags := strings.Split(specialTagsStr, " ")
+	specialTags = make([]language.Tag, len(tags))
+	for i, t := range tags {
+		specialTags[i] = language.MustParse(t)
+	}
+}
@@ -0,0 +1,64 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Language tag table generator.
+// Data read from the web.
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/unicode/cldr"
+)
+
+var (
+	test = flag.Bool("test",
+		false,
+		"test existing tables; can be used to compare web data with package data.")
+	outputFile = flag.String("output",
+		"tables.go",
+		"output file for generated tables")
+)
+
+func main() {
+	gen.Init()
+
+	w := gen.NewCodeWriter()
+	defer w.WriteGoFile("tables.go", "compact")
+
+	fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
+
+	b := newBuilder(w)
+	gen.WriteCLDRVersion(w)
+
+	b.writeCompactIndex()
+}
+
+type builder struct {
+	w    *gen.CodeWriter
+	data *cldr.CLDR
+	supp *cldr.SupplementalData
+}
+
+func newBuilder(w *gen.CodeWriter) *builder {
+	r := gen.OpenCLDRCoreZip()
+	defer r.Close()
+	d := &cldr.Decoder{}
+	data, err := d.DecodeZip(r)
+	if err != nil {
+		log.Fatal(err)
+	}
+	b := builder{
+		w:    w,
+		data: data,
+		supp: data.Supplemental(),
+	}
+	return &b
+}
@@ -0,0 +1,113 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This file generates derivative tables based on the language package itself.
+
+import (
+	"fmt"
+	"log"
+	"sort"
+	"strings"
+
+	"golang.org/x/text/internal/language"
+)
+
+// Compact indices:
+// Note -va-X variants only apply to localization variants.
+// BCP variants only ever apply to language.
+// The only ambiguity between tags is with regions.
+
+func (b *builder) writeCompactIndex() {
+	// Collect all language tags for which we have any data in CLDR.
+	m := map[language.Tag]bool{}
+	for _, lang := range b.data.Locales() {
+		// We include all locales unconditionally to be consistent with en_US.
+		// We want en_US, even though it has no data associated with it.
+
+		// TODO: put any of the languages for which no data exists at the end
+		// of the index. This allows all components based on ICU to use that
+		// as the cutoff point.
+		// if x := data.RawLDML(lang); false ||
+		// 	x.LocaleDisplayNames != nil ||
+		// 	x.Characters != nil ||
+		// 	x.Delimiters != nil ||
+		// 	x.Measurement != nil ||
+		// 	x.Dates != nil ||
+		// 	x.Numbers != nil ||
+		// 	x.Units != nil ||
+		// 	x.ListPatterns != nil ||
+		// 	x.Collations != nil ||
+		// 	x.Segmentations != nil ||
+		// 	x.Rbnf != nil ||
+		// 	x.Annotations != nil ||
+		// 	x.Metadata != nil {
+
+		// TODO: support POSIX natively, albeit non-standard.
+		tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
+		m[tag] = true
+		// }
+	}
+
+	// TODO: plural rules are also defined for the deprecated tags:
+	//    iw mo sh tl
+	// Consider removing these as compact tags.
+
+	// Include locales for plural rules, which uses a different structure.
+	for _, plurals := range b.supp.Plurals {
+		for _, rules := range plurals.PluralRules {
+			for _, lang := range strings.Split(rules.Locales, " ") {
+				m[language.Make(lang)] = true
+			}
+		}
+	}
+
+	var coreTags []language.CompactCoreInfo
+	var special []string
+
+	for t := range m {
+		if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
+			log.Fatalf("Unexpected extension %v in %v", x, t)
+		}
+		if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
+			cci, ok := language.GetCompactCore(t)
+			if !ok {
+				log.Fatalf("Locale for non-basic language %q", t)
+			}
+			coreTags = append(coreTags, cci)
+		} else {
+			special = append(special, t.String())
+		}
+	}
+
+	w := b.w
+
+	sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
+	sort.Strings(special)
+
+	w.WriteComment(`
+	NumCompactTags is the number of common tags. The maximum tag is
+	NumCompactTags-1.`)
+	w.WriteConst("NumCompactTags", len(m))
+
+	fmt.Fprintln(w, "const (")
+	for i, t := range coreTags {
+		fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
+	}
+	for i, t := range special {
+		fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
+	}
+	fmt.Fprintln(w, ")")
+
+	w.WriteVar("coreTags", coreTags)
+
+	w.WriteConst("specialTagsStr", strings.Join(special, " "))
+}
+
+func ident(s string) string {
+	return strings.Replace(s, "-", "", -1) + "Index"
+}
@@ -0,0 +1,54 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+	"log"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/internal/language"
+	"golang.org/x/text/internal/language/compact"
+	"golang.org/x/text/unicode/cldr"
+)
+
+func main() {
+	r := gen.OpenCLDRCoreZip()
+	defer r.Close()
+
+	d := &cldr.Decoder{}
+	data, err := d.DecodeZip(r)
+	if err != nil {
+		log.Fatalf("DecodeZip: %v", err)
+	}
+
+	w := gen.NewCodeWriter()
+	defer w.WriteGoFile("parents.go", "compact")
+
+	// Create parents table.
+	type ID uint16
+	parents := make([]ID, compact.NumCompactTags)
+	for _, loc := range data.Locales() {
+		tag := language.MustParse(loc)
+		index, ok := compact.FromTag(tag)
+		if !ok {
+			continue
+		}
+		parentIndex := compact.ID(0) // und
+		for p := tag.Parent(); p != language.Und; p = p.Parent() {
+			if x, ok := compact.FromTag(p); ok {
+				parentIndex = x
+				break
+			}
+		}
+		parents[index] = ID(parentIndex)
+	}
+
+	w.WriteComment(`
+	parents maps a compact index of a tag to the compact index of the parent of
+	this tag.`)
+	w.WriteVar("parents", parents)
+}
@@ -0,0 +1,260 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run gen.go gen_index.go -output tables.go
+//go:generate go run gen_parents.go
+
+package compact
+
+// TODO: Remove above NOTE after:
+// - verifying that tables are dropped correctly (most notably matcher tables).
+
+import (
+	"strings"
+
+	"golang.org/x/text/internal/language"
+)
+
+// Tag represents a BCP 47 language tag. It is used to specify an instance of a
+// specific language or locale. All language tag values are guaranteed to be
+// well-formed.
+type Tag struct {
+	// NOTE: exported tags will become part of the public API.
+	language ID
+	locale   ID
+	full     fullTag // always a language.Tag for now.
+}
+
+const _und = 0
+
+type fullTag interface {
+	IsRoot() bool
+	Parent() language.Tag
+}
+
+// Make a compact Tag from a fully specified internal language Tag.
+func Make(t language.Tag) (tag Tag) {
+	if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
+		if r, err := language.ParseRegion(region[:2]); err == nil {
+			tFull := t
+			t, _ = t.SetTypeForKey("rg", "")
+			// TODO: should we not consider "va" for the language tag?
+			var exact1, exact2 bool
+			tag.language, exact1 = FromTag(t)
+			t.RegionID = r
+			tag.locale, exact2 = FromTag(t)
+			if !exact1 || !exact2 {
+				tag.full = tFull
+			}
+			return tag
+		}
+	}
+	lang, ok := FromTag(t)
+	tag.language = lang
+	tag.locale = lang
+	if !ok {
+		tag.full = t
+	}
+	return tag
+}
+
+// Tag returns an internal language Tag version of this tag.
+func (t Tag) Tag() language.Tag {
+	if t.full != nil {
+		return t.full.(language.Tag)
+	}
+	tag := t.language.Tag()
+	if t.language != t.locale {
+		loc := t.locale.Tag()
+		tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
+	}
+	return tag
+}
+
+// IsCompact reports whether this tag is fully defined in terms of ID.
+func (t *Tag) IsCompact() bool {
+	return t.full == nil
+}
+
+// MayHaveVariants reports whether a tag may have variants. If it returns false
+// it is guaranteed the tag does not have variants.
+func (t Tag) MayHaveVariants() bool {
+	return t.full != nil || int(t.language) >= len(coreTags)
+}
+
+// MayHaveExtensions reports whether a tag may have extensions. If it returns
+// false it is guaranteed the tag does not have them.
+func (t Tag) MayHaveExtensions() bool {
+	return t.full != nil ||
+		int(t.language) >= len(coreTags) ||
+		t.language != t.locale
+}
+
+// IsRoot returns true if t is equal to language "und".
+func (t Tag) IsRoot() bool {
+	if t.full != nil {
+		return t.full.IsRoot()
+	}
+	return t.language == _und
+}
+
+// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
+// specific language are substituted with fields from the parent language.
+// The parent for a language may change for newer versions of CLDR.
+func (t Tag) Parent() Tag {
+	if t.full != nil {
+		return Make(t.full.Parent())
+	}
+	if t.language != t.locale {
+		// Simulate stripping -u-rg-xxxxxx
+		return Tag{language: t.language, locale: t.language}
+	}
+	// TODO: use parent lookup table once cycle from internal package is
+	// removed. Probably by internalizing the table and declaring this fast
+	// enough.
+	// lang := compactID(internal.Parent(uint16(t.language)))
+	lang, _ := FromTag(t.language.Tag().Parent())
+	return Tag{language: lang, locale: lang}
+}
+
+// returns token t and the rest of the string.
+func nextToken(s string) (t, tail string) {
+	p := strings.Index(s[1:], "-")
+	if p == -1 {
+		return s[1:], ""
+	}
+	p++
+	return s[1:p], s[p:]
+}
+
+// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
+// for which data exists in the text repository.The index will change over time
+// and should not be stored in persistent storage. If t does not match a compact
+// index, exact will be false and the compact index will be returned for the
+// first match after repeatedly taking the Parent of t.
+func LanguageID(t Tag) (id ID, exact bool) {
+	return t.language, t.full == nil
+}
+
+// RegionalID returns the ID for the regional variant of this tag. This index is
+// used to indicate region-specific overrides, such as default currency, default
+// calendar and week data, default time cycle, and default measurement system
+// and unit preferences.
+//
+// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
+// settings for currency, number formatting, etc. The CompactIndex for this tag
+// will be that for en-GB, while the RegionalID will be the one corresponding to
+// en-US.
+func RegionalID(t Tag) (id ID, exact bool) {
+	return t.locale, t.full == nil
+}
+
+// LanguageTag returns t stripped of regional variant indicators.
+//
+// At the moment this means it is stripped of a regional and variant subtag "rg"
+// and "va" in the "u" extension.
+func (t Tag) LanguageTag() Tag {
+	if t.full == nil {
+		return Tag{language: t.language, locale: t.language}
+	}
+	tt := t.Tag()
+	tt.SetTypeForKey("rg", "")
+	tt.SetTypeForKey("va", "")
+	return Make(tt)
+}
+
+// RegionalTag returns the regional variant of the tag.
+//
+// At the moment this means that the region is set from the regional subtag
+// "rg" in the "u" extension.
+func (t Tag) RegionalTag() Tag {
+	rt := Tag{language: t.locale, locale: t.locale}
+	if t.full == nil {
+		return rt
+	}
+	b := language.Builder{}
+	tag := t.Tag()
+	// tag, _ = tag.SetTypeForKey("rg", "")
+	b.SetTag(t.locale.Tag())
+	if v := tag.Variants(); v != "" {
+		for _, v := range strings.Split(v, "-") {
+			b.AddVariant(v)
+		}
+	}
+	for _, e := range tag.Extensions() {
+		b.AddExt(e)
+	}
+	return t
+}
+
+// FromTag reports closest matching ID for an internal language Tag.
+func FromTag(t language.Tag) (id ID, exact bool) {
+	// TODO: perhaps give more frequent tags a lower index.
+	// TODO: we could make the indexes stable. This will excluded some
+	//       possibilities for optimization, so don't do this quite yet.
+	exact = true
+
+	b, s, r := t.Raw()
+	if t.HasString() {
+		if t.IsPrivateUse() {
+			// We have no entries for user-defined tags.
+			return 0, false
+		}
+		hasExtra := false
+		if t.HasVariants() {
+			if t.HasExtensions() {
+				build := language.Builder{}
+				build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
+				build.AddVariant(t.Variants())
+				exact = false
+				t = build.Make()
+			}
+			hasExtra = true
+		} else if _, ok := t.Extension('u'); ok {
+			// TODO: va may mean something else. Consider not considering it.
+			// Strip all but the 'va' entry.
+			old := t
+			variant := t.TypeForKey("va")
+			t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
+			if variant != "" {
+				t, _ = t.SetTypeForKey("va", variant)
+				hasExtra = true
+			}
+			exact = old == t
+		} else {
+			exact = false
+		}
+		if hasExtra {
+			// We have some variants.
+			for i, s := range specialTags {
+				if s == t {
+					return ID(i + len(coreTags)), exact
+				}
+			}
+			exact = false
+		}
+	}
+	if x, ok := getCoreIndex(t); ok {
+		return x, exact
+	}
+	exact = false
+	if r != 0 && s == 0 {
+		// Deal with cases where an extra script is inserted for the region.
+		t, _ := t.Maximize()
+		if x, ok := getCoreIndex(t); ok {
+			return x, exact
+		}
+	}
+	for t = t.Parent(); t != root; t = t.Parent() {
+		// No variants specified: just compare core components.
+		// The key has the form lllssrrr, where l, s, and r are nibbles for
+		// respectively the langID, scriptID, and regionID.
+		if x, ok := getCoreIndex(t); ok {
+			return x, exact
+		}
+	}
+	return 0, exact
+}
+
+var root = language.Tag{}
@@ -0,0 +1,120 @@
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
+
+package compact
+
+// parents maps a compact index of a tag to the compact index of the parent of
+// this tag.
+var parents = []ID{ // 775 elements
+	// Entry 0 - 3F
+	0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
+	0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
+	0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
+	0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
+	0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
+	0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
+	0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
+	0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
+	// Entry 40 - 7F
+	0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
+	0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
+	0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
+	0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
+	0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
+	0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
+	0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
+	0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
+	// Entry 80 - BF
+	0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
+	0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
+	0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
+	0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
+	// Entry C0 - FF
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
+	0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
+	0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
+	0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
+	0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
+	0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
+	// Entry 100 - 13F
+	0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
+	0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
+	0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
+	0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
+	0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	// Entry 140 - 17F
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
+	0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
+	0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
+	0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
+	0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
+	0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
+	0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
+	// Entry 180 - 1BF
+	0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
+	0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
+	0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
+	0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
+	0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
+	0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
+	0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
+	0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
+	// Entry 1C0 - 1FF
+	0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
+	0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
+	0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
+	0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
+	0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
+	0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
+	0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
+	0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
+	// Entry 200 - 23F
+	0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
+	0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
+	0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
+	0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
+	0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
+	0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
+	0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
+	// Entry 240 - 27F
+	0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
+	0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
+	0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
+	0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
+	0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
+	0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
+	0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
+	0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
+	// Entry 280 - 2BF
+	0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
+	0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
+	0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
+	0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
+	0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
+	0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
+	0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
+	0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
+	// Entry 2C0 - 2FF
+	0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
+	0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
+	0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
+	0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
+	0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
+	0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
+	0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
+	0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
+	// Entry 300 - 33F
+	0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
+} // Size: 1574 bytes
+
+// Total table size 1574 bytes (1KiB); checksum: 895AAF0B
@@ -0,0 +1,91 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package compact
+
+var (
+	und = Tag{}
+
+	Und Tag = Tag{}
+
+	Afrikaans            Tag = Tag{language: afIndex, locale: afIndex}
+	Amharic              Tag = Tag{language: amIndex, locale: amIndex}
+	Arabic               Tag = Tag{language: arIndex, locale: arIndex}
+	ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
+	Azerbaijani          Tag = Tag{language: azIndex, locale: azIndex}
+	Bulgarian            Tag = Tag{language: bgIndex, locale: bgIndex}
+	Bengali              Tag = Tag{language: bnIndex, locale: bnIndex}
+	Catalan              Tag = Tag{language: caIndex, locale: caIndex}
+	Czech                Tag = Tag{language: csIndex, locale: csIndex}
+	Danish               Tag = Tag{language: daIndex, locale: daIndex}
+	German               Tag = Tag{language: deIndex, locale: deIndex}
+	Greek                Tag = Tag{language: elIndex, locale: elIndex}
+	English              Tag = Tag{language: enIndex, locale: enIndex}
+	AmericanEnglish      Tag = Tag{language: enUSIndex, locale: enUSIndex}
+	BritishEnglish       Tag = Tag{language: enGBIndex, locale: enGBIndex}
+	Spanish              Tag = Tag{language: esIndex, locale: esIndex}
+	EuropeanSpanish      Tag = Tag{language: esESIndex, locale: esESIndex}
+	LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
+	Estonian             Tag = Tag{language: etIndex, locale: etIndex}
+	Persian              Tag = Tag{language: faIndex, locale: faIndex}
+	Finnish              Tag = Tag{language: fiIndex, locale: fiIndex}
+	Filipino             Tag = Tag{language: filIndex, locale: filIndex}
+	French               Tag = Tag{language: frIndex, locale: frIndex}
+	CanadianFrench       Tag = Tag{language: frCAIndex, locale: frCAIndex}
+	Gujarati             Tag = Tag{language: guIndex, locale: guIndex}
+	Hebrew               Tag = Tag{language: heIndex, locale: heIndex}
+	Hindi                Tag = Tag{language: hiIndex, locale: hiIndex}
+	Croatian             Tag = Tag{language: hrIndex, locale: hrIndex}
+	Hungarian            Tag = Tag{language: huIndex, locale: huIndex}
+	Armenian             Tag = Tag{language: hyIndex, locale: hyIndex}
+	Indonesian           Tag = Tag{language: idIndex, locale: idIndex}
+	Icelandic            Tag = Tag{language: isIndex, locale: isIndex}
+	Italian              Tag = Tag{language: itIndex, locale: itIndex}
+	Japanese             Tag = Tag{language: jaIndex, locale: jaIndex}
+	Georgian             Tag = Tag{language: kaIndex, locale: kaIndex}
+	Kazakh               Tag = Tag{language: kkIndex, locale: kkIndex}
+	Khmer                Tag = Tag{language: kmIndex, locale: kmIndex}
+	Kannada              Tag = Tag{language: knIndex, locale: knIndex}
+	Korean               Tag = Tag{language: koIndex, locale: koIndex}
+	Kirghiz              Tag = Tag{language: kyIndex, locale: kyIndex}
+	Lao                  Tag = Tag{language: loIndex, locale: loIndex}
+	Lithuanian           Tag = Tag{language: ltIndex, locale: ltIndex}
+	Latvian              Tag = Tag{language: lvIndex, locale: lvIndex}
+	Macedonian           Tag = Tag{language: mkIndex, locale: mkIndex}
+	Malayalam            Tag = Tag{language: mlIndex, locale: mlIndex}
+	Mongolian            Tag = Tag{language: mnIndex, locale: mnIndex}
+	Marathi              Tag = Tag{language: mrIndex, locale: mrIndex}
+	Malay                Tag = Tag{language: msIndex, locale: msIndex}
+	Burmese              Tag = Tag{language: myIndex, locale: myIndex}
+	Nepali               Tag = Tag{language: neIndex, locale: neIndex}
+	Dutch                Tag = Tag{language: nlIndex, locale: nlIndex}
+	Norwegian            Tag = Tag{language: noIndex, locale: noIndex}
+	Punjabi              Tag = Tag{language: paIndex, locale: paIndex}
+	Polish               Tag = Tag{language: plIndex, locale: plIndex}
+	Portuguese           Tag = Tag{language: ptIndex, locale: ptIndex}
+	BrazilianPortuguese  Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
+	EuropeanPortuguese   Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
+	Romanian             Tag = Tag{language: roIndex, locale: roIndex}
+	Russian              Tag = Tag{language: ruIndex, locale: ruIndex}
+	Sinhala              Tag = Tag{language: siIndex, locale: siIndex}
+	Slovak               Tag = Tag{language: skIndex, locale: skIndex}
+	Slovenian            Tag = Tag{language: slIndex, locale: slIndex}
+	Albanian             Tag = Tag{language: sqIndex, locale: sqIndex}
+	Serbian              Tag = Tag{language: srIndex, locale: srIndex}
+	SerbianLatin         Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
+	Swedish              Tag = Tag{language: svIndex, locale: svIndex}
+	Swahili              Tag = Tag{language: swIndex, locale: swIndex}
+	Tamil                Tag = Tag{language: taIndex, locale: taIndex}
+	Telugu               Tag = Tag{language: teIndex, locale: teIndex}
+	Thai                 Tag = Tag{language: thIndex, locale: thIndex}
+	Turkish              Tag = Tag{language: trIndex, locale: trIndex}
+	Ukrainian            Tag = Tag{language: ukIndex, locale: ukIndex}
+	Urdu                 Tag = Tag{language: urIndex, locale: urIndex}
+	Uzbek                Tag = Tag{language: uzIndex, locale: uzIndex}
+	Vietnamese           Tag = Tag{language: viIndex, locale: viIndex}
+	Chinese              Tag = Tag{language: zhIndex, locale: zhIndex}
+	SimplifiedChinese    Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
+	TraditionalChinese   Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
+	Zulu                 Tag = Tag{language: zuIndex, locale: zuIndex}
+)
@@ -0,0 +1,167 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"sort"
+	"strings"
+)
+
+// A Builder allows constructing a Tag from individual components.
+// Its main user is Compose in the top-level language package.
+type Builder struct {
+	Tag Tag
+
+	private    string // the x extension
+	variants   []string
+	extensions []string
+}
+
+// Make returns a new Tag from the current settings.
+func (b *Builder) Make() Tag {
+	t := b.Tag
+
+	if len(b.extensions) > 0 || len(b.variants) > 0 {
+		sort.Sort(sortVariants(b.variants))
+		sort.Strings(b.extensions)
+
+		if b.private != "" {
+			b.extensions = append(b.extensions, b.private)
+		}
+		n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
+		buf := make([]byte, n)
+		p := t.genCoreBytes(buf)
+		t.pVariant = byte(p)
+		p += appendTokens(buf[p:], b.variants...)
+		t.pExt = uint16(p)
+		p += appendTokens(buf[p:], b.extensions...)
+		t.str = string(buf[:p])
+		// We may not always need to remake the string, but when or when not
+		// to do so is rather tricky.
+		scan := makeScanner(buf[:p])
+		t, _ = parse(&scan, "")
+		return t
+
+	} else if b.private != "" {
+		t.str = b.private
+		t.RemakeString()
+	}
+	return t
+}
+
+// SetTag copies all the settings from a given Tag. Any previously set values
+// are discarded.
+func (b *Builder) SetTag(t Tag) {
+	b.Tag.LangID = t.LangID
+	b.Tag.RegionID = t.RegionID
+	b.Tag.ScriptID = t.ScriptID
+	// TODO: optimize
+	b.variants = b.variants[:0]
+	if variants := t.Variants(); variants != "" {
+		for _, vr := range strings.Split(variants[1:], "-") {
+			b.variants = append(b.variants, vr)
+		}
+	}
+	b.extensions, b.private = b.extensions[:0], ""
+	for _, e := range t.Extensions() {
+		b.AddExt(e)
+	}
+}
+
+// AddExt adds extension e to the tag. e must be a valid extension as returned
+// by Tag.Extension. If the extension already exists, it will be discarded,
+// except for a -u extension, where non-existing key-type pairs will added.
+func (b *Builder) AddExt(e string) {
+	if e[0] == 'x' {
+		if b.private == "" {
+			b.private = e
+		}
+		return
+	}
+	for i, s := range b.extensions {
+		if s[0] == e[0] {
+			if e[0] == 'u' {
+				b.extensions[i] += e[1:]
+			}
+			return
+		}
+	}
+	b.extensions = append(b.extensions, e)
+}
+
+// SetExt sets the extension e to the tag. e must be a valid extension as
+// returned by Tag.Extension. If the extension already exists, it will be
+// overwritten, except for a -u extension, where the individual key-type pairs
+// will be set.
+func (b *Builder) SetExt(e string) {
+	if e[0] == 'x' {
+		b.private = e
+		return
+	}
+	for i, s := range b.extensions {
+		if s[0] == e[0] {
+			if e[0] == 'u' {
+				b.extensions[i] = e + s[1:]
+			} else {
+				b.extensions[i] = e
+			}
+			return
+		}
+	}
+	b.extensions = append(b.extensions, e)
+}
+
+// AddVariant adds any number of variants.
+func (b *Builder) AddVariant(v ...string) {
+	for _, v := range v {
+		if v != "" {
+			b.variants = append(b.variants, v)
+		}
+	}
+}
+
+// ClearVariants removes any variants previously added, including those
+// copied from a Tag in SetTag.
+func (b *Builder) ClearVariants() {
+	b.variants = b.variants[:0]
+}
+
+// ClearExtensions removes any extensions previously added, including those
+// copied from a Tag in SetTag.
+func (b *Builder) ClearExtensions() {
+	b.private = ""
+	b.extensions = b.extensions[:0]
+}
+
+func tokenLen(token ...string) (n int) {
+	for _, t := range token {
+		n += len(t) + 1
+	}
+	return
+}
+
+func appendTokens(b []byte, token ...string) int {
+	p := 0
+	for _, t := range token {
+		b[p] = '-'
+		copy(b[p+1:], t)
+		p += 1 + len(t)
+	}
+	return p
+}
+
+type sortVariants []string
+
+func (s sortVariants) Len() int {
+	return len(s)
+}
+
+func (s sortVariants) Swap(i, j int) {
+	s[j], s[i] = s[i], s[j]
+}
+
+func (s sortVariants) Less(i, j int) bool {
+	return variantIndex[s[i]] < variantIndex[s[j]]
+}
@@ -0,0 +1,28 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+// BaseLanguages returns the list of all supported base languages. It generates
+// the list by traversing the internal structures.
+func BaseLanguages() []Language {
+	base := make([]Language, 0, NumLanguages)
+	for i := 0; i < langNoIndexOffset; i++ {
+		// We included "und" already for the value 0.
+		if i != nonCanonicalUnd {
+			base = append(base, Language(i))
+		}
+	}
+	i := langNoIndexOffset
+	for _, v := range langNoIndex {
+		for k := 0; k < 8; k++ {
+			if v&1 == 1 {
+				base = append(base, Language(i))
+			}
+			v >>= 1
+			i++
+		}
+	}
+	return base
+}
@@ -8,13 +8,13 @@ package main

 // This file contains code common to the maketables.go and the package code.

-// langAliasType is the type of an alias in langAliasMap.
-type langAliasType int8
+// AliasType is the type of an alias in AliasMap.
+type AliasType int8

 const (
-	langDeprecated langAliasType = iota
-	langMacro
-	langLegacy
+	Deprecated AliasType = iota
+	Macro
+	Legacy

-	langAliasTypeUnknown langAliasType = -1
+	AliasTypeUnknown AliasType = -1
 )
@@ -0,0 +1,596 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run gen.go gen_common.go -output tables.go
+
+package language // import "golang.org/x/text/internal/language"
+
+// TODO: Remove above NOTE after:
+// - verifying that tables are dropped correctly (most notably matcher tables).
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+)
+
+const (
+	// maxCoreSize is the maximum size of a BCP 47 tag without variants and
+	// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
+	maxCoreSize = 12
+
+	// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
+	// is large enough to hold at least 99% of the BCP 47 tags.
+	max99thPercentileSize = 32
+
+	// maxSimpleUExtensionSize is the maximum size of a -u extension with one
+	// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
+	maxSimpleUExtensionSize = 14
+)
+
+// Tag represents a BCP 47 language tag. It is used to specify an instance of a
+// specific language or locale. All language tag values are guaranteed to be
+// well-formed. The zero value of Tag is Und.
+type Tag struct {
+	// TODO: the following fields have the form TagTypeID. This name is chosen
+	// to allow refactoring the public package without conflicting with its
+	// Base, Script, and Region methods. Once the transition is fully completed
+	// the ID can be stripped from the name.
+
+	LangID   Language
+	RegionID Region
+	// TODO: we will soon run out of positions for ScriptID. Idea: instead of
+	// storing lang, region, and ScriptID codes, store only the compact index and
+	// have a lookup table from this code to its expansion. This greatly speeds
+	// up table lookup, speed up common variant cases.
+	// This will also immediately free up 3 extra bytes. Also, the pVariant
+	// field can now be moved to the lookup table, as the compact index uniquely
+	// determines the offset of a possible variant.
+	ScriptID Script
+	pVariant byte   // offset in str, includes preceding '-'
+	pExt     uint16 // offset of first extension, includes preceding '-'
+
+	// str is the string representation of the Tag. It will only be used if the
+	// tag has variants or extensions.
+	str string
+}
+
+// Make is a convenience wrapper for Parse that omits the error.
+// In case of an error, a sensible default is returned.
+func Make(s string) Tag {
+	t, _ := Parse(s)
+	return t
+}
+
+// Raw returns the raw base language, script and region, without making an
+// attempt to infer their values.
+// TODO: consider removing
+func (t Tag) Raw() (b Language, s Script, r Region) {
+	return t.LangID, t.ScriptID, t.RegionID
+}
+
+// equalTags compares language, script and region subtags only.
+func (t Tag) equalTags(a Tag) bool {
+	return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
+}
+
+// IsRoot returns true if t is equal to language "und".
+func (t Tag) IsRoot() bool {
+	if int(t.pVariant) < len(t.str) {
+		return false
+	}
+	return t.equalTags(Und)
+}
+
+// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
+// tag.
+func (t Tag) IsPrivateUse() bool {
+	return t.str != "" && t.pVariant == 0
+}
+
+// RemakeString is used to update t.str in case lang, script or region changed.
+// It is assumed that pExt and pVariant still point to the start of the
+// respective parts.
+func (t *Tag) RemakeString() {
+	if t.str == "" {
+		return
+	}
+	extra := t.str[t.pVariant:]
+	if t.pVariant > 0 {
+		extra = extra[1:]
+	}
+	if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
+		t.str = extra
+		t.pVariant = 0
+		t.pExt = 0
+		return
+	}
+	var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
+	b := buf[:t.genCoreBytes(buf[:])]
+	if extra != "" {
+		diff := len(b) - int(t.pVariant)
+		b = append(b, '-')
+		b = append(b, extra...)
+		t.pVariant = uint8(int(t.pVariant) + diff)
+		t.pExt = uint16(int(t.pExt) + diff)
+	} else {
+		t.pVariant = uint8(len(b))
+		t.pExt = uint16(len(b))
+	}
+	t.str = string(b)
+}
+
+// genCoreBytes writes a string for the base languages, script and region tags
+// to the given buffer and returns the number of bytes written. It will never
+// write more than maxCoreSize bytes.
+func (t *Tag) genCoreBytes(buf []byte) int {
+	n := t.LangID.StringToBuf(buf[:])
+	if t.ScriptID != 0 {
+		n += copy(buf[n:], "-")
+		n += copy(buf[n:], t.ScriptID.String())
+	}
+	if t.RegionID != 0 {
+		n += copy(buf[n:], "-")
+		n += copy(buf[n:], t.RegionID.String())
+	}
+	return n
+}
+
+// String returns the canonical string representation of the language tag.
+func (t Tag) String() string {
+	if t.str != "" {
+		return t.str
+	}
+	if t.ScriptID == 0 && t.RegionID == 0 {
+		return t.LangID.String()
+	}
+	buf := [maxCoreSize]byte{}
+	return string(buf[:t.genCoreBytes(buf[:])])
+}
+
+// MarshalText implements encoding.TextMarshaler.
+func (t Tag) MarshalText() (text []byte, err error) {
+	if t.str != "" {
+		text = append(text, t.str...)
+	} else if t.ScriptID == 0 && t.RegionID == 0 {
+		text = append(text, t.LangID.String()...)
+	} else {
+		buf := [maxCoreSize]byte{}
+		text = buf[:t.genCoreBytes(buf[:])]
+	}
+	return text, nil
+}
+
+// UnmarshalText implements encoding.TextUnmarshaler.
+func (t *Tag) UnmarshalText(text []byte) error {
+	tag, err := Parse(string(text))
+	*t = tag
+	return err
+}
+
+// Variants returns the part of the tag holding all variants or the empty string
+// if there are no variants defined.
+func (t Tag) Variants() string {
+	if t.pVariant == 0 {
+		return ""
+	}
+	return t.str[t.pVariant:t.pExt]
+}
+
+// VariantOrPrivateUseTags returns variants or private use tags.
+func (t Tag) VariantOrPrivateUseTags() string {
+	if t.pExt > 0 {
+		return t.str[t.pVariant:t.pExt]
+	}
+	return t.str[t.pVariant:]
+}
+
+// HasString reports whether this tag defines more than just the raw
+// components.
+func (t Tag) HasString() bool {
+	return t.str != ""
+}
+
+// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
+// specific language are substituted with fields from the parent language.
+// The parent for a language may change for newer versions of CLDR.
+func (t Tag) Parent() Tag {
+	if t.str != "" {
+		// Strip the variants and extensions.
+		b, s, r := t.Raw()
+		t = Tag{LangID: b, ScriptID: s, RegionID: r}
+		if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
+			base, _ := addTags(Tag{LangID: t.LangID})
+			if base.ScriptID == t.ScriptID {
+				return Tag{LangID: t.LangID}
+			}
+		}
+		return t
+	}
+	if t.LangID != 0 {
+		if t.RegionID != 0 {
+			maxScript := t.ScriptID
+			if maxScript == 0 {
+				max, _ := addTags(t)
+				maxScript = max.ScriptID
+			}
+
+			for i := range parents {
+				if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
+					for _, r := range parents[i].fromRegion {
+						if Region(r) == t.RegionID {
+							return Tag{
+								LangID:   t.LangID,
+								ScriptID: Script(parents[i].script),
+								RegionID: Region(parents[i].toRegion),
+							}
+						}
+					}
+				}
+			}
+
+			// Strip the script if it is the default one.
+			base, _ := addTags(Tag{LangID: t.LangID})
+			if base.ScriptID != maxScript {
+				return Tag{LangID: t.LangID, ScriptID: maxScript}
+			}
+			return Tag{LangID: t.LangID}
+		} else if t.ScriptID != 0 {
+			// The parent for an base-script pair with a non-default script is
+			// "und" instead of the base language.
+			base, _ := addTags(Tag{LangID: t.LangID})
+			if base.ScriptID != t.ScriptID {
+				return Und
+			}
+			return Tag{LangID: t.LangID}
+		}
+	}
+	return Und
+}
+
+// ParseExtension parses s as an extension and returns it on success.
+func ParseExtension(s string) (ext string, err error) {
+	scan := makeScannerString(s)
+	var end int
+	if n := len(scan.token); n != 1 {
+		return "", ErrSyntax
+	}
+	scan.toLower(0, len(scan.b))
+	end = parseExtension(&scan)
+	if end != len(s) {
+		return "", ErrSyntax
+	}
+	return string(scan.b), nil
+}
+
+// HasVariants reports whether t has variants.
+func (t Tag) HasVariants() bool {
+	return uint16(t.pVariant) < t.pExt
+}
+
+// HasExtensions reports whether t has extensions.
+func (t Tag) HasExtensions() bool {
+	return int(t.pExt) < len(t.str)
+}
+
+// Extension returns the extension of type x for tag t. It will return
+// false for ok if t does not have the requested extension. The returned
+// extension will be invalid in this case.
+func (t Tag) Extension(x byte) (ext string, ok bool) {
+	for i := int(t.pExt); i < len(t.str)-1; {
+		var ext string
+		i, ext = getExtension(t.str, i)
+		if ext[0] == x {
+			return ext, true
+		}
+	}
+	return "", false
+}
+
+// Extensions returns all extensions of t.
+func (t Tag) Extensions() []string {
+	e := []string{}
+	for i := int(t.pExt); i < len(t.str)-1; {
+		var ext string
+		i, ext = getExtension(t.str, i)
+		e = append(e, ext)
+	}
+	return e
+}
+
+// TypeForKey returns the type associated with the given key, where key and type
+// are of the allowed values defined for the Unicode locale extension ('u') in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// TypeForKey will traverse the inheritance chain to get the correct value.
+func (t Tag) TypeForKey(key string) string {
+	if start, end, _ := t.findTypeForKey(key); end != start {
+		return t.str[start:end]
+	}
+	return ""
+}
+
+var (
+	errPrivateUse       = errors.New("cannot set a key on a private use tag")
+	errInvalidArguments = errors.New("invalid key or type")
+)
+
+// SetTypeForKey returns a new Tag with the key set to type, where key and type
+// are of the allowed values defined for the Unicode locale extension ('u') in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// An empty value removes an existing pair with the same key.
+func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
+	if t.IsPrivateUse() {
+		return t, errPrivateUse
+	}
+	if len(key) != 2 {
+		return t, errInvalidArguments
+	}
+
+	// Remove the setting if value is "".
+	if value == "" {
+		start, end, _ := t.findTypeForKey(key)
+		if start != end {
+			// Remove key tag and leading '-'.
+			start -= 4
+
+			// Remove a possible empty extension.
+			if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
+				start -= 2
+			}
+			if start == int(t.pVariant) && end == len(t.str) {
+				t.str = ""
+				t.pVariant, t.pExt = 0, 0
+			} else {
+				t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
+			}
+		}
+		return t, nil
+	}
+
+	if len(value) < 3 || len(value) > 8 {
+		return t, errInvalidArguments
+	}
+
+	var (
+		buf    [maxCoreSize + maxSimpleUExtensionSize]byte
+		uStart int // start of the -u extension.
+	)
+
+	// Generate the tag string if needed.
+	if t.str == "" {
+		uStart = t.genCoreBytes(buf[:])
+		buf[uStart] = '-'
+		uStart++
+	}
+
+	// Create new key-type pair and parse it to verify.
+	b := buf[uStart:]
+	copy(b, "u-")
+	copy(b[2:], key)
+	b[4] = '-'
+	b = b[:5+copy(b[5:], value)]
+	scan := makeScanner(b)
+	if parseExtensions(&scan); scan.err != nil {
+		return t, scan.err
+	}
+
+	// Assemble the replacement string.
+	if t.str == "" {
+		t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
+		t.str = string(buf[:uStart+len(b)])
+	} else {
+		s := t.str
+		start, end, hasExt := t.findTypeForKey(key)
+		if start == end {
+			if hasExt {
+				b = b[2:]
+			}
+			t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
+		} else {
+			t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
+		}
+	}
+	return t, nil
+}
+
+// findKeyAndType returns the start and end position for the type corresponding
+// to key or the point at which to insert the key-value pair if the type
+// wasn't found. The hasExt return value reports whether an -u extension was present.
+// Note: the extensions are typically very small and are likely to contain
+// only one key-type pair.
+func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
+	p := int(t.pExt)
+	if len(key) != 2 || p == len(t.str) || p == 0 {
+		return p, p, false
+	}
+	s := t.str
+
+	// Find the correct extension.
+	for p++; s[p] != 'u'; p++ {
+		if s[p] > 'u' {
+			p--
+			return p, p, false
+		}
+		if p = nextExtension(s, p); p == len(s) {
+			return len(s), len(s), false
+		}
+	}
+	// Proceed to the hyphen following the extension name.
+	p++
+
+	// curKey is the key currently being processed.
+	curKey := ""
+
+	// Iterate over keys until we get the end of a section.
+	for {
+		// p points to the hyphen preceding the current token.
+		if p3 := p + 3; s[p3] == '-' {
+			// Found a key.
+			// Check whether we just processed the key that was requested.
+			if curKey == key {
+				return start, p, true
+			}
+			// Set to the next key and continue scanning type tokens.
+			curKey = s[p+1 : p3]
+			if curKey > key {
+				return p, p, true
+			}
+			// Start of the type token sequence.
+			start = p + 4
+			// A type is at least 3 characters long.
+			p += 7 // 4 + 3
+		} else {
+			// Attribute or type, which is at least 3 characters long.
+			p += 4
+		}
+		// p points past the third character of a type or attribute.
+		max := p + 5 // maximum length of token plus hyphen.
+		if len(s) < max {
+			max = len(s)
+		}
+		for ; p < max && s[p] != '-'; p++ {
+		}
+		// Bail if we have exhausted all tokens or if the next token starts
+		// a new extension.
+		if p == len(s) || s[p+2] == '-' {
+			if curKey == key {
+				return start, p, true
+			}
+			return p, p, true
+		}
+	}
+}
+
+// ParseBase parses a 2- or 3-letter ISO 639 code.
+// It returns a ValueError if s is a well-formed but unknown language identifier
+// or another error if another error occurred.
+func ParseBase(s string) (Language, error) {
+	if n := len(s); n < 2 || 3 < n {
+		return 0, ErrSyntax
+	}
+	var buf [3]byte
+	return getLangID(buf[:copy(buf[:], s)])
+}
+
+// ParseScript parses a 4-letter ISO 15924 code.
+// It returns a ValueError if s is a well-formed but unknown script identifier
+// or another error if another error occurred.
+func ParseScript(s string) (Script, error) {
+	if len(s) != 4 {
+		return 0, ErrSyntax
+	}
+	var buf [4]byte
+	return getScriptID(script, buf[:copy(buf[:], s)])
+}
+
+// EncodeM49 returns the Region for the given UN M.49 code.
+// It returns an error if r is not a valid code.
+func EncodeM49(r int) (Region, error) {
+	return getRegionM49(r)
+}
+
+// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
+// It returns a ValueError if s is a well-formed but unknown region identifier
+// or another error if another error occurred.
+func ParseRegion(s string) (Region, error) {
+	if n := len(s); n < 2 || 3 < n {
+		return 0, ErrSyntax
+	}
+	var buf [3]byte
+	return getRegionID(buf[:copy(buf[:], s)])
+}
+
+// IsCountry returns whether this region is a country or autonomous area. This
+// includes non-standard definitions from CLDR.
+func (r Region) IsCountry() bool {
+	if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
+		return false
+	}
+	return true
+}
+
+// IsGroup returns whether this region defines a collection of regions. This
+// includes non-standard definitions from CLDR.
+func (r Region) IsGroup() bool {
+	if r == 0 {
+		return false
+	}
+	return int(regionInclusion[r]) < len(regionContainment)
+}
+
+// Contains returns whether Region c is contained by Region r. It returns true
+// if c == r.
+func (r Region) Contains(c Region) bool {
+	if r == c {
+		return true
+	}
+	g := regionInclusion[r]
+	if g >= nRegionGroups {
+		return false
+	}
+	m := regionContainment[g]
+
+	d := regionInclusion[c]
+	b := regionInclusionBits[d]
+
+	// A contained country may belong to multiple disjoint groups. Matching any
+	// of these indicates containment. If the contained region is a group, it
+	// must strictly be a subset.
+	if d >= nRegionGroups {
+		return b&m != 0
+	}
+	return b&^m == 0
+}
+
+var errNoTLD = errors.New("language: region is not a valid ccTLD")
+
+// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
+// In all other cases it returns either the region itself or an error.
+//
+// This method may return an error for a region for which there exists a
+// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
+// region will already be canonicalized it was obtained from a Tag that was
+// obtained using any of the default methods.
+func (r Region) TLD() (Region, error) {
+	// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
+	// difference between ISO 3166-1 and IANA ccTLD.
+	if r == _GB {
+		r = _UK
+	}
+	if (r.typ() & ccTLD) == 0 {
+		return 0, errNoTLD
+	}
+	return r, nil
+}
+
+// Canonicalize returns the region or a possible replacement if the region is
+// deprecated. It will not return a replacement for deprecated regions that
+// are split into multiple regions.
+func (r Region) Canonicalize() Region {
+	if cr := normRegion(r); cr != 0 {
+		return cr
+	}
+	return r
+}
+
+// Variant represents a registered variant of a language as defined by BCP 47.
+type Variant struct {
+	ID  uint8
+	str string
+}
+
+// ParseVariant parses and returns a Variant. An error is returned if s is not
+// a valid variant.
+func ParseVariant(s string) (Variant, error) {
+	s = strings.ToLower(s)
+	if id, ok := variantIndex[s]; ok {
+		return Variant{id, s}, nil
+	}
+	return Variant{}, NewValueError([]byte(s))
+}
+
+// String returns the string representation of the variant.
+func (v Variant) String() string {
+	return v.str
+}
@@ -17,11 +17,11 @@ import (
 // if it could not be found.
 func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
 	if !tag.FixCase(form, key) {
-		return 0, errSyntax
+		return 0, ErrSyntax
 	}
 	i := idx.Index(key)
 	if i == -1 {
-		return 0, mkErrInvalid(key)
+		return 0, NewValueError(key)
 	}
 	return i, nil
 }
@@ -32,38 +32,45 @@ func searchUint(imap []uint16, key uint16) int {
 	})
 }

-type langID uint16
+type Language uint16

 // getLangID returns the langID of s if s is a canonical subtag
 // or langUnknown if s is not a canonical subtag.
-func getLangID(s []byte) (langID, error) {
+func getLangID(s []byte) (Language, error) {
 	if len(s) == 2 {
 		return getLangISO2(s)
 	}
 	return getLangISO3(s)
 }

+// TODO language normalization as well as the AliasMaps could be moved to the
+// higher level package, but it is a bit tricky to separate the generation.
+
+func (id Language) Canonicalize() (Language, AliasType) {
+	return normLang(id)
+}
+
 // mapLang returns the mapped langID of id according to mapping m.
-func normLang(id langID) (langID, langAliasType) {
-	k := sort.Search(len(langAliasMap), func(i int) bool {
-		return langAliasMap[i].from >= uint16(id)
+func normLang(id Language) (Language, AliasType) {
+	k := sort.Search(len(AliasMap), func(i int) bool {
+		return AliasMap[i].From >= uint16(id)
 	})
-	if k < len(langAliasMap) && langAliasMap[k].from == uint16(id) {
-		return langID(langAliasMap[k].to), langAliasTypes[k]
+	if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
+		return Language(AliasMap[k].To), AliasTypes[k]
 	}
-	return id, langAliasTypeUnknown
+	return id, AliasTypeUnknown
 }

 // getLangISO2 returns the langID for the given 2-letter ISO language code
 // or unknownLang if this does not exist.
-func getLangISO2(s []byte) (langID, error) {
+func getLangISO2(s []byte) (Language, error) {
 	if !tag.FixCase("zz", s) {
-		return 0, errSyntax
+		return 0, ErrSyntax
 	}
 	if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
-		return langID(i), nil
+		return Language(i), nil
 	}
-	return 0, mkErrInvalid(s)
+	return 0, NewValueError(s)
 }

 const base = 'z' - 'a' + 1
@@ -88,7 +95,7 @@ func intToStr(v uint, s []byte) {

 // getLangISO3 returns the langID for the given 3-letter ISO language code
 // or unknownLang if this does not exist.
-func getLangISO3(s []byte) (langID, error) {
+func getLangISO3(s []byte) (Language, error) {
 	if tag.FixCase("und", s) {
 		// first try to match canonical 3-letter entries
 		for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
@@ -96,7 +103,7 @@ func getLangISO3(s []byte) (langID, error) {
 				// We treat "und" as special and always translate it to "unspecified".
 				// Note that ZZ and Zzzz are private use and are not treated as
 				// unspecified by default.
-				id := langID(i)
+				id := Language(i)
 				if id == nonCanonicalUnd {
 					return 0, nil
 				}
@@ -104,26 +111,26 @@ func getLangISO3(s []byte) (langID, error) {
 			}
 		}
 		if i := altLangISO3.Index(s); i != -1 {
-			return langID(altLangIndex[altLangISO3.Elem(i)[3]]), nil
+			return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
 		}
 		n := strToInt(s)
 		if langNoIndex[n/8]&(1<<(n%8)) != 0 {
-			return langID(n) + langNoIndexOffset, nil
+			return Language(n) + langNoIndexOffset, nil
 		}
 		// Check for non-canonical uses of ISO3.
 		for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
 			if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
-				return langID(i), nil
+				return Language(i), nil
 			}
 		}
-		return 0, mkErrInvalid(s)
+		return 0, NewValueError(s)
 	}
-	return 0, errSyntax
+	return 0, ErrSyntax
 }

-// stringToBuf writes the string to b and returns the number of bytes
+// StringToBuf writes the string to b and returns the number of bytes
 // written.  cap(b) must be >= 3.
-func (id langID) stringToBuf(b []byte) int {
+func (id Language) StringToBuf(b []byte) int {
 	if id >= langNoIndexOffset {
 		intToStr(uint(id)-langNoIndexOffset, b[:3])
 		return 3
@@ -140,7 +147,7 @@ func (id langID) stringToBuf(b []byte) int {
 // String returns the BCP 47 representation of the langID.
 // Use b as variable name, instead of id, to ensure the variable
 // used is consistent with that of Base in which this type is embedded.
-func (b langID) String() string {
+func (b Language) String() string {
 	if b == 0 {
 		return "und"
 	} else if b >= langNoIndexOffset {
@@ -157,7 +164,7 @@ func (b langID) String() string {
 }

 // ISO3 returns the ISO 639-3 language code.
-func (b langID) ISO3() string {
+func (b Language) ISO3() string {
 	if b == 0 || b >= langNoIndexOffset {
 		return b.String()
 	}
@@ -173,15 +180,24 @@ func (b langID) ISO3() string {
 }

 // IsPrivateUse reports whether this language code is reserved for private use.
-func (b langID) IsPrivateUse() bool {
+func (b Language) IsPrivateUse() bool {
 	return langPrivateStart <= b && b <= langPrivateEnd
 }

-type regionID uint16
+// SuppressScript returns the script marked as SuppressScript in the IANA
+// language tag repository, or 0 if there is no such script.
+func (b Language) SuppressScript() Script {
+	if b < langNoIndexOffset {
+		return Script(suppressScript[b])
+	}
+	return 0
+}
+
+type Region uint16

 // getRegionID returns the region id for s if s is a valid 2-letter region code
 // or unknownRegion.
-func getRegionID(s []byte) (regionID, error) {
+func getRegionID(s []byte) (Region, error) {
 	if len(s) == 3 {
 		if isAlpha(s[0]) {
 			return getRegionISO3(s)
@@ -195,34 +211,34 @@ func getRegionID(s []byte) (regionID, error) {

 // getRegionISO2 returns the regionID for the given 2-letter ISO country code
 // or unknownRegion if this does not exist.
-func getRegionISO2(s []byte) (regionID, error) {
+func getRegionISO2(s []byte) (Region, error) {
 	i, err := findIndex(regionISO, s, "ZZ")
 	if err != nil {
 		return 0, err
 	}
-	return regionID(i) + isoRegionOffset, nil
+	return Region(i) + isoRegionOffset, nil
 }

 // getRegionISO3 returns the regionID for the given 3-letter ISO country code
 // or unknownRegion if this does not exist.
-func getRegionISO3(s []byte) (regionID, error) {
+func getRegionISO3(s []byte) (Region, error) {
 	if tag.FixCase("ZZZ", s) {
 		for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
 			if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
-				return regionID(i) + isoRegionOffset, nil
+				return Region(i) + isoRegionOffset, nil
 			}
 		}
 		for i := 0; i < len(altRegionISO3); i += 3 {
 			if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
-				return regionID(altRegionIDs[i/3]), nil
+				return Region(altRegionIDs[i/3]), nil
 			}
 		}
-		return 0, mkErrInvalid(s)
+		return 0, NewValueError(s)
 	}
-	return 0, errSyntax
+	return 0, ErrSyntax
 }

-func getRegionM49(n int) (regionID, error) {
+func getRegionM49(n int) (Region, error) {
 	if 0 < n && n <= 999 {
 		const (
 			searchBits = 7
@@ -236,7 +252,7 @@ func getRegionM49(n int) (regionID, error) {
 			return buf[i] >= val
 		})
 		if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
-			return regionID(r & regionMask), nil
+			return Region(r & regionMask), nil
 		}
 	}
 	var e ValueError
@@ -247,13 +263,13 @@ func getRegionM49(n int) (regionID, error) {
 // normRegion returns a region if r is deprecated or 0 otherwise.
 // TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
 // TODO: consider mapping split up regions to new most populous one (like CLDR).
-func normRegion(r regionID) regionID {
+func normRegion(r Region) Region {
 	m := regionOldMap
 	k := sort.Search(len(m), func(i int) bool {
-		return m[i].from >= uint16(r)
+		return m[i].From >= uint16(r)
 	})
-	if k < len(m) && m[k].from == uint16(r) {
-		return regionID(m[k].to)
+	if k < len(m) && m[k].From == uint16(r) {
+		return Region(m[k].To)
 	}
 	return 0
 }
@@ -264,13 +280,13 @@ const (
 	bcp47Region
 )

-func (r regionID) typ() byte {
+func (r Region) typ() byte {
 	return regionTypes[r]
 }

 // String returns the BCP 47 representation for the region.
 // It returns "ZZ" for an unspecified region.
-func (r regionID) String() string {
+func (r Region) String() string {
 	if r < isoRegionOffset {
 		if r == 0 {
 			return "ZZ"
@@ -284,7 +300,7 @@ func (r regionID) String() string {
 // ISO3 returns the 3-letter ISO code of r.
 // Note that not all regions have a 3-letter ISO code.
 // In such cases this method returns "ZZZ".
-func (r regionID) ISO3() string {
+func (r Region) ISO3() string {
 	if r < isoRegionOffset {
 		return "ZZZ"
 	}
@@ -301,29 +317,29 @@ func (r regionID) ISO3() string {

 // M49 returns the UN M.49 encoding of r, or 0 if this encoding
 // is not defined for r.
-func (r regionID) M49() int {
+func (r Region) M49() int {
 	return int(m49[r])
 }

 // IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
 // may include private-use tags that are assigned by CLDR and used in this
 // implementation. So IsPrivateUse and IsCountry can be simultaneously true.
-func (r regionID) IsPrivateUse() bool {
+func (r Region) IsPrivateUse() bool {
 	return r.typ()&iso3166UserAssigned != 0
 }

-type scriptID uint8
+type Script uint8

 // getScriptID returns the script id for string s. It assumes that s
 // is of the format [A-Z][a-z]{3}.
-func getScriptID(idx tag.Index, s []byte) (scriptID, error) {
+func getScriptID(idx tag.Index, s []byte) (Script, error) {
 	i, err := findIndex(idx, s, "Zzzz")
-	return scriptID(i), err
+	return Script(i), err
 }

 // String returns the script code in title case.
 // It returns "Zzzz" for an unspecified script.
-func (s scriptID) String() string {
+func (s Script) String() string {
 	if s == 0 {
 		return "Zzzz"
 	}
@@ -331,7 +347,7 @@ func (s scriptID) String() string {
 }

 // IsPrivateUse reports whether this script code is reserved for private use.
-func (s scriptID) IsPrivateUse() bool {
+func (s Script) IsPrivateUse() bool {
 	return _Qaaa <= s && s <= _Qabx
 }

@@ -389,7 +405,7 @@ func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
 		if v < 0 {
 			return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
 		}
-		t.lang = langID(v)
+		t.LangID = Language(v)
 		return t, true
 	}
 	return t, false
@@ -0,0 +1,226 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import "errors"
+
+type scriptRegionFlags uint8
+
+const (
+	isList = 1 << iota
+	scriptInFrom
+	regionInFrom
+)
+
+func (t *Tag) setUndefinedLang(id Language) {
+	if t.LangID == 0 {
+		t.LangID = id
+	}
+}
+
+func (t *Tag) setUndefinedScript(id Script) {
+	if t.ScriptID == 0 {
+		t.ScriptID = id
+	}
+}
+
+func (t *Tag) setUndefinedRegion(id Region) {
+	if t.RegionID == 0 || t.RegionID.Contains(id) {
+		t.RegionID = id
+	}
+}
+
+// ErrMissingLikelyTagsData indicates no information was available
+// to compute likely values of missing tags.
+var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
+
+// addLikelySubtags sets subtags to their most likely value, given the locale.
+// In most cases this means setting fields for unknown values, but in some
+// cases it may alter a value.  It returns an ErrMissingLikelyTagsData error
+// if the given locale cannot be expanded.
+func (t Tag) addLikelySubtags() (Tag, error) {
+	id, err := addTags(t)
+	if err != nil {
+		return t, err
+	} else if id.equalTags(t) {
+		return t, nil
+	}
+	id.RemakeString()
+	return id, nil
+}
+
+// specializeRegion attempts to specialize a group region.
+func specializeRegion(t *Tag) bool {
+	if i := regionInclusion[t.RegionID]; i < nRegionGroups {
+		x := likelyRegionGroup[i]
+		if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
+			t.RegionID = Region(x.region)
+		}
+		return true
+	}
+	return false
+}
+
+// Maximize returns a new tag with missing tags filled in.
+func (t Tag) Maximize() (Tag, error) {
+	return addTags(t)
+}
+
+func addTags(t Tag) (Tag, error) {
+	// We leave private use identifiers alone.
+	if t.IsPrivateUse() {
+		return t, nil
+	}
+	if t.ScriptID != 0 && t.RegionID != 0 {
+		if t.LangID != 0 {
+			// already fully specified
+			specializeRegion(&t)
+			return t, nil
+		}
+		// Search matches for und-script-region. Note that for these cases
+		// region will never be a group so there is no need to check for this.
+		list := likelyRegion[t.RegionID : t.RegionID+1]
+		if x := list[0]; x.flags&isList != 0 {
+			list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
+		}
+		for _, x := range list {
+			// Deviating from the spec. See match_test.go for details.
+			if Script(x.script) == t.ScriptID {
+				t.setUndefinedLang(Language(x.lang))
+				return t, nil
+			}
+		}
+	}
+	if t.LangID != 0 {
+		// Search matches for lang-script and lang-region, where lang != und.
+		if t.LangID < langNoIndexOffset {
+			x := likelyLang[t.LangID]
+			if x.flags&isList != 0 {
+				list := likelyLangList[x.region : x.region+uint16(x.script)]
+				if t.ScriptID != 0 {
+					for _, x := range list {
+						if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
+							t.setUndefinedRegion(Region(x.region))
+							return t, nil
+						}
+					}
+				} else if t.RegionID != 0 {
+					count := 0
+					goodScript := true
+					tt := t
+					for _, x := range list {
+						// We visit all entries for which the script was not
+						// defined, including the ones where the region was not
+						// defined. This allows for proper disambiguation within
+						// regions.
+						if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
+							tt.RegionID = Region(x.region)
+							tt.setUndefinedScript(Script(x.script))
+							goodScript = goodScript && tt.ScriptID == Script(x.script)
+							count++
+						}
+					}
+					if count == 1 {
+						return tt, nil
+					}
+					// Even if we fail to find a unique Region, we might have
+					// an unambiguous script.
+					if goodScript {
+						t.ScriptID = tt.ScriptID
+					}
+				}
+			}
+		}
+	} else {
+		// Search matches for und-script.
+		if t.ScriptID != 0 {
+			x := likelyScript[t.ScriptID]
+			if x.region != 0 {
+				t.setUndefinedRegion(Region(x.region))
+				t.setUndefinedLang(Language(x.lang))
+				return t, nil
+			}
+		}
+		// Search matches for und-region. If und-script-region exists, it would
+		// have been found earlier.
+		if t.RegionID != 0 {
+			if i := regionInclusion[t.RegionID]; i < nRegionGroups {
+				x := likelyRegionGroup[i]
+				if x.region != 0 {
+					t.setUndefinedLang(Language(x.lang))
+					t.setUndefinedScript(Script(x.script))
+					t.RegionID = Region(x.region)
+				}
+			} else {
+				x := likelyRegion[t.RegionID]
+				if x.flags&isList != 0 {
+					x = likelyRegionList[x.lang]
+				}
+				if x.script != 0 && x.flags != scriptInFrom {
+					t.setUndefinedLang(Language(x.lang))
+					t.setUndefinedScript(Script(x.script))
+					return t, nil
+				}
+			}
+		}
+	}
+
+	// Search matches for lang.
+	if t.LangID < langNoIndexOffset {
+		x := likelyLang[t.LangID]
+		if x.flags&isList != 0 {
+			x = likelyLangList[x.region]
+		}
+		if x.region != 0 {
+			t.setUndefinedScript(Script(x.script))
+			t.setUndefinedRegion(Region(x.region))
+		}
+		specializeRegion(&t)
+		if t.LangID == 0 {
+			t.LangID = _en // default language
+		}
+		return t, nil
+	}
+	return t, ErrMissingLikelyTagsData
+}
+
+func (t *Tag) setTagsFrom(id Tag) {
+	t.LangID = id.LangID
+	t.ScriptID = id.ScriptID
+	t.RegionID = id.RegionID
+}
+
+// minimize removes the region or script subtags from t such that
+// t.addLikelySubtags() == t.minimize().addLikelySubtags().
+func (t Tag) minimize() (Tag, error) {
+	t, err := minimizeTags(t)
+	if err != nil {
+		return t, err
+	}
+	t.RemakeString()
+	return t, nil
+}
+
+// minimizeTags mimics the behavior of the ICU 51 C implementation.
+func minimizeTags(t Tag) (Tag, error) {
+	if t.equalTags(Und) {
+		return t, nil
+	}
+	max, err := addTags(t)
+	if err != nil {
+		return t, err
+	}
+	for _, id := range [...]Tag{
+		{LangID: t.LangID},
+		{LangID: t.LangID, RegionID: t.RegionID},
+		{LangID: t.LangID, ScriptID: t.ScriptID},
+	} {
+		if x, err := addTags(id); err == nil && max.equalTags(x) {
+			t.setTagsFrom(id)
+			break
+		}
+	}
+	return t, nil
+}
@@ -0,0 +1,594 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"sort"
+
+	"golang.org/x/text/internal/tag"
+)
+
+// isAlpha returns true if the byte is not a digit.
+// b must be an ASCII letter or digit.
+func isAlpha(b byte) bool {
+	return b > '9'
+}
+
+// isAlphaNum returns true if the string contains only ASCII letters or digits.
+func isAlphaNum(s []byte) bool {
+	for _, c := range s {
+		if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
+			return false
+		}
+	}
+	return true
+}
+
+// ErrSyntax is returned by any of the parsing functions when the
+// input is not well-formed, according to BCP 47.
+// TODO: return the position at which the syntax error occurred?
+var ErrSyntax = errors.New("language: tag is not well-formed")
+
+// ErrDuplicateKey is returned when a tag contains the same key twice with
+// different values in the -u section.
+var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
+
+// ValueError is returned by any of the parsing functions when the
+// input is well-formed but the respective subtag is not recognized
+// as a valid value.
+type ValueError struct {
+	v [8]byte
+}
+
+// NewValueError creates a new ValueError.
+func NewValueError(tag []byte) ValueError {
+	var e ValueError
+	copy(e.v[:], tag)
+	return e
+}
+
+func (e ValueError) tag() []byte {
+	n := bytes.IndexByte(e.v[:], 0)
+	if n == -1 {
+		n = 8
+	}
+	return e.v[:n]
+}
+
+// Error implements the error interface.
+func (e ValueError) Error() string {
+	return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
+}
+
+// Subtag returns the subtag for which the error occurred.
+func (e ValueError) Subtag() string {
+	return string(e.tag())
+}
+
+// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
+type scanner struct {
+	b     []byte
+	bytes [max99thPercentileSize]byte
+	token []byte
+	start int // start position of the current token
+	end   int // end position of the current token
+	next  int // next point for scan
+	err   error
+	done  bool
+}
+
+func makeScannerString(s string) scanner {
+	scan := scanner{}
+	if len(s) <= len(scan.bytes) {
+		scan.b = scan.bytes[:copy(scan.bytes[:], s)]
+	} else {
+		scan.b = []byte(s)
+	}
+	scan.init()
+	return scan
+}
+
+// makeScanner returns a scanner using b as the input buffer.
+// b is not copied and may be modified by the scanner routines.
+func makeScanner(b []byte) scanner {
+	scan := scanner{b: b}
+	scan.init()
+	return scan
+}
+
+func (s *scanner) init() {
+	for i, c := range s.b {
+		if c == '_' {
+			s.b[i] = '-'
+		}
+	}
+	s.scan()
+}
+
+// restToLower converts the string between start and end to lower case.
+func (s *scanner) toLower(start, end int) {
+	for i := start; i < end; i++ {
+		c := s.b[i]
+		if 'A' <= c && c <= 'Z' {
+			s.b[i] += 'a' - 'A'
+		}
+	}
+}
+
+func (s *scanner) setError(e error) {
+	if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
+		s.err = e
+	}
+}
+
+// resizeRange shrinks or grows the array at position oldStart such that
+// a new string of size newSize can fit between oldStart and oldEnd.
+// Sets the scan point to after the resized range.
+func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
+	s.start = oldStart
+	if end := oldStart + newSize; end != oldEnd {
+		diff := end - oldEnd
+		if end < cap(s.b) {
+			b := make([]byte, len(s.b)+diff)
+			copy(b, s.b[:oldStart])
+			copy(b[end:], s.b[oldEnd:])
+			s.b = b
+		} else {
+			s.b = append(s.b[end:], s.b[oldEnd:]...)
+		}
+		s.next = end + (s.next - s.end)
+		s.end = end
+	}
+}
+
+// replace replaces the current token with repl.
+func (s *scanner) replace(repl string) {
+	s.resizeRange(s.start, s.end, len(repl))
+	copy(s.b[s.start:], repl)
+}
+
+// gobble removes the current token from the input.
+// Caller must call scan after calling gobble.
+func (s *scanner) gobble(e error) {
+	s.setError(e)
+	if s.start == 0 {
+		s.b = s.b[:+copy(s.b, s.b[s.next:])]
+		s.end = 0
+	} else {
+		s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
+		s.end = s.start - 1
+	}
+	s.next = s.start
+}
+
+// deleteRange removes the given range from s.b before the current token.
+func (s *scanner) deleteRange(start, end int) {
+	s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
+	diff := end - start
+	s.next -= diff
+	s.start -= diff
+	s.end -= diff
+}
+
+// scan parses the next token of a BCP 47 string.  Tokens that are larger
+// than 8 characters or include non-alphanumeric characters result in an error
+// and are gobbled and removed from the output.
+// It returns the end position of the last token consumed.
+func (s *scanner) scan() (end int) {
+	end = s.end
+	s.token = nil
+	for s.start = s.next; s.next < len(s.b); {
+		i := bytes.IndexByte(s.b[s.next:], '-')
+		if i == -1 {
+			s.end = len(s.b)
+			s.next = len(s.b)
+			i = s.end - s.start
+		} else {
+			s.end = s.next + i
+			s.next = s.end + 1
+		}
+		token := s.b[s.start:s.end]
+		if i < 1 || i > 8 || !isAlphaNum(token) {
+			s.gobble(ErrSyntax)
+			continue
+		}
+		s.token = token
+		return end
+	}
+	if n := len(s.b); n > 0 && s.b[n-1] == '-' {
+		s.setError(ErrSyntax)
+		s.b = s.b[:len(s.b)-1]
+	}
+	s.done = true
+	return end
+}
+
+// acceptMinSize parses multiple tokens of the given size or greater.
+// It returns the end position of the last token consumed.
+func (s *scanner) acceptMinSize(min int) (end int) {
+	end = s.end
+	s.scan()
+	for ; len(s.token) >= min; s.scan() {
+		end = s.end
+	}
+	return end
+}
+
+// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
+// failed it returns an error and any part of the tag that could be parsed.
+// If parsing succeeded but an unknown value was found, it returns
+// ValueError. The Tag returned in this case is just stripped of the unknown
+// value. All other values are preserved. It accepts tags in the BCP 47 format
+// and extensions to this standard defined in
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+func Parse(s string) (t Tag, err error) {
+	// TODO: consider supporting old-style locale key-value pairs.
+	if s == "" {
+		return Und, ErrSyntax
+	}
+	if len(s) <= maxAltTaglen {
+		b := [maxAltTaglen]byte{}
+		for i, c := range s {
+			// Generating invalid UTF-8 is okay as it won't match.
+			if 'A' <= c && c <= 'Z' {
+				c += 'a' - 'A'
+			} else if c == '_' {
+				c = '-'
+			}
+			b[i] = byte(c)
+		}
+		if t, ok := grandfathered(b); ok {
+			return t, nil
+		}
+	}
+	scan := makeScannerString(s)
+	return parse(&scan, s)
+}
+
+func parse(scan *scanner, s string) (t Tag, err error) {
+	t = Und
+	var end int
+	if n := len(scan.token); n <= 1 {
+		scan.toLower(0, len(scan.b))
+		if n == 0 || scan.token[0] != 'x' {
+			return t, ErrSyntax
+		}
+		end = parseExtensions(scan)
+	} else if n >= 4 {
+		return Und, ErrSyntax
+	} else { // the usual case
+		t, end = parseTag(scan)
+		if n := len(scan.token); n == 1 {
+			t.pExt = uint16(end)
+			end = parseExtensions(scan)
+		} else if end < len(scan.b) {
+			scan.setError(ErrSyntax)
+			scan.b = scan.b[:end]
+		}
+	}
+	if int(t.pVariant) < len(scan.b) {
+		if end < len(s) {
+			s = s[:end]
+		}
+		if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
+			t.str = s
+		} else {
+			t.str = string(scan.b)
+		}
+	} else {
+		t.pVariant, t.pExt = 0, 0
+	}
+	return t, scan.err
+}
+
+// parseTag parses language, script, region and variants.
+// It returns a Tag and the end position in the input that was parsed.
+func parseTag(scan *scanner) (t Tag, end int) {
+	var e error
+	// TODO: set an error if an unknown lang, script or region is encountered.
+	t.LangID, e = getLangID(scan.token)
+	scan.setError(e)
+	scan.replace(t.LangID.String())
+	langStart := scan.start
+	end = scan.scan()
+	for len(scan.token) == 3 && isAlpha(scan.token[0]) {
+		// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
+		// to a tag of the form <extlang>.
+		lang, e := getLangID(scan.token)
+		if lang != 0 {
+			t.LangID = lang
+			copy(scan.b[langStart:], lang.String())
+			scan.b[langStart+3] = '-'
+			scan.start = langStart + 4
+		}
+		scan.gobble(e)
+		end = scan.scan()
+	}
+	if len(scan.token) == 4 && isAlpha(scan.token[0]) {
+		t.ScriptID, e = getScriptID(script, scan.token)
+		if t.ScriptID == 0 {
+			scan.gobble(e)
+		}
+		end = scan.scan()
+	}
+	if n := len(scan.token); n >= 2 && n <= 3 {
+		t.RegionID, e = getRegionID(scan.token)
+		if t.RegionID == 0 {
+			scan.gobble(e)
+		} else {
+			scan.replace(t.RegionID.String())
+		}
+		end = scan.scan()
+	}
+	scan.toLower(scan.start, len(scan.b))
+	t.pVariant = byte(end)
+	end = parseVariants(scan, end, t)
+	t.pExt = uint16(end)
+	return t, end
+}
+
+var separator = []byte{'-'}
+
+// parseVariants scans tokens as long as each token is a valid variant string.
+// Duplicate variants are removed.
+func parseVariants(scan *scanner, end int, t Tag) int {
+	start := scan.start
+	varIDBuf := [4]uint8{}
+	variantBuf := [4][]byte{}
+	varID := varIDBuf[:0]
+	variant := variantBuf[:0]
+	last := -1
+	needSort := false
+	for ; len(scan.token) >= 4; scan.scan() {
+		// TODO: measure the impact of needing this conversion and redesign
+		// the data structure if there is an issue.
+		v, ok := variantIndex[string(scan.token)]
+		if !ok {
+			// unknown variant
+			// TODO: allow user-defined variants?
+			scan.gobble(NewValueError(scan.token))
+			continue
+		}
+		varID = append(varID, v)
+		variant = append(variant, scan.token)
+		if !needSort {
+			if last < int(v) {
+				last = int(v)
+			} else {
+				needSort = true
+				// There is no legal combinations of more than 7 variants
+				// (and this is by no means a useful sequence).
+				const maxVariants = 8
+				if len(varID) > maxVariants {
+					break
+				}
+			}
+		}
+		end = scan.end
+	}
+	if needSort {
+		sort.Sort(variantsSort{varID, variant})
+		k, l := 0, -1
+		for i, v := range varID {
+			w := int(v)
+			if l == w {
+				// Remove duplicates.
+				continue
+			}
+			varID[k] = varID[i]
+			variant[k] = variant[i]
+			k++
+			l = w
+		}
+		if str := bytes.Join(variant[:k], separator); len(str) == 0 {
+			end = start - 1
+		} else {
+			scan.resizeRange(start, end, len(str))
+			copy(scan.b[scan.start:], str)
+			end = scan.end
+		}
+	}
+	return end
+}
+
+type variantsSort struct {
+	i []uint8
+	v [][]byte
+}
+
+func (s variantsSort) Len() int {
+	return len(s.i)
+}
+
+func (s variantsSort) Swap(i, j int) {
+	s.i[i], s.i[j] = s.i[j], s.i[i]
+	s.v[i], s.v[j] = s.v[j], s.v[i]
+}
+
+func (s variantsSort) Less(i, j int) bool {
+	return s.i[i] < s.i[j]
+}
+
+type bytesSort struct {
+	b [][]byte
+	n int // first n bytes to compare
+}
+
+func (b bytesSort) Len() int {
+	return len(b.b)
+}
+
+func (b bytesSort) Swap(i, j int) {
+	b.b[i], b.b[j] = b.b[j], b.b[i]
+}
+
+func (b bytesSort) Less(i, j int) bool {
+	for k := 0; k < b.n; k++ {
+		if b.b[i][k] == b.b[j][k] {
+			continue
+		}
+		return b.b[i][k] < b.b[j][k]
+	}
+	return false
+}
+
+// parseExtensions parses and normalizes the extensions in the buffer.
+// It returns the last position of scan.b that is part of any extension.
+// It also trims scan.b to remove excess parts accordingly.
+func parseExtensions(scan *scanner) int {
+	start := scan.start
+	exts := [][]byte{}
+	private := []byte{}
+	end := scan.end
+	for len(scan.token) == 1 {
+		extStart := scan.start
+		ext := scan.token[0]
+		end = parseExtension(scan)
+		extension := scan.b[extStart:end]
+		if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
+			scan.setError(ErrSyntax)
+			end = extStart
+			continue
+		} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
+			scan.b = scan.b[:end]
+			return end
+		} else if ext == 'x' {
+			private = extension
+			break
+		}
+		exts = append(exts, extension)
+	}
+	sort.Sort(bytesSort{exts, 1})
+	if len(private) > 0 {
+		exts = append(exts, private)
+	}
+	scan.b = scan.b[:start]
+	if len(exts) > 0 {
+		scan.b = append(scan.b, bytes.Join(exts, separator)...)
+	} else if start > 0 {
+		// Strip trailing '-'.
+		scan.b = scan.b[:start-1]
+	}
+	return end
+}
+
+// parseExtension parses a single extension and returns the position of
+// the extension end.
+func parseExtension(scan *scanner) int {
+	start, end := scan.start, scan.end
+	switch scan.token[0] {
+	case 'u':
+		attrStart := end
+		scan.scan()
+		for last := []byte{}; len(scan.token) > 2; scan.scan() {
+			if bytes.Compare(scan.token, last) != -1 {
+				// Attributes are unsorted. Start over from scratch.
+				p := attrStart + 1
+				scan.next = p
+				attrs := [][]byte{}
+				for scan.scan(); len(scan.token) > 2; scan.scan() {
+					attrs = append(attrs, scan.token)
+					end = scan.end
+				}
+				sort.Sort(bytesSort{attrs, 3})
+				copy(scan.b[p:], bytes.Join(attrs, separator))
+				break
+			}
+			last = scan.token
+			end = scan.end
+		}
+		var last, key []byte
+		for attrEnd := end; len(scan.token) == 2; last = key {
+			key = scan.token
+			keyEnd := scan.end
+			end = scan.acceptMinSize(3)
+			// TODO: check key value validity
+			if keyEnd == end || bytes.Compare(key, last) != 1 {
+				// We have an invalid key or the keys are not sorted.
+				// Start scanning keys from scratch and reorder.
+				p := attrEnd + 1
+				scan.next = p
+				keys := [][]byte{}
+				for scan.scan(); len(scan.token) == 2; {
+					keyStart, keyEnd := scan.start, scan.end
+					end = scan.acceptMinSize(3)
+					if keyEnd != end {
+						keys = append(keys, scan.b[keyStart:end])
+					} else {
+						scan.setError(ErrSyntax)
+						end = keyStart
+					}
+				}
+				sort.Stable(bytesSort{keys, 2})
+				if n := len(keys); n > 0 {
+					k := 0
+					for i := 1; i < n; i++ {
+						if !bytes.Equal(keys[k][:2], keys[i][:2]) {
+							k++
+							keys[k] = keys[i]
+						} else if !bytes.Equal(keys[k], keys[i]) {
+							scan.setError(ErrDuplicateKey)
+						}
+					}
+					keys = keys[:k+1]
+				}
+				reordered := bytes.Join(keys, separator)
+				if e := p + len(reordered); e < end {
+					scan.deleteRange(e, end)
+					end = e
+				}
+				copy(scan.b[p:], reordered)
+				break
+			}
+		}
+	case 't':
+		scan.scan()
+		if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
+			_, end = parseTag(scan)
+			scan.toLower(start, end)
+		}
+		for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
+			end = scan.acceptMinSize(3)
+		}
+	case 'x':
+		end = scan.acceptMinSize(1)
+	default:
+		end = scan.acceptMinSize(2)
+	}
+	return end
+}
+
+// getExtension returns the name, body and end position of the extension.
+func getExtension(s string, p int) (end int, ext string) {
+	if s[p] == '-' {
+		p++
+	}
+	if s[p] == 'x' {
+		return len(s), s[p:]
+	}
+	end = nextExtension(s, p)
+	return end, s[p:end]
+}
+
+// nextExtension finds the next extension within the string, searching
+// for the -<char>- pattern from position p.
+// In the fast majority of cases, language tags will have at most
+// one extension and extensions tend to be small.
+func nextExtension(s string, p int) int {
+	for n := len(s) - 3; p < n; {
+		if s[p] == '-' {
+			if s[p+2] == '-' {
+				return p
+			}
+			p += 3
+		} else {
+			p++
+		}
+	}
+	return len(s)
+}
@@ -0,0 +1,48 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package language
+
+// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
+// It simplifies safe initialization of Tag values.
+func MustParse(s string) Tag {
+	t, err := Parse(s)
+	if err != nil {
+		panic(err)
+	}
+	return t
+}
+
+// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
+// It simplifies safe initialization of Base values.
+func MustParseBase(s string) Language {
+	b, err := ParseBase(s)
+	if err != nil {
+		panic(err)
+	}
+	return b
+}
+
+// MustParseScript is like ParseScript, but panics if the given script cannot be
+// parsed. It simplifies safe initialization of Script values.
+func MustParseScript(s string) Script {
+	scr, err := ParseScript(s)
+	if err != nil {
+		panic(err)
+	}
+	return scr
+}
+
+// MustParseRegion is like ParseRegion, but panics if the given region cannot be
+// parsed. It simplifies safe initialization of Region values.
+func MustParseRegion(s string) Region {
+	r, err := ParseRegion(s)
+	if err != nil {
+		panic(err)
+	}
+	return r
+}
+
+// Und is the root language.
+var Und Tag
@@ -1,16 +0,0 @@
-# Copyright 2013 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-CLEANFILES+=maketables
-
-maketables: maketables.go
-	go build $^
-
-tables:	maketables
-	./maketables > tables.go
-	gofmt -w -s tables.go
-
-# Build (but do not run) maketables during testing,
-# just to make sure it still compiles.
-testshort: maketables
@@ -7,6 +7,8 @@ package language
 import (
 	"fmt"
 	"sort"
+
+	"golang.org/x/text/internal/language"
 )

 // The Coverage interface is used to define the level of coverage of an
@@ -44,9 +46,9 @@ type allSubtags struct{}
 // consecutive range, it simply returns a slice of numbers in increasing order.
 // The "undefined" region is not returned.
 func (s allSubtags) Regions() []Region {
-	reg := make([]Region, numRegions)
+	reg := make([]Region, language.NumRegions)
 	for i := range reg {
-		reg[i] = Region{regionID(i + 1)}
+		reg[i] = Region{language.Region(i + 1)}
 	}
 	return reg
 }
@@ -55,9 +57,9 @@ func (s allSubtags) Regions() []Region {
 // consecutive range, it simply returns a slice of numbers in increasing order.
 // The "undefined" script is not returned.
 func (s allSubtags) Scripts() []Script {
-	scr := make([]Script, numScripts)
+	scr := make([]Script, language.NumScripts)
 	for i := range scr {
-		scr[i] = Script{scriptID(i + 1)}
+		scr[i] = Script{language.Script(i + 1)}
 	}
 	return scr
 }
@@ -65,22 +67,10 @@ func (s allSubtags) Scripts() []Script {
 // BaseLanguages returns the list of all supported base languages. It generates
 // the list by traversing the internal structures.
 func (s allSubtags) BaseLanguages() []Base {
-	base := make([]Base, 0, numLanguages)
-	for i := 0; i < langNoIndexOffset; i++ {
-		// We included "und" already for the value 0.
-		if i != nonCanonicalUnd {
-			base = append(base, Base{langID(i)})
-		}
-	}
-	i := langNoIndexOffset
-	for _, v := range langNoIndex {
-		for k := 0; k < 8; k++ {
-			if v&1 == 1 {
-				base = append(base, Base{langID(i)})
-			}
-			v >>= 1
-			i++
-		}
+	bs := language.BaseLanguages()
+	base := make([]Base, len(bs))
+	for i, b := range bs {
+		base[i] = Base{b}
 	}
 	return base
 }
@@ -90,7 +80,7 @@ func (s allSubtags) Tags() []Tag {
 	return nil
 }

-// coverage is used used by NewCoverage which is used as a convenient way for
+// coverage is used by NewCoverage which is used as a convenient way for
 // creating Coverage implementations for partially defined data. Very often a
 // package will only need to define a subset of slices. coverage provides a
 // convenient way to do this. Moreover, packages using NewCoverage, instead of
@@ -134,7 +124,7 @@ func (s *coverage) BaseLanguages() []Base {
 		}
 		a := make([]Base, len(tags))
 		for i, t := range tags {
-			a[i] = Base{langID(t.lang)}
+			a[i] = Base{language.Language(t.lang())}
 		}
 		sort.Sort(bases(a))
 		k := 0
@@ -1,162 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-// This file generates derivative tables based on the language package itself.
-
-import (
-	"bytes"
-	"flag"
-	"fmt"
-	"io/ioutil"
-	"log"
-	"reflect"
-	"sort"
-	"strings"
-
-	"golang.org/x/text/internal/gen"
-	"golang.org/x/text/language"
-	"golang.org/x/text/unicode/cldr"
-)
-
-var (
-	test = flag.Bool("test", false,
-		"test existing tables; can be used to compare web data with package data.")
-
-	draft = flag.String("draft",
-		"contributed",
-		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
-)
-
-func main() {
-	gen.Init()
-
-	// Read the CLDR zip file.
-	r := gen.OpenCLDRCoreZip()
-	defer r.Close()
-
-	d := &cldr.Decoder{}
-	data, err := d.DecodeZip(r)
-	if err != nil {
-		log.Fatalf("DecodeZip: %v", err)
-	}
-
-	w := gen.NewCodeWriter()
-	defer func() {
-		buf := &bytes.Buffer{}
-
-		if _, err = w.WriteGo(buf, "language", ""); err != nil {
-			log.Fatalf("Error formatting file index.go: %v", err)
-		}
-
-		// Since we're generating a table for our own package we need to rewrite
-		// doing the equivalent of go fmt -r 'language.b -> b'. Using
-		// bytes.Replace will do.
-		out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
-		if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
-			log.Fatalf("Could not create file index.go: %v", err)
-		}
-	}()
-
-	m := map[language.Tag]bool{}
-	for _, lang := range data.Locales() {
-		// We include all locales unconditionally to be consistent with en_US.
-		// We want en_US, even though it has no data associated with it.
-
-		// TODO: put any of the languages for which no data exists at the end
-		// of the index. This allows all components based on ICU to use that
-		// as the cutoff point.
-		// if x := data.RawLDML(lang); false ||
-		// 	x.LocaleDisplayNames != nil ||
-		// 	x.Characters != nil ||
-		// 	x.Delimiters != nil ||
-		// 	x.Measurement != nil ||
-		// 	x.Dates != nil ||
-		// 	x.Numbers != nil ||
-		// 	x.Units != nil ||
-		// 	x.ListPatterns != nil ||
-		// 	x.Collations != nil ||
-		// 	x.Segmentations != nil ||
-		// 	x.Rbnf != nil ||
-		// 	x.Annotations != nil ||
-		// 	x.Metadata != nil {
-
-		// TODO: support POSIX natively, albeit non-standard.
-		tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
-		m[tag] = true
-		// }
-	}
-	// Include locales for plural rules, which uses a different structure.
-	for _, plurals := range data.Supplemental().Plurals {
-		for _, rules := range plurals.PluralRules {
-			for _, lang := range strings.Split(rules.Locales, " ") {
-				m[language.Make(lang)] = true
-			}
-		}
-	}
-
-	var core, special []language.Tag
-
-	for t := range m {
-		if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
-			log.Fatalf("Unexpected extension %v in %v", x, t)
-		}
-		if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
-			core = append(core, t)
-		} else {
-			special = append(special, t)
-		}
-	}
-
-	w.WriteComment(`
-	NumCompactTags is the number of common tags. The maximum tag is
-	NumCompactTags-1.`)
-	w.WriteConst("NumCompactTags", len(core)+len(special))
-
-	sort.Sort(byAlpha(special))
-	w.WriteVar("specialTags", special)
-
-	// TODO: order by frequency?
-	sort.Sort(byAlpha(core))
-
-	// Size computations are just an estimate.
-	w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size())
-	w.Size += len(core) * 6 // size of uint32 and uint16
-
-	fmt.Fprintln(w)
-	fmt.Fprintln(w, "var coreTags = map[uint32]uint16{")
-	fmt.Fprintln(w, "0x0: 0, // und")
-	i := len(special) + 1 // Und and special tags already written.
-	for _, t := range core {
-		if t == language.Und {
-			continue
-		}
-		fmt.Fprint(w.Hash, t, i)
-		b, s, r := t.Raw()
-		fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n",
-			getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number
-			getIndex(s, 2),
-			getIndex(r, 3),
-			i, t)
-		i++
-	}
-	fmt.Fprintln(w, "}")
-}
-
-// getIndex prints the subtag type and extracts its index of size nibble.
-// If the index is less than n nibbles, the result is prefixed with 0s.
-func getIndex(x interface{}, n int) string {
-	s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00}
-	s = s[strings.Index(s, "0x")+2 : len(s)-1]
-	return strings.Repeat("0", n-len(s)) + s
-}
-
-type byAlpha []language.Tag
-
-func (a byAlpha) Len() int           { return len(a) }
-func (a byAlpha) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
-func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() }
@@ -1,783 +0,0 @@
-// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
-
-package language
-
-// NumCompactTags is the number of common tags. The maximum tag is
-// NumCompactTags-1.
-const NumCompactTags = 768
-
-var specialTags = []Tag{ // 2 elements
-	0: {lang: 0xd7, region: 0x6e, script: 0x0, pVariant: 0x5, pExt: 0xe, str: "ca-ES-valencia"},
-	1: {lang: 0x139, region: 0x135, script: 0x0, pVariant: 0x5, pExt: 0x5, str: "en-US-u-va-posix"},
-} // Size: 72 bytes
-
-var coreTags = map[uint32]uint16{
-	0x0:        0,   // und
-	0x01600000: 3,   // af
-	0x016000d2: 4,   // af-NA
-	0x01600161: 5,   // af-ZA
-	0x01c00000: 6,   // agq
-	0x01c00052: 7,   // agq-CM
-	0x02100000: 8,   // ak
-	0x02100080: 9,   // ak-GH
-	0x02700000: 10,  // am
-	0x0270006f: 11,  // am-ET
-	0x03a00000: 12,  // ar
-	0x03a00001: 13,  // ar-001
-	0x03a00023: 14,  // ar-AE
-	0x03a00039: 15,  // ar-BH
-	0x03a00062: 16,  // ar-DJ
-	0x03a00067: 17,  // ar-DZ
-	0x03a0006b: 18,  // ar-EG
-	0x03a0006c: 19,  // ar-EH
-	0x03a0006d: 20,  // ar-ER
-	0x03a00097: 21,  // ar-IL
-	0x03a0009b: 22,  // ar-IQ
-	0x03a000a1: 23,  // ar-JO
-	0x03a000a8: 24,  // ar-KM
-	0x03a000ac: 25,  // ar-KW
-	0x03a000b0: 26,  // ar-LB
-	0x03a000b9: 27,  // ar-LY
-	0x03a000ba: 28,  // ar-MA
-	0x03a000c9: 29,  // ar-MR
-	0x03a000e1: 30,  // ar-OM
-	0x03a000ed: 31,  // ar-PS
-	0x03a000f3: 32,  // ar-QA
-	0x03a00108: 33,  // ar-SA
-	0x03a0010b: 34,  // ar-SD
-	0x03a00115: 35,  // ar-SO
-	0x03a00117: 36,  // ar-SS
-	0x03a0011c: 37,  // ar-SY
-	0x03a00120: 38,  // ar-TD
-	0x03a00128: 39,  // ar-TN
-	0x03a0015e: 40,  // ar-YE
-	0x04000000: 41,  // ars
-	0x04300000: 42,  // as
-	0x04300099: 43,  // as-IN
-	0x04400000: 44,  // asa
-	0x0440012f: 45,  // asa-TZ
-	0x04800000: 46,  // ast
-	0x0480006e: 47,  // ast-ES
-	0x05800000: 48,  // az
-	0x0581f000: 49,  // az-Cyrl
-	0x0581f032: 50,  // az-Cyrl-AZ
-	0x05857000: 51,  // az-Latn
-	0x05857032: 52,  // az-Latn-AZ
-	0x05e00000: 53,  // bas
-	0x05e00052: 54,  // bas-CM
-	0x07100000: 55,  // be
-	0x07100047: 56,  // be-BY
-	0x07500000: 57,  // bem
-	0x07500162: 58,  // bem-ZM
-	0x07900000: 59,  // bez
-	0x0790012f: 60,  // bez-TZ
-	0x07e00000: 61,  // bg
-	0x07e00038: 62,  // bg-BG
-	0x08200000: 63,  // bh
-	0x0a000000: 64,  // bm
-	0x0a0000c3: 65,  // bm-ML
-	0x0a500000: 66,  // bn
-	0x0a500035: 67,  // bn-BD
-	0x0a500099: 68,  // bn-IN
-	0x0a900000: 69,  // bo
-	0x0a900053: 70,  // bo-CN
-	0x0a900099: 71,  // bo-IN
-	0x0b200000: 72,  // br
-	0x0b200078: 73,  // br-FR
-	0x0b500000: 74,  // brx
-	0x0b500099: 75,  // brx-IN
-	0x0b700000: 76,  // bs
-	0x0b71f000: 77,  // bs-Cyrl
-	0x0b71f033: 78,  // bs-Cyrl-BA
-	0x0b757000: 79,  // bs-Latn
-	0x0b757033: 80,  // bs-Latn-BA
-	0x0d700000: 81,  // ca
-	0x0d700022: 82,  // ca-AD
-	0x0d70006e: 83,  // ca-ES
-	0x0d700078: 84,  // ca-FR
-	0x0d70009e: 85,  // ca-IT
-	0x0db00000: 86,  // ccp
-	0x0db00035: 87,  // ccp-BD
-	0x0db00099: 88,  // ccp-IN
-	0x0dc00000: 89,  // ce
-	0x0dc00106: 90,  // ce-RU
-	0x0df00000: 91,  // cgg
-	0x0df00131: 92,  // cgg-UG
-	0x0e500000: 93,  // chr
-	0x0e500135: 94,  // chr-US
-	0x0e900000: 95,  // ckb
-	0x0e90009b: 96,  // ckb-IQ
-	0x0e90009c: 97,  // ckb-IR
-	0x0fa00000: 98,  // cs
-	0x0fa0005e: 99,  // cs-CZ
-	0x0fe00000: 100, // cu
-	0x0fe00106: 101, // cu-RU
-	0x10000000: 102, // cy
-	0x1000007b: 103, // cy-GB
-	0x10100000: 104, // da
-	0x10100063: 105, // da-DK
-	0x10100082: 106, // da-GL
-	0x10800000: 107, // dav
-	0x108000a4: 108, // dav-KE
-	0x10d00000: 109, // de
-	0x10d0002e: 110, // de-AT
-	0x10d00036: 111, // de-BE
-	0x10d0004e: 112, // de-CH
-	0x10d00060: 113, // de-DE
-	0x10d0009e: 114, // de-IT
-	0x10d000b2: 115, // de-LI
-	0x10d000b7: 116, // de-LU
-	0x11700000: 117, // dje
-	0x117000d4: 118, // dje-NE
-	0x11f00000: 119, // dsb
-	0x11f00060: 120, // dsb-DE
-	0x12400000: 121, // dua
-	0x12400052: 122, // dua-CM
-	0x12800000: 123, // dv
-	0x12b00000: 124, // dyo
-	0x12b00114: 125, // dyo-SN
-	0x12d00000: 126, // dz
-	0x12d00043: 127, // dz-BT
-	0x12f00000: 128, // ebu
-	0x12f000a4: 129, // ebu-KE
-	0x13000000: 130, // ee
-	0x13000080: 131, // ee-GH
-	0x13000122: 132, // ee-TG
-	0x13600000: 133, // el
-	0x1360005d: 134, // el-CY
-	0x13600087: 135, // el-GR
-	0x13900000: 136, // en
-	0x13900001: 137, // en-001
-	0x1390001a: 138, // en-150
-	0x13900025: 139, // en-AG
-	0x13900026: 140, // en-AI
-	0x1390002d: 141, // en-AS
-	0x1390002e: 142, // en-AT
-	0x1390002f: 143, // en-AU
-	0x13900034: 144, // en-BB
-	0x13900036: 145, // en-BE
-	0x1390003a: 146, // en-BI
-	0x1390003d: 147, // en-BM
-	0x13900042: 148, // en-BS
-	0x13900046: 149, // en-BW
-	0x13900048: 150, // en-BZ
-	0x13900049: 151, // en-CA
-	0x1390004a: 152, // en-CC
-	0x1390004e: 153, // en-CH
-	0x13900050: 154, // en-CK
-	0x13900052: 155, // en-CM
-	0x1390005c: 156, // en-CX
-	0x1390005d: 157, // en-CY
-	0x13900060: 158, // en-DE
-	0x13900061: 159, // en-DG
-	0x13900063: 160, // en-DK
-	0x13900064: 161, // en-DM
-	0x1390006d: 162, // en-ER
-	0x13900072: 163, // en-FI
-	0x13900073: 164, // en-FJ
-	0x13900074: 165, // en-FK
-	0x13900075: 166, // en-FM
-	0x1390007b: 167, // en-GB
-	0x1390007c: 168, // en-GD
-	0x1390007f: 169, // en-GG
-	0x13900080: 170, // en-GH
-	0x13900081: 171, // en-GI
-	0x13900083: 172, // en-GM
-	0x1390008a: 173, // en-GU
-	0x1390008c: 174, // en-GY
-	0x1390008d: 175, // en-HK
-	0x13900096: 176, // en-IE
-	0x13900097: 177, // en-IL
-	0x13900098: 178, // en-IM
-	0x13900099: 179, // en-IN
-	0x1390009a: 180, // en-IO
-	0x1390009f: 181, // en-JE
-	0x139000a0: 182, // en-JM
-	0x139000a4: 183, // en-KE
-	0x139000a7: 184, // en-KI
-	0x139000a9: 185, // en-KN
-	0x139000ad: 186, // en-KY
-	0x139000b1: 187, // en-LC
-	0x139000b4: 188, // en-LR
-	0x139000b5: 189, // en-LS
-	0x139000bf: 190, // en-MG
-	0x139000c0: 191, // en-MH
-	0x139000c6: 192, // en-MO
-	0x139000c7: 193, // en-MP
-	0x139000ca: 194, // en-MS
-	0x139000cb: 195, // en-MT
-	0x139000cc: 196, // en-MU
-	0x139000ce: 197, // en-MW
-	0x139000d0: 198, // en-MY
-	0x139000d2: 199, // en-NA
-	0x139000d5: 200, // en-NF
-	0x139000d6: 201, // en-NG
-	0x139000d9: 202, // en-NL
-	0x139000dd: 203, // en-NR
-	0x139000df: 204, // en-NU
-	0x139000e0: 205, // en-NZ
-	0x139000e6: 206, // en-PG
-	0x139000e7: 207, // en-PH
-	0x139000e8: 208, // en-PK
-	0x139000eb: 209, // en-PN
-	0x139000ec: 210, // en-PR
-	0x139000f0: 211, // en-PW
-	0x13900107: 212, // en-RW
-	0x13900109: 213, // en-SB
-	0x1390010a: 214, // en-SC
-	0x1390010b: 215, // en-SD
-	0x1390010c: 216, // en-SE
-	0x1390010d: 217, // en-SG
-	0x1390010e: 218, // en-SH
-	0x1390010f: 219, // en-SI
-	0x13900112: 220, // en-SL
-	0x13900117: 221, // en-SS
-	0x1390011b: 222, // en-SX
-	0x1390011d: 223, // en-SZ
-	0x1390011f: 224, // en-TC
-	0x13900125: 225, // en-TK
-	0x13900129: 226, // en-TO
-	0x1390012c: 227, // en-TT
-	0x1390012d: 228, // en-TV
-	0x1390012f: 229, // en-TZ
-	0x13900131: 230, // en-UG
-	0x13900133: 231, // en-UM
-	0x13900135: 232, // en-US
-	0x13900139: 233, // en-VC
-	0x1390013c: 234, // en-VG
-	0x1390013d: 235, // en-VI
-	0x1390013f: 236, // en-VU
-	0x13900142: 237, // en-WS
-	0x13900161: 238, // en-ZA
-	0x13900162: 239, // en-ZM
-	0x13900164: 240, // en-ZW
-	0x13c00000: 241, // eo
-	0x13c00001: 242, // eo-001
-	0x13e00000: 243, // es
-	0x13e0001f: 244, // es-419
-	0x13e0002c: 245, // es-AR
-	0x13e0003f: 246, // es-BO
-	0x13e00041: 247, // es-BR
-	0x13e00048: 248, // es-BZ
-	0x13e00051: 249, // es-CL
-	0x13e00054: 250, // es-CO
-	0x13e00056: 251, // es-CR
-	0x13e00059: 252, // es-CU
-	0x13e00065: 253, // es-DO
-	0x13e00068: 254, // es-EA
-	0x13e00069: 255, // es-EC
-	0x13e0006e: 256, // es-ES
-	0x13e00086: 257, // es-GQ
-	0x13e00089: 258, // es-GT
-	0x13e0008f: 259, // es-HN
-	0x13e00094: 260, // es-IC
-	0x13e000cf: 261, // es-MX
-	0x13e000d8: 262, // es-NI
-	0x13e000e2: 263, // es-PA
-	0x13e000e4: 264, // es-PE
-	0x13e000e7: 265, // es-PH
-	0x13e000ec: 266, // es-PR
-	0x13e000f1: 267, // es-PY
-	0x13e0011a: 268, // es-SV
-	0x13e00135: 269, // es-US
-	0x13e00136: 270, // es-UY
-	0x13e0013b: 271, // es-VE
-	0x14000000: 272, // et
-	0x1400006a: 273, // et-EE
-	0x14500000: 274, // eu
-	0x1450006e: 275, // eu-ES
-	0x14600000: 276, // ewo
-	0x14600052: 277, // ewo-CM
-	0x14800000: 278, // fa
-	0x14800024: 279, // fa-AF
-	0x1480009c: 280, // fa-IR
-	0x14e00000: 281, // ff
-	0x14e00052: 282, // ff-CM
-	0x14e00084: 283, // ff-GN
-	0x14e000c9: 284, // ff-MR
-	0x14e00114: 285, // ff-SN
-	0x15100000: 286, // fi
-	0x15100072: 287, // fi-FI
-	0x15300000: 288, // fil
-	0x153000e7: 289, // fil-PH
-	0x15800000: 290, // fo
-	0x15800063: 291, // fo-DK
-	0x15800076: 292, // fo-FO
-	0x15e00000: 293, // fr
-	0x15e00036: 294, // fr-BE
-	0x15e00037: 295, // fr-BF
-	0x15e0003a: 296, // fr-BI
-	0x15e0003b: 297, // fr-BJ
-	0x15e0003c: 298, // fr-BL
-	0x15e00049: 299, // fr-CA
-	0x15e0004b: 300, // fr-CD
-	0x15e0004c: 301, // fr-CF
-	0x15e0004d: 302, // fr-CG
-	0x15e0004e: 303, // fr-CH
-	0x15e0004f: 304, // fr-CI
-	0x15e00052: 305, // fr-CM
-	0x15e00062: 306, // fr-DJ
-	0x15e00067: 307, // fr-DZ
-	0x15e00078: 308, // fr-FR
-	0x15e0007a: 309, // fr-GA
-	0x15e0007e: 310, // fr-GF
-	0x15e00084: 311, // fr-GN
-	0x15e00085: 312, // fr-GP
-	0x15e00086: 313, // fr-GQ
-	0x15e00091: 314, // fr-HT
-	0x15e000a8: 315, // fr-KM
-	0x15e000b7: 316, // fr-LU
-	0x15e000ba: 317, // fr-MA
-	0x15e000bb: 318, // fr-MC
-	0x15e000be: 319, // fr-MF
-	0x15e000bf: 320, // fr-MG
-	0x15e000c3: 321, // fr-ML
-	0x15e000c8: 322, // fr-MQ
-	0x15e000c9: 323, // fr-MR
-	0x15e000cc: 324, // fr-MU
-	0x15e000d3: 325, // fr-NC
-	0x15e000d4: 326, // fr-NE
-	0x15e000e5: 327, // fr-PF
-	0x15e000ea: 328, // fr-PM
-	0x15e00102: 329, // fr-RE
-	0x15e00107: 330, // fr-RW
-	0x15e0010a: 331, // fr-SC
-	0x15e00114: 332, // fr-SN
-	0x15e0011c: 333, // fr-SY
-	0x15e00120: 334, // fr-TD
-	0x15e00122: 335, // fr-TG
-	0x15e00128: 336, // fr-TN
-	0x15e0013f: 337, // fr-VU
-	0x15e00140: 338, // fr-WF
-	0x15e0015f: 339, // fr-YT
-	0x16900000: 340, // fur
-	0x1690009e: 341, // fur-IT
-	0x16d00000: 342, // fy
-	0x16d000d9: 343, // fy-NL
-	0x16e00000: 344, // ga
-	0x16e00096: 345, // ga-IE
-	0x17e00000: 346, // gd
-	0x17e0007b: 347, // gd-GB
-	0x19000000: 348, // gl
-	0x1900006e: 349, // gl-ES
-	0x1a300000: 350, // gsw
-	0x1a30004e: 351, // gsw-CH
-	0x1a300078: 352, // gsw-FR
-	0x1a3000b2: 353, // gsw-LI
-	0x1a400000: 354, // gu
-	0x1a400099: 355, // gu-IN
-	0x1a900000: 356, // guw
-	0x1ab00000: 357, // guz
-	0x1ab000a4: 358, // guz-KE
-	0x1ac00000: 359, // gv
-	0x1ac00098: 360, // gv-IM
-	0x1b400000: 361, // ha
-	0x1b400080: 362, // ha-GH
-	0x1b4000d4: 363, // ha-NE
-	0x1b4000d6: 364, // ha-NG
-	0x1b800000: 365, // haw
-	0x1b800135: 366, // haw-US
-	0x1bc00000: 367, // he
-	0x1bc00097: 368, // he-IL
-	0x1be00000: 369, // hi
-	0x1be00099: 370, // hi-IN
-	0x1d100000: 371, // hr
-	0x1d100033: 372, // hr-BA
-	0x1d100090: 373, // hr-HR
-	0x1d200000: 374, // hsb
-	0x1d200060: 375, // hsb-DE
-	0x1d500000: 376, // hu
-	0x1d500092: 377, // hu-HU
-	0x1d700000: 378, // hy
-	0x1d700028: 379, // hy-AM
-	0x1e100000: 380, // id
-	0x1e100095: 381, // id-ID
-	0x1e700000: 382, // ig
-	0x1e7000d6: 383, // ig-NG
-	0x1ea00000: 384, // ii
-	0x1ea00053: 385, // ii-CN
-	0x1f500000: 386, // io
-	0x1f800000: 387, // is
-	0x1f80009d: 388, // is-IS
-	0x1f900000: 389, // it
-	0x1f90004e: 390, // it-CH
-	0x1f90009e: 391, // it-IT
-	0x1f900113: 392, // it-SM
-	0x1f900138: 393, // it-VA
-	0x1fa00000: 394, // iu
-	0x20000000: 395, // ja
-	0x200000a2: 396, // ja-JP
-	0x20300000: 397, // jbo
-	0x20700000: 398, // jgo
-	0x20700052: 399, // jgo-CM
-	0x20a00000: 400, // jmc
-	0x20a0012f: 401, // jmc-TZ
-	0x20e00000: 402, // jv
-	0x21000000: 403, // ka
-	0x2100007d: 404, // ka-GE
-	0x21200000: 405, // kab
-	0x21200067: 406, // kab-DZ
-	0x21600000: 407, // kaj
-	0x21700000: 408, // kam
-	0x217000a4: 409, // kam-KE
-	0x21f00000: 410, // kcg
-	0x22300000: 411, // kde
-	0x2230012f: 412, // kde-TZ
-	0x22700000: 413, // kea
-	0x2270005a: 414, // kea-CV
-	0x23400000: 415, // khq
-	0x234000c3: 416, // khq-ML
-	0x23900000: 417, // ki
-	0x239000a4: 418, // ki-KE
-	0x24200000: 419, // kk
-	0x242000ae: 420, // kk-KZ
-	0x24400000: 421, // kkj
-	0x24400052: 422, // kkj-CM
-	0x24500000: 423, // kl
-	0x24500082: 424, // kl-GL
-	0x24600000: 425, // kln
-	0x246000a4: 426, // kln-KE
-	0x24a00000: 427, // km
-	0x24a000a6: 428, // km-KH
-	0x25100000: 429, // kn
-	0x25100099: 430, // kn-IN
-	0x25400000: 431, // ko
-	0x254000aa: 432, // ko-KP
-	0x254000ab: 433, // ko-KR
-	0x25600000: 434, // kok
-	0x25600099: 435, // kok-IN
-	0x26a00000: 436, // ks
-	0x26a00099: 437, // ks-IN
-	0x26b00000: 438, // ksb
-	0x26b0012f: 439, // ksb-TZ
-	0x26d00000: 440, // ksf
-	0x26d00052: 441, // ksf-CM
-	0x26e00000: 442, // ksh
-	0x26e00060: 443, // ksh-DE
-	0x27400000: 444, // ku
-	0x28100000: 445, // kw
-	0x2810007b: 446, // kw-GB
-	0x28a00000: 447, // ky
-	0x28a000a5: 448, // ky-KG
-	0x29100000: 449, // lag
-	0x2910012f: 450, // lag-TZ
-	0x29500000: 451, // lb
-	0x295000b7: 452, // lb-LU
-	0x2a300000: 453, // lg
-	0x2a300131: 454, // lg-UG
-	0x2af00000: 455, // lkt
-	0x2af00135: 456, // lkt-US
-	0x2b500000: 457, // ln
-	0x2b50002a: 458, // ln-AO
-	0x2b50004b: 459, // ln-CD
-	0x2b50004c: 460, // ln-CF
-	0x2b50004d: 461, // ln-CG
-	0x2b800000: 462, // lo
-	0x2b8000af: 463, // lo-LA
-	0x2bf00000: 464, // lrc
-	0x2bf0009b: 465, // lrc-IQ
-	0x2bf0009c: 466, // lrc-IR
-	0x2c000000: 467, // lt
-	0x2c0000b6: 468, // lt-LT
-	0x2c200000: 469, // lu
-	0x2c20004b: 470, // lu-CD
-	0x2c400000: 471, // luo
-	0x2c4000a4: 472, // luo-KE
-	0x2c500000: 473, // luy
-	0x2c5000a4: 474, // luy-KE
-	0x2c700000: 475, // lv
-	0x2c7000b8: 476, // lv-LV
-	0x2d100000: 477, // mas
-	0x2d1000a4: 478, // mas-KE
-	0x2d10012f: 479, // mas-TZ
-	0x2e900000: 480, // mer
-	0x2e9000a4: 481, // mer-KE
-	0x2ed00000: 482, // mfe
-	0x2ed000cc: 483, // mfe-MU
-	0x2f100000: 484, // mg
-	0x2f1000bf: 485, // mg-MG
-	0x2f200000: 486, // mgh
-	0x2f2000d1: 487, // mgh-MZ
-	0x2f400000: 488, // mgo
-	0x2f400052: 489, // mgo-CM
-	0x2ff00000: 490, // mk
-	0x2ff000c2: 491, // mk-MK
-	0x30400000: 492, // ml
-	0x30400099: 493, // ml-IN
-	0x30b00000: 494, // mn
-	0x30b000c5: 495, // mn-MN
-	0x31b00000: 496, // mr
-	0x31b00099: 497, // mr-IN
-	0x31f00000: 498, // ms
-	0x31f0003e: 499, // ms-BN
-	0x31f000d0: 500, // ms-MY
-	0x31f0010d: 501, // ms-SG
-	0x32000000: 502, // mt
-	0x320000cb: 503, // mt-MT
-	0x32500000: 504, // mua
-	0x32500052: 505, // mua-CM
-	0x33100000: 506, // my
-	0x331000c4: 507, // my-MM
-	0x33a00000: 508, // mzn
-	0x33a0009c: 509, // mzn-IR
-	0x34100000: 510, // nah
-	0x34500000: 511, // naq
-	0x345000d2: 512, // naq-NA
-	0x34700000: 513, // nb
-	0x347000da: 514, // nb-NO
-	0x34700110: 515, // nb-SJ
-	0x34e00000: 516, // nd
-	0x34e00164: 517, // nd-ZW
-	0x35000000: 518, // nds
-	0x35000060: 519, // nds-DE
-	0x350000d9: 520, // nds-NL
-	0x35100000: 521, // ne
-	0x35100099: 522, // ne-IN
-	0x351000db: 523, // ne-NP
-	0x36700000: 524, // nl
-	0x36700030: 525, // nl-AW
-	0x36700036: 526, // nl-BE
-	0x36700040: 527, // nl-BQ
-	0x3670005b: 528, // nl-CW
-	0x367000d9: 529, // nl-NL
-	0x36700116: 530, // nl-SR
-	0x3670011b: 531, // nl-SX
-	0x36800000: 532, // nmg
-	0x36800052: 533, // nmg-CM
-	0x36a00000: 534, // nn
-	0x36a000da: 535, // nn-NO
-	0x36c00000: 536, // nnh
-	0x36c00052: 537, // nnh-CM
-	0x36f00000: 538, // no
-	0x37500000: 539, // nqo
-	0x37600000: 540, // nr
-	0x37a00000: 541, // nso
-	0x38000000: 542, // nus
-	0x38000117: 543, // nus-SS
-	0x38700000: 544, // ny
-	0x38900000: 545, // nyn
-	0x38900131: 546, // nyn-UG
-	0x39000000: 547, // om
-	0x3900006f: 548, // om-ET
-	0x390000a4: 549, // om-KE
-	0x39500000: 550, // or
-	0x39500099: 551, // or-IN
-	0x39800000: 552, // os
-	0x3980007d: 553, // os-GE
-	0x39800106: 554, // os-RU
-	0x39d00000: 555, // pa
-	0x39d05000: 556, // pa-Arab
-	0x39d050e8: 557, // pa-Arab-PK
-	0x39d33000: 558, // pa-Guru
-	0x39d33099: 559, // pa-Guru-IN
-	0x3a100000: 560, // pap
-	0x3b300000: 561, // pl
-	0x3b3000e9: 562, // pl-PL
-	0x3bd00000: 563, // prg
-	0x3bd00001: 564, // prg-001
-	0x3be00000: 565, // ps
-	0x3be00024: 566, // ps-AF
-	0x3c000000: 567, // pt
-	0x3c00002a: 568, // pt-AO
-	0x3c000041: 569, // pt-BR
-	0x3c00004e: 570, // pt-CH
-	0x3c00005a: 571, // pt-CV
-	0x3c000086: 572, // pt-GQ
-	0x3c00008b: 573, // pt-GW
-	0x3c0000b7: 574, // pt-LU
-	0x3c0000c6: 575, // pt-MO
-	0x3c0000d1: 576, // pt-MZ
-	0x3c0000ee: 577, // pt-PT
-	0x3c000118: 578, // pt-ST
-	0x3c000126: 579, // pt-TL
-	0x3c400000: 580, // qu
-	0x3c40003f: 581, // qu-BO
-	0x3c400069: 582, // qu-EC
-	0x3c4000e4: 583, // qu-PE
-	0x3d400000: 584, // rm
-	0x3d40004e: 585, // rm-CH
-	0x3d900000: 586, // rn
-	0x3d90003a: 587, // rn-BI
-	0x3dc00000: 588, // ro
-	0x3dc000bc: 589, // ro-MD
-	0x3dc00104: 590, // ro-RO
-	0x3de00000: 591, // rof
-	0x3de0012f: 592, // rof-TZ
-	0x3e200000: 593, // ru
-	0x3e200047: 594, // ru-BY
-	0x3e2000a5: 595, // ru-KG
-	0x3e2000ae: 596, // ru-KZ
-	0x3e2000bc: 597, // ru-MD
-	0x3e200106: 598, // ru-RU
-	0x3e200130: 599, // ru-UA
-	0x3e500000: 600, // rw
-	0x3e500107: 601, // rw-RW
-	0x3e600000: 602, // rwk
-	0x3e60012f: 603, // rwk-TZ
-	0x3eb00000: 604, // sah
-	0x3eb00106: 605, // sah-RU
-	0x3ec00000: 606, // saq
-	0x3ec000a4: 607, // saq-KE
-	0x3f300000: 608, // sbp
-	0x3f30012f: 609, // sbp-TZ
-	0x3fa00000: 610, // sd
-	0x3fa000e8: 611, // sd-PK
-	0x3fc00000: 612, // sdh
-	0x3fd00000: 613, // se
-	0x3fd00072: 614, // se-FI
-	0x3fd000da: 615, // se-NO
-	0x3fd0010c: 616, // se-SE
-	0x3ff00000: 617, // seh
-	0x3ff000d1: 618, // seh-MZ
-	0x40100000: 619, // ses
-	0x401000c3: 620, // ses-ML
-	0x40200000: 621, // sg
-	0x4020004c: 622, // sg-CF
-	0x40800000: 623, // shi
-	0x40857000: 624, // shi-Latn
-	0x408570ba: 625, // shi-Latn-MA
-	0x408dc000: 626, // shi-Tfng
-	0x408dc0ba: 627, // shi-Tfng-MA
-	0x40c00000: 628, // si
-	0x40c000b3: 629, // si-LK
-	0x41200000: 630, // sk
-	0x41200111: 631, // sk-SK
-	0x41600000: 632, // sl
-	0x4160010f: 633, // sl-SI
-	0x41c00000: 634, // sma
-	0x41d00000: 635, // smi
-	0x41e00000: 636, // smj
-	0x41f00000: 637, // smn
-	0x41f00072: 638, // smn-FI
-	0x42200000: 639, // sms
-	0x42300000: 640, // sn
-	0x42300164: 641, // sn-ZW
-	0x42900000: 642, // so
-	0x42900062: 643, // so-DJ
-	0x4290006f: 644, // so-ET
-	0x429000a4: 645, // so-KE
-	0x42900115: 646, // so-SO
-	0x43100000: 647, // sq
-	0x43100027: 648, // sq-AL
-	0x431000c2: 649, // sq-MK
-	0x4310014d: 650, // sq-XK
-	0x43200000: 651, // sr
-	0x4321f000: 652, // sr-Cyrl
-	0x4321f033: 653, // sr-Cyrl-BA
-	0x4321f0bd: 654, // sr-Cyrl-ME
-	0x4321f105: 655, // sr-Cyrl-RS
-	0x4321f14d: 656, // sr-Cyrl-XK
-	0x43257000: 657, // sr-Latn
-	0x43257033: 658, // sr-Latn-BA
-	0x432570bd: 659, // sr-Latn-ME
-	0x43257105: 660, // sr-Latn-RS
-	0x4325714d: 661, // sr-Latn-XK
-	0x43700000: 662, // ss
-	0x43a00000: 663, // ssy
-	0x43b00000: 664, // st
-	0x44400000: 665, // sv
-	0x44400031: 666, // sv-AX
-	0x44400072: 667, // sv-FI
-	0x4440010c: 668, // sv-SE
-	0x44500000: 669, // sw
-	0x4450004b: 670, // sw-CD
-	0x445000a4: 671, // sw-KE
-	0x4450012f: 672, // sw-TZ
-	0x44500131: 673, // sw-UG
-	0x44e00000: 674, // syr
-	0x45000000: 675, // ta
-	0x45000099: 676, // ta-IN
-	0x450000b3: 677, // ta-LK
-	0x450000d0: 678, // ta-MY
-	0x4500010d: 679, // ta-SG
-	0x46100000: 680, // te
-	0x46100099: 681, // te-IN
-	0x46400000: 682, // teo
-	0x464000a4: 683, // teo-KE
-	0x46400131: 684, // teo-UG
-	0x46700000: 685, // tg
-	0x46700124: 686, // tg-TJ
-	0x46b00000: 687, // th
-	0x46b00123: 688, // th-TH
-	0x46f00000: 689, // ti
-	0x46f0006d: 690, // ti-ER
-	0x46f0006f: 691, // ti-ET
-	0x47100000: 692, // tig
-	0x47600000: 693, // tk
-	0x47600127: 694, // tk-TM
-	0x48000000: 695, // tn
-	0x48200000: 696, // to
-	0x48200129: 697, // to-TO
-	0x48a00000: 698, // tr
-	0x48a0005d: 699, // tr-CY
-	0x48a0012b: 700, // tr-TR
-	0x48e00000: 701, // ts
-	0x49400000: 702, // tt
-	0x49400106: 703, // tt-RU
-	0x4a400000: 704, // twq
-	0x4a4000d4: 705, // twq-NE
-	0x4a900000: 706, // tzm
-	0x4a9000ba: 707, // tzm-MA
-	0x4ac00000: 708, // ug
-	0x4ac00053: 709, // ug-CN
-	0x4ae00000: 710, // uk
-	0x4ae00130: 711, // uk-UA
-	0x4b400000: 712, // ur
-	0x4b400099: 713, // ur-IN
-	0x4b4000e8: 714, // ur-PK
-	0x4bc00000: 715, // uz
-	0x4bc05000: 716, // uz-Arab
-	0x4bc05024: 717, // uz-Arab-AF
-	0x4bc1f000: 718, // uz-Cyrl
-	0x4bc1f137: 719, // uz-Cyrl-UZ
-	0x4bc57000: 720, // uz-Latn
-	0x4bc57137: 721, // uz-Latn-UZ
-	0x4be00000: 722, // vai
-	0x4be57000: 723, // vai-Latn
-	0x4be570b4: 724, // vai-Latn-LR
-	0x4bee3000: 725, // vai-Vaii
-	0x4bee30b4: 726, // vai-Vaii-LR
-	0x4c000000: 727, // ve
-	0x4c300000: 728, // vi
-	0x4c30013e: 729, // vi-VN
-	0x4c900000: 730, // vo
-	0x4c900001: 731, // vo-001
-	0x4cc00000: 732, // vun
-	0x4cc0012f: 733, // vun-TZ
-	0x4ce00000: 734, // wa
-	0x4cf00000: 735, // wae
-	0x4cf0004e: 736, // wae-CH
-	0x4e500000: 737, // wo
-	0x4e500114: 738, // wo-SN
-	0x4f200000: 739, // xh
-	0x4fb00000: 740, // xog
-	0x4fb00131: 741, // xog-UG
-	0x50900000: 742, // yav
-	0x50900052: 743, // yav-CM
-	0x51200000: 744, // yi
-	0x51200001: 745, // yi-001
-	0x51800000: 746, // yo
-	0x5180003b: 747, // yo-BJ
-	0x518000d6: 748, // yo-NG
-	0x51f00000: 749, // yue
-	0x51f38000: 750, // yue-Hans
-	0x51f38053: 751, // yue-Hans-CN
-	0x51f39000: 752, // yue-Hant
-	0x51f3908d: 753, // yue-Hant-HK
-	0x52800000: 754, // zgh
-	0x528000ba: 755, // zgh-MA
-	0x52900000: 756, // zh
-	0x52938000: 757, // zh-Hans
-	0x52938053: 758, // zh-Hans-CN
-	0x5293808d: 759, // zh-Hans-HK
-	0x529380c6: 760, // zh-Hans-MO
-	0x5293810d: 761, // zh-Hans-SG
-	0x52939000: 762, // zh-Hant
-	0x5293908d: 763, // zh-Hant-HK
-	0x529390c6: 764, // zh-Hant-MO
-	0x5293912e: 765, // zh-Hant-TW
-	0x52f00000: 766, // zu
-	0x52f00161: 767, // zu-ZA
-}
-
-// Total table size 4676 bytes (4KiB); checksum: 17BE3673
@@ -2,8 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-//go:generate go run gen.go gen_common.go -output tables.go
-//go:generate go run gen_index.go
+//go:generate go run gen.go -output tables.go

 package language

@@ -11,47 +10,34 @@ package language
 // - verifying that tables are dropped correctly (most notably matcher tables).

 import (
-	"errors"
-	"fmt"
 	"strings"
-)

-const (
-	// maxCoreSize is the maximum size of a BCP 47 tag without variants and
-	// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
-	maxCoreSize = 12
-
-	// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
-	// is large enough to hold at least 99% of the BCP 47 tags.
-	max99thPercentileSize = 32
-
-	// maxSimpleUExtensionSize is the maximum size of a -u extension with one
-	// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
-	maxSimpleUExtensionSize = 14
+	"golang.org/x/text/internal/language"
+	"golang.org/x/text/internal/language/compact"
 )

 // Tag represents a BCP 47 language tag. It is used to specify an instance of a
 // specific language or locale. All language tag values are guaranteed to be
 // well-formed.
-type Tag struct {
-	lang   langID
-	region regionID
-	// TODO: we will soon run out of positions for script. Idea: instead of
-	// storing lang, region, and script codes, store only the compact index and
-	// have a lookup table from this code to its expansion. This greatly speeds
-	// up table lookup, speed up common variant cases.
-	// This will also immediately free up 3 extra bytes. Also, the pVariant
-	// field can now be moved to the lookup table, as the compact index uniquely
-	// determines the offset of a possible variant.
-	script   scriptID
-	pVariant byte   // offset in str, includes preceding '-'
-	pExt     uint16 // offset of first extension, includes preceding '-'
+type Tag compact.Tag

-	// str is the string representation of the Tag. It will only be used if the
-	// tag has variants or extensions.
-	str string
+func makeTag(t language.Tag) (tag Tag) {
+	return Tag(compact.Make(t))
 }

+func (t *Tag) tag() language.Tag {
+	return (*compact.Tag)(t).Tag()
+}
+
+func (t *Tag) isCompact() bool {
+	return (*compact.Tag)(t).IsCompact()
+}
+
+// TODO: improve performance.
+func (t *Tag) lang() language.Language { return t.tag().LangID }
+func (t *Tag) region() language.Region { return t.tag().RegionID }
+func (t *Tag) script() language.Script { return t.tag().ScriptID }
+
 // Make is a convenience wrapper for Parse that omits the error.
 // In case of an error, a sensible default is returned.
 func Make(s string) Tag {
@@ -68,25 +54,13 @@ func (c CanonType) Make(s string) Tag {
 // Raw returns the raw base language, script and region, without making an
 // attempt to infer their values.
 func (t Tag) Raw() (b Base, s Script, r Region) {
-	return Base{t.lang}, Script{t.script}, Region{t.region}
-}
-
-// equalTags compares language, script and region subtags only.
-func (t Tag) equalTags(a Tag) bool {
-	return t.lang == a.lang && t.script == a.script && t.region == a.region
+	tt := t.tag()
+	return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
 }

 // IsRoot returns true if t is equal to language "und".
 func (t Tag) IsRoot() bool {
-	if int(t.pVariant) < len(t.str) {
-		return false
-	}
-	return t.equalTags(und)
-}
-
-// private reports whether the Tag consists solely of a private use tag.
-func (t Tag) private() bool {
-	return t.str != "" && t.pVariant == 0
+	return compact.Tag(t).IsRoot()
 }

 // CanonType can be used to enable or disable various types of canonicalization.
@@ -138,73 +112,73 @@ const (

 // canonicalize returns the canonicalized equivalent of the tag and
 // whether there was any change.
-func (t Tag) canonicalize(c CanonType) (Tag, bool) {
+func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
 	if c == Raw {
 		return t, false
 	}
 	changed := false
 	if c&SuppressScript != 0 {
-		if t.lang < langNoIndexOffset && uint8(t.script) == suppressScript[t.lang] {
-			t.script = 0
+		if t.LangID.SuppressScript() == t.ScriptID {
+			t.ScriptID = 0
 			changed = true
 		}
 	}
 	if c&canonLang != 0 {
 		for {
-			if l, aliasType := normLang(t.lang); l != t.lang {
+			if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
 				switch aliasType {
-				case langLegacy:
+				case language.Legacy:
 					if c&Legacy != 0 {
-						if t.lang == _sh && t.script == 0 {
-							t.script = _Latn
+						if t.LangID == _sh && t.ScriptID == 0 {
+							t.ScriptID = _Latn
 						}
-						t.lang = l
+						t.LangID = l
 						changed = true
 					}
-				case langMacro:
+				case language.Macro:
 					if c&Macro != 0 {
 						// We deviate here from CLDR. The mapping "nb" -> "no"
 						// qualifies as a typical Macro language mapping.  However,
 						// for legacy reasons, CLDR maps "no", the macro language
 						// code for Norwegian, to the dominant variant "nb". This
 						// change is currently under consideration for CLDR as well.
-						// See http://unicode.org/cldr/trac/ticket/2698 and also
-						// http://unicode.org/cldr/trac/ticket/1790 for some of the
+						// See https://unicode.org/cldr/trac/ticket/2698 and also
+						// https://unicode.org/cldr/trac/ticket/1790 for some of the
 						// practical implications. TODO: this check could be removed
 						// if CLDR adopts this change.
-						if c&CLDR == 0 || t.lang != _nb {
+						if c&CLDR == 0 || t.LangID != _nb {
 							changed = true
-							t.lang = l
+							t.LangID = l
 						}
 					}
-				case langDeprecated:
+				case language.Deprecated:
 					if c&DeprecatedBase != 0 {
-						if t.lang == _mo && t.region == 0 {
-							t.region = _MD
+						if t.LangID == _mo && t.RegionID == 0 {
+							t.RegionID = _MD
 						}
-						t.lang = l
+						t.LangID = l
 						changed = true
 						// Other canonicalization types may still apply.
 						continue
 					}
 				}
-			} else if c&Legacy != 0 && t.lang == _no && c&CLDR != 0 {
-				t.lang = _nb
+			} else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
+				t.LangID = _nb
 				changed = true
 			}
 			break
 		}
 	}
 	if c&DeprecatedScript != 0 {
-		if t.script == _Qaai {
+		if t.ScriptID == _Qaai {
 			changed = true
-			t.script = _Zinh
+			t.ScriptID = _Zinh
 		}
 	}
 	if c&DeprecatedRegion != 0 {
-		if r := normRegion(t.region); r != 0 {
+		if r := t.RegionID.Canonicalize(); r != t.RegionID {
 			changed = true
-			t.region = r
+			t.RegionID = r
 		}
 	}
 	return t, changed
@@ -212,11 +186,20 @@ func (t Tag) canonicalize(c CanonType) (Tag, bool) {

 // Canonicalize returns the canonicalized equivalent of the tag.
 func (c CanonType) Canonicalize(t Tag) (Tag, error) {
-	t, changed := t.canonicalize(c)
-	if changed {
-		t.remakeString()
+	// First try fast path.
+	if t.isCompact() {
+		if _, changed := canonicalize(c, compact.Tag(t).Tag()); !changed {
+			return t, nil
+		}
+	}
+	// It is unlikely that one will canonicalize a tag after matching. So do
+	// a slow but simple approach here.
+	if tag, changed := canonicalize(c, t.tag()); changed {
+		tag.RemakeString()
+		return makeTag(tag), nil
 	}
 	return t, nil
+
 }

 // Confidence indicates the level of certainty for a given return value.
@@ -239,83 +222,21 @@ func (c Confidence) String() string {
 	return confName[c]
 }

-// remakeString is used to update t.str in case lang, script or region changed.
-// It is assumed that pExt and pVariant still point to the start of the
-// respective parts.
-func (t *Tag) remakeString() {
-	if t.str == "" {
-		return
-	}
-	extra := t.str[t.pVariant:]
-	if t.pVariant > 0 {
-		extra = extra[1:]
-	}
-	if t.equalTags(und) && strings.HasPrefix(extra, "x-") {
-		t.str = extra
-		t.pVariant = 0
-		t.pExt = 0
-		return
-	}
-	var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
-	b := buf[:t.genCoreBytes(buf[:])]
-	if extra != "" {
-		diff := len(b) - int(t.pVariant)
-		b = append(b, '-')
-		b = append(b, extra...)
-		t.pVariant = uint8(int(t.pVariant) + diff)
-		t.pExt = uint16(int(t.pExt) + diff)
-	} else {
-		t.pVariant = uint8(len(b))
-		t.pExt = uint16(len(b))
-	}
-	t.str = string(b)
-}
-
-// genCoreBytes writes a string for the base languages, script and region tags
-// to the given buffer and returns the number of bytes written. It will never
-// write more than maxCoreSize bytes.
-func (t *Tag) genCoreBytes(buf []byte) int {
-	n := t.lang.stringToBuf(buf[:])
-	if t.script != 0 {
-		n += copy(buf[n:], "-")
-		n += copy(buf[n:], t.script.String())
-	}
-	if t.region != 0 {
-		n += copy(buf[n:], "-")
-		n += copy(buf[n:], t.region.String())
-	}
-	return n
-}
-
 // String returns the canonical string representation of the language tag.
 func (t Tag) String() string {
-	if t.str != "" {
-		return t.str
-	}
-	if t.script == 0 && t.region == 0 {
-		return t.lang.String()
-	}
-	buf := [maxCoreSize]byte{}
-	return string(buf[:t.genCoreBytes(buf[:])])
+	return t.tag().String()
 }

 // MarshalText implements encoding.TextMarshaler.
 func (t Tag) MarshalText() (text []byte, err error) {
-	if t.str != "" {
-		text = append(text, t.str...)
-	} else if t.script == 0 && t.region == 0 {
-		text = append(text, t.lang.String()...)
-	} else {
-		buf := [maxCoreSize]byte{}
-		text = buf[:t.genCoreBytes(buf[:])]
-	}
-	return text, nil
+	return t.tag().MarshalText()
 }

 // UnmarshalText implements encoding.TextUnmarshaler.
 func (t *Tag) UnmarshalText(text []byte) error {
-	tag, err := Raw.Parse(string(text))
-	*t = tag
+	var tag language.Tag
+	err := tag.UnmarshalText(text)
+	*t = makeTag(tag)
 	return err
 }

@@ -323,15 +244,16 @@ func (t *Tag) UnmarshalText(text []byte) error {
 // unspecified, an attempt will be made to infer it from the context.
 // It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
 func (t Tag) Base() (Base, Confidence) {
-	if t.lang != 0 {
-		return Base{t.lang}, Exact
+	if b := t.lang(); b != 0 {
+		return Base{b}, Exact
 	}
+	tt := t.tag()
 	c := High
-	if t.script == 0 && !(Region{t.region}).IsCountry() {
+	if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
 		c = Low
 	}
-	if tag, err := addTags(t); err == nil && tag.lang != 0 {
-		return Base{tag.lang}, c
+	if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
+		return Base{tag.LangID}, c
 	}
 	return Base{0}, No
 }
@@ -344,35 +266,34 @@ func (t Tag) Base() (Base, Confidence) {
 // If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
 // as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
 // common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
-// See http://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
+// See https://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
 // unknown value in CLDR.  (Zzzz, Exact) is returned if Zzzz was explicitly specified.
 // Note that an inferred script is never guaranteed to be the correct one. Latin is
 // almost exclusively used for Afrikaans, but Arabic has been used for some texts
 // in the past.  Also, the script that is commonly used may change over time.
 // It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
 func (t Tag) Script() (Script, Confidence) {
-	if t.script != 0 {
-		return Script{t.script}, Exact
+	if scr := t.script(); scr != 0 {
+		return Script{scr}, Exact
 	}
-	sc, c := scriptID(_Zzzz), No
-	if t.lang < langNoIndexOffset {
-		if scr := scriptID(suppressScript[t.lang]); scr != 0 {
-			// Note: it is not always the case that a language with a suppress
-			// script value is only written in one script (e.g. kk, ms, pa).
-			if t.region == 0 {
-				return Script{scriptID(scr)}, High
-			}
-			sc, c = scr, High
+	tt := t.tag()
+	sc, c := language.Script(_Zzzz), No
+	if scr := tt.LangID.SuppressScript(); scr != 0 {
+		// Note: it is not always the case that a language with a suppress
+		// script value is only written in one script (e.g. kk, ms, pa).
+		if tt.RegionID == 0 {
+			return Script{scr}, High
 		}
+		sc, c = scr, High
 	}
-	if tag, err := addTags(t); err == nil {
-		if tag.script != sc {
-			sc, c = tag.script, Low
+	if tag, err := tt.Maximize(); err == nil {
+		if tag.ScriptID != sc {
+			sc, c = tag.ScriptID, Low
 		}
 	} else {
-		t, _ = (Deprecated | Macro).Canonicalize(t)
-		if tag, err := addTags(t); err == nil && tag.script != sc {
-			sc, c = tag.script, Low
+		tt, _ = canonicalize(Deprecated|Macro, tt)
+		if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
+			sc, c = tag.ScriptID, Low
 		}
 	}
 	return Script{sc}, c
@@ -382,28 +303,31 @@ func (t Tag) Script() (Script, Confidence) {
 // infer a most likely candidate from the context.
 // It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
 func (t Tag) Region() (Region, Confidence) {
-	if t.region != 0 {
-		return Region{t.region}, Exact
+	if r := t.region(); r != 0 {
+		return Region{r}, Exact
 	}
-	if t, err := addTags(t); err == nil {
-		return Region{t.region}, Low // TODO: differentiate between high and low.
+	tt := t.tag()
+	if tt, err := tt.Maximize(); err == nil {
+		return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
 	}
-	t, _ = (Deprecated | Macro).Canonicalize(t)
-	if tag, err := addTags(t); err == nil {
-		return Region{tag.region}, Low
+	tt, _ = canonicalize(Deprecated|Macro, tt)
+	if tag, err := tt.Maximize(); err == nil {
+		return Region{tag.RegionID}, Low
 	}
 	return Region{_ZZ}, No // TODO: return world instead of undetermined?
 }

-// Variant returns the variants specified explicitly for this language tag.
+// Variants returns the variants specified explicitly for this language tag.
 // or nil if no variant was specified.
 func (t Tag) Variants() []Variant {
+	if !compact.Tag(t).MayHaveVariants() {
+		return nil
+	}
 	v := []Variant{}
-	if int(t.pVariant) < int(t.pExt) {
-		for x, str := "", t.str[t.pVariant:t.pExt]; str != ""; {
-			x, str = nextToken(str)
-			v = append(v, Variant{x})
-		}
+	x, str := "", t.tag().Variants()
+	for str != "" {
+		x, str = nextToken(str)
+		v = append(v, Variant{x})
 	}
 	return v
 }
@@ -411,57 +335,13 @@ func (t Tag) Variants() []Variant {
 // Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
 // specific language are substituted with fields from the parent language.
 // The parent for a language may change for newer versions of CLDR.
+//
+// Parent returns a tag for a less specific language that is mutually
+// intelligible or Und if there is no such language. This may not be the same as
+// simply stripping the last BCP 47 subtag. For instance, the parent of "zh-TW"
+// is "zh-Hant", and the parent of "zh-Hant" is "und".
 func (t Tag) Parent() Tag {
-	if t.str != "" {
-		// Strip the variants and extensions.
-		t, _ = Raw.Compose(t.Raw())
-		if t.region == 0 && t.script != 0 && t.lang != 0 {
-			base, _ := addTags(Tag{lang: t.lang})
-			if base.script == t.script {
-				return Tag{lang: t.lang}
-			}
-		}
-		return t
-	}
-	if t.lang != 0 {
-		if t.region != 0 {
-			maxScript := t.script
-			if maxScript == 0 {
-				max, _ := addTags(t)
-				maxScript = max.script
-			}
-
-			for i := range parents {
-				if langID(parents[i].lang) == t.lang && scriptID(parents[i].maxScript) == maxScript {
-					for _, r := range parents[i].fromRegion {
-						if regionID(r) == t.region {
-							return Tag{
-								lang:   t.lang,
-								script: scriptID(parents[i].script),
-								region: regionID(parents[i].toRegion),
-							}
-						}
-					}
-				}
-			}
-
-			// Strip the script if it is the default one.
-			base, _ := addTags(Tag{lang: t.lang})
-			if base.script != maxScript {
-				return Tag{lang: t.lang, script: maxScript}
-			}
-			return Tag{lang: t.lang}
-		} else if t.script != 0 {
-			// The parent for an base-script pair with a non-default script is
-			// "und" instead of the base language.
-			base, _ := addTags(Tag{lang: t.lang})
-			if base.script != t.script {
-				return und
-			}
-			return Tag{lang: t.lang}
-		}
-	}
-	return und
+	return Tag(compact.Tag(t).Parent())
 }

 // returns token t and the rest of the string.
@@ -487,17 +367,8 @@ func (e Extension) String() string {

 // ParseExtension parses s as an extension and returns it on success.
 func ParseExtension(s string) (e Extension, err error) {
-	scan := makeScannerString(s)
-	var end int
-	if n := len(scan.token); n != 1 {
-		return Extension{}, errSyntax
-	}
-	scan.toLower(0, len(scan.b))
-	end = parseExtension(&scan)
-	if end != len(s) {
-		return Extension{}, errSyntax
-	}
-	return Extension{string(scan.b)}, nil
+	ext, err := language.ParseExtension(s)
+	return Extension{ext}, err
 }

 // Type returns the one-byte extension type of e. It returns 0 for the zero
@@ -518,22 +389,20 @@ func (e Extension) Tokens() []string {
 // false for ok if t does not have the requested extension. The returned
 // extension will be invalid in this case.
 func (t Tag) Extension(x byte) (ext Extension, ok bool) {
-	for i := int(t.pExt); i < len(t.str)-1; {
-		var ext string
-		i, ext = getExtension(t.str, i)
-		if ext[0] == x {
-			return Extension{ext}, true
-		}
+	if !compact.Tag(t).MayHaveExtensions() {
+		return Extension{}, false
 	}
-	return Extension{}, false
+	e, ok := t.tag().Extension(x)
+	return Extension{e}, ok
 }

 // Extensions returns all extensions of t.
 func (t Tag) Extensions() []Extension {
+	if !compact.Tag(t).MayHaveExtensions() {
+		return nil
+	}
 	e := []Extension{}
-	for i := int(t.pExt); i < len(t.str)-1; {
-		var ext string
-		i, ext = getExtension(t.str, i)
+	for _, ext := range t.tag().Extensions() {
 		e = append(e, Extension{ext})
 	}
 	return e
@@ -541,259 +410,105 @@ func (t Tag) Extensions() []Extension {

 // TypeForKey returns the type associated with the given key, where key and type
 // are of the allowed values defined for the Unicode locale extension ('u') in
-// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
 // TypeForKey will traverse the inheritance chain to get the correct value.
 func (t Tag) TypeForKey(key string) string {
-	if start, end, _ := t.findTypeForKey(key); end != start {
-		return t.str[start:end]
+	if !compact.Tag(t).MayHaveExtensions() {
+		if key != "rg" && key != "va" {
+			return ""
+		}
 	}
-	return ""
+	return t.tag().TypeForKey(key)
 }

-var (
-	errPrivateUse       = errors.New("cannot set a key on a private use tag")
-	errInvalidArguments = errors.New("invalid key or type")
-)
-
 // SetTypeForKey returns a new Tag with the key set to type, where key and type
 // are of the allowed values defined for the Unicode locale extension ('u') in
-// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
 // An empty value removes an existing pair with the same key.
 func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
-	if t.private() {
-		return t, errPrivateUse
-	}
-	if len(key) != 2 {
-		return t, errInvalidArguments
-	}
-
-	// Remove the setting if value is "".
-	if value == "" {
-		start, end, _ := t.findTypeForKey(key)
-		if start != end {
-			// Remove key tag and leading '-'.
-			start -= 4
-
-			// Remove a possible empty extension.
-			if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
-				start -= 2
-			}
-			if start == int(t.pVariant) && end == len(t.str) {
-				t.str = ""
-				t.pVariant, t.pExt = 0, 0
-			} else {
-				t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
-			}
-		}
-		return t, nil
-	}
-
-	if len(value) < 3 || len(value) > 8 {
-		return t, errInvalidArguments
-	}
-
-	var (
-		buf    [maxCoreSize + maxSimpleUExtensionSize]byte
-		uStart int // start of the -u extension.
-	)
-
-	// Generate the tag string if needed.
-	if t.str == "" {
-		uStart = t.genCoreBytes(buf[:])
-		buf[uStart] = '-'
-		uStart++
-	}
-
-	// Create new key-type pair and parse it to verify.
-	b := buf[uStart:]
-	copy(b, "u-")
-	copy(b[2:], key)
-	b[4] = '-'
-	b = b[:5+copy(b[5:], value)]
-	scan := makeScanner(b)
-	if parseExtensions(&scan); scan.err != nil {
-		return t, scan.err
-	}
-
-	// Assemble the replacement string.
-	if t.str == "" {
-		t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
-		t.str = string(buf[:uStart+len(b)])
-	} else {
-		s := t.str
-		start, end, hasExt := t.findTypeForKey(key)
-		if start == end {
-			if hasExt {
-				b = b[2:]
-			}
-			t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
-		} else {
-			t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
-		}
-	}
-	return t, nil
+	tt, err := t.tag().SetTypeForKey(key, value)
+	return makeTag(tt), err
 }

-// findKeyAndType returns the start and end position for the type corresponding
-// to key or the point at which to insert the key-value pair if the type
-// wasn't found. The hasExt return value reports whether an -u extension was present.
-// Note: the extensions are typically very small and are likely to contain
-// only one key-type pair.
-func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
-	p := int(t.pExt)
-	if len(key) != 2 || p == len(t.str) || p == 0 {
-		return p, p, false
-	}
-	s := t.str
-
-	// Find the correct extension.
-	for p++; s[p] != 'u'; p++ {
-		if s[p] > 'u' {
-			p--
-			return p, p, false
-		}
-		if p = nextExtension(s, p); p == len(s) {
-			return len(s), len(s), false
-		}
-	}
-	// Proceed to the hyphen following the extension name.
-	p++
-
-	// curKey is the key currently being processed.
-	curKey := ""
-
-	// Iterate over keys until we get the end of a section.
-	for {
-		// p points to the hyphen preceding the current token.
-		if p3 := p + 3; s[p3] == '-' {
-			// Found a key.
-			// Check whether we just processed the key that was requested.
-			if curKey == key {
-				return start, p, true
-			}
-			// Set to the next key and continue scanning type tokens.
-			curKey = s[p+1 : p3]
-			if curKey > key {
-				return p, p, true
-			}
-			// Start of the type token sequence.
-			start = p + 4
-			// A type is at least 3 characters long.
-			p += 7 // 4 + 3
-		} else {
-			// Attribute or type, which is at least 3 characters long.
-			p += 4
-		}
-		// p points past the third character of a type or attribute.
-		max := p + 5 // maximum length of token plus hyphen.
-		if len(s) < max {
-			max = len(s)
-		}
-		for ; p < max && s[p] != '-'; p++ {
-		}
-		// Bail if we have exhausted all tokens or if the next token starts
-		// a new extension.
-		if p == len(s) || s[p+2] == '-' {
-			if curKey == key {
-				return start, p, true
-			}
-			return p, p, true
-		}
-	}
-}
+// NumCompactTags is the number of compact tags. The maximum tag is
+// NumCompactTags-1.
+const NumCompactTags = compact.NumCompactTags

 // CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
-// for which data exists in the text repository. The index will change over time
-// and should not be stored in persistent storage. Extensions, except for the
-// 'va' type of the 'u' extension, are ignored. It will return 0, false if no
-// compact tag exists, where 0 is the index for the root language (Und).
-func CompactIndex(t Tag) (index int, ok bool) {
-	// TODO: perhaps give more frequent tags a lower index.
-	// TODO: we could make the indexes stable. This will excluded some
-	//       possibilities for optimization, so don't do this quite yet.
-	b, s, r := t.Raw()
-	if len(t.str) > 0 {
-		if strings.HasPrefix(t.str, "x-") {
-			// We have no entries for user-defined tags.
-			return 0, false
-		}
-		if uint16(t.pVariant) != t.pExt {
-			// There are no tags with variants and an u-va type.
-			if t.TypeForKey("va") != "" {
-				return 0, false
-			}
-			t, _ = Raw.Compose(b, s, r, t.Variants())
-		} else if _, ok := t.Extension('u'); ok {
-			// Strip all but the 'va' entry.
-			variant := t.TypeForKey("va")
-			t, _ = Raw.Compose(b, s, r)
-			t, _ = t.SetTypeForKey("va", variant)
-		}
-		if len(t.str) > 0 {
-			// We have some variants.
-			for i, s := range specialTags {
-				if s == t {
-					return i + 1, true
-				}
-			}
-			return 0, false
-		}
-	}
-	// No variants specified: just compare core components.
-	// The key has the form lllssrrr, where l, s, and r are nibbles for
-	// respectively the langID, scriptID, and regionID.
-	key := uint32(b.langID) << (8 + 12)
-	key |= uint32(s.scriptID) << 12
-	key |= uint32(r.regionID)
-	x, ok := coreTags[key]
-	return int(x), ok
+// for which data exists in the text repository.The index will change over time
+// and should not be stored in persistent storage. If t does not match a compact
+// index, exact will be false and the compact index will be returned for the
+// first match after repeatedly taking the Parent of t.
+func CompactIndex(t Tag) (index int, exact bool) {
+	id, exact := compact.LanguageID(compact.Tag(t))
+	return int(id), exact
 }

+var root = language.Tag{}
+
 // Base is an ISO 639 language code, used for encoding the base language
 // of a language tag.
 type Base struct {
-	langID
+	langID language.Language
 }

 // ParseBase parses a 2- or 3-letter ISO 639 code.
 // It returns a ValueError if s is a well-formed but unknown language identifier
 // or another error if another error occurred.
 func ParseBase(s string) (Base, error) {
-	if n := len(s); n < 2 || 3 < n {
-		return Base{}, errSyntax
-	}
-	var buf [3]byte
-	l, err := getLangID(buf[:copy(buf[:], s)])
+	l, err := language.ParseBase(s)
 	return Base{l}, err
 }

+// String returns the BCP 47 representation of the base language.
+func (b Base) String() string {
+	return b.langID.String()
+}
+
+// ISO3 returns the ISO 639-3 language code.
+func (b Base) ISO3() string {
+	return b.langID.ISO3()
+}
+
+// IsPrivateUse reports whether this language code is reserved for private use.
+func (b Base) IsPrivateUse() bool {
+	return b.langID.IsPrivateUse()
+}
+
 // Script is a 4-letter ISO 15924 code for representing scripts.
 // It is idiomatically represented in title case.
 type Script struct {
-	scriptID
+	scriptID language.Script
 }

 // ParseScript parses a 4-letter ISO 15924 code.
 // It returns a ValueError if s is a well-formed but unknown script identifier
 // or another error if another error occurred.
 func ParseScript(s string) (Script, error) {
-	if len(s) != 4 {
-		return Script{}, errSyntax
-	}
-	var buf [4]byte
-	sc, err := getScriptID(script, buf[:copy(buf[:], s)])
+	sc, err := language.ParseScript(s)
 	return Script{sc}, err
 }

+// String returns the script code in title case.
+// It returns "Zzzz" for an unspecified script.
+func (s Script) String() string {
+	return s.scriptID.String()
+}
+
+// IsPrivateUse reports whether this script code is reserved for private use.
+func (s Script) IsPrivateUse() bool {
+	return s.scriptID.IsPrivateUse()
+}
+
 // Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
 type Region struct {
-	regionID
+	regionID language.Region
 }

 // EncodeM49 returns the Region for the given UN M.49 code.
 // It returns an error if r is not a valid code.
 func EncodeM49(r int) (Region, error) {
-	rid, err := getRegionM49(r)
+	rid, err := language.EncodeM49(r)
 	return Region{rid}, err
 }

@@ -801,62 +516,54 @@ func EncodeM49(r int) (Region, error) {
 // It returns a ValueError if s is a well-formed but unknown region identifier
 // or another error if another error occurred.
 func ParseRegion(s string) (Region, error) {
-	if n := len(s); n < 2 || 3 < n {
-		return Region{}, errSyntax
-	}
-	var buf [3]byte
-	r, err := getRegionID(buf[:copy(buf[:], s)])
+	r, err := language.ParseRegion(s)
 	return Region{r}, err
 }

+// String returns the BCP 47 representation for the region.
+// It returns "ZZ" for an unspecified region.
+func (r Region) String() string {
+	return r.regionID.String()
+}
+
+// ISO3 returns the 3-letter ISO code of r.
+// Note that not all regions have a 3-letter ISO code.
+// In such cases this method returns "ZZZ".
+func (r Region) ISO3() string {
+	return r.regionID.ISO3()
+}
+
+// M49 returns the UN M.49 encoding of r, or 0 if this encoding
+// is not defined for r.
+func (r Region) M49() int {
+	return r.regionID.M49()
+}
+
+// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
+// may include private-use tags that are assigned by CLDR and used in this
+// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
+func (r Region) IsPrivateUse() bool {
+	return r.regionID.IsPrivateUse()
+}
+
 // IsCountry returns whether this region is a country or autonomous area. This
 // includes non-standard definitions from CLDR.
 func (r Region) IsCountry() bool {
-	if r.regionID == 0 || r.IsGroup() || r.IsPrivateUse() && r.regionID != _XK {
-		return false
-	}
-	return true
+	return r.regionID.IsCountry()
 }

 // IsGroup returns whether this region defines a collection of regions. This
 // includes non-standard definitions from CLDR.
 func (r Region) IsGroup() bool {
-	if r.regionID == 0 {
-		return false
-	}
-	return int(regionInclusion[r.regionID]) < len(regionContainment)
+	return r.regionID.IsGroup()
 }

 // Contains returns whether Region c is contained by Region r. It returns true
 // if c == r.
 func (r Region) Contains(c Region) bool {
-	return r.regionID.contains(c.regionID)
+	return r.regionID.Contains(c.regionID)
 }

-func (r regionID) contains(c regionID) bool {
-	if r == c {
-		return true
-	}
-	g := regionInclusion[r]
-	if g >= nRegionGroups {
-		return false
-	}
-	m := regionContainment[g]
-
-	d := regionInclusion[c]
-	b := regionInclusionBits[d]
-
-	// A contained country may belong to multiple disjoint groups. Matching any
-	// of these indicates containment. If the contained region is a group, it
-	// must strictly be a subset.
-	if d >= nRegionGroups {
-		return b&m != 0
-	}
-	return b&^m == 0
-}
-
-var errNoTLD = errors.New("language: region is not a valid ccTLD")
-
 // TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
 // In all other cases it returns either the region itself or an error.
 //
@@ -865,25 +572,15 @@ var errNoTLD = errors.New("language: region is not a valid ccTLD")
 // region will already be canonicalized it was obtained from a Tag that was
 // obtained using any of the default methods.
 func (r Region) TLD() (Region, error) {
-	// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
-	// difference between ISO 3166-1 and IANA ccTLD.
-	if r.regionID == _GB {
-		r = Region{_UK}
-	}
-	if (r.typ() & ccTLD) == 0 {
-		return Region{}, errNoTLD
-	}
-	return r, nil
+	tld, err := r.regionID.TLD()
+	return Region{tld}, err
 }

 // Canonicalize returns the region or a possible replacement if the region is
 // deprecated. It will not return a replacement for deprecated regions that
 // are split into multiple regions.
 func (r Region) Canonicalize() Region {
-	if cr := normRegion(r.regionID); cr != 0 {
-		return Region{cr}
-	}
-	return r
+	return Region{r.regionID.Canonicalize()}
 }

 // Variant represents a registered variant of a language as defined by BCP 47.
@@ -894,11 +591,8 @@ type Variant struct {
 // ParseVariant parses and returns a Variant. An error is returned if s is not
 // a valid variant.
 func ParseVariant(s string) (Variant, error) {
-	s = strings.ToLower(s)
-	if _, ok := variantIndex[s]; ok {
-		return Variant{s}, nil
-	}
-	return Variant{}, mkErrInvalid([]byte(s))
+	v, err := language.ParseVariant(s)
+	return Variant{v.String()}, err
 }

 // String returns the string representation of the variant.
@@ -4,7 +4,12 @@

 package language

-import "errors"
+import (
+	"errors"
+	"strings"
+
+	"golang.org/x/text/internal/language"
+)

 // A MatchOption configures a Matcher.
 type MatchOption func(*matcher)
@@ -74,12 +79,13 @@ func NewMatcher(t []Tag, options ...MatchOption) Matcher {
 }

 func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
+	var tt language.Tag
 	match, w, c := m.getBest(want...)
 	if match != nil {
-		t, index = match.tag, match.index
+		tt, index = match.tag, match.index
 	} else {
 		// TODO: this should be an option
-		t = m.default_.tag
+		tt = m.default_.tag
 		if m.preferSameScript {
 		outer:
 			for _, w := range want {
@@ -91,7 +97,7 @@ func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
 				}
 				for i, h := range m.supported {
 					if script.scriptID == h.maxScript {
-						t, index = h.tag, i
+						tt, index = h.tag, i
 						break outer
 					}
 				}
@@ -99,238 +105,45 @@ func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
 		}
 		// TODO: select first language tag based on script.
 	}
-	if w.region != 0 && t.region != 0 && t.region.contains(w.region) {
-		t, _ = Raw.Compose(t, Region{w.region})
+	if w.RegionID != tt.RegionID && w.RegionID != 0 {
+		if w.RegionID != 0 && tt.RegionID != 0 && tt.RegionID.Contains(w.RegionID) {
+			tt.RegionID = w.RegionID
+			tt.RemakeString()
+		} else if r := w.RegionID.String(); len(r) == 2 {
+			// TODO: also filter macro and deprecated.
+			tt, _ = tt.SetTypeForKey("rg", strings.ToLower(r)+"zzzz")
+		}
 	}
 	// Copy options from the user-provided tag into the result tag. This is hard
 	// to do after the fact, so we do it here.
 	// TODO: add in alternative variants to -u-va-.
 	// TODO: add preferred region to -u-rg-.
 	if e := w.Extensions(); len(e) > 0 {
-		t, _ = Raw.Compose(t, e)
-	}
-	return t, index, c
-}
-
-type scriptRegionFlags uint8
-
-const (
-	isList = 1 << iota
-	scriptInFrom
-	regionInFrom
-)
-
-func (t *Tag) setUndefinedLang(id langID) {
-	if t.lang == 0 {
-		t.lang = id
-	}
-}
-
-func (t *Tag) setUndefinedScript(id scriptID) {
-	if t.script == 0 {
-		t.script = id
-	}
-}
-
-func (t *Tag) setUndefinedRegion(id regionID) {
-	if t.region == 0 || t.region.contains(id) {
-		t.region = id
+		b := language.Builder{}
+		b.SetTag(tt)
+		for _, e := range e {
+			b.AddExt(e)
+		}
+		tt = b.Make()
 	}
+	return makeTag(tt), index, c
 }

 // ErrMissingLikelyTagsData indicates no information was available
 // to compute likely values of missing tags.
 var ErrMissingLikelyTagsData = errors.New("missing likely tags data")

-// addLikelySubtags sets subtags to their most likely value, given the locale.
-// In most cases this means setting fields for unknown values, but in some
-// cases it may alter a value.  It returns an ErrMissingLikelyTagsData error
-// if the given locale cannot be expanded.
-func (t Tag) addLikelySubtags() (Tag, error) {
-	id, err := addTags(t)
-	if err != nil {
-		return t, err
-	} else if id.equalTags(t) {
-		return t, nil
-	}
-	id.remakeString()
-	return id, nil
-}
-
-// specializeRegion attempts to specialize a group region.
-func specializeRegion(t *Tag) bool {
-	if i := regionInclusion[t.region]; i < nRegionGroups {
-		x := likelyRegionGroup[i]
-		if langID(x.lang) == t.lang && scriptID(x.script) == t.script {
-			t.region = regionID(x.region)
-		}
-		return true
-	}
-	return false
-}
-
-func addTags(t Tag) (Tag, error) {
-	// We leave private use identifiers alone.
-	if t.private() {
-		return t, nil
-	}
-	if t.script != 0 && t.region != 0 {
-		if t.lang != 0 {
-			// already fully specified
-			specializeRegion(&t)
-			return t, nil
-		}
-		// Search matches for und-script-region. Note that for these cases
-		// region will never be a group so there is no need to check for this.
-		list := likelyRegion[t.region : t.region+1]
-		if x := list[0]; x.flags&isList != 0 {
-			list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
-		}
-		for _, x := range list {
-			// Deviating from the spec. See match_test.go for details.
-			if scriptID(x.script) == t.script {
-				t.setUndefinedLang(langID(x.lang))
-				return t, nil
-			}
-		}
-	}
-	if t.lang != 0 {
-		// Search matches for lang-script and lang-region, where lang != und.
-		if t.lang < langNoIndexOffset {
-			x := likelyLang[t.lang]
-			if x.flags&isList != 0 {
-				list := likelyLangList[x.region : x.region+uint16(x.script)]
-				if t.script != 0 {
-					for _, x := range list {
-						if scriptID(x.script) == t.script && x.flags&scriptInFrom != 0 {
-							t.setUndefinedRegion(regionID(x.region))
-							return t, nil
-						}
-					}
-				} else if t.region != 0 {
-					count := 0
-					goodScript := true
-					tt := t
-					for _, x := range list {
-						// We visit all entries for which the script was not
-						// defined, including the ones where the region was not
-						// defined. This allows for proper disambiguation within
-						// regions.
-						if x.flags&scriptInFrom == 0 && t.region.contains(regionID(x.region)) {
-							tt.region = regionID(x.region)
-							tt.setUndefinedScript(scriptID(x.script))
-							goodScript = goodScript && tt.script == scriptID(x.script)
-							count++
-						}
-					}
-					if count == 1 {
-						return tt, nil
-					}
-					// Even if we fail to find a unique Region, we might have
-					// an unambiguous script.
-					if goodScript {
-						t.script = tt.script
-					}
-				}
-			}
-		}
-	} else {
-		// Search matches for und-script.
-		if t.script != 0 {
-			x := likelyScript[t.script]
-			if x.region != 0 {
-				t.setUndefinedRegion(regionID(x.region))
-				t.setUndefinedLang(langID(x.lang))
-				return t, nil
-			}
-		}
-		// Search matches for und-region. If und-script-region exists, it would
-		// have been found earlier.
-		if t.region != 0 {
-			if i := regionInclusion[t.region]; i < nRegionGroups {
-				x := likelyRegionGroup[i]
-				if x.region != 0 {
-					t.setUndefinedLang(langID(x.lang))
-					t.setUndefinedScript(scriptID(x.script))
-					t.region = regionID(x.region)
-				}
-			} else {
-				x := likelyRegion[t.region]
-				if x.flags&isList != 0 {
-					x = likelyRegionList[x.lang]
-				}
-				if x.script != 0 && x.flags != scriptInFrom {
-					t.setUndefinedLang(langID(x.lang))
-					t.setUndefinedScript(scriptID(x.script))
-					return t, nil
-				}
-			}
-		}
-	}
-
-	// Search matches for lang.
-	if t.lang < langNoIndexOffset {
-		x := likelyLang[t.lang]
-		if x.flags&isList != 0 {
-			x = likelyLangList[x.region]
-		}
-		if x.region != 0 {
-			t.setUndefinedScript(scriptID(x.script))
-			t.setUndefinedRegion(regionID(x.region))
-		}
-		specializeRegion(&t)
-		if t.lang == 0 {
-			t.lang = _en // default language
-		}
-		return t, nil
-	}
-	return t, ErrMissingLikelyTagsData
-}
-
-func (t *Tag) setTagsFrom(id Tag) {
-	t.lang = id.lang
-	t.script = id.script
-	t.region = id.region
-}
-
-// minimize removes the region or script subtags from t such that
-// t.addLikelySubtags() == t.minimize().addLikelySubtags().
-func (t Tag) minimize() (Tag, error) {
-	t, err := minimizeTags(t)
-	if err != nil {
-		return t, err
-	}
-	t.remakeString()
-	return t, nil
-}
-
-// minimizeTags mimics the behavior of the ICU 51 C implementation.
-func minimizeTags(t Tag) (Tag, error) {
-	if t.equalTags(und) {
-		return t, nil
-	}
-	max, err := addTags(t)
-	if err != nil {
-		return t, err
-	}
-	for _, id := range [...]Tag{
-		{lang: t.lang},
-		{lang: t.lang, region: t.region},
-		{lang: t.lang, script: t.script},
-	} {
-		if x, err := addTags(id); err == nil && max.equalTags(x) {
-			t.setTagsFrom(id)
-			break
-		}
-	}
-	return t, nil
-}
+// func (t *Tag) setTagsFrom(id Tag) {
+// 	t.LangID = id.LangID
+// 	t.ScriptID = id.ScriptID
+// 	t.RegionID = id.RegionID
+// }

 // Tag Matching
 // CLDR defines an algorithm for finding the best match between two sets of language
 // tags. The basic algorithm defines how to score a possible match and then find
 // the match with the best score
-// (see http://www.unicode.org/reports/tr35/#LanguageMatching).
+// (see https://www.unicode.org/reports/tr35/#LanguageMatching).
 // Using scoring has several disadvantages. The scoring obfuscates the importance of
 // the various factors considered, making the algorithm harder to understand. Using
 // scoring also requires the full score to be computed for each pair of tags.
@@ -441,7 +254,7 @@ func minimizeTags(t Tag) (Tag, error) {
 type matcher struct {
 	default_         *haveTag
 	supported        []*haveTag
-	index            map[langID]*matchHeader
+	index            map[language.Language]*matchHeader
 	passSettings     bool
 	preferSameScript bool
 }
@@ -456,7 +269,7 @@ type matchHeader struct {
 // haveTag holds a supported Tag and its maximized script and region. The maximized
 // or canonicalized language is not stored as it is not needed during matching.
 type haveTag struct {
-	tag Tag
+	tag language.Tag

 	// index of this tag in the original list of supported tags.
 	index int
@@ -466,37 +279,37 @@ type haveTag struct {
 	conf Confidence

 	// Maximized region and script.
-	maxRegion regionID
-	maxScript scriptID
+	maxRegion language.Region
+	maxScript language.Script

 	// altScript may be checked as an alternative match to maxScript. If altScript
 	// matches, the confidence level for this match is Low. Theoretically there
 	// could be multiple alternative scripts. This does not occur in practice.
-	altScript scriptID
+	altScript language.Script

 	// nextMax is the index of the next haveTag with the same maximized tags.
 	nextMax uint16
 }

-func makeHaveTag(tag Tag, index int) (haveTag, langID) {
+func makeHaveTag(tag language.Tag, index int) (haveTag, language.Language) {
 	max := tag
-	if tag.lang != 0 || tag.region != 0 || tag.script != 0 {
-		max, _ = max.canonicalize(All)
-		max, _ = addTags(max)
-		max.remakeString()
+	if tag.LangID != 0 || tag.RegionID != 0 || tag.ScriptID != 0 {
+		max, _ = canonicalize(All, max)
+		max, _ = max.Maximize()
+		max.RemakeString()
 	}
-	return haveTag{tag, index, Exact, max.region, max.script, altScript(max.lang, max.script), 0}, max.lang
+	return haveTag{tag, index, Exact, max.RegionID, max.ScriptID, altScript(max.LangID, max.ScriptID), 0}, max.LangID
 }

 // altScript returns an alternative script that may match the given script with
 // a low confidence.  At the moment, the langMatch data allows for at most one
 // script to map to another and we rely on this to keep the code simple.
-func altScript(l langID, s scriptID) scriptID {
+func altScript(l language.Language, s language.Script) language.Script {
 	for _, alt := range matchScript {
 		// TODO: also match cases where language is not the same.
-		if (langID(alt.wantLang) == l || langID(alt.haveLang) == l) &&
-			scriptID(alt.haveScript) == s {
-			return scriptID(alt.wantScript)
+		if (language.Language(alt.wantLang) == l || language.Language(alt.haveLang) == l) &&
+			language.Script(alt.haveScript) == s {
+			return language.Script(alt.wantScript)
 		}
 	}
 	return 0
@@ -508,7 +321,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
 	h.original = h.original || exact
 	// Don't add new exact matches.
 	for _, v := range h.haveTags {
-		if v.tag.equalsRest(n.tag) {
+		if equalsRest(v.tag, n.tag) {
 			return
 		}
 	}
@@ -517,7 +330,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {
 	for i, v := range h.haveTags {
 		if v.maxScript == n.maxScript &&
 			v.maxRegion == n.maxRegion &&
-			v.tag.variantOrPrivateTagStr() == n.tag.variantOrPrivateTagStr() {
+			v.tag.VariantOrPrivateUseTags() == n.tag.VariantOrPrivateUseTags() {
 			for h.haveTags[i].nextMax != 0 {
 				i = int(h.haveTags[i].nextMax)
 			}
@@ -530,7 +343,7 @@ func (h *matchHeader) addIfNew(n haveTag, exact bool) {

 // header returns the matchHeader for the given language. It creates one if
 // it doesn't already exist.
-func (m *matcher) header(l langID) *matchHeader {
+func (m *matcher) header(l language.Language) *matchHeader {
 	if h := m.index[l]; h != nil {
 		return h
 	}
@@ -554,7 +367,7 @@ func toConf(d uint8) Confidence {
 // for a given tag.
 func newMatcher(supported []Tag, options []MatchOption) *matcher {
 	m := &matcher{
-		index:            make(map[langID]*matchHeader),
+		index:            make(map[language.Language]*matchHeader),
 		preferSameScript: true,
 	}
 	for _, o := range options {
@@ -567,16 +380,18 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
 	// Add supported languages to the index. Add exact matches first to give
 	// them precedence.
 	for i, tag := range supported {
-		pair, _ := makeHaveTag(tag, i)
-		m.header(tag.lang).addIfNew(pair, true)
+		tt := tag.tag()
+		pair, _ := makeHaveTag(tt, i)
+		m.header(tt.LangID).addIfNew(pair, true)
 		m.supported = append(m.supported, &pair)
 	}
-	m.default_ = m.header(supported[0].lang).haveTags[0]
+	m.default_ = m.header(supported[0].lang()).haveTags[0]
 	// Keep these in two different loops to support the case that two equivalent
 	// languages are distinguished, such as iw and he.
 	for i, tag := range supported {
-		pair, max := makeHaveTag(tag, i)
-		if max != tag.lang {
+		tt := tag.tag()
+		pair, max := makeHaveTag(tt, i)
+		if max != tt.LangID {
 			m.header(max).addIfNew(pair, true)
 		}
 	}
@@ -585,11 +400,11 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
 	// update will only add entries to original indexes, thus not computing any
 	// transitive relations.
 	update := func(want, have uint16, conf Confidence) {
-		if hh := m.index[langID(have)]; hh != nil {
+		if hh := m.index[language.Language(have)]; hh != nil {
 			if !hh.original {
 				return
 			}
-			hw := m.header(langID(want))
+			hw := m.header(language.Language(want))
 			for _, ht := range hh.haveTags {
 				v := *ht
 				if conf < v.conf {
@@ -597,7 +412,7 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
 				}
 				v.nextMax = 0 // this value needs to be recomputed
 				if v.altScript != 0 {
-					v.altScript = altScript(langID(want), v.maxScript)
+					v.altScript = altScript(language.Language(want), v.maxScript)
 				}
 				hw.addIfNew(v, conf == Exact && hh.original)
 			}
@@ -618,66 +433,67 @@ func newMatcher(supported []Tag, options []MatchOption) *matcher {
 	// First we match deprecated equivalents. If they are perfect equivalents
 	// (their canonicalization simply substitutes a different language code, but
 	// nothing else), the match confidence is Exact, otherwise it is High.
-	for i, lm := range langAliasMap {
+	for i, lm := range language.AliasMap {
 		// If deprecated codes match and there is no fiddling with the script or
 		// or region, we consider it an exact match.
 		conf := Exact
-		if langAliasTypes[i] != langMacro {
-			if !isExactEquivalent(langID(lm.from)) {
+		if language.AliasTypes[i] != language.Macro {
+			if !isExactEquivalent(language.Language(lm.From)) {
 				conf = High
 			}
-			update(lm.to, lm.from, conf)
+			update(lm.To, lm.From, conf)
 		}
-		update(lm.from, lm.to, conf)
+		update(lm.From, lm.To, conf)
 	}
 	return m
 }

 // getBest gets the best matching tag in m for any of the given tags, taking into
 // account the order of preference of the given tags.
-func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
+func (m *matcher) getBest(want ...Tag) (got *haveTag, orig language.Tag, c Confidence) {
 	best := bestMatch{}
-	for i, w := range want {
-		var max Tag
+	for i, ww := range want {
+		w := ww.tag()
+		var max language.Tag
 		// Check for exact match first.
-		h := m.index[w.lang]
-		if w.lang != 0 {
+		h := m.index[w.LangID]
+		if w.LangID != 0 {
 			if h == nil {
 				continue
 			}
 			// Base language is defined.
-			max, _ = w.canonicalize(Legacy | Deprecated | Macro)
+			max, _ = canonicalize(Legacy|Deprecated|Macro, w)
 			// A region that is added through canonicalization is stronger than
 			// a maximized region: set it in the original (e.g. mo -> ro-MD).
-			if w.region != max.region {
-				w.region = max.region
+			if w.RegionID != max.RegionID {
+				w.RegionID = max.RegionID
 			}
 			// TODO: should we do the same for scripts?
 			// See test case: en, sr, nl ; sh ; sr
-			max, _ = addTags(max)
+			max, _ = max.Maximize()
 		} else {
 			// Base language is not defined.
 			if h != nil {
 				for i := range h.haveTags {
 					have := h.haveTags[i]
-					if have.tag.equalsRest(w) {
+					if equalsRest(have.tag, w) {
 						return have, w, Exact
 					}
 				}
 			}
-			if w.script == 0 && w.region == 0 {
+			if w.ScriptID == 0 && w.RegionID == 0 {
 				// We skip all tags matching und for approximate matching, including
 				// private tags.
 				continue
 			}
-			max, _ = addTags(w)
-			if h = m.index[max.lang]; h == nil {
+			max, _ = w.Maximize()
+			if h = m.index[max.LangID]; h == nil {
 				continue
 			}
 		}
 		pin := true
 		for _, t := range want[i+1:] {
-			if w.lang == t.lang {
+			if w.LangID == t.lang() {
 				pin = false
 				break
 			}
@@ -685,11 +501,11 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
 		// Check for match based on maximized tag.
 		for i := range h.haveTags {
 			have := h.haveTags[i]
-			best.update(have, w, max.script, max.region, pin)
+			best.update(have, w, max.ScriptID, max.RegionID, pin)
 			if best.conf == Exact {
 				for have.nextMax != 0 {
 					have = h.haveTags[have.nextMax]
-					best.update(have, w, max.script, max.region, pin)
+					best.update(have, w, max.ScriptID, max.RegionID, pin)
 				}
 				return best.have, best.want, best.conf
 			}
@@ -697,9 +513,9 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
 	}
 	if best.conf <= No {
 		if len(want) != 0 {
-			return nil, want[0], No
+			return nil, want[0].tag(), No
 		}
-		return nil, Tag{}, No
+		return nil, language.Tag{}, No
 	}
 	return best.have, best.want, best.conf
 }
@@ -707,9 +523,9 @@ func (m *matcher) getBest(want ...Tag) (got *haveTag, orig Tag, c Confidence) {
 // bestMatch accumulates the best match so far.
 type bestMatch struct {
 	have            *haveTag
-	want            Tag
+	want            language.Tag
 	conf            Confidence
-	pinnedRegion    regionID
+	pinnedRegion    language.Region
 	pinLanguage     bool
 	sameRegionGroup bool
 	// Cached results from applying tie-breaking rules.
@@ -734,19 +550,19 @@ type bestMatch struct {
 // still prefer a second language over a dialect of the preferred language by
 // explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
 // be false.
-func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion regionID, pin bool) {
+func (m *bestMatch) update(have *haveTag, tag language.Tag, maxScript language.Script, maxRegion language.Region, pin bool) {
 	// Bail if the maximum attainable confidence is below that of the current best match.
 	c := have.conf
 	if c < m.conf {
 		return
 	}
 	// Don't change the language once we already have found an exact match.
-	if m.pinLanguage && tag.lang != m.want.lang {
+	if m.pinLanguage && tag.LangID != m.want.LangID {
 		return
 	}
 	// Pin the region group if we are comparing tags for the same language.
-	if tag.lang == m.want.lang && m.sameRegionGroup {
-		_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.lang)
+	if tag.LangID == m.want.LangID && m.sameRegionGroup {
+		_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.LangID)
 		if !sameGroup {
 			return
 		}
@@ -756,7 +572,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
 		// don't pin anything, otherwise pin the language.
 		m.pinLanguage = pin
 	}
-	if have.tag.equalsRest(tag) {
+	if equalsRest(have.tag, tag) {
 	} else if have.maxScript != maxScript {
 		// There is usually very little comprehension between different scripts.
 		// In a few cases there may still be Low comprehension. This possibility
@@ -786,7 +602,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion

 	// Tie-breaker rules:
 	// We prefer if the pre-maximized language was specified and identical.
-	origLang := have.tag.lang == tag.lang && tag.lang != 0
+	origLang := have.tag.LangID == tag.LangID && tag.LangID != 0
 	if !beaten && m.origLang != origLang {
 		if m.origLang {
 			return
@@ -795,7 +611,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
 	}

 	// We prefer if the pre-maximized region was specified and identical.
-	origReg := have.tag.region == tag.region && tag.region != 0
+	origReg := have.tag.RegionID == tag.RegionID && tag.RegionID != 0
 	if !beaten && m.origReg != origReg {
 		if m.origReg {
 			return
@@ -803,7 +619,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
 		beaten = true
 	}

-	regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.lang)
+	regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.LangID)
 	if !beaten && m.regGroupDist != regGroupDist {
 		if regGroupDist > m.regGroupDist {
 			return
@@ -811,7 +627,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
 		beaten = true
 	}

-	paradigmReg := isParadigmLocale(tag.lang, have.maxRegion)
+	paradigmReg := isParadigmLocale(tag.LangID, have.maxRegion)
 	if !beaten && m.paradigmReg != paradigmReg {
 		if !paradigmReg {
 			return
@@ -820,7 +636,7 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
 	}

 	// Next we prefer if the pre-maximized script was specified and identical.
-	origScript := have.tag.script == tag.script && tag.script != 0
+	origScript := have.tag.ScriptID == tag.ScriptID && tag.ScriptID != 0
 	if !beaten && m.origScript != origScript {
 		if m.origScript {
 			return
@@ -843,9 +659,9 @@ func (m *bestMatch) update(have *haveTag, tag Tag, maxScript scriptID, maxRegion
 	}
 }

-func isParadigmLocale(lang langID, r regionID) bool {
+func isParadigmLocale(lang language.Language, r language.Region) bool {
 	for _, e := range paradigmLocales {
-		if langID(e[0]) == lang && (r == regionID(e[1]) || r == regionID(e[2])) {
+		if language.Language(e[0]) == lang && (r == language.Region(e[1]) || r == language.Region(e[2])) {
 			return true
 		}
 	}
@@ -854,13 +670,13 @@ func isParadigmLocale(lang langID, r regionID) bool {

 // regionGroupDist computes the distance between two regions based on their
 // CLDR grouping.
-func regionGroupDist(a, b regionID, script scriptID, lang langID) (dist uint8, same bool) {
+func regionGroupDist(a, b language.Region, script language.Script, lang language.Language) (dist uint8, same bool) {
 	const defaultDistance = 4

 	aGroup := uint(regionToGroups[a]) << 1
 	bGroup := uint(regionToGroups[b]) << 1
 	for _, ri := range matchRegion {
-		if langID(ri.lang) == lang && (ri.script == 0 || scriptID(ri.script) == script) {
+		if language.Language(ri.lang) == lang && (ri.script == 0 || language.Script(ri.script) == script) {
 			group := uint(1 << (ri.group &^ 0x80))
 			if 0x80&ri.group == 0 {
 				if aGroup&bGroup&group != 0 { // Both regions are in the group.
@@ -876,31 +692,16 @@ func regionGroupDist(a, b regionID, script scriptID, lang langID) (dist uint8, s
 	return defaultDistance, true
 }

-func (t Tag) variants() string {
-	if t.pVariant == 0 {
-		return ""
-	}
-	return t.str[t.pVariant:t.pExt]
-}
-
-// variantOrPrivateTagStr returns variants or private use tags.
-func (t Tag) variantOrPrivateTagStr() string {
-	if t.pExt > 0 {
-		return t.str[t.pVariant:t.pExt]
-	}
-	return t.str[t.pVariant:]
-}
-
 // equalsRest compares everything except the language.
-func (a Tag) equalsRest(b Tag) bool {
+func equalsRest(a, b language.Tag) bool {
 	// TODO: don't include extensions in this comparison. To do this efficiently,
 	// though, we should handle private tags separately.
-	return a.script == b.script && a.region == b.region && a.variantOrPrivateTagStr() == b.variantOrPrivateTagStr()
+	return a.ScriptID == b.ScriptID && a.RegionID == b.RegionID && a.VariantOrPrivateUseTags() == b.VariantOrPrivateUseTags()
 }

 // isExactEquivalent returns true if canonicalizing the language will not alter
 // the script or region of a tag.
-func isExactEquivalent(l langID) bool {
+func isExactEquivalent(l language.Language) bool {
 	for _, o := range notEquivalent {
 		if o == l {
 			return false
@@ -909,25 +710,26 @@ func isExactEquivalent(l langID) bool {
 	return true
 }

-var notEquivalent []langID
+var notEquivalent []language.Language

 func init() {
 	// Create a list of all languages for which canonicalization may alter the
 	// script or region.
-	for _, lm := range langAliasMap {
-		tag := Tag{lang: langID(lm.from)}
-		if tag, _ = tag.canonicalize(All); tag.script != 0 || tag.region != 0 {
-			notEquivalent = append(notEquivalent, langID(lm.from))
+	for _, lm := range language.AliasMap {
+		tag := language.Tag{LangID: language.Language(lm.From)}
+		if tag, _ = canonicalize(All, tag); tag.ScriptID != 0 || tag.RegionID != 0 {
+			notEquivalent = append(notEquivalent, language.Language(lm.From))
 		}
 	}
 	// Maximize undefined regions of paradigm locales.
 	for i, v := range paradigmLocales {
-		max, _ := addTags(Tag{lang: langID(v[0])})
+		t := language.Tag{LangID: language.Language(v[0])}
+		max, _ := t.Maximize()
 		if v[1] == 0 {
-			paradigmLocales[i][1] = uint16(max.region)
+			paradigmLocales[i][1] = uint16(max.RegionID)
 		}
 		if v[2] == 0 {
-			paradigmLocales[i][2] = uint16(max.region)
+			paradigmLocales[i][2] = uint16(max.RegionID)
 		}
 	}
 }
@@ -5,216 +5,21 @@
 package language

 import (
-	"bytes"
 	"errors"
-	"fmt"
-	"sort"
 	"strconv"
 	"strings"

-	"golang.org/x/text/internal/tag"
+	"golang.org/x/text/internal/language"
 )

-// isAlpha returns true if the byte is not a digit.
-// b must be an ASCII letter or digit.
-func isAlpha(b byte) bool {
-	return b > '9'
-}
-
-// isAlphaNum returns true if the string contains only ASCII letters or digits.
-func isAlphaNum(s []byte) bool {
-	for _, c := range s {
-		if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
-			return false
-		}
-	}
-	return true
-}
-
-// errSyntax is returned by any of the parsing functions when the
-// input is not well-formed, according to BCP 47.
-// TODO: return the position at which the syntax error occurred?
-var errSyntax = errors.New("language: tag is not well-formed")
-
 // ValueError is returned by any of the parsing functions when the
 // input is well-formed but the respective subtag is not recognized
 // as a valid value.
-type ValueError struct {
-	v [8]byte
-}
+type ValueError interface {
+	error

-func mkErrInvalid(s []byte) error {
-	var e ValueError
-	copy(e.v[:], s)
-	return e
-}
-
-func (e ValueError) tag() []byte {
-	n := bytes.IndexByte(e.v[:], 0)
-	if n == -1 {
-		n = 8
-	}
-	return e.v[:n]
-}
-
-// Error implements the error interface.
-func (e ValueError) Error() string {
-	return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
-}
-
-// Subtag returns the subtag for which the error occurred.
-func (e ValueError) Subtag() string {
-	return string(e.tag())
-}
-
-// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
-type scanner struct {
-	b     []byte
-	bytes [max99thPercentileSize]byte
-	token []byte
-	start int // start position of the current token
-	end   int // end position of the current token
-	next  int // next point for scan
-	err   error
-	done  bool
-}
-
-func makeScannerString(s string) scanner {
-	scan := scanner{}
-	if len(s) <= len(scan.bytes) {
-		scan.b = scan.bytes[:copy(scan.bytes[:], s)]
-	} else {
-		scan.b = []byte(s)
-	}
-	scan.init()
-	return scan
-}
-
-// makeScanner returns a scanner using b as the input buffer.
-// b is not copied and may be modified by the scanner routines.
-func makeScanner(b []byte) scanner {
-	scan := scanner{b: b}
-	scan.init()
-	return scan
-}
-
-func (s *scanner) init() {
-	for i, c := range s.b {
-		if c == '_' {
-			s.b[i] = '-'
-		}
-	}
-	s.scan()
-}
-
-// restToLower converts the string between start and end to lower case.
-func (s *scanner) toLower(start, end int) {
-	for i := start; i < end; i++ {
-		c := s.b[i]
-		if 'A' <= c && c <= 'Z' {
-			s.b[i] += 'a' - 'A'
-		}
-	}
-}
-
-func (s *scanner) setError(e error) {
-	if s.err == nil || (e == errSyntax && s.err != errSyntax) {
-		s.err = e
-	}
-}
-
-// resizeRange shrinks or grows the array at position oldStart such that
-// a new string of size newSize can fit between oldStart and oldEnd.
-// Sets the scan point to after the resized range.
-func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
-	s.start = oldStart
-	if end := oldStart + newSize; end != oldEnd {
-		diff := end - oldEnd
-		if end < cap(s.b) {
-			b := make([]byte, len(s.b)+diff)
-			copy(b, s.b[:oldStart])
-			copy(b[end:], s.b[oldEnd:])
-			s.b = b
-		} else {
-			s.b = append(s.b[end:], s.b[oldEnd:]...)
-		}
-		s.next = end + (s.next - s.end)
-		s.end = end
-	}
-}
-
-// replace replaces the current token with repl.
-func (s *scanner) replace(repl string) {
-	s.resizeRange(s.start, s.end, len(repl))
-	copy(s.b[s.start:], repl)
-}
-
-// gobble removes the current token from the input.
-// Caller must call scan after calling gobble.
-func (s *scanner) gobble(e error) {
-	s.setError(e)
-	if s.start == 0 {
-		s.b = s.b[:+copy(s.b, s.b[s.next:])]
-		s.end = 0
-	} else {
-		s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
-		s.end = s.start - 1
-	}
-	s.next = s.start
-}
-
-// deleteRange removes the given range from s.b before the current token.
-func (s *scanner) deleteRange(start, end int) {
-	s.setError(errSyntax)
-	s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
-	diff := end - start
-	s.next -= diff
-	s.start -= diff
-	s.end -= diff
-}
-
-// scan parses the next token of a BCP 47 string.  Tokens that are larger
-// than 8 characters or include non-alphanumeric characters result in an error
-// and are gobbled and removed from the output.
-// It returns the end position of the last token consumed.
-func (s *scanner) scan() (end int) {
-	end = s.end
-	s.token = nil
-	for s.start = s.next; s.next < len(s.b); {
-		i := bytes.IndexByte(s.b[s.next:], '-')
-		if i == -1 {
-			s.end = len(s.b)
-			s.next = len(s.b)
-			i = s.end - s.start
-		} else {
-			s.end = s.next + i
-			s.next = s.end + 1
-		}
-		token := s.b[s.start:s.end]
-		if i < 1 || i > 8 || !isAlphaNum(token) {
-			s.gobble(errSyntax)
-			continue
-		}
-		s.token = token
-		return end
-	}
-	if n := len(s.b); n > 0 && s.b[n-1] == '-' {
-		s.setError(errSyntax)
-		s.b = s.b[:len(s.b)-1]
-	}
-	s.done = true
-	return end
-}
-
-// acceptMinSize parses multiple tokens of the given size or greater.
-// It returns the end position of the last token consumed.
-func (s *scanner) acceptMinSize(min int) (end int) {
-	end = s.end
-	s.scan()
-	for ; len(s.token) >= min; s.scan() {
-		end = s.end
-	}
-	return end
+	// Subtag returns the subtag for which the error occurred.
+	Subtag() string
 }

 // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
@@ -223,7 +28,7 @@ func (s *scanner) acceptMinSize(min int) (end int) {
 // ValueError. The Tag returned in this case is just stripped of the unknown
 // value. All other values are preserved. It accepts tags in the BCP 47 format
 // and extensions to this standard defined in
-// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
 // The resulting tag is canonicalized using the default canonicalization type.
 func Parse(s string) (t Tag, err error) {
 	return Default.Parse(s)
@@ -235,327 +40,18 @@ func Parse(s string) (t Tag, err error) {
 // ValueError. The Tag returned in this case is just stripped of the unknown
 // value. All other values are preserved. It accepts tags in the BCP 47 format
 // and extensions to this standard defined in
-// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
-// The resulting tag is canonicalized using the the canonicalization type c.
+// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
+// The resulting tag is canonicalized using the canonicalization type c.
 func (c CanonType) Parse(s string) (t Tag, err error) {
-	// TODO: consider supporting old-style locale key-value pairs.
-	if s == "" {
-		return und, errSyntax
+	tt, err := language.Parse(s)
+	if err != nil {
+		return makeTag(tt), err
 	}
-	if len(s) <= maxAltTaglen {
-		b := [maxAltTaglen]byte{}
-		for i, c := range s {
-			// Generating invalid UTF-8 is okay as it won't match.
-			if 'A' <= c && c <= 'Z' {
-				c += 'a' - 'A'
-			} else if c == '_' {
-				c = '-'
-			}
-			b[i] = byte(c)
-		}
-		if t, ok := grandfathered(b); ok {
-			return t, nil
-		}
-	}
-	scan := makeScannerString(s)
-	t, err = parse(&scan, s)
-	t, changed := t.canonicalize(c)
+	tt, changed := canonicalize(c, tt)
 	if changed {
-		t.remakeString()
+		tt.RemakeString()
 	}
-	return t, err
-}
-
-func parse(scan *scanner, s string) (t Tag, err error) {
-	t = und
-	var end int
-	if n := len(scan.token); n <= 1 {
-		scan.toLower(0, len(scan.b))
-		if n == 0 || scan.token[0] != 'x' {
-			return t, errSyntax
-		}
-		end = parseExtensions(scan)
-	} else if n >= 4 {
-		return und, errSyntax
-	} else { // the usual case
-		t, end = parseTag(scan)
-		if n := len(scan.token); n == 1 {
-			t.pExt = uint16(end)
-			end = parseExtensions(scan)
-		} else if end < len(scan.b) {
-			scan.setError(errSyntax)
-			scan.b = scan.b[:end]
-		}
-	}
-	if int(t.pVariant) < len(scan.b) {
-		if end < len(s) {
-			s = s[:end]
-		}
-		if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
-			t.str = s
-		} else {
-			t.str = string(scan.b)
-		}
-	} else {
-		t.pVariant, t.pExt = 0, 0
-	}
-	return t, scan.err
-}
-
-// parseTag parses language, script, region and variants.
-// It returns a Tag and the end position in the input that was parsed.
-func parseTag(scan *scanner) (t Tag, end int) {
-	var e error
-	// TODO: set an error if an unknown lang, script or region is encountered.
-	t.lang, e = getLangID(scan.token)
-	scan.setError(e)
-	scan.replace(t.lang.String())
-	langStart := scan.start
-	end = scan.scan()
-	for len(scan.token) == 3 && isAlpha(scan.token[0]) {
-		// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
-		// to a tag of the form <extlang>.
-		lang, e := getLangID(scan.token)
-		if lang != 0 {
-			t.lang = lang
-			copy(scan.b[langStart:], lang.String())
-			scan.b[langStart+3] = '-'
-			scan.start = langStart + 4
-		}
-		scan.gobble(e)
-		end = scan.scan()
-	}
-	if len(scan.token) == 4 && isAlpha(scan.token[0]) {
-		t.script, e = getScriptID(script, scan.token)
-		if t.script == 0 {
-			scan.gobble(e)
-		}
-		end = scan.scan()
-	}
-	if n := len(scan.token); n >= 2 && n <= 3 {
-		t.region, e = getRegionID(scan.token)
-		if t.region == 0 {
-			scan.gobble(e)
-		} else {
-			scan.replace(t.region.String())
-		}
-		end = scan.scan()
-	}
-	scan.toLower(scan.start, len(scan.b))
-	t.pVariant = byte(end)
-	end = parseVariants(scan, end, t)
-	t.pExt = uint16(end)
-	return t, end
-}
-
-var separator = []byte{'-'}
-
-// parseVariants scans tokens as long as each token is a valid variant string.
-// Duplicate variants are removed.
-func parseVariants(scan *scanner, end int, t Tag) int {
-	start := scan.start
-	varIDBuf := [4]uint8{}
-	variantBuf := [4][]byte{}
-	varID := varIDBuf[:0]
-	variant := variantBuf[:0]
-	last := -1
-	needSort := false
-	for ; len(scan.token) >= 4; scan.scan() {
-		// TODO: measure the impact of needing this conversion and redesign
-		// the data structure if there is an issue.
-		v, ok := variantIndex[string(scan.token)]
-		if !ok {
-			// unknown variant
-			// TODO: allow user-defined variants?
-			scan.gobble(mkErrInvalid(scan.token))
-			continue
-		}
-		varID = append(varID, v)
-		variant = append(variant, scan.token)
-		if !needSort {
-			if last < int(v) {
-				last = int(v)
-			} else {
-				needSort = true
-				// There is no legal combinations of more than 7 variants
-				// (and this is by no means a useful sequence).
-				const maxVariants = 8
-				if len(varID) > maxVariants {
-					break
-				}
-			}
-		}
-		end = scan.end
-	}
-	if needSort {
-		sort.Sort(variantsSort{varID, variant})
-		k, l := 0, -1
-		for i, v := range varID {
-			w := int(v)
-			if l == w {
-				// Remove duplicates.
-				continue
-			}
-			varID[k] = varID[i]
-			variant[k] = variant[i]
-			k++
-			l = w
-		}
-		if str := bytes.Join(variant[:k], separator); len(str) == 0 {
-			end = start - 1
-		} else {
-			scan.resizeRange(start, end, len(str))
-			copy(scan.b[scan.start:], str)
-			end = scan.end
-		}
-	}
-	return end
-}
-
-type variantsSort struct {
-	i []uint8
-	v [][]byte
-}
-
-func (s variantsSort) Len() int {
-	return len(s.i)
-}
-
-func (s variantsSort) Swap(i, j int) {
-	s.i[i], s.i[j] = s.i[j], s.i[i]
-	s.v[i], s.v[j] = s.v[j], s.v[i]
-}
-
-func (s variantsSort) Less(i, j int) bool {
-	return s.i[i] < s.i[j]
-}
-
-type bytesSort [][]byte
-
-func (b bytesSort) Len() int {
-	return len(b)
-}
-
-func (b bytesSort) Swap(i, j int) {
-	b[i], b[j] = b[j], b[i]
-}
-
-func (b bytesSort) Less(i, j int) bool {
-	return bytes.Compare(b[i], b[j]) == -1
-}
-
-// parseExtensions parses and normalizes the extensions in the buffer.
-// It returns the last position of scan.b that is part of any extension.
-// It also trims scan.b to remove excess parts accordingly.
-func parseExtensions(scan *scanner) int {
-	start := scan.start
-	exts := [][]byte{}
-	private := []byte{}
-	end := scan.end
-	for len(scan.token) == 1 {
-		extStart := scan.start
-		ext := scan.token[0]
-		end = parseExtension(scan)
-		extension := scan.b[extStart:end]
-		if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
-			scan.setError(errSyntax)
-			end = extStart
-			continue
-		} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
-			scan.b = scan.b[:end]
-			return end
-		} else if ext == 'x' {
-			private = extension
-			break
-		}
-		exts = append(exts, extension)
-	}
-	sort.Sort(bytesSort(exts))
-	if len(private) > 0 {
-		exts = append(exts, private)
-	}
-	scan.b = scan.b[:start]
-	if len(exts) > 0 {
-		scan.b = append(scan.b, bytes.Join(exts, separator)...)
-	} else if start > 0 {
-		// Strip trailing '-'.
-		scan.b = scan.b[:start-1]
-	}
-	return end
-}
-
-// parseExtension parses a single extension and returns the position of
-// the extension end.
-func parseExtension(scan *scanner) int {
-	start, end := scan.start, scan.end
-	switch scan.token[0] {
-	case 'u':
-		attrStart := end
-		scan.scan()
-		for last := []byte{}; len(scan.token) > 2; scan.scan() {
-			if bytes.Compare(scan.token, last) != -1 {
-				// Attributes are unsorted. Start over from scratch.
-				p := attrStart + 1
-				scan.next = p
-				attrs := [][]byte{}
-				for scan.scan(); len(scan.token) > 2; scan.scan() {
-					attrs = append(attrs, scan.token)
-					end = scan.end
-				}
-				sort.Sort(bytesSort(attrs))
-				copy(scan.b[p:], bytes.Join(attrs, separator))
-				break
-			}
-			last = scan.token
-			end = scan.end
-		}
-		var last, key []byte
-		for attrEnd := end; len(scan.token) == 2; last = key {
-			key = scan.token
-			keyEnd := scan.end
-			end = scan.acceptMinSize(3)
-			// TODO: check key value validity
-			if keyEnd == end || bytes.Compare(key, last) != 1 {
-				// We have an invalid key or the keys are not sorted.
-				// Start scanning keys from scratch and reorder.
-				p := attrEnd + 1
-				scan.next = p
-				keys := [][]byte{}
-				for scan.scan(); len(scan.token) == 2; {
-					keyStart, keyEnd := scan.start, scan.end
-					end = scan.acceptMinSize(3)
-					if keyEnd != end {
-						keys = append(keys, scan.b[keyStart:end])
-					} else {
-						scan.setError(errSyntax)
-						end = keyStart
-					}
-				}
-				sort.Sort(bytesSort(keys))
-				reordered := bytes.Join(keys, separator)
-				if e := p + len(reordered); e < end {
-					scan.deleteRange(e, end)
-					end = e
-				}
-				copy(scan.b[p:], bytes.Join(keys, separator))
-				break
-			}
-		}
-	case 't':
-		scan.scan()
-		if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
-			_, end = parseTag(scan)
-			scan.toLower(start, end)
-		}
-		for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
-			end = scan.acceptMinSize(3)
-		}
-	case 'x':
-		end = scan.acceptMinSize(1)
-	default:
-		end = scan.acceptMinSize(2)
-	}
-	return end
+	return makeTag(tt), err
 }

 // Compose creates a Tag from individual parts, which may be of type Tag, Base,
@@ -563,10 +59,11 @@ func parseExtension(scan *scanner) int {
 // Base, Script or Region or slice of type Variant or Extension is passed more
 // than once, the latter will overwrite the former. Variants and Extensions are
 // accumulated, but if two extensions of the same type are passed, the latter
-// will replace the former. A Tag overwrites all former values and typically
-// only makes sense as the first argument. The resulting tag is returned after
-// canonicalizing using the Default CanonType. If one or more errors are
-// encountered, one of the errors is returned.
+// will replace the former. For -u extensions, though, the key-type pairs are
+// added, where later values overwrite older ones. A Tag overwrites all former
+// values and typically only makes sense as the first argument. The resulting
+// tag is returned after canonicalizing using the Default CanonType. If one or
+// more errors are encountered, one of the errors is returned.
 func Compose(part ...interface{}) (t Tag, err error) {
 	return Default.Compose(part...)
 }
@@ -576,191 +73,63 @@ func Compose(part ...interface{}) (t Tag, err error) {
 // Base, Script or Region or slice of type Variant or Extension is passed more
 // than once, the latter will overwrite the former. Variants and Extensions are
 // accumulated, but if two extensions of the same type are passed, the latter
-// will replace the former. A Tag overwrites all former values and typically
-// only makes sense as the first argument. The resulting tag is returned after
-// canonicalizing using CanonType c. If one or more errors are encountered,
-// one of the errors is returned.
+// will replace the former. For -u extensions, though, the key-type pairs are
+// added, where later values overwrite older ones. A Tag overwrites all former
+// values and typically only makes sense as the first argument. The resulting
+// tag is returned after canonicalizing using CanonType c. If one or more errors
+// are encountered, one of the errors is returned.
 func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
-	var b builder
-	if err = b.update(part...); err != nil {
+	var b language.Builder
+	if err = update(&b, part...); err != nil {
 		return und, err
 	}
-	t, _ = b.tag.canonicalize(c)
-
-	if len(b.ext) > 0 || len(b.variant) > 0 {
-		sort.Sort(sortVariant(b.variant))
-		sort.Strings(b.ext)
-		if b.private != "" {
-			b.ext = append(b.ext, b.private)
-		}
-		n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...)
-		buf := make([]byte, n)
-		p := t.genCoreBytes(buf)
-		t.pVariant = byte(p)
-		p += appendTokens(buf[p:], b.variant...)
-		t.pExt = uint16(p)
-		p += appendTokens(buf[p:], b.ext...)
-		t.str = string(buf[:p])
-	} else if b.private != "" {
-		t.str = b.private
-		t.remakeString()
-	}
-	return
-}
-
-type builder struct {
-	tag Tag
-
-	private string // the x extension
-	ext     []string
-	variant []string
-
-	err error
-}
-
-func (b *builder) addExt(e string) {
-	if e == "" {
-	} else if e[0] == 'x' {
-		b.private = e
-	} else {
-		b.ext = append(b.ext, e)
-	}
+	b.Tag, _ = canonicalize(c, b.Tag)
+	return makeTag(b.Make()), err
 }

 var errInvalidArgument = errors.New("invalid Extension or Variant")

-func (b *builder) update(part ...interface{}) (err error) {
-	replace := func(l *[]string, s string, eq func(a, b string) bool) bool {
-		if s == "" {
-			b.err = errInvalidArgument
-			return true
-		}
-		for i, v := range *l {
-			if eq(v, s) {
-				(*l)[i] = s
-				return true
-			}
-		}
-		return false
-	}
+func update(b *language.Builder, part ...interface{}) (err error) {
 	for _, x := range part {
 		switch v := x.(type) {
 		case Tag:
-			b.tag.lang = v.lang
-			b.tag.region = v.region
-			b.tag.script = v.script
-			if v.str != "" {
-				b.variant = nil
-				for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; {
-					x, s = nextToken(s)
-					b.variant = append(b.variant, x)
-				}
-				b.ext, b.private = nil, ""
-				for i, e := int(v.pExt), ""; i < len(v.str); {
-					i, e = getExtension(v.str, i)
-					b.addExt(e)
-				}
-			}
+			b.SetTag(v.tag())
 		case Base:
-			b.tag.lang = v.langID
+			b.Tag.LangID = v.langID
 		case Script:
-			b.tag.script = v.scriptID
+			b.Tag.ScriptID = v.scriptID
 		case Region:
-			b.tag.region = v.regionID
+			b.Tag.RegionID = v.regionID
 		case Variant:
-			if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) {
-				b.variant = append(b.variant, v.variant)
+			if v.variant == "" {
+				err = errInvalidArgument
+				break
 			}
+			b.AddVariant(v.variant)
 		case Extension:
-			if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) {
-				b.addExt(v.s)
+			if v.s == "" {
+				err = errInvalidArgument
+				break
 			}
+			b.SetExt(v.s)
 		case []Variant:
-			b.variant = nil
-			for _, x := range v {
-				b.update(x)
+			b.ClearVariants()
+			for _, v := range v {
+				b.AddVariant(v.variant)
 			}
 		case []Extension:
-			b.ext, b.private = nil, ""
+			b.ClearExtensions()
 			for _, e := range v {
-				b.update(e)
+				b.SetExt(e.s)
 			}
 		// TODO: support parsing of raw strings based on morphology or just extensions?
 		case error:
-			err = v
-		}
-	}
-	return
-}
-
-func tokenLen(token ...string) (n int) {
-	for _, t := range token {
-		n += len(t) + 1
-	}
-	return
-}
-
-func appendTokens(b []byte, token ...string) int {
-	p := 0
-	for _, t := range token {
-		b[p] = '-'
-		copy(b[p+1:], t)
-		p += 1 + len(t)
-	}
-	return p
-}
-
-type sortVariant []string
-
-func (s sortVariant) Len() int {
-	return len(s)
-}
-
-func (s sortVariant) Swap(i, j int) {
-	s[j], s[i] = s[i], s[j]
-}
-
-func (s sortVariant) Less(i, j int) bool {
-	return variantIndex[s[i]] < variantIndex[s[j]]
-}
-
-func findExt(list []string, x byte) int {
-	for i, e := range list {
-		if e[0] == x {
-			return i
-		}
-	}
-	return -1
-}
-
-// getExtension returns the name, body and end position of the extension.
-func getExtension(s string, p int) (end int, ext string) {
-	if s[p] == '-' {
-		p++
-	}
-	if s[p] == 'x' {
-		return len(s), s[p:]
-	}
-	end = nextExtension(s, p)
-	return end, s[p:end]
-}
-
-// nextExtension finds the next extension within the string, searching
-// for the -<char>- pattern from position p.
-// In the fast majority of cases, language tags will have at most
-// one extension and extensions tend to be small.
-func nextExtension(s string, p int) int {
-	for n := len(s) - 3; p < n; {
-		if s[p] == '-' {
-			if s[p+2] == '-' {
-				return p
+			if v != nil {
+				err = v
 			}
-			p += 3
-		} else {
-			p++
 		}
 	}
-	return len(s)
+	return
 }

 var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
@@ -788,7 +157,7 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
 			if !ok {
 				return nil, nil, err
 			}
-			t = Tag{lang: id}
+			t = makeTag(language.Tag{LangID: id})
 		}

 		// Scan the optional weight.
@@ -830,9 +199,9 @@ func split(s string, c byte) (head, tail string) {
 	return strings.TrimSpace(s), ""
 }

-// Add hack mapping to deal with a small number of cases that that occur
+// Add hack mapping to deal with a small number of cases that occur
 // in Accept-Language (with reasonable frequency).
-var acceptFallback = map[string]langID{
+var acceptFallback = map[string]language.Language{
 	"english": _en,
 	"deutsch": _de,
 	"italian": _it,
@@ -4,6 +4,8 @@

 package language

+import "golang.org/x/text/internal/language/compact"
+
 // TODO: Various sets of commonly use tags and regions.

 // MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
@@ -61,83 +63,83 @@ var (

 	Und Tag = Tag{}

-	Afrikaans            Tag = Tag{lang: _af}                //  af
-	Amharic              Tag = Tag{lang: _am}                //  am
-	Arabic               Tag = Tag{lang: _ar}                //  ar
-	ModernStandardArabic Tag = Tag{lang: _ar, region: _001}  //  ar-001
-	Azerbaijani          Tag = Tag{lang: _az}                //  az
-	Bulgarian            Tag = Tag{lang: _bg}                //  bg
-	Bengali              Tag = Tag{lang: _bn}                //  bn
-	Catalan              Tag = Tag{lang: _ca}                //  ca
-	Czech                Tag = Tag{lang: _cs}                //  cs
-	Danish               Tag = Tag{lang: _da}                //  da
-	German               Tag = Tag{lang: _de}                //  de
-	Greek                Tag = Tag{lang: _el}                //  el
-	English              Tag = Tag{lang: _en}                //  en
-	AmericanEnglish      Tag = Tag{lang: _en, region: _US}   //  en-US
-	BritishEnglish       Tag = Tag{lang: _en, region: _GB}   //  en-GB
-	Spanish              Tag = Tag{lang: _es}                //  es
-	EuropeanSpanish      Tag = Tag{lang: _es, region: _ES}   //  es-ES
-	LatinAmericanSpanish Tag = Tag{lang: _es, region: _419}  //  es-419
-	Estonian             Tag = Tag{lang: _et}                //  et
-	Persian              Tag = Tag{lang: _fa}                //  fa
-	Finnish              Tag = Tag{lang: _fi}                //  fi
-	Filipino             Tag = Tag{lang: _fil}               //  fil
-	French               Tag = Tag{lang: _fr}                //  fr
-	CanadianFrench       Tag = Tag{lang: _fr, region: _CA}   //  fr-CA
-	Gujarati             Tag = Tag{lang: _gu}                //  gu
-	Hebrew               Tag = Tag{lang: _he}                //  he
-	Hindi                Tag = Tag{lang: _hi}                //  hi
-	Croatian             Tag = Tag{lang: _hr}                //  hr
-	Hungarian            Tag = Tag{lang: _hu}                //  hu
-	Armenian             Tag = Tag{lang: _hy}                //  hy
-	Indonesian           Tag = Tag{lang: _id}                //  id
-	Icelandic            Tag = Tag{lang: _is}                //  is
-	Italian              Tag = Tag{lang: _it}                //  it
-	Japanese             Tag = Tag{lang: _ja}                //  ja
-	Georgian             Tag = Tag{lang: _ka}                //  ka
-	Kazakh               Tag = Tag{lang: _kk}                //  kk
-	Khmer                Tag = Tag{lang: _km}                //  km
-	Kannada              Tag = Tag{lang: _kn}                //  kn
-	Korean               Tag = Tag{lang: _ko}                //  ko
-	Kirghiz              Tag = Tag{lang: _ky}                //  ky
-	Lao                  Tag = Tag{lang: _lo}                //  lo
-	Lithuanian           Tag = Tag{lang: _lt}                //  lt
-	Latvian              Tag = Tag{lang: _lv}                //  lv
-	Macedonian           Tag = Tag{lang: _mk}                //  mk
-	Malayalam            Tag = Tag{lang: _ml}                //  ml
-	Mongolian            Tag = Tag{lang: _mn}                //  mn
-	Marathi              Tag = Tag{lang: _mr}                //  mr
-	Malay                Tag = Tag{lang: _ms}                //  ms
-	Burmese              Tag = Tag{lang: _my}                //  my
-	Nepali               Tag = Tag{lang: _ne}                //  ne
-	Dutch                Tag = Tag{lang: _nl}                //  nl
-	Norwegian            Tag = Tag{lang: _no}                //  no
-	Punjabi              Tag = Tag{lang: _pa}                //  pa
-	Polish               Tag = Tag{lang: _pl}                //  pl
-	Portuguese           Tag = Tag{lang: _pt}                //  pt
-	BrazilianPortuguese  Tag = Tag{lang: _pt, region: _BR}   //  pt-BR
-	EuropeanPortuguese   Tag = Tag{lang: _pt, region: _PT}   //  pt-PT
-	Romanian             Tag = Tag{lang: _ro}                //  ro
-	Russian              Tag = Tag{lang: _ru}                //  ru
-	Sinhala              Tag = Tag{lang: _si}                //  si
-	Slovak               Tag = Tag{lang: _sk}                //  sk
-	Slovenian            Tag = Tag{lang: _sl}                //  sl
-	Albanian             Tag = Tag{lang: _sq}                //  sq
-	Serbian              Tag = Tag{lang: _sr}                //  sr
-	SerbianLatin         Tag = Tag{lang: _sr, script: _Latn} //  sr-Latn
-	Swedish              Tag = Tag{lang: _sv}                //  sv
-	Swahili              Tag = Tag{lang: _sw}                //  sw
-	Tamil                Tag = Tag{lang: _ta}                //  ta
-	Telugu               Tag = Tag{lang: _te}                //  te
-	Thai                 Tag = Tag{lang: _th}                //  th
-	Turkish              Tag = Tag{lang: _tr}                //  tr
-	Ukrainian            Tag = Tag{lang: _uk}                //  uk
-	Urdu                 Tag = Tag{lang: _ur}                //  ur
-	Uzbek                Tag = Tag{lang: _uz}                //  uz
-	Vietnamese           Tag = Tag{lang: _vi}                //  vi
-	Chinese              Tag = Tag{lang: _zh}                //  zh
-	SimplifiedChinese    Tag = Tag{lang: _zh, script: _Hans} //  zh-Hans
-	TraditionalChinese   Tag = Tag{lang: _zh, script: _Hant} //  zh-Hant
-	Zulu                 Tag = Tag{lang: _zu}                //  zu
+	Afrikaans            Tag = Tag(compact.Afrikaans)
+	Amharic              Tag = Tag(compact.Amharic)
+	Arabic               Tag = Tag(compact.Arabic)
+	ModernStandardArabic Tag = Tag(compact.ModernStandardArabic)
+	Azerbaijani          Tag = Tag(compact.Azerbaijani)
+	Bulgarian            Tag = Tag(compact.Bulgarian)
+	Bengali              Tag = Tag(compact.Bengali)
+	Catalan              Tag = Tag(compact.Catalan)
+	Czech                Tag = Tag(compact.Czech)
+	Danish               Tag = Tag(compact.Danish)
+	German               Tag = Tag(compact.German)
+	Greek                Tag = Tag(compact.Greek)
+	English              Tag = Tag(compact.English)
+	AmericanEnglish      Tag = Tag(compact.AmericanEnglish)
+	BritishEnglish       Tag = Tag(compact.BritishEnglish)
+	Spanish              Tag = Tag(compact.Spanish)
+	EuropeanSpanish      Tag = Tag(compact.EuropeanSpanish)
+	LatinAmericanSpanish Tag = Tag(compact.LatinAmericanSpanish)
+	Estonian             Tag = Tag(compact.Estonian)
+	Persian              Tag = Tag(compact.Persian)
+	Finnish              Tag = Tag(compact.Finnish)
+	Filipino             Tag = Tag(compact.Filipino)
+	French               Tag = Tag(compact.French)
+	CanadianFrench       Tag = Tag(compact.CanadianFrench)
+	Gujarati             Tag = Tag(compact.Gujarati)
+	Hebrew               Tag = Tag(compact.Hebrew)
+	Hindi                Tag = Tag(compact.Hindi)
+	Croatian             Tag = Tag(compact.Croatian)
+	Hungarian            Tag = Tag(compact.Hungarian)
+	Armenian             Tag = Tag(compact.Armenian)
+	Indonesian           Tag = Tag(compact.Indonesian)
+	Icelandic            Tag = Tag(compact.Icelandic)
+	Italian              Tag = Tag(compact.Italian)
+	Japanese             Tag = Tag(compact.Japanese)
+	Georgian             Tag = Tag(compact.Georgian)
+	Kazakh               Tag = Tag(compact.Kazakh)
+	Khmer                Tag = Tag(compact.Khmer)
+	Kannada              Tag = Tag(compact.Kannada)
+	Korean               Tag = Tag(compact.Korean)
+	Kirghiz              Tag = Tag(compact.Kirghiz)
+	Lao                  Tag = Tag(compact.Lao)
+	Lithuanian           Tag = Tag(compact.Lithuanian)
+	Latvian              Tag = Tag(compact.Latvian)
+	Macedonian           Tag = Tag(compact.Macedonian)
+	Malayalam            Tag = Tag(compact.Malayalam)
+	Mongolian            Tag = Tag(compact.Mongolian)
+	Marathi              Tag = Tag(compact.Marathi)
+	Malay                Tag = Tag(compact.Malay)
+	Burmese              Tag = Tag(compact.Burmese)
+	Nepali               Tag = Tag(compact.Nepali)
+	Dutch                Tag = Tag(compact.Dutch)
+	Norwegian            Tag = Tag(compact.Norwegian)
+	Punjabi              Tag = Tag(compact.Punjabi)
+	Polish               Tag = Tag(compact.Polish)
+	Portuguese           Tag = Tag(compact.Portuguese)
+	BrazilianPortuguese  Tag = Tag(compact.BrazilianPortuguese)
+	EuropeanPortuguese   Tag = Tag(compact.EuropeanPortuguese)
+	Romanian             Tag = Tag(compact.Romanian)
+	Russian              Tag = Tag(compact.Russian)
+	Sinhala              Tag = Tag(compact.Sinhala)
+	Slovak               Tag = Tag(compact.Slovak)
+	Slovenian            Tag = Tag(compact.Slovenian)
+	Albanian             Tag = Tag(compact.Albanian)
+	Serbian              Tag = Tag(compact.Serbian)
+	SerbianLatin         Tag = Tag(compact.SerbianLatin)
+	Swedish              Tag = Tag(compact.Swedish)
+	Swahili              Tag = Tag(compact.Swahili)
+	Tamil                Tag = Tag(compact.Tamil)
+	Telugu               Tag = Tag(compact.Telugu)
+	Thai                 Tag = Tag(compact.Thai)
+	Turkish              Tag = Tag(compact.Turkish)
+	Ukrainian            Tag = Tag(compact.Ukrainian)
+	Urdu                 Tag = Tag(compact.Urdu)
+	Uzbek                Tag = Tag(compact.Uzbek)
+	Vietnamese           Tag = Tag(compact.Vietnamese)
+	Chinese              Tag = Tag(compact.Chinese)
+	SimplifiedChinese    Tag = Tag(compact.SimplifiedChinese)
+	TraditionalChinese   Tag = Tag(compact.TraditionalChinese)
+	Zulu                 Tag = Tag(compact.Zulu)
 )
@@ -0,0 +1,336 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package bidirule implements the Bidi Rule defined by RFC 5893.
+//
+// This package is under development. The API may change without notice and
+// without preserving backward compatibility.
+package bidirule
+
+import (
+	"errors"
+	"unicode/utf8"
+
+	"golang.org/x/text/transform"
+	"golang.org/x/text/unicode/bidi"
+)
+
+// This file contains an implementation of RFC 5893: Right-to-Left Scripts for
+// Internationalized Domain Names for Applications (IDNA)
+//
+// A label is an individual component of a domain name.  Labels are usually
+// shown separated by dots; for example, the domain name "www.example.com" is
+// composed of three labels: "www", "example", and "com".
+//
+// An RTL label is a label that contains at least one character of class R, AL,
+// or AN. An LTR label is any label that is not an RTL label.
+//
+// A "Bidi domain name" is a domain name that contains at least one RTL label.
+//
+//  The following guarantees can be made based on the above:
+//
+//  o  In a domain name consisting of only labels that satisfy the rule,
+//     the requirements of Section 3 are satisfied.  Note that even LTR
+//     labels and pure ASCII labels have to be tested.
+//
+//  o  In a domain name consisting of only LDH labels (as defined in the
+//     Definitions document [RFC5890]) and labels that satisfy the rule,
+//     the requirements of Section 3 are satisfied as long as a label
+//     that starts with an ASCII digit does not come after a
+//     right-to-left label.
+//
+//  No guarantee is given for other combinations.
+
+// ErrInvalid indicates a label is invalid according to the Bidi Rule.
+var ErrInvalid = errors.New("bidirule: failed Bidi Rule")
+
+type ruleState uint8
+
+const (
+	ruleInitial ruleState = iota
+	ruleLTR
+	ruleLTRFinal
+	ruleRTL
+	ruleRTLFinal
+	ruleInvalid
+)
+
+type ruleTransition struct {
+	next ruleState
+	mask uint16
+}
+
+var transitions = [...][2]ruleTransition{
+	// [2.1] The first character must be a character with Bidi property L, R, or
+	// AL. If it has the R or AL property, it is an RTL label; if it has the L
+	// property, it is an LTR label.
+	ruleInitial: {
+		{ruleLTRFinal, 1 << bidi.L},
+		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL},
+	},
+	ruleRTL: {
+		// [2.3] In an RTL label, the end of the label must be a character with
+		// Bidi property R, AL, EN, or AN, followed by zero or more characters
+		// with Bidi property NSM.
+		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN},
+
+		// [2.2] In an RTL label, only characters with the Bidi properties R,
+		// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
+		// We exclude the entries from [2.3]
+		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
+	},
+	ruleRTLFinal: {
+		// [2.3] In an RTL label, the end of the label must be a character with
+		// Bidi property R, AL, EN, or AN, followed by zero or more characters
+		// with Bidi property NSM.
+		{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM},
+
+		// [2.2] In an RTL label, only characters with the Bidi properties R,
+		// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
+		// We exclude the entries from [2.3] and NSM.
+		{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
+	},
+	ruleLTR: {
+		// [2.6] In an LTR label, the end of the label must be a character with
+		// Bidi property L or EN, followed by zero or more characters with Bidi
+		// property NSM.
+		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN},
+
+		// [2.5] In an LTR label, only characters with the Bidi properties L,
+		// EN, ES, CS, ET, ON, BN, or NSM are allowed.
+		// We exclude the entries from [2.6].
+		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
+	},
+	ruleLTRFinal: {
+		// [2.6] In an LTR label, the end of the label must be a character with
+		// Bidi property L or EN, followed by zero or more characters with Bidi
+		// property NSM.
+		{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM},
+
+		// [2.5] In an LTR label, only characters with the Bidi properties L,
+		// EN, ES, CS, ET, ON, BN, or NSM are allowed.
+		// We exclude the entries from [2.6].
+		{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
+	},
+	ruleInvalid: {
+		{ruleInvalid, 0},
+		{ruleInvalid, 0},
+	},
+}
+
+// [2.4] In an RTL label, if an EN is present, no AN may be present, and
+// vice versa.
+const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)
+
+// From RFC 5893
+// An RTL label is a label that contains at least one character of type
+// R, AL, or AN.
+//
+// An LTR label is any label that is not an RTL label.
+
+// Direction reports the direction of the given label as defined by RFC 5893.
+// The Bidi Rule does not have to be applied to labels of the category
+// LeftToRight.
+func Direction(b []byte) bidi.Direction {
+	for i := 0; i < len(b); {
+		e, sz := bidi.Lookup(b[i:])
+		if sz == 0 {
+			i++
+		}
+		c := e.Class()
+		if c == bidi.R || c == bidi.AL || c == bidi.AN {
+			return bidi.RightToLeft
+		}
+		i += sz
+	}
+	return bidi.LeftToRight
+}
+
+// DirectionString reports the direction of the given label as defined by RFC
+// 5893. The Bidi Rule does not have to be applied to labels of the category
+// LeftToRight.
+func DirectionString(s string) bidi.Direction {
+	for i := 0; i < len(s); {
+		e, sz := bidi.LookupString(s[i:])
+		if sz == 0 {
+			i++
+			continue
+		}
+		c := e.Class()
+		if c == bidi.R || c == bidi.AL || c == bidi.AN {
+			return bidi.RightToLeft
+		}
+		i += sz
+	}
+	return bidi.LeftToRight
+}
+
+// Valid reports whether b conforms to the BiDi rule.
+func Valid(b []byte) bool {
+	var t Transformer
+	if n, ok := t.advance(b); !ok || n < len(b) {
+		return false
+	}
+	return t.isFinal()
+}
+
+// ValidString reports whether s conforms to the BiDi rule.
+func ValidString(s string) bool {
+	var t Transformer
+	if n, ok := t.advanceString(s); !ok || n < len(s) {
+		return false
+	}
+	return t.isFinal()
+}
+
+// New returns a Transformer that verifies that input adheres to the Bidi Rule.
+func New() *Transformer {
+	return &Transformer{}
+}
+
+// Transformer implements transform.Transform.
+type Transformer struct {
+	state  ruleState
+	hasRTL bool
+	seen   uint16
+}
+
+// A rule can only be violated for "Bidi Domain names", meaning if one of the
+// following categories has been observed.
+func (t *Transformer) isRTL() bool {
+	const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN
+	return t.seen&isRTL != 0
+}
+
+// Reset implements transform.Transformer.
+func (t *Transformer) Reset() { *t = Transformer{} }
+
+// Transform implements transform.Transformer. This Transformer has state and
+// needs to be reset between uses.
+func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	if len(dst) < len(src) {
+		src = src[:len(dst)]
+		atEOF = false
+		err = transform.ErrShortDst
+	}
+	n, err1 := t.Span(src, atEOF)
+	copy(dst, src[:n])
+	if err == nil || err1 != nil && err1 != transform.ErrShortSrc {
+		err = err1
+	}
+	return n, n, err
+}
+
+// Span returns the first n bytes of src that conform to the Bidi rule.
+func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) {
+	if t.state == ruleInvalid && t.isRTL() {
+		return 0, ErrInvalid
+	}
+	n, ok := t.advance(src)
+	switch {
+	case !ok:
+		err = ErrInvalid
+	case n < len(src):
+		if !atEOF {
+			err = transform.ErrShortSrc
+			break
+		}
+		err = ErrInvalid
+	case !t.isFinal():
+		err = ErrInvalid
+	}
+	return n, err
+}
+
+// Precomputing the ASCII values decreases running time for the ASCII fast path
+// by about 30%.
+var asciiTable [128]bidi.Properties
+
+func init() {
+	for i := range asciiTable {
+		p, _ := bidi.LookupRune(rune(i))
+		asciiTable[i] = p
+	}
+}
+
+func (t *Transformer) advance(s []byte) (n int, ok bool) {
+	var e bidi.Properties
+	var sz int
+	for n < len(s) {
+		if s[n] < utf8.RuneSelf {
+			e, sz = asciiTable[s[n]], 1
+		} else {
+			e, sz = bidi.Lookup(s[n:])
+			if sz <= 1 {
+				if sz == 1 {
+					// We always consider invalid UTF-8 to be invalid, even if
+					// the string has not yet been determined to be RTL.
+					// TODO: is this correct?
+					return n, false
+				}
+				return n, true // incomplete UTF-8 encoding
+			}
+		}
+		// TODO: using CompactClass would result in noticeable speedup.
+		// See unicode/bidi/prop.go:Properties.CompactClass.
+		c := uint16(1 << e.Class())
+		t.seen |= c
+		if t.seen&exclusiveRTL == exclusiveRTL {
+			t.state = ruleInvalid
+			return n, false
+		}
+		switch tr := transitions[t.state]; {
+		case tr[0].mask&c != 0:
+			t.state = tr[0].next
+		case tr[1].mask&c != 0:
+			t.state = tr[1].next
+		default:
+			t.state = ruleInvalid
+			if t.isRTL() {
+				return n, false
+			}
+		}
+		n += sz
+	}
+	return n, true
+}
+
+func (t *Transformer) advanceString(s string) (n int, ok bool) {
+	var e bidi.Properties
+	var sz int
+	for n < len(s) {
+		if s[n] < utf8.RuneSelf {
+			e, sz = asciiTable[s[n]], 1
+		} else {
+			e, sz = bidi.LookupString(s[n:])
+			if sz <= 1 {
+				if sz == 1 {
+					return n, false // invalid UTF-8
+				}
+				return n, true // incomplete UTF-8 encoding
+			}
+		}
+		// TODO: using CompactClass results in noticeable speedup.
+		// See unicode/bidi/prop.go:Properties.CompactClass.
+		c := uint16(1 << e.Class())
+		t.seen |= c
+		if t.seen&exclusiveRTL == exclusiveRTL {
+			t.state = ruleInvalid
+			return n, false
+		}
+		switch tr := transitions[t.state]; {
+		case tr[0].mask&c != 0:
+			t.state = tr[0].next
+		case tr[1].mask&c != 0:
+			t.state = tr[1].next
+		default:
+			t.state = ruleInvalid
+			if t.isRTL() {
+				return n, false
+			}
+		}
+		n += sz
+	}
+	return n, true
+}
@@ -0,0 +1,11 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.10
+
+package bidirule
+
+func (t *Transformer) isFinal() bool {
+	return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
+}
@@ -0,0 +1,14 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !go1.10
+
+package bidirule
+
+func (t *Transformer) isFinal() bool {
+	if !t.isRTL() {
+		return true
+	}
+	return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
+}
@@ -78,8 +78,8 @@ type SpanningTransformer interface {
 	// considering the error err.
 	//
 	// A nil error means that all input bytes are known to be identical to the
-	// output produced by the Transformer. A nil error can be be returned
-	// regardless of whether atEOF is true. If err is nil, then then n must
+	// output produced by the Transformer. A nil error can be returned
+	// regardless of whether atEOF is true. If err is nil, then n must
 	// equal len(src); the converse is not necessarily true.
 	//
 	// ErrEndOfSpan means that the Transformer output may differ from the
@@ -493,7 +493,7 @@ func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err erro
 	return dstL.n, srcL.p, err
 }

-// Deprecated: use runes.Remove instead.
+// Deprecated: Use runes.Remove instead.
 func RemoveFunc(f func(r rune) bool) Transformer {
 	return removeF(f)
 }
@@ -0,0 +1,198 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run gen.go gen_trieval.go gen_ranges.go
+
+// Package bidi contains functionality for bidirectional text support.
+//
+// See https://www.unicode.org/reports/tr9.
+//
+// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
+// and without notice.
+package bidi // import "golang.org/x/text/unicode/bidi"
+
+// TODO:
+// The following functionality would not be hard to implement, but hinges on
+// the definition of a Segmenter interface. For now this is up to the user.
+// - Iterate over paragraphs
+// - Segmenter to iterate over runs directly from a given text.
+// Also:
+// - Transformer for reordering?
+// - Transformer (validator, really) for Bidi Rule.
+
+// This API tries to avoid dealing with embedding levels for now. Under the hood
+// these will be computed, but the question is to which extent the user should
+// know they exist. We should at some point allow the user to specify an
+// embedding hierarchy, though.
+
+// A Direction indicates the overall flow of text.
+type Direction int
+
+const (
+	// LeftToRight indicates the text contains no right-to-left characters and
+	// that either there are some left-to-right characters or the option
+	// DefaultDirection(LeftToRight) was passed.
+	LeftToRight Direction = iota
+
+	// RightToLeft indicates the text contains no left-to-right characters and
+	// that either there are some right-to-left characters or the option
+	// DefaultDirection(RightToLeft) was passed.
+	RightToLeft
+
+	// Mixed indicates text contains both left-to-right and right-to-left
+	// characters.
+	Mixed
+
+	// Neutral means that text contains no left-to-right and right-to-left
+	// characters and that no default direction has been set.
+	Neutral
+)
+
+type options struct{}
+
+// An Option is an option for Bidi processing.
+type Option func(*options)
+
+// ICU allows the user to define embedding levels. This may be used, for example,
+// to use hierarchical structure of markup languages to define embeddings.
+// The following option may be a way to expose this functionality in this API.
+// // LevelFunc sets a function that associates nesting levels with the given text.
+// // The levels function will be called with monotonically increasing values for p.
+// func LevelFunc(levels func(p int) int) Option {
+// 	panic("unimplemented")
+// }
+
+// DefaultDirection sets the default direction for a Paragraph. The direction is
+// overridden if the text contains directional characters.
+func DefaultDirection(d Direction) Option {
+	panic("unimplemented")
+}
+
+// A Paragraph holds a single Paragraph for Bidi processing.
+type Paragraph struct {
+	// buffers
+}
+
+// SetBytes configures p for the given paragraph text. It replaces text
+// previously set by SetBytes or SetString. If b contains a paragraph separator
+// it will only process the first paragraph and report the number of bytes
+// consumed from b including this separator. Error may be non-nil if options are
+// given.
+func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
+	panic("unimplemented")
+}
+
+// SetString configures p for the given paragraph text. It replaces text
+// previously set by SetBytes or SetString. If b contains a paragraph separator
+// it will only process the first paragraph and report the number of bytes
+// consumed from b including this separator. Error may be non-nil if options are
+// given.
+func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
+	panic("unimplemented")
+}
+
+// IsLeftToRight reports whether the principle direction of rendering for this
+// paragraphs is left-to-right. If this returns false, the principle direction
+// of rendering is right-to-left.
+func (p *Paragraph) IsLeftToRight() bool {
+	panic("unimplemented")
+}
+
+// Direction returns the direction of the text of this paragraph.
+//
+// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
+func (p *Paragraph) Direction() Direction {
+	panic("unimplemented")
+}
+
+// RunAt reports the Run at the given position of the input text.
+//
+// This method can be used for computing line breaks on paragraphs.
+func (p *Paragraph) RunAt(pos int) Run {
+	panic("unimplemented")
+}
+
+// Order computes the visual ordering of all the runs in a Paragraph.
+func (p *Paragraph) Order() (Ordering, error) {
+	panic("unimplemented")
+}
+
+// Line computes the visual ordering of runs for a single line starting and
+// ending at the given positions in the original text.
+func (p *Paragraph) Line(start, end int) (Ordering, error) {
+	panic("unimplemented")
+}
+
+// An Ordering holds the computed visual order of runs of a Paragraph. Calling
+// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
+// The methods of an Ordering should only be called by one goroutine at a time.
+type Ordering struct{}
+
+// Direction reports the directionality of the runs.
+//
+// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
+func (o *Ordering) Direction() Direction {
+	panic("unimplemented")
+}
+
+// NumRuns returns the number of runs.
+func (o *Ordering) NumRuns() int {
+	panic("unimplemented")
+}
+
+// Run returns the ith run within the ordering.
+func (o *Ordering) Run(i int) Run {
+	panic("unimplemented")
+}
+
+// TODO: perhaps with options.
+// // Reorder creates a reader that reads the runes in visual order per character.
+// // Modifiers remain after the runes they modify.
+// func (l *Runs) Reorder() io.Reader {
+// 	panic("unimplemented")
+// }
+
+// A Run is a continuous sequence of characters of a single direction.
+type Run struct {
+}
+
+// String returns the text of the run in its original order.
+func (r *Run) String() string {
+	panic("unimplemented")
+}
+
+// Bytes returns the text of the run in its original order.
+func (r *Run) Bytes() []byte {
+	panic("unimplemented")
+}
+
+// TODO: methods for
+// - Display order
+// - headers and footers
+// - bracket replacement.
+
+// Direction reports the direction of the run.
+func (r *Run) Direction() Direction {
+	panic("unimplemented")
+}
+
+// Position of the Run within the text passed to SetBytes or SetString of the
+// originating Paragraph value.
+func (r *Run) Pos() (start, end int) {
+	panic("unimplemented")
+}
+
+// AppendReverse reverses the order of characters of in, appends them to out,
+// and returns the result. Modifiers will still follow the runes they modify.
+// Brackets are replaced with their counterparts.
+func AppendReverse(out, in []byte) []byte {
+	panic("unimplemented")
+}
+
+// ReverseString reverses the order of characters in s and returns a new string.
+// Modifiers will still follow the runes they modify. Brackets are replaced with
+// their counterparts.
+func ReverseString(s string) string {
+	panic("unimplemented")
+}
@@ -0,0 +1,335 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bidi
+
+import (
+	"container/list"
+	"fmt"
+	"sort"
+)
+
+// This file contains a port of the reference implementation of the
+// Bidi Parentheses Algorithm:
+// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
+//
+// The implementation in this file covers definitions BD14-BD16 and rule N0
+// of UAX#9.
+//
+// Some preprocessing is done for each rune before data is passed to this
+// algorithm:
+//  - opening and closing brackets are identified
+//  - a bracket pair type, like '(' and ')' is assigned a unique identifier that
+//    is identical for the opening and closing bracket. It is left to do these
+//    mappings.
+//  - The BPA algorithm requires that bracket characters that are canonical
+//    equivalents of each other be able to be substituted for each other.
+//    It is the responsibility of the caller to do this canonicalization.
+//
+// In implementing BD16, this implementation departs slightly from the "logical"
+// algorithm defined in UAX#9. In particular, the stack referenced there
+// supports operations that go beyond a "basic" stack. An equivalent
+// implementation based on a linked list is used here.
+
+// Bidi_Paired_Bracket_Type
+// BD14. An opening paired bracket is a character whose
+// Bidi_Paired_Bracket_Type property value is Open.
+//
+// BD15. A closing paired bracket is a character whose
+// Bidi_Paired_Bracket_Type property value is Close.
+type bracketType byte
+
+const (
+	bpNone bracketType = iota
+	bpOpen
+	bpClose
+)
+
+// bracketPair holds a pair of index values for opening and closing bracket
+// location of a bracket pair.
+type bracketPair struct {
+	opener int
+	closer int
+}
+
+func (b *bracketPair) String() string {
+	return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
+}
+
+// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
+type bracketPairs []bracketPair
+
+func (b bracketPairs) Len() int           { return len(b) }
+func (b bracketPairs) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
+func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
+
+// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
+//
+// For each rune, it takes the indexes into the original string, the class the
+// bracket type (in pairTypes) and the bracket identifier (pairValues). It also
+// takes the direction type for the start-of-sentence and the embedding level.
+//
+// The identifiers for bracket types are the rune of the canonicalized opening
+// bracket for brackets (open or close) or 0 for runes that are not brackets.
+func resolvePairedBrackets(s *isolatingRunSequence) {
+	p := bracketPairer{
+		sos:              s.sos,
+		openers:          list.New(),
+		codesIsolatedRun: s.types,
+		indexes:          s.indexes,
+	}
+	dirEmbed := L
+	if s.level&1 != 0 {
+		dirEmbed = R
+	}
+	p.locateBrackets(s.p.pairTypes, s.p.pairValues)
+	p.resolveBrackets(dirEmbed, s.p.initialTypes)
+}
+
+type bracketPairer struct {
+	sos Class // direction corresponding to start of sequence
+
+	// The following is a restatement of BD 16 using non-algorithmic language.
+	//
+	// A bracket pair is a pair of characters consisting of an opening
+	// paired bracket and a closing paired bracket such that the
+	// Bidi_Paired_Bracket property value of the former equals the latter,
+	// subject to the following constraints.
+	// - both characters of a pair occur in the same isolating run sequence
+	// - the closing character of a pair follows the opening character
+	// - any bracket character can belong at most to one pair, the earliest possible one
+	// - any bracket character not part of a pair is treated like an ordinary character
+	// - pairs may nest properly, but their spans may not overlap otherwise
+
+	// Bracket characters with canonical decompositions are supposed to be
+	// treated as if they had been normalized, to allow normalized and non-
+	// normalized text to give the same result. In this implementation that step
+	// is pushed out to the caller. The caller has to ensure that the pairValue
+	// slices contain the rune of the opening bracket after normalization for
+	// any opening or closing bracket.
+
+	openers *list.List // list of positions for opening brackets
+
+	// bracket pair positions sorted by location of opening bracket
+	pairPositions bracketPairs
+
+	codesIsolatedRun []Class // directional bidi codes for an isolated run
+	indexes          []int   // array of index values into the original string
+
+}
+
+// matchOpener reports whether characters at given positions form a matching
+// bracket pair.
+func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
+	return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
+}
+
+const maxPairingDepth = 63
+
+// locateBrackets locates matching bracket pairs according to BD16.
+//
+// This implementation uses a linked list instead of a stack, because, while
+// elements are added at the front (like a push) they are not generally removed
+// in atomic 'pop' operations, reducing the benefit of the stack archetype.
+func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
+	// traverse the run
+	// do that explicitly (not in a for-each) so we can record position
+	for i, index := range p.indexes {
+
+		// look at the bracket type for each character
+		if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
+			// continue scanning
+			continue
+		}
+		switch pairTypes[index] {
+		case bpOpen:
+			// check if maximum pairing depth reached
+			if p.openers.Len() == maxPairingDepth {
+				p.openers.Init()
+				return
+			}
+			// remember opener location, most recent first
+			p.openers.PushFront(i)
+
+		case bpClose:
+			// see if there is a match
+			count := 0
+			for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
+				count++
+				opener := elem.Value.(int)
+				if p.matchOpener(pairValues, opener, i) {
+					// if the opener matches, add nested pair to the ordered list
+					p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
+					// remove up to and including matched opener
+					for ; count > 0; count-- {
+						p.openers.Remove(p.openers.Front())
+					}
+					break
+				}
+			}
+			sort.Sort(p.pairPositions)
+			// if we get here, the closing bracket matched no openers
+			// and gets ignored
+		}
+	}
+}
+
+// Bracket pairs within an isolating run sequence are processed as units so
+// that both the opening and the closing paired bracket in a pair resolve to
+// the same direction.
+//
+// N0. Process bracket pairs in an isolating run sequence sequentially in
+// the logical order of the text positions of the opening paired brackets
+// using the logic given below. Within this scope, bidirectional types EN
+// and AN are treated as R.
+//
+// Identify the bracket pairs in the current isolating run sequence
+// according to BD16. For each bracket-pair element in the list of pairs of
+// text positions:
+//
+// a Inspect the bidirectional types of the characters enclosed within the
+// bracket pair.
+//
+// b If any strong type (either L or R) matching the embedding direction is
+// found, set the type for both brackets in the pair to match the embedding
+// direction.
+//
+// o [ e ] o -> o e e e o
+//
+// o [ o e ] -> o e o e e
+//
+// o [ NI e ] -> o e NI e e
+//
+// c Otherwise, if a strong type (opposite the embedding direction) is
+// found, test for adjacent strong types as follows: 1 First, check
+// backwards before the opening paired bracket until the first strong type
+// (L, R, or sos) is found. If that first preceding strong type is opposite
+// the embedding direction, then set the type for both brackets in the pair
+// to that type. 2 Otherwise, set the type for both brackets in the pair to
+// the embedding direction.
+//
+// o [ o ] e -> o o o o e
+//
+// o [ o NI ] o -> o o o NI o o
+//
+// e [ o ] o -> e e o e o
+//
+// e [ o ] e -> e e o e e
+//
+// e ( o [ o ] NI ) e -> e e o o o o NI e e
+//
+// d Otherwise, do not set the type for the current bracket pair. Note that
+// if the enclosed text contains no strong types the paired brackets will
+// both resolve to the same level when resolved individually using rules N1
+// and N2.
+//
+// e ( NI ) o -> e ( NI ) o
+
+// getStrongTypeN0 maps character's directional code to strong type as required
+// by rule N0.
+//
+// TODO: have separate type for "strong" directionality.
+func (p *bracketPairer) getStrongTypeN0(index int) Class {
+	switch p.codesIsolatedRun[index] {
+	// in the scope of N0, number types are treated as R
+	case EN, AN, AL, R:
+		return R
+	case L:
+		return L
+	default:
+		return ON
+	}
+}
+
+// classifyPairContent reports the strong types contained inside a Bracket Pair,
+// assuming the given embedding direction.
+//
+// It returns ON if no strong type is found. If a single strong type is found,
+// it returns this type. Otherwise it returns the embedding direction.
+//
+// TODO: use separate type for "strong" directionality.
+func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
+	dirOpposite := ON
+	for i := loc.opener + 1; i < loc.closer; i++ {
+		dir := p.getStrongTypeN0(i)
+		if dir == ON {
+			continue
+		}
+		if dir == dirEmbed {
+			return dir // type matching embedding direction found
+		}
+		dirOpposite = dir
+	}
+	// return ON if no strong type found, or class opposite to dirEmbed
+	return dirOpposite
+}
+
+// classBeforePair determines which strong types are present before a Bracket
+// Pair. Return R or L if strong type found, otherwise ON.
+func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
+	for i := loc.opener - 1; i >= 0; i-- {
+		if dir := p.getStrongTypeN0(i); dir != ON {
+			return dir
+		}
+	}
+	// no strong types found, return sos
+	return p.sos
+}
+
+// assignBracketType implements rule N0 for a single bracket pair.
+func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
+	// rule "N0, a", inspect contents of pair
+	dirPair := p.classifyPairContent(loc, dirEmbed)
+
+	// dirPair is now L, R, or N (no strong type found)
+
+	// the following logical tests are performed out of order compared to
+	// the statement of the rules but yield the same results
+	if dirPair == ON {
+		return // case "d" - nothing to do
+	}
+
+	if dirPair != dirEmbed {
+		// case "c": strong type found, opposite - check before (c.1)
+		dirPair = p.classBeforePair(loc)
+		if dirPair == dirEmbed || dirPair == ON {
+			// no strong opposite type found before - use embedding (c.2)
+			dirPair = dirEmbed
+		}
+	}
+	// else: case "b", strong type found matching embedding,
+	// no explicit action needed, as dirPair is already set to embedding
+	// direction
+
+	// set the bracket types to the type found
+	p.setBracketsToType(loc, dirPair, initialTypes)
+}
+
+func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
+	p.codesIsolatedRun[loc.opener] = dirPair
+	p.codesIsolatedRun[loc.closer] = dirPair
+
+	for i := loc.opener + 1; i < loc.closer; i++ {
+		index := p.indexes[i]
+		if initialTypes[index] != NSM {
+			break
+		}
+		p.codesIsolatedRun[i] = dirPair
+	}
+
+	for i := loc.closer + 1; i < len(p.indexes); i++ {
+		index := p.indexes[i]
+		if initialTypes[index] != NSM {
+			break
+		}
+		p.codesIsolatedRun[i] = dirPair
+	}
+}
+
+// resolveBrackets implements rule N0 for a list of pairs.
+func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
+	for _, loc := range p.pairPositions {
+		p.assignBracketType(loc, dirEmbed, initialTypes)
+	}
+}
@@ -0,0 +1,133 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+	"flag"
+	"log"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/internal/triegen"
+	"golang.org/x/text/internal/ucd"
+)
+
+var outputFile = flag.String("out", "tables.go", "output file")
+
+func main() {
+	gen.Init()
+	gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
+	gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
+
+	genTables()
+}
+
+// bidiClass names and codes taken from class "bc" in
+// https://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
+var bidiClass = map[string]Class{
+	"AL":  AL,  // ArabicLetter
+	"AN":  AN,  // ArabicNumber
+	"B":   B,   // ParagraphSeparator
+	"BN":  BN,  // BoundaryNeutral
+	"CS":  CS,  // CommonSeparator
+	"EN":  EN,  // EuropeanNumber
+	"ES":  ES,  // EuropeanSeparator
+	"ET":  ET,  // EuropeanTerminator
+	"L":   L,   // LeftToRight
+	"NSM": NSM, // NonspacingMark
+	"ON":  ON,  // OtherNeutral
+	"R":   R,   // RightToLeft
+	"S":   S,   // SegmentSeparator
+	"WS":  WS,  // WhiteSpace
+
+	"FSI": Control,
+	"PDF": Control,
+	"PDI": Control,
+	"LRE": Control,
+	"LRI": Control,
+	"LRO": Control,
+	"RLE": Control,
+	"RLI": Control,
+	"RLO": Control,
+}
+
+func genTables() {
+	if numClass > 0x0F {
+		log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
+	}
+	w := gen.NewCodeWriter()
+	defer w.WriteVersionedGoFile(*outputFile, "bidi")
+
+	gen.WriteUnicodeVersion(w)
+
+	t := triegen.NewTrie("bidi")
+
+	// Build data about bracket mapping. These bits need to be or-ed with
+	// any other bits.
+	orMask := map[rune]uint64{}
+
+	xorMap := map[rune]int{}
+	xorMasks := []rune{0} // First value is no-op.
+
+	ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
+		r1 := p.Rune(0)
+		r2 := p.Rune(1)
+		xor := r1 ^ r2
+		if _, ok := xorMap[xor]; !ok {
+			xorMap[xor] = len(xorMasks)
+			xorMasks = append(xorMasks, xor)
+		}
+		entry := uint64(xorMap[xor]) << xorMaskShift
+		switch p.String(2) {
+		case "o":
+			entry |= openMask
+		case "c", "n":
+		default:
+			log.Fatalf("Unknown bracket class %q.", p.String(2))
+		}
+		orMask[r1] = entry
+	})
+
+	w.WriteComment(`
+	xorMasks contains masks to be xor-ed with brackets to get the reverse
+	version.`)
+	w.WriteVar("xorMasks", xorMasks)
+
+	done := map[rune]bool{}
+
+	insert := func(r rune, c Class) {
+		if !done[r] {
+			t.Insert(r, orMask[r]|uint64(c))
+			done[r] = true
+		}
+	}
+
+	// Insert the derived BiDi properties.
+	ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
+		r := p.Rune(0)
+		class, ok := bidiClass[p.String(1)]
+		if !ok {
+			log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
+		}
+		insert(r, class)
+	})
+	visitDefaults(insert)
+
+	// TODO: use sparse blocks. This would reduce table size considerably
+	// from the looks of it.
+
+	sz, err := t.Gen(w)
+	if err != nil {
+		log.Fatal(err)
+	}
+	w.Size += sz
+}
+
+// dummy values to make methods in gen_common compile. The real versions
+// will be generated by this file to tables.go.
+var (
+	xorMasks []rune
+)
@@ -0,0 +1,57 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+	"unicode"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/internal/ucd"
+	"golang.org/x/text/unicode/rangetable"
+)
+
+// These tables are hand-extracted from:
+// https://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
+func visitDefaults(fn func(r rune, c Class)) {
+	// first write default values for ranges listed above.
+	visitRunes(fn, AL, []rune{
+		0x0600, 0x07BF, // Arabic
+		0x08A0, 0x08FF, // Arabic Extended-A
+		0xFB50, 0xFDCF, // Arabic Presentation Forms
+		0xFDF0, 0xFDFF,
+		0xFE70, 0xFEFF,
+		0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
+	})
+	visitRunes(fn, R, []rune{
+		0x0590, 0x05FF, // Hebrew
+		0x07C0, 0x089F, // Nko et al.
+		0xFB1D, 0xFB4F,
+		0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
+		0x0001E800, 0x0001EDFF,
+		0x0001EF00, 0x0001EFFF,
+	})
+	visitRunes(fn, ET, []rune{ // European Terminator
+		0x20A0, 0x20Cf, // Currency symbols
+	})
+	rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
+		fn(r, BN) // Boundary Neutral
+	})
+	ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
+		if p.String(1) == "Default_Ignorable_Code_Point" {
+			fn(p.Rune(0), BN) // Boundary Neutral
+		}
+	})
+}
+
+func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
+	for i := 0; i < len(runes); i += 2 {
+		lo, hi := runes[i], runes[i+1]
+		for j := lo; j <= hi; j++ {
+			fn(j, c)
+		}
+	}
+}
@@ -0,0 +1,64 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// Class is the Unicode BiDi class. Each rune has a single class.
+type Class uint
+
+const (
+	L       Class = iota // LeftToRight
+	R                    // RightToLeft
+	EN                   // EuropeanNumber
+	ES                   // EuropeanSeparator
+	ET                   // EuropeanTerminator
+	AN                   // ArabicNumber
+	CS                   // CommonSeparator
+	B                    // ParagraphSeparator
+	S                    // SegmentSeparator
+	WS                   // WhiteSpace
+	ON                   // OtherNeutral
+	BN                   // BoundaryNeutral
+	NSM                  // NonspacingMark
+	AL                   // ArabicLetter
+	Control              // Control LRO - PDI
+
+	numClass
+
+	LRO // LeftToRightOverride
+	RLO // RightToLeftOverride
+	LRE // LeftToRightEmbedding
+	RLE // RightToLeftEmbedding
+	PDF // PopDirectionalFormat
+	LRI // LeftToRightIsolate
+	RLI // RightToLeftIsolate
+	FSI // FirstStrongIsolate
+	PDI // PopDirectionalIsolate
+
+	unknownClass = ^Class(0)
+)
+
+var controlToClass = map[rune]Class{
+	0x202D: LRO, // LeftToRightOverride,
+	0x202E: RLO, // RightToLeftOverride,
+	0x202A: LRE, // LeftToRightEmbedding,
+	0x202B: RLE, // RightToLeftEmbedding,
+	0x202C: PDF, // PopDirectionalFormat,
+	0x2066: LRI, // LeftToRightIsolate,
+	0x2067: RLI, // RightToLeftIsolate,
+	0x2068: FSI, // FirstStrongIsolate,
+	0x2069: PDI, // PopDirectionalIsolate,
+}
+
+// A trie entry has the following bits:
+// 7..5  XOR mask for brackets
+// 4     1: Bracket open, 0: Bracket close
+// 3..0  Class type
+
+const (
+	openMask     = 0x10
+	xorMaskShift = 5
+)
@@ -0,0 +1,206 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bidi
+
+import "unicode/utf8"
+
+// Properties provides access to BiDi properties of runes.
+type Properties struct {
+	entry uint8
+	last  uint8
+}
+
+var trie = newBidiTrie(0)
+
+// TODO: using this for bidirule reduces the running time by about 5%. Consider
+// if this is worth exposing or if we can find a way to speed up the Class
+// method.
+//
+// // CompactClass is like Class, but maps all of the BiDi control classes
+// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
+// func (p Properties) CompactClass() Class {
+// 	return Class(p.entry & 0x0F)
+// }
+
+// Class returns the Bidi class for p.
+func (p Properties) Class() Class {
+	c := Class(p.entry & 0x0F)
+	if c == Control {
+		c = controlByteToClass[p.last&0xF]
+	}
+	return c
+}
+
+// IsBracket reports whether the rune is a bracket.
+func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
+
+// IsOpeningBracket reports whether the rune is an opening bracket.
+// IsBracket must return true.
+func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
+
+// TODO: find a better API and expose.
+func (p Properties) reverseBracket(r rune) rune {
+	return xorMasks[p.entry>>xorMaskShift] ^ r
+}
+
+var controlByteToClass = [16]Class{
+	0xD: LRO, // U+202D LeftToRightOverride,
+	0xE: RLO, // U+202E RightToLeftOverride,
+	0xA: LRE, // U+202A LeftToRightEmbedding,
+	0xB: RLE, // U+202B RightToLeftEmbedding,
+	0xC: PDF, // U+202C PopDirectionalFormat,
+	0x6: LRI, // U+2066 LeftToRightIsolate,
+	0x7: RLI, // U+2067 RightToLeftIsolate,
+	0x8: FSI, // U+2068 FirstStrongIsolate,
+	0x9: PDI, // U+2069 PopDirectionalIsolate,
+}
+
+// LookupRune returns properties for r.
+func LookupRune(r rune) (p Properties, size int) {
+	var buf [4]byte
+	n := utf8.EncodeRune(buf[:], r)
+	return Lookup(buf[:n])
+}
+
+// TODO: these lookup methods are based on the generated trie code. The returned
+// sizes have slightly different semantics from the generated code, in that it
+// always returns size==1 for an illegal UTF-8 byte (instead of the length
+// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
+// leave invalid UTF-8 untouched, in which case it has performance benefits to
+// do so (without changing the semantics). Bidi requires the semantics used here
+// for the bidirule implementation to be compatible with the Go semantics.
+//  They ultimately should perhaps be adopted by all trie implementations, for
+// convenience sake.
+// This unrolled code also boosts performance of the secure/bidirule package by
+// about 30%.
+// So, to remove this code:
+//   - add option to trie generator to define return type.
+//   - always return 1 byte size for ill-formed UTF-8 runes.
+
+// Lookup returns properties for the first rune in s and the width in bytes of
+// its encoding. The size will be 0 if s does not hold enough bytes to complete
+// the encoding.
+func Lookup(s []byte) (p Properties, sz int) {
+	c0 := s[0]
+	switch {
+	case c0 < 0x80: // is ASCII
+		return Properties{entry: bidiValues[c0]}, 1
+	case c0 < 0xC2:
+		return Properties{}, 1
+	case c0 < 0xE0: // 2-byte UTF-8
+		if len(s) < 2 {
+			return Properties{}, 0
+		}
+		i := bidiIndex[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return Properties{}, 1
+		}
+		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
+	case c0 < 0xF0: // 3-byte UTF-8
+		if len(s) < 3 {
+			return Properties{}, 0
+		}
+		i := bidiIndex[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return Properties{}, 1
+		}
+		o := uint32(i)<<6 + uint32(c1)
+		i = bidiIndex[o]
+		c2 := s[2]
+		if c2 < 0x80 || 0xC0 <= c2 {
+			return Properties{}, 1
+		}
+		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
+	case c0 < 0xF8: // 4-byte UTF-8
+		if len(s) < 4 {
+			return Properties{}, 0
+		}
+		i := bidiIndex[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return Properties{}, 1
+		}
+		o := uint32(i)<<6 + uint32(c1)
+		i = bidiIndex[o]
+		c2 := s[2]
+		if c2 < 0x80 || 0xC0 <= c2 {
+			return Properties{}, 1
+		}
+		o = uint32(i)<<6 + uint32(c2)
+		i = bidiIndex[o]
+		c3 := s[3]
+		if c3 < 0x80 || 0xC0 <= c3 {
+			return Properties{}, 1
+		}
+		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
+	}
+	// Illegal rune
+	return Properties{}, 1
+}
+
+// LookupString returns properties for the first rune in s and the width in
+// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
+// complete the encoding.
+func LookupString(s string) (p Properties, sz int) {
+	c0 := s[0]
+	switch {
+	case c0 < 0x80: // is ASCII
+		return Properties{entry: bidiValues[c0]}, 1
+	case c0 < 0xC2:
+		return Properties{}, 1
+	case c0 < 0xE0: // 2-byte UTF-8
+		if len(s) < 2 {
+			return Properties{}, 0
+		}
+		i := bidiIndex[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return Properties{}, 1
+		}
+		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
+	case c0 < 0xF0: // 3-byte UTF-8
+		if len(s) < 3 {
+			return Properties{}, 0
+		}
+		i := bidiIndex[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return Properties{}, 1
+		}
+		o := uint32(i)<<6 + uint32(c1)
+		i = bidiIndex[o]
+		c2 := s[2]
+		if c2 < 0x80 || 0xC0 <= c2 {
+			return Properties{}, 1
+		}
+		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
+	case c0 < 0xF8: // 4-byte UTF-8
+		if len(s) < 4 {
+			return Properties{}, 0
+		}
+		i := bidiIndex[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return Properties{}, 1
+		}
+		o := uint32(i)<<6 + uint32(c1)
+		i = bidiIndex[o]
+		c2 := s[2]
+		if c2 < 0x80 || 0xC0 <= c2 {
+			return Properties{}, 1
+		}
+		o = uint32(i)<<6 + uint32(c2)
+		i = bidiIndex[o]
+		c3 := s[3]
+		if c3 < 0x80 || 0xC0 <= c3 {
+			return Properties{}, 1
+		}
+		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
+	}
+	// Illegal rune
+	return Properties{}, 1
+}
@@ -0,0 +1,60 @@
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
+
+package bidi
+
+// Class is the Unicode BiDi class. Each rune has a single class.
+type Class uint
+
+const (
+	L       Class = iota // LeftToRight
+	R                    // RightToLeft
+	EN                   // EuropeanNumber
+	ES                   // EuropeanSeparator
+	ET                   // EuropeanTerminator
+	AN                   // ArabicNumber
+	CS                   // CommonSeparator
+	B                    // ParagraphSeparator
+	S                    // SegmentSeparator
+	WS                   // WhiteSpace
+	ON                   // OtherNeutral
+	BN                   // BoundaryNeutral
+	NSM                  // NonspacingMark
+	AL                   // ArabicLetter
+	Control              // Control LRO - PDI
+
+	numClass
+
+	LRO // LeftToRightOverride
+	RLO // RightToLeftOverride
+	LRE // LeftToRightEmbedding
+	RLE // RightToLeftEmbedding
+	PDF // PopDirectionalFormat
+	LRI // LeftToRightIsolate
+	RLI // RightToLeftIsolate
+	FSI // FirstStrongIsolate
+	PDI // PopDirectionalIsolate
+
+	unknownClass = ^Class(0)
+)
+
+var controlToClass = map[rune]Class{
+	0x202D: LRO, // LeftToRightOverride,
+	0x202E: RLO, // RightToLeftOverride,
+	0x202A: LRE, // LeftToRightEmbedding,
+	0x202B: RLE, // RightToLeftEmbedding,
+	0x202C: PDF, // PopDirectionalFormat,
+	0x2066: LRI, // LeftToRightIsolate,
+	0x2067: RLI, // RightToLeftIsolate,
+	0x2068: FSI, // FirstStrongIsolate,
+	0x2069: PDI, // PopDirectionalIsolate,
+}
+
+// A trie entry has the following bits:
+// 7..5  XOR mask for brackets
+// 4     1: Bracket open, 0: Bracket close
+// 3..0  Class type
+
+const (
+	openMask     = 0x10
+	xorMaskShift = 5
+)
@@ -407,7 +407,7 @@ func decomposeHangul(buf []byte, r rune) int {

 // decomposeHangul algorithmically decomposes a Hangul rune into
 // its Jamo components.
-// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
+// See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
 func (rb *reorderBuffer) decomposeHangul(r rune) {
 	r -= hangulBase
 	x := r % jamoTCount
@@ -420,7 +420,7 @@ func (rb *reorderBuffer) decomposeHangul(r rune) {
 }

 // combineHangul algorithmically combines Jamo character components into Hangul.
-// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
+// See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
 func (rb *reorderBuffer) combineHangul(s, i, k int) {
 	b := rb.rune[:]
 	bn := rb.nrune
@@ -461,6 +461,10 @@ func (rb *reorderBuffer) combineHangul(s, i, k int) {
 // It should only be used to recompose a single segment, as it will not
 // handle alternations between Hangul and non-Hangul characters correctly.
 func (rb *reorderBuffer) compose() {
+	// Lazily load the map used by the combine func below, but do
+	// it outside of the loop.
+	recompMapOnce.Do(buildRecompMap)
+
 	// UAX #15, section X5 , including Corrigendum #5
 	// "In any character sequence beginning with starter S, a character C is
 	//  blocked from S if and only if there is some character B between S
@@ -4,6 +4,8 @@

 package norm

+import "encoding/binary"
+
 // This file contains Form-specific logic and wrappers for data in tables.go.

 // Rune info is stored in a separate trie per composing form. A composing form
@@ -178,6 +180,17 @@ func (p Properties) TrailCCC() uint8 {
 	return ccc[p.tccc]
 }

+func buildRecompMap() {
+	recompMap = make(map[uint32]rune, len(recompMapPacked)/8)
+	var buf [8]byte
+	for i := 0; i < len(recompMapPacked); i += 8 {
+		copy(buf[:], recompMapPacked[i:i+8])
+		key := binary.BigEndian.Uint32(buf[:4])
+		val := binary.BigEndian.Uint32(buf[4:])
+		recompMap[key] = rune(val)
+	}
+}
+
 // Recomposition
 // We use 32-bit keys instead of 64-bit for the two codepoint keys.
 // This clips off the bits of three entries, but we know this will not
@@ -186,8 +199,14 @@ func (p Properties) TrailCCC() uint8 {
 // Note that the recomposition map for NFC and NFKC are identical.

 // combine returns the combined rune or 0 if it doesn't exist.
+//
+// The caller is responsible for calling
+// recompMapOnce.Do(buildRecompMap) sometime before this is called.
 func combine(a, b rune) rune {
 	key := uint32(uint16(a))<<16 + uint32(uint16(b))
+	if recompMap == nil {
+		panic("caller error") // see func comment
+	}
 	return recompMap[key]
 }

@@ -128,8 +128,9 @@ func (i *Iter) Next() []byte {
 func nextASCIIBytes(i *Iter) []byte {
 	p := i.p + 1
 	if p >= i.rb.nsrc {
+		p0 := i.p
 		i.setDone()
-		return i.rb.src.bytes[i.p:p]
+		return i.rb.src.bytes[p0:p]
 	}
 	if i.rb.src.bytes[p] < utf8.RuneSelf {
 		p0 := i.p
@@ -12,6 +12,7 @@ package main

 import (
 	"bytes"
+	"encoding/binary"
 	"flag"
 	"fmt"
 	"io"
@@ -261,7 +262,7 @@ func compactCCC() {

 // CompositionExclusions.txt has form:
 // 0958    # ...
-// See http://unicode.org/reports/tr44/ for full explanation
+// See https://unicode.org/reports/tr44/ for full explanation
 func loadCompositionExclusions() {
 	f := gen.OpenUCDFile("CompositionExclusions.txt")
 	defer f.Close()
@@ -735,6 +736,8 @@ func makeTables() {
 			max = n
 		}
 	}
+	fmt.Fprintln(w, `import "sync"`)
+	fmt.Fprintln(w)

 	fmt.Fprintln(w, "const (")
 	fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
@@ -782,16 +785,23 @@ func makeTables() {
 		sz := nrentries * 8
 		size += sz
 		fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
-		fmt.Fprintln(w, "var recompMap = map[uint32]rune{")
+		fmt.Fprintln(w, "var recompMap map[uint32]rune")
+		fmt.Fprintln(w, "var recompMapOnce sync.Once\n")
+		fmt.Fprintln(w, `const recompMapPacked = "" +`)
+		var buf [8]byte
 		for i, c := range chars {
 			f := c.forms[FCanonical]
 			d := f.decomp
 			if !f.isOneWay && len(d) > 0 {
 				key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
-				fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i)
+				binary.BigEndian.PutUint32(buf[:4], key)
+				binary.BigEndian.PutUint32(buf[4:], uint32(i))
+				fmt.Fprintf(w, "\t\t%q + // 0x%.8X: 0x%.8X\n", string(buf[:]), key, uint32(i))
 			}
 		}
-		fmt.Fprintf(w, "}\n\n")
+		// hack so we don't have to special case the trailing plus sign
+		fmt.Fprintf(w, `	""`)
+		fmt.Fprintln(w)
 	}

 	fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
@@ -857,7 +867,7 @@ func verifyComputed() {
 // DerivedNormalizationProps.txt has form:
 // 00C0..00C5    ; NFD_QC; N # ...
 // 0374          ; NFD_QC; N # ...
-// See http://unicode.org/reports/tr44/ for full explanation
+// See https://unicode.org/reports/tr44/ for full explanation
 func testDerived() {
 	f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
 	defer f.Close()
@@ -29,8 +29,8 @@ import (
 // proceed independently on both sides:
 //   f(x) == append(f(x[0:n]), f(x[n:])...)
 //
-// References: http://unicode.org/reports/tr15/ and
-// http://unicode.org/notes/tn5/.
+// References: https://unicode.org/reports/tr15/ and
+// https://unicode.org/notes/tn5/.
 type Form int

 const (
@@ -60,8 +60,8 @@ func (w *normWriter) Close() error {
 }

 // Writer returns a new writer that implements Write(b)
-// by writing f(b) to w.  The returned writer may use an
-// an internal buffer to maintain state across Write calls.
+// by writing f(b) to w. The returned writer may use an
+// internal buffer to maintain state across Write calls.
 // Calling its Close method writes any buffered data to w.
 func (f Form) Writer(w io.Writer) io.WriteCloser {
 	wr := &normWriter{rb: reorderBuffer{}, w: w}
@@ -18,7 +18,6 @@ func (Form) Reset() {}
 // Users should either catch ErrShortDst and allow dst to grow or have dst be at
 // least of size MaxTransformChunkSize to be guaranteed of progress.
 func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
-	n := 0
 	// Cap the maximum number of src bytes to check.
 	b := src
 	eof := atEOF
@@ -27,13 +26,14 @@ func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
 		eof = false
 		b = b[:ns]
 	}
-	i, ok := formTable[f].quickSpan(inputBytes(b), n, len(b), eof)
-	n += copy(dst[n:], b[n:i])
+	i, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), eof)
+	n := copy(dst, b[:i])
 	if !ok {
 		nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF)
 		return nDst + n, nSrc + n, err
 	}
-	if n < len(src) && !atEOF {
+
+	if err == nil && n < len(src) && !atEOF {
 		err = transform.ErrShortSrc
 	}
 	return n, n, err
@@ -79,7 +79,7 @@ func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
 		nSrc += n
 		nDst += n
 		if ok {
-			if n < rb.nsrc && !atEOF {
+			if err == nil && n < rb.nsrc && !atEOF {
 				err = transform.ErrShortSrc
 			}
 			return nDst, nSrc, err