encoding.go (5189B)
1 // Copyright 2022 The TCell Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use file except in compliance with the License. 5 // You may obtain a copy of the license at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tcell 16 17 import ( 18 "strings" 19 "sync" 20 21 "golang.org/x/text/encoding" 22 23 gencoding "github.com/gdamore/encoding" 24 ) 25 26 var encodings map[string]encoding.Encoding 27 var encodingLk sync.Mutex 28 var encodingFallback EncodingFallback = EncodingFallbackFail 29 30 // RegisterEncoding may be called by the application to register an encoding. 31 // The presence of additional encodings will facilitate application usage with 32 // terminal environments where the I/O subsystem does not support Unicode. 33 // 34 // Windows systems use Unicode natively, and do not need any of the encoding 35 // subsystem when using Windows Console screens. 36 // 37 // Please see the Go documentation for golang.org/x/text/encoding -- most of 38 // the common ones exist already as stock variables. For example, ISO8859-15 39 // can be registered using the following code: 40 // 41 // import "golang.org/x/text/encoding/charmap" 42 // 43 // ... 44 // RegisterEncoding("ISO8859-15", charmap.ISO8859_15) 45 // 46 // Aliases can be registered as well, for example "8859-15" could be an alias 47 // for "ISO8859-15". 48 // 49 // For POSIX systems, this package will check the environment variables 50 // LC_ALL, LC_CTYPE, and LANG (in that order) to determine the character set. 51 // These are expected to have the following pattern: 52 // 53 // $language[.$codeset[@$variant] 54 // 55 // We extract only the $codeset part, which will usually be something like 56 // UTF-8 or ISO8859-15 or KOI8-R. Note that if the locale is either "POSIX" 57 // or "C", then we assume US-ASCII (the POSIX 'portable character set' 58 // and assume all other characters are somehow invalid.) 59 // 60 // Modern POSIX systems and terminal emulators may use UTF-8, and for those 61 // systems, this API is also unnecessary. For example, Darwin (MacOS X) and 62 // modern Linux running modern xterm generally will out of the box without 63 // any of this. Use of UTF-8 is recommended when possible, as it saves 64 // quite a lot processing overhead. 65 // 66 // Note that some encodings are quite large (for example GB18030 which is a 67 // superset of Unicode) and so the application size can be expected to 68 // increase quite a bit as each encoding is added. 69 70 // The East Asian encodings have been seen to add 100-200K per encoding to the 71 // size of the resulting binary. 72 func RegisterEncoding(charset string, enc encoding.Encoding) { 73 encodingLk.Lock() 74 charset = strings.ToLower(charset) 75 encodings[charset] = enc 76 encodingLk.Unlock() 77 } 78 79 // EncodingFallback describes how the system behaves when the locale 80 // requires a character set that we do not support. The system always 81 // supports UTF-8 and US-ASCII. On Windows consoles, UTF-16LE is also 82 // supported automatically. Other character sets must be added using the 83 // RegisterEncoding API. (A large group of nearly all of them can be 84 // added using the RegisterAll function in the encoding sub package.) 85 type EncodingFallback int 86 87 const ( 88 // EncodingFallbackFail behavior causes GetEncoding to fail 89 // when it cannot find an encoding. 90 EncodingFallbackFail = iota 91 92 // EncodingFallbackASCII behavior causes GetEncoding to fall back 93 // to a 7-bit ASCII encoding, if no other encoding can be found. 94 EncodingFallbackASCII 95 96 // EncodingFallbackUTF8 behavior causes GetEncoding to assume 97 // UTF8 can pass unmodified upon failure. Note that this behavior 98 // is not recommended, unless you are sure your terminal can cope 99 // with real UTF8 sequences. 100 EncodingFallbackUTF8 101 ) 102 103 // SetEncodingFallback changes the behavior of GetEncoding when a suitable 104 // encoding is not found. The default is EncodingFallbackFail, which 105 // causes GetEncoding to simply return nil. 106 func SetEncodingFallback(fb EncodingFallback) { 107 encodingLk.Lock() 108 encodingFallback = fb 109 encodingLk.Unlock() 110 } 111 112 // GetEncoding is used by Screen implementors who want to locate an encoding 113 // for the given character set name. Note that this will return nil for 114 // either the Unicode (UTF-8) or ASCII encodings, since we don't use 115 // encodings for them but instead have our own native methods. 116 func GetEncoding(charset string) encoding.Encoding { 117 charset = strings.ToLower(charset) 118 encodingLk.Lock() 119 defer encodingLk.Unlock() 120 if enc, ok := encodings[charset]; ok { 121 return enc 122 } 123 switch encodingFallback { 124 case EncodingFallbackASCII: 125 return gencoding.ASCII 126 case EncodingFallbackUTF8: 127 return encoding.Nop 128 } 129 return nil 130 } 131 132 func init() { 133 // We always support UTF-8 and ASCII. 134 encodings = make(map[string]encoding.Encoding) 135 encodings["utf-8"] = gencoding.UTF8 136 encodings["utf8"] = gencoding.UTF8 137 encodings["us-ascii"] = gencoding.ASCII 138 encodings["ascii"] = gencoding.ASCII 139 encodings["iso646"] = gencoding.ASCII 140 }