nt

A sensible note-taking program
git clone git://git.laack.co/nt.git
Log | Files | Refs | README

encoding.go (5189B)


      1 // Copyright 2022 The TCell Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use file except in compliance with the License.
      5 // You may obtain a copy of the license at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 package tcell
     16 
     17 import (
     18 	"strings"
     19 	"sync"
     20 
     21 	"golang.org/x/text/encoding"
     22 
     23 	gencoding "github.com/gdamore/encoding"
     24 )
     25 
     26 var encodings map[string]encoding.Encoding
     27 var encodingLk sync.Mutex
     28 var encodingFallback EncodingFallback = EncodingFallbackFail
     29 
     30 // RegisterEncoding may be called by the application to register an encoding.
     31 // The presence of additional encodings will facilitate application usage with
     32 // terminal environments where the I/O subsystem does not support Unicode.
     33 //
     34 // Windows systems use Unicode natively, and do not need any of the encoding
     35 // subsystem when using Windows Console screens.
     36 //
     37 // Please see the Go documentation for golang.org/x/text/encoding -- most of
     38 // the common ones exist already as stock variables.  For example, ISO8859-15
     39 // can be registered using the following code:
     40 //
     41 //	import "golang.org/x/text/encoding/charmap"
     42 //
     43 //	  ...
     44 //	  RegisterEncoding("ISO8859-15", charmap.ISO8859_15)
     45 //
     46 // Aliases can be registered as well, for example "8859-15" could be an alias
     47 // for "ISO8859-15".
     48 //
     49 // For POSIX systems, this package will check the environment variables
     50 // LC_ALL, LC_CTYPE,  and LANG (in that order) to determine the character set.
     51 // These are expected to have the following pattern:
     52 //
     53 //	$language[.$codeset[@$variant]
     54 //
     55 // We extract only the $codeset part, which will usually be something like
     56 // UTF-8 or ISO8859-15 or KOI8-R.  Note that if the locale is either "POSIX"
     57 // or "C", then we assume US-ASCII (the POSIX 'portable character set'
     58 // and assume all other characters are somehow invalid.)
     59 //
     60 // Modern POSIX systems and terminal emulators may use UTF-8, and for those
     61 // systems, this API is also unnecessary.  For example, Darwin (MacOS X) and
     62 // modern Linux running modern xterm generally will out of the box without
     63 // any of this.  Use of UTF-8 is recommended when possible, as it saves
     64 // quite a lot processing overhead.
     65 //
     66 // Note that some encodings are quite large (for example GB18030 which is a
     67 // superset of Unicode) and so the application size can be expected to
     68 // increase quite a bit as each encoding is added.
     69 
     70 // The East Asian encodings have been seen to add 100-200K per encoding to the
     71 // size of the resulting binary.
     72 func RegisterEncoding(charset string, enc encoding.Encoding) {
     73 	encodingLk.Lock()
     74 	charset = strings.ToLower(charset)
     75 	encodings[charset] = enc
     76 	encodingLk.Unlock()
     77 }
     78 
     79 // EncodingFallback describes how the system behaves when the locale
     80 // requires a character set that we do not support.  The system always
     81 // supports UTF-8 and US-ASCII. On Windows consoles, UTF-16LE is also
     82 // supported automatically.  Other character sets must be added using the
     83 // RegisterEncoding API.  (A large group of nearly all of them can be
     84 // added using the RegisterAll function in the encoding sub package.)
     85 type EncodingFallback int
     86 
     87 const (
     88 	// EncodingFallbackFail behavior causes GetEncoding to fail
     89 	// when it cannot find an encoding.
     90 	EncodingFallbackFail = iota
     91 
     92 	// EncodingFallbackASCII behavior causes GetEncoding to fall back
     93 	// to a 7-bit ASCII encoding, if no other encoding can be found.
     94 	EncodingFallbackASCII
     95 
     96 	// EncodingFallbackUTF8 behavior causes GetEncoding to assume
     97 	// UTF8 can pass unmodified upon failure.  Note that this behavior
     98 	// is not recommended, unless you are sure your terminal can cope
     99 	// with real UTF8 sequences.
    100 	EncodingFallbackUTF8
    101 )
    102 
    103 // SetEncodingFallback changes the behavior of GetEncoding when a suitable
    104 // encoding is not found.  The default is EncodingFallbackFail, which
    105 // causes GetEncoding to simply return nil.
    106 func SetEncodingFallback(fb EncodingFallback) {
    107 	encodingLk.Lock()
    108 	encodingFallback = fb
    109 	encodingLk.Unlock()
    110 }
    111 
    112 // GetEncoding is used by Screen implementors who want to locate an encoding
    113 // for the given character set name.  Note that this will return nil for
    114 // either the Unicode (UTF-8) or ASCII encodings, since we don't use
    115 // encodings for them but instead have our own native methods.
    116 func GetEncoding(charset string) encoding.Encoding {
    117 	charset = strings.ToLower(charset)
    118 	encodingLk.Lock()
    119 	defer encodingLk.Unlock()
    120 	if enc, ok := encodings[charset]; ok {
    121 		return enc
    122 	}
    123 	switch encodingFallback {
    124 	case EncodingFallbackASCII:
    125 		return gencoding.ASCII
    126 	case EncodingFallbackUTF8:
    127 		return encoding.Nop
    128 	}
    129 	return nil
    130 }
    131 
    132 func init() {
    133 	// We always support UTF-8 and ASCII.
    134 	encodings = make(map[string]encoding.Encoding)
    135 	encodings["utf-8"] = gencoding.UTF8
    136 	encodings["utf8"] = gencoding.UTF8
    137 	encodings["us-ascii"] = gencoding.ASCII
    138 	encodings["ascii"] = gencoding.ASCII
    139 	encodings["iso646"] = gencoding.ASCII
    140 }