nt

A sensible note-taking program
git clone git://git.laack.co/nt.git
Log | Files | Refs | README

strings.go (18970B)


      1 package tview
      2 
      3 import (
      4 	"math/rand"
      5 	"regexp"
      6 	"strconv"
      7 	"strings"
      8 	"unicode/utf8"
      9 
     10 	"github.com/gdamore/tcell/v2"
     11 	"github.com/rivo/uniseg"
     12 )
     13 
     14 // escapedTagPattern matches an escaped tag, e.g. "[red[]", at the beginning of
     15 // a string.
     16 var escapedTagPattern = regexp.MustCompile(`^\[[^\[\]]+\[+\]`)
     17 
     18 // stepOptions is a bit field of options for [step]. A value of 0 results in
     19 // [step] having the same behavior as uniseg.Step, i.e. no tview-related parsing
     20 // is performed.
     21 type stepOptions int
     22 
     23 // Bit fields for [stepOptions].
     24 const (
     25 	stepOptionsNone   stepOptions = 0
     26 	stepOptionsStyle  stepOptions = 1 << iota // Parse style tags.
     27 	stepOptionsRegion                         // Parse region tags.
     28 )
     29 
     30 // stepState represents the current state of the parser implemented in [step].
     31 type stepState struct {
     32 	unisegState     int         // The state of the uniseg parser.
     33 	boundaries      int         // Information about boundaries, as returned by uniseg.Step.
     34 	style           tcell.Style // The current style.
     35 	region          string      // The current region.
     36 	escapedTagState int         // States for parsing escaped tags (defined in [step]).
     37 	grossLength     int         // The length of the cluster, including any tags not returned.
     38 
     39 	// The styles for the initial call to [step].
     40 	initialForeground tcell.Color
     41 	initialBackground tcell.Color
     42 	initialAttributes tcell.AttrMask
     43 }
     44 
     45 // IsWordBoundary returns true if the boundary between the returned grapheme
     46 // cluster and the one following it is a word boundary.
     47 func (s *stepState) IsWordBoundary() bool {
     48 	return s.boundaries&uniseg.MaskWord != 0
     49 }
     50 
     51 // IsSentenceBoundary returns true if the boundary between the returned grapheme
     52 // cluster and the one following it is a sentence boundary.
     53 func (s *stepState) IsSentenceBoundary() bool {
     54 	return s.boundaries&uniseg.MaskSentence != 0
     55 }
     56 
     57 // LineBreak returns whether the string can be broken into the next line after
     58 // the returned grapheme cluster. If optional is true, the line break is
     59 // optional. If false, the line break is mandatory, e.g. after a newline
     60 // character.
     61 func (s *stepState) LineBreak() (lineBreak, optional bool) {
     62 	switch s.boundaries & uniseg.MaskLine {
     63 	case uniseg.LineCanBreak:
     64 		return true, true
     65 	case uniseg.LineMustBreak:
     66 		return true, false
     67 	}
     68 	return false, false // uniseg.LineDontBreak.
     69 }
     70 
     71 // Width returns the grapheme cluster's width in cells.
     72 func (s *stepState) Width() int {
     73 	return s.boundaries >> uniseg.ShiftWidth
     74 }
     75 
     76 // GrossLength returns the grapheme cluster's length in bytes, including any
     77 // tags that were parsed but not explicitly returned.
     78 func (s *stepState) GrossLength() int {
     79 	return s.grossLength
     80 }
     81 
     82 // Style returns the style for the grapheme cluster.
     83 func (s *stepState) Style() tcell.Style {
     84 	return s.style
     85 }
     86 
     87 // step uses uniseg.Step to iterate over the grapheme clusters of a string but
     88 // (optionally) also parses the string for style or region tags.
     89 //
     90 // This function can be called consecutively to extract all grapheme clusters
     91 // from str, without returning any contained (parsed) tags. The return values
     92 // are the first grapheme cluster, the remaining string, and the new state. Pass
     93 // the remaining string and the returned state to the next call. If the rest
     94 // string is empty, parsing is complete. Call the returned state's methods for
     95 // boundary and cluster width information.
     96 //
     97 // The returned cluster may be empty if the given string consists of only
     98 // (parsed) tags. The boundary and width information will be meaningless in
     99 // this case but the style will describe the style at the end of the string.
    100 //
    101 // Pass nil for state on the first call. This will assume an initial style with
    102 // [Styles.PrimitiveBackgroundColor] as the background color and
    103 // [Styles.PrimaryTextColor] as the text color, no current region. If you want
    104 // to start with a different style or region, you can set the state accordingly
    105 // but you must then set [state.unisegState] to -1.
    106 //
    107 // There is no need to call uniseg.HasTrailingLineBreakInString on the last
    108 // non-empty cluster as this function will do this for you and adjust the
    109 // returned boundaries accordingly.
    110 func step(str string, state *stepState, opts stepOptions) (cluster, rest string, newState *stepState) {
    111 	// Set up initial state.
    112 	if state == nil {
    113 		state = &stepState{
    114 			unisegState: -1,
    115 			style:       tcell.StyleDefault.Background(Styles.PrimitiveBackgroundColor).Foreground(Styles.PrimaryTextColor),
    116 		}
    117 	}
    118 	if state.unisegState < 0 {
    119 		state.initialForeground, state.initialBackground, state.initialAttributes = state.style.Decompose()
    120 	}
    121 	if len(str) == 0 {
    122 		newState = state
    123 		return
    124 	}
    125 
    126 	// Get a grapheme cluster.
    127 	preState := state.unisegState
    128 	cluster, rest, state.boundaries, state.unisegState = uniseg.StepString(str, preState)
    129 	state.grossLength = len(cluster)
    130 	if rest == "" {
    131 		if !uniseg.HasTrailingLineBreakInString(cluster) {
    132 			state.boundaries &^= uniseg.MaskLine
    133 		}
    134 	}
    135 
    136 	// Parse tags.
    137 	if opts != 0 {
    138 		const (
    139 			etNone int = iota
    140 			etStart
    141 			etChar
    142 			etClosing
    143 		)
    144 
    145 		// Finite state machine for escaped tags.
    146 		switch state.escapedTagState {
    147 		case etStart:
    148 			if cluster[0] == '[' || cluster[0] == ']' { // Invalid escaped tag.
    149 				state.escapedTagState = etNone
    150 			} else { // Other characters are allowed.
    151 				state.escapedTagState = etChar
    152 			}
    153 		case etChar:
    154 			if cluster[0] == ']' { // In theory, this should not happen.
    155 				state.escapedTagState = etNone
    156 			} else if cluster[0] == '[' { // Starting closing sequence.
    157 				// Swallow the first one.
    158 				cluster, rest, state.boundaries, state.unisegState = uniseg.StepString(rest, preState)
    159 				state.grossLength += len(cluster)
    160 				if cluster[0] == ']' {
    161 					state.escapedTagState = etNone
    162 				} else {
    163 					state.escapedTagState = etClosing
    164 				}
    165 			} // More characters. Remain in etChar.
    166 		case etClosing:
    167 			if cluster[0] != '[' {
    168 				state.escapedTagState = etNone
    169 			}
    170 		}
    171 
    172 		// Regular tags.
    173 		if state.escapedTagState == etNone {
    174 			if cluster[0] == '[' {
    175 				// We've already opened a tag. Parse it.
    176 				length, style, region := parseTag(str, state)
    177 				if length > 0 {
    178 					state.style = style
    179 					state.region = region
    180 					cluster, rest, state.boundaries, state.unisegState = uniseg.StepString(str[length:], preState)
    181 					state.grossLength = len(cluster) + length
    182 					if rest == "" {
    183 						if !uniseg.HasTrailingLineBreakInString(cluster) {
    184 							state.boundaries &^= uniseg.MaskLine
    185 						}
    186 					}
    187 				}
    188 				// Is this an escaped tag?
    189 				if escapedTagPattern.MatchString(str[length:]) {
    190 					state.escapedTagState = etStart
    191 				}
    192 			}
    193 			if len(rest) > 0 && rest[0] == '[' {
    194 				// A tag might follow the cluster. If so, we need to fix the state
    195 				// for the boundaries to be correct.
    196 				if length, _, _ := parseTag(rest, state); length > 0 {
    197 					if len(rest) > length {
    198 						_, l := utf8.DecodeRuneInString(rest[length:])
    199 						cluster += rest[length : length+l]
    200 					}
    201 					var taglessRest string
    202 					cluster, taglessRest, state.boundaries, state.unisegState = uniseg.StepString(cluster, preState)
    203 					if taglessRest == "" {
    204 						if !uniseg.HasTrailingLineBreakInString(cluster) {
    205 							state.boundaries &^= uniseg.MaskLine
    206 						}
    207 					}
    208 				}
    209 			}
    210 		}
    211 	}
    212 
    213 	newState = state
    214 	return
    215 }
    216 
    217 // parseTag parses str for consecutive style and/or region tags, assuming that
    218 // str starts with the opening bracket for the first tag. It returns the string
    219 // length of all valid tags (0 if the first tag is not valid) and the updated
    220 // style and region for valid tags (based on the provided state).
    221 func parseTag(str string, state *stepState) (length int, style tcell.Style, region string) {
    222 	// Automata states for parsing tags.
    223 	const (
    224 		tagStateNone = iota
    225 		tagStateDoneTag
    226 		tagStateStart
    227 		tagStateRegionStart
    228 		tagStateEndForeground
    229 		tagStateStartBackground
    230 		tagStateNumericForeground
    231 		tagStateNameForeground
    232 		tagStateEndBackground
    233 		tagStateStartAttributes
    234 		tagStateNumericBackground
    235 		tagStateNameBackground
    236 		tagStateAttributes
    237 		tagStateRegionEnd
    238 		tagStateRegionName
    239 		tagStateEndAttributes
    240 		tagStateStartURL
    241 		tagStateEndURL
    242 		tagStateURL
    243 	)
    244 
    245 	// Helper function which checks if the given byte is one of a list of
    246 	// characters, including letters and digits.
    247 	isOneOf := func(b byte, chars string) bool {
    248 		if b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z' || b >= '0' && b <= '9' {
    249 			return true
    250 		}
    251 		return strings.IndexByte(chars, b) >= 0
    252 	}
    253 
    254 	// Attribute map.
    255 	attrs := map[byte]tcell.AttrMask{
    256 		'B': tcell.AttrBold,
    257 		'I': tcell.AttrItalic,
    258 		'L': tcell.AttrBlink,
    259 		'D': tcell.AttrDim,
    260 		'S': tcell.AttrStrikeThrough,
    261 		'R': tcell.AttrReverse,
    262 	}
    263 
    264 	var (
    265 		tagState, tagLength int
    266 		tempStr             strings.Builder
    267 	)
    268 	tStyle := state.style
    269 	tRegion := state.region
    270 
    271 	// Process state transitions.
    272 	for len(str) > 0 {
    273 		ch := str[0]
    274 		str = str[1:]
    275 		tagLength++
    276 
    277 		// Transition.
    278 		switch tagState {
    279 		case tagStateNone:
    280 			if ch == '[' { // Start of a tag.
    281 				tagState = tagStateStart
    282 			} else { // Not a tag. We're done.
    283 				return
    284 			}
    285 		case tagStateStart:
    286 			switch {
    287 			case ch == '"': // Start of a region tag.
    288 				tempStr.Reset()
    289 				tagState = tagStateRegionStart
    290 			case !isOneOf(ch, "#:-"): // Invalid style tag.
    291 				return
    292 			case ch == '-': // Reset foreground color.
    293 				tStyle = tStyle.Foreground(state.initialForeground)
    294 				tagState = tagStateEndForeground
    295 			case ch == ':': // No foreground color.
    296 				tagState = tagStateStartBackground
    297 			default:
    298 				tempStr.Reset()
    299 				tempStr.WriteByte(ch)
    300 				if ch == '#' { // Numeric foreground color.
    301 					tagState = tagStateNumericForeground
    302 				} else { // Letters or numbers.
    303 					tagState = tagStateNameForeground
    304 				}
    305 			}
    306 		case tagStateEndForeground:
    307 			switch ch {
    308 			case ']': // End of tag.
    309 				tagState = tagStateDoneTag
    310 			case ':':
    311 				tagState = tagStateStartBackground
    312 			default: // Invalid tag.
    313 				return
    314 			}
    315 		case tagStateNumericForeground:
    316 			if ch == ']' || ch == ':' {
    317 				if tempStr.Len() != 7 { // Must be #rrggbb.
    318 					return
    319 				}
    320 				tStyle = tStyle.Foreground(tcell.GetColor(tempStr.String()))
    321 			}
    322 			switch {
    323 			case ch == ']': // End of tag.
    324 				tagState = tagStateDoneTag
    325 			case ch == ':': // Start of background color.
    326 				tagState = tagStateStartBackground
    327 			case strings.IndexByte("0123456789abcdefABCDEF", ch) >= 0: // Hex digit.
    328 				tempStr.WriteByte(ch)
    329 				tagState = tagStateNumericForeground
    330 			default: // Invalid tag.
    331 				return
    332 			}
    333 		case tagStateNameForeground:
    334 			if ch == ']' || ch == ':' {
    335 				name := tempStr.String()
    336 				if name[0] >= '0' && name[0] <= '9' { // Must not start with a digit.
    337 					return
    338 				}
    339 				tStyle = tStyle.Foreground(tcell.ColorNames[name])
    340 			}
    341 			switch {
    342 			case !isOneOf(ch, "]:"): // Invalid tag.
    343 				return
    344 			case ch == ']': // End of tag.
    345 				tagState = tagStateDoneTag
    346 			case ch == ':': // Start of background color.
    347 				tagState = tagStateStartBackground
    348 			default: // Letters or numbers.
    349 				tempStr.WriteByte(ch)
    350 			}
    351 		case tagStateStartBackground:
    352 			switch {
    353 			case !isOneOf(ch, "#:-]"): // Invalid style tag.
    354 				return
    355 			case ch == ']': // End of tag.
    356 				tagState = tagStateDoneTag
    357 			case ch == '-': // Reset background color.
    358 				tStyle = tStyle.Background(state.initialBackground)
    359 				tagState = tagStateEndBackground
    360 			case ch == ':': // No background color.
    361 				tagState = tagStateStartAttributes
    362 			default:
    363 				tempStr.Reset()
    364 				tempStr.WriteByte(ch)
    365 				if ch == '#' { // Numeric background color.
    366 					tagState = tagStateNumericBackground
    367 				} else { // Letters or numbers.
    368 					tagState = tagStateNameBackground
    369 				}
    370 			}
    371 		case tagStateEndBackground:
    372 			switch ch {
    373 			case ']': // End of tag.
    374 				tagState = tagStateDoneTag
    375 			case ':': // Start of attributes.
    376 				tagState = tagStateStartAttributes
    377 			default: // Invalid tag.
    378 				return
    379 			}
    380 		case tagStateNumericBackground:
    381 			if ch == ']' || ch == ':' {
    382 				if tempStr.Len() != 7 { // Must be #rrggbb.
    383 					return
    384 				}
    385 				tStyle = tStyle.Background(tcell.GetColor(tempStr.String()))
    386 			}
    387 			if ch == ']' { // End of tag.
    388 				tagState = tagStateDoneTag
    389 			} else if ch == ':' { // Start of attributes.
    390 				tagState = tagStateStartAttributes
    391 			} else if strings.IndexByte("0123456789abcdefABCDEF", ch) >= 0 { // Hex digit.
    392 				tempStr.WriteByte(ch)
    393 				tagState = tagStateNumericBackground
    394 			} else { // Invalid tag.
    395 				return
    396 			}
    397 		case tagStateNameBackground:
    398 			if ch == ']' || ch == ':' {
    399 				name := tempStr.String()
    400 				if name[0] >= '0' && name[0] <= '9' { // Must not start with a digit.
    401 					return
    402 				}
    403 				tStyle = tStyle.Background(tcell.ColorNames[name])
    404 			}
    405 			switch {
    406 			case !isOneOf(ch, "]:"): // Invalid tag.
    407 				return
    408 			case ch == ']': // End of tag.
    409 				tagState = tagStateDoneTag
    410 			case ch == ':': // Start of background color.
    411 				tagState = tagStateStartAttributes
    412 			default: // Letters or numbers.
    413 				tempStr.WriteByte(ch)
    414 			}
    415 		case tagStateStartAttributes:
    416 			switch {
    417 			case ch == ']': // End of tag.
    418 				tagState = tagStateDoneTag
    419 			case ch == '-': // Reset attributes.
    420 				tStyle = tStyle.Attributes(state.initialAttributes)
    421 				tagState = tagStateEndAttributes
    422 			case ch == ':': // Start of URL.
    423 				tagState = tagStateStartURL
    424 			case strings.IndexByte("buildsrBUILDSR", ch) >= 0: // Attribute tag.
    425 				tempStr.Reset()
    426 				tempStr.WriteByte(ch)
    427 				tagState = tagStateAttributes
    428 			default: // Invalid tag.
    429 				return
    430 			}
    431 		case tagStateAttributes:
    432 			if ch == ']' || ch == ':' {
    433 				flags := tempStr.String()
    434 				_, _, a := tStyle.Decompose()
    435 				for index := 0; index < len(flags); index++ {
    436 					ch := flags[index]
    437 					switch {
    438 					case ch == 'u':
    439 						tStyle = tStyle.Underline(true)
    440 					case ch == 'U':
    441 						tStyle = tStyle.Underline(false)
    442 					case ch >= 'a' && ch <= 'z':
    443 						a |= attrs[ch-('a'-'A')]
    444 					default:
    445 						a &^= attrs[ch]
    446 					}
    447 				}
    448 				tStyle = tStyle.Attributes(a)
    449 			}
    450 			switch {
    451 			case ch == ']': // End of tag.
    452 				tagState = tagStateDoneTag
    453 			case ch == ':': // Start of URL.
    454 				tagState = tagStateStartURL
    455 			case strings.IndexByte("buildsrBUILDSR", ch) >= 0: // Attribute tag.
    456 				tempStr.WriteByte(ch)
    457 			default: // Invalid tag.
    458 				return
    459 			}
    460 		case tagStateEndAttributes:
    461 			switch ch {
    462 			case ']': // End of tag.
    463 				tagState = tagStateDoneTag
    464 			case ':': // Start of URL.
    465 				tagState = tagStateStartURL
    466 			default: // Invalid tag.
    467 				return
    468 			}
    469 		case tagStateStartURL:
    470 			switch ch {
    471 			case ']': // End of tag.
    472 				tagState = tagStateDoneTag
    473 			case '-': // Reset URL.
    474 				tStyle = tStyle.Url("").UrlId("")
    475 				tagState = tagStateEndURL
    476 			default: // URL character.
    477 				tempStr.Reset()
    478 				tempStr.WriteByte(ch)
    479 				tStyle = tStyle.UrlId(strconv.Itoa(int(rand.Uint32()))) // Generate a unique ID for this URL.
    480 				tagState = tagStateURL
    481 			}
    482 		case tagStateEndURL:
    483 			if ch == ']' { // End of tag.
    484 				tagState = tagStateDoneTag
    485 			} else { // Invalid tag.
    486 				return
    487 			}
    488 		case tagStateURL:
    489 			if ch == ']' { // End of tag.
    490 				tStyle = tStyle.Url(tempStr.String())
    491 				tagState = tagStateDoneTag
    492 			} else { // URL character.
    493 				tempStr.WriteByte(ch)
    494 			}
    495 		case tagStateRegionStart:
    496 			switch {
    497 			case ch == '"': // End of region tag.
    498 				tagState = tagStateRegionEnd
    499 			case isOneOf(ch, "_,;: -."): // Region name.
    500 				tempStr.WriteByte(ch)
    501 				tagState = tagStateRegionName
    502 			default: // Invalid tag.
    503 				return
    504 			}
    505 		case tagStateRegionEnd:
    506 			if ch == ']' { // End of tag.
    507 				tRegion = tempStr.String()
    508 				tagState = tagStateDoneTag
    509 			} else { // Invalid tag.
    510 				return
    511 			}
    512 		case tagStateRegionName:
    513 			switch {
    514 			case ch == '"': // End of region tag.
    515 				tagState = tagStateRegionEnd
    516 			case isOneOf(ch, "_,;: -."): // Region name.
    517 				tempStr.WriteByte(ch)
    518 			default: // Invalid tag.
    519 				return
    520 			}
    521 		}
    522 
    523 		// The last transition led to a tag end. Make the tag permanent.
    524 		if tagState == tagStateDoneTag {
    525 			length, style, region = tagLength, tStyle, tRegion
    526 			tagState = tagStateNone // Reset state.
    527 		}
    528 	}
    529 
    530 	return
    531 }
    532 
    533 // TaggedStringWidth returns the width of the given string needed to print it on
    534 // screen. The text may contain style tags which are not counted.
    535 func TaggedStringWidth(text string) (width int) {
    536 	var state *stepState
    537 	for len(text) > 0 {
    538 		_, text, state = step(text, state, stepOptionsStyle)
    539 		width += state.Width()
    540 	}
    541 	return
    542 }
    543 
    544 // WordWrap splits a text such that each resulting line does not exceed the
    545 // given screen width. Split points are determined using the algorithm described
    546 // in [Unicode Standard Annex #14].
    547 //
    548 // This function considers style tags to have no width.
    549 //
    550 // [Unicode Standard Annex #14]: https://www.unicode.org/reports/tr14/
    551 func WordWrap(text string, width int) (lines []string) {
    552 	if width <= 0 {
    553 		return
    554 	}
    555 
    556 	var (
    557 		state                                              *stepState
    558 		lineWidth, lineLength, lastOption, lastOptionWidth int
    559 	)
    560 	str := text
    561 	for len(str) > 0 {
    562 		// Parse the next character.
    563 		_, str, state = step(str, state, stepOptionsStyle)
    564 		cWidth := state.Width()
    565 
    566 		// Would it exceed the line width?
    567 		if lineWidth+cWidth > width {
    568 			if lastOptionWidth == 0 {
    569 				// No split point so far. Just split at the current position.
    570 				lines = append(lines, text[:lineLength])
    571 				text = text[lineLength:]
    572 				lineWidth, lineLength, lastOption, lastOptionWidth = 0, 0, 0, 0
    573 			} else {
    574 				// Split at the last split point.
    575 				lines = append(lines, text[:lastOption])
    576 				text = text[lastOption:]
    577 				lineWidth -= lastOptionWidth
    578 				lineLength -= lastOption
    579 				lastOption, lastOptionWidth = 0, 0
    580 			}
    581 		}
    582 
    583 		// Move ahead.
    584 		lineWidth += cWidth
    585 		lineLength += state.GrossLength()
    586 
    587 		// Check for split points.
    588 		if lineBreak, optional := state.LineBreak(); lineBreak {
    589 			if optional {
    590 				// Remember this split point.
    591 				lastOption = lineLength
    592 				lastOptionWidth = lineWidth
    593 			} else {
    594 				// We must split here.
    595 				lines = append(lines, strings.TrimRight(text[:lineLength], "\n\r"))
    596 				text = text[lineLength:]
    597 				lineWidth, lineLength, lastOption, lastOptionWidth = 0, 0, 0, 0
    598 			}
    599 		}
    600 	}
    601 	lines = append(lines, text)
    602 
    603 	return
    604 }
    605 
    606 // Escape escapes the given text such that color and/or region tags are not
    607 // recognized and substituted by the print functions of this package. For
    608 // example, to include a tag-like string in a box title or in a TextView:
    609 //
    610 //	box.SetTitle(tview.Escape("[squarebrackets]"))
    611 //	fmt.Fprint(textView, tview.Escape(`["quoted"]`))
    612 func Escape(text string) string {
    613 	return escapePattern.ReplaceAllString(text, "$1[]")
    614 }
    615 
    616 // Unescape unescapes text previously escaped with [Escape].
    617 func Unescape(text string) string {
    618 	return unescapePattern.ReplaceAllString(text, "$1]")
    619 }
    620 
    621 // stripTags strips style tags from the given string. (Region tags are not
    622 // stripped.)
    623 func stripTags(text string) string {
    624 	var (
    625 		str   strings.Builder
    626 		state *stepState
    627 	)
    628 	for len(text) > 0 {
    629 		var c string
    630 		c, text, state = step(text, state, stepOptionsStyle)
    631 		str.WriteString(c)
    632 	}
    633 	return str.String()
    634 }