strings.go (18970B)
1 package tview 2 3 import ( 4 "math/rand" 5 "regexp" 6 "strconv" 7 "strings" 8 "unicode/utf8" 9 10 "github.com/gdamore/tcell/v2" 11 "github.com/rivo/uniseg" 12 ) 13 14 // escapedTagPattern matches an escaped tag, e.g. "[red[]", at the beginning of 15 // a string. 16 var escapedTagPattern = regexp.MustCompile(`^\[[^\[\]]+\[+\]`) 17 18 // stepOptions is a bit field of options for [step]. A value of 0 results in 19 // [step] having the same behavior as uniseg.Step, i.e. no tview-related parsing 20 // is performed. 21 type stepOptions int 22 23 // Bit fields for [stepOptions]. 24 const ( 25 stepOptionsNone stepOptions = 0 26 stepOptionsStyle stepOptions = 1 << iota // Parse style tags. 27 stepOptionsRegion // Parse region tags. 28 ) 29 30 // stepState represents the current state of the parser implemented in [step]. 31 type stepState struct { 32 unisegState int // The state of the uniseg parser. 33 boundaries int // Information about boundaries, as returned by uniseg.Step. 34 style tcell.Style // The current style. 35 region string // The current region. 36 escapedTagState int // States for parsing escaped tags (defined in [step]). 37 grossLength int // The length of the cluster, including any tags not returned. 38 39 // The styles for the initial call to [step]. 40 initialForeground tcell.Color 41 initialBackground tcell.Color 42 initialAttributes tcell.AttrMask 43 } 44 45 // IsWordBoundary returns true if the boundary between the returned grapheme 46 // cluster and the one following it is a word boundary. 47 func (s *stepState) IsWordBoundary() bool { 48 return s.boundaries&uniseg.MaskWord != 0 49 } 50 51 // IsSentenceBoundary returns true if the boundary between the returned grapheme 52 // cluster and the one following it is a sentence boundary. 53 func (s *stepState) IsSentenceBoundary() bool { 54 return s.boundaries&uniseg.MaskSentence != 0 55 } 56 57 // LineBreak returns whether the string can be broken into the next line after 58 // the returned grapheme cluster. If optional is true, the line break is 59 // optional. If false, the line break is mandatory, e.g. after a newline 60 // character. 61 func (s *stepState) LineBreak() (lineBreak, optional bool) { 62 switch s.boundaries & uniseg.MaskLine { 63 case uniseg.LineCanBreak: 64 return true, true 65 case uniseg.LineMustBreak: 66 return true, false 67 } 68 return false, false // uniseg.LineDontBreak. 69 } 70 71 // Width returns the grapheme cluster's width in cells. 72 func (s *stepState) Width() int { 73 return s.boundaries >> uniseg.ShiftWidth 74 } 75 76 // GrossLength returns the grapheme cluster's length in bytes, including any 77 // tags that were parsed but not explicitly returned. 78 func (s *stepState) GrossLength() int { 79 return s.grossLength 80 } 81 82 // Style returns the style for the grapheme cluster. 83 func (s *stepState) Style() tcell.Style { 84 return s.style 85 } 86 87 // step uses uniseg.Step to iterate over the grapheme clusters of a string but 88 // (optionally) also parses the string for style or region tags. 89 // 90 // This function can be called consecutively to extract all grapheme clusters 91 // from str, without returning any contained (parsed) tags. The return values 92 // are the first grapheme cluster, the remaining string, and the new state. Pass 93 // the remaining string and the returned state to the next call. If the rest 94 // string is empty, parsing is complete. Call the returned state's methods for 95 // boundary and cluster width information. 96 // 97 // The returned cluster may be empty if the given string consists of only 98 // (parsed) tags. The boundary and width information will be meaningless in 99 // this case but the style will describe the style at the end of the string. 100 // 101 // Pass nil for state on the first call. This will assume an initial style with 102 // [Styles.PrimitiveBackgroundColor] as the background color and 103 // [Styles.PrimaryTextColor] as the text color, no current region. If you want 104 // to start with a different style or region, you can set the state accordingly 105 // but you must then set [state.unisegState] to -1. 106 // 107 // There is no need to call uniseg.HasTrailingLineBreakInString on the last 108 // non-empty cluster as this function will do this for you and adjust the 109 // returned boundaries accordingly. 110 func step(str string, state *stepState, opts stepOptions) (cluster, rest string, newState *stepState) { 111 // Set up initial state. 112 if state == nil { 113 state = &stepState{ 114 unisegState: -1, 115 style: tcell.StyleDefault.Background(Styles.PrimitiveBackgroundColor).Foreground(Styles.PrimaryTextColor), 116 } 117 } 118 if state.unisegState < 0 { 119 state.initialForeground, state.initialBackground, state.initialAttributes = state.style.Decompose() 120 } 121 if len(str) == 0 { 122 newState = state 123 return 124 } 125 126 // Get a grapheme cluster. 127 preState := state.unisegState 128 cluster, rest, state.boundaries, state.unisegState = uniseg.StepString(str, preState) 129 state.grossLength = len(cluster) 130 if rest == "" { 131 if !uniseg.HasTrailingLineBreakInString(cluster) { 132 state.boundaries &^= uniseg.MaskLine 133 } 134 } 135 136 // Parse tags. 137 if opts != 0 { 138 const ( 139 etNone int = iota 140 etStart 141 etChar 142 etClosing 143 ) 144 145 // Finite state machine for escaped tags. 146 switch state.escapedTagState { 147 case etStart: 148 if cluster[0] == '[' || cluster[0] == ']' { // Invalid escaped tag. 149 state.escapedTagState = etNone 150 } else { // Other characters are allowed. 151 state.escapedTagState = etChar 152 } 153 case etChar: 154 if cluster[0] == ']' { // In theory, this should not happen. 155 state.escapedTagState = etNone 156 } else if cluster[0] == '[' { // Starting closing sequence. 157 // Swallow the first one. 158 cluster, rest, state.boundaries, state.unisegState = uniseg.StepString(rest, preState) 159 state.grossLength += len(cluster) 160 if cluster[0] == ']' { 161 state.escapedTagState = etNone 162 } else { 163 state.escapedTagState = etClosing 164 } 165 } // More characters. Remain in etChar. 166 case etClosing: 167 if cluster[0] != '[' { 168 state.escapedTagState = etNone 169 } 170 } 171 172 // Regular tags. 173 if state.escapedTagState == etNone { 174 if cluster[0] == '[' { 175 // We've already opened a tag. Parse it. 176 length, style, region := parseTag(str, state) 177 if length > 0 { 178 state.style = style 179 state.region = region 180 cluster, rest, state.boundaries, state.unisegState = uniseg.StepString(str[length:], preState) 181 state.grossLength = len(cluster) + length 182 if rest == "" { 183 if !uniseg.HasTrailingLineBreakInString(cluster) { 184 state.boundaries &^= uniseg.MaskLine 185 } 186 } 187 } 188 // Is this an escaped tag? 189 if escapedTagPattern.MatchString(str[length:]) { 190 state.escapedTagState = etStart 191 } 192 } 193 if len(rest) > 0 && rest[0] == '[' { 194 // A tag might follow the cluster. If so, we need to fix the state 195 // for the boundaries to be correct. 196 if length, _, _ := parseTag(rest, state); length > 0 { 197 if len(rest) > length { 198 _, l := utf8.DecodeRuneInString(rest[length:]) 199 cluster += rest[length : length+l] 200 } 201 var taglessRest string 202 cluster, taglessRest, state.boundaries, state.unisegState = uniseg.StepString(cluster, preState) 203 if taglessRest == "" { 204 if !uniseg.HasTrailingLineBreakInString(cluster) { 205 state.boundaries &^= uniseg.MaskLine 206 } 207 } 208 } 209 } 210 } 211 } 212 213 newState = state 214 return 215 } 216 217 // parseTag parses str for consecutive style and/or region tags, assuming that 218 // str starts with the opening bracket for the first tag. It returns the string 219 // length of all valid tags (0 if the first tag is not valid) and the updated 220 // style and region for valid tags (based on the provided state). 221 func parseTag(str string, state *stepState) (length int, style tcell.Style, region string) { 222 // Automata states for parsing tags. 223 const ( 224 tagStateNone = iota 225 tagStateDoneTag 226 tagStateStart 227 tagStateRegionStart 228 tagStateEndForeground 229 tagStateStartBackground 230 tagStateNumericForeground 231 tagStateNameForeground 232 tagStateEndBackground 233 tagStateStartAttributes 234 tagStateNumericBackground 235 tagStateNameBackground 236 tagStateAttributes 237 tagStateRegionEnd 238 tagStateRegionName 239 tagStateEndAttributes 240 tagStateStartURL 241 tagStateEndURL 242 tagStateURL 243 ) 244 245 // Helper function which checks if the given byte is one of a list of 246 // characters, including letters and digits. 247 isOneOf := func(b byte, chars string) bool { 248 if b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z' || b >= '0' && b <= '9' { 249 return true 250 } 251 return strings.IndexByte(chars, b) >= 0 252 } 253 254 // Attribute map. 255 attrs := map[byte]tcell.AttrMask{ 256 'B': tcell.AttrBold, 257 'I': tcell.AttrItalic, 258 'L': tcell.AttrBlink, 259 'D': tcell.AttrDim, 260 'S': tcell.AttrStrikeThrough, 261 'R': tcell.AttrReverse, 262 } 263 264 var ( 265 tagState, tagLength int 266 tempStr strings.Builder 267 ) 268 tStyle := state.style 269 tRegion := state.region 270 271 // Process state transitions. 272 for len(str) > 0 { 273 ch := str[0] 274 str = str[1:] 275 tagLength++ 276 277 // Transition. 278 switch tagState { 279 case tagStateNone: 280 if ch == '[' { // Start of a tag. 281 tagState = tagStateStart 282 } else { // Not a tag. We're done. 283 return 284 } 285 case tagStateStart: 286 switch { 287 case ch == '"': // Start of a region tag. 288 tempStr.Reset() 289 tagState = tagStateRegionStart 290 case !isOneOf(ch, "#:-"): // Invalid style tag. 291 return 292 case ch == '-': // Reset foreground color. 293 tStyle = tStyle.Foreground(state.initialForeground) 294 tagState = tagStateEndForeground 295 case ch == ':': // No foreground color. 296 tagState = tagStateStartBackground 297 default: 298 tempStr.Reset() 299 tempStr.WriteByte(ch) 300 if ch == '#' { // Numeric foreground color. 301 tagState = tagStateNumericForeground 302 } else { // Letters or numbers. 303 tagState = tagStateNameForeground 304 } 305 } 306 case tagStateEndForeground: 307 switch ch { 308 case ']': // End of tag. 309 tagState = tagStateDoneTag 310 case ':': 311 tagState = tagStateStartBackground 312 default: // Invalid tag. 313 return 314 } 315 case tagStateNumericForeground: 316 if ch == ']' || ch == ':' { 317 if tempStr.Len() != 7 { // Must be #rrggbb. 318 return 319 } 320 tStyle = tStyle.Foreground(tcell.GetColor(tempStr.String())) 321 } 322 switch { 323 case ch == ']': // End of tag. 324 tagState = tagStateDoneTag 325 case ch == ':': // Start of background color. 326 tagState = tagStateStartBackground 327 case strings.IndexByte("0123456789abcdefABCDEF", ch) >= 0: // Hex digit. 328 tempStr.WriteByte(ch) 329 tagState = tagStateNumericForeground 330 default: // Invalid tag. 331 return 332 } 333 case tagStateNameForeground: 334 if ch == ']' || ch == ':' { 335 name := tempStr.String() 336 if name[0] >= '0' && name[0] <= '9' { // Must not start with a digit. 337 return 338 } 339 tStyle = tStyle.Foreground(tcell.ColorNames[name]) 340 } 341 switch { 342 case !isOneOf(ch, "]:"): // Invalid tag. 343 return 344 case ch == ']': // End of tag. 345 tagState = tagStateDoneTag 346 case ch == ':': // Start of background color. 347 tagState = tagStateStartBackground 348 default: // Letters or numbers. 349 tempStr.WriteByte(ch) 350 } 351 case tagStateStartBackground: 352 switch { 353 case !isOneOf(ch, "#:-]"): // Invalid style tag. 354 return 355 case ch == ']': // End of tag. 356 tagState = tagStateDoneTag 357 case ch == '-': // Reset background color. 358 tStyle = tStyle.Background(state.initialBackground) 359 tagState = tagStateEndBackground 360 case ch == ':': // No background color. 361 tagState = tagStateStartAttributes 362 default: 363 tempStr.Reset() 364 tempStr.WriteByte(ch) 365 if ch == '#' { // Numeric background color. 366 tagState = tagStateNumericBackground 367 } else { // Letters or numbers. 368 tagState = tagStateNameBackground 369 } 370 } 371 case tagStateEndBackground: 372 switch ch { 373 case ']': // End of tag. 374 tagState = tagStateDoneTag 375 case ':': // Start of attributes. 376 tagState = tagStateStartAttributes 377 default: // Invalid tag. 378 return 379 } 380 case tagStateNumericBackground: 381 if ch == ']' || ch == ':' { 382 if tempStr.Len() != 7 { // Must be #rrggbb. 383 return 384 } 385 tStyle = tStyle.Background(tcell.GetColor(tempStr.String())) 386 } 387 if ch == ']' { // End of tag. 388 tagState = tagStateDoneTag 389 } else if ch == ':' { // Start of attributes. 390 tagState = tagStateStartAttributes 391 } else if strings.IndexByte("0123456789abcdefABCDEF", ch) >= 0 { // Hex digit. 392 tempStr.WriteByte(ch) 393 tagState = tagStateNumericBackground 394 } else { // Invalid tag. 395 return 396 } 397 case tagStateNameBackground: 398 if ch == ']' || ch == ':' { 399 name := tempStr.String() 400 if name[0] >= '0' && name[0] <= '9' { // Must not start with a digit. 401 return 402 } 403 tStyle = tStyle.Background(tcell.ColorNames[name]) 404 } 405 switch { 406 case !isOneOf(ch, "]:"): // Invalid tag. 407 return 408 case ch == ']': // End of tag. 409 tagState = tagStateDoneTag 410 case ch == ':': // Start of background color. 411 tagState = tagStateStartAttributes 412 default: // Letters or numbers. 413 tempStr.WriteByte(ch) 414 } 415 case tagStateStartAttributes: 416 switch { 417 case ch == ']': // End of tag. 418 tagState = tagStateDoneTag 419 case ch == '-': // Reset attributes. 420 tStyle = tStyle.Attributes(state.initialAttributes) 421 tagState = tagStateEndAttributes 422 case ch == ':': // Start of URL. 423 tagState = tagStateStartURL 424 case strings.IndexByte("buildsrBUILDSR", ch) >= 0: // Attribute tag. 425 tempStr.Reset() 426 tempStr.WriteByte(ch) 427 tagState = tagStateAttributes 428 default: // Invalid tag. 429 return 430 } 431 case tagStateAttributes: 432 if ch == ']' || ch == ':' { 433 flags := tempStr.String() 434 _, _, a := tStyle.Decompose() 435 for index := 0; index < len(flags); index++ { 436 ch := flags[index] 437 switch { 438 case ch == 'u': 439 tStyle = tStyle.Underline(true) 440 case ch == 'U': 441 tStyle = tStyle.Underline(false) 442 case ch >= 'a' && ch <= 'z': 443 a |= attrs[ch-('a'-'A')] 444 default: 445 a &^= attrs[ch] 446 } 447 } 448 tStyle = tStyle.Attributes(a) 449 } 450 switch { 451 case ch == ']': // End of tag. 452 tagState = tagStateDoneTag 453 case ch == ':': // Start of URL. 454 tagState = tagStateStartURL 455 case strings.IndexByte("buildsrBUILDSR", ch) >= 0: // Attribute tag. 456 tempStr.WriteByte(ch) 457 default: // Invalid tag. 458 return 459 } 460 case tagStateEndAttributes: 461 switch ch { 462 case ']': // End of tag. 463 tagState = tagStateDoneTag 464 case ':': // Start of URL. 465 tagState = tagStateStartURL 466 default: // Invalid tag. 467 return 468 } 469 case tagStateStartURL: 470 switch ch { 471 case ']': // End of tag. 472 tagState = tagStateDoneTag 473 case '-': // Reset URL. 474 tStyle = tStyle.Url("").UrlId("") 475 tagState = tagStateEndURL 476 default: // URL character. 477 tempStr.Reset() 478 tempStr.WriteByte(ch) 479 tStyle = tStyle.UrlId(strconv.Itoa(int(rand.Uint32()))) // Generate a unique ID for this URL. 480 tagState = tagStateURL 481 } 482 case tagStateEndURL: 483 if ch == ']' { // End of tag. 484 tagState = tagStateDoneTag 485 } else { // Invalid tag. 486 return 487 } 488 case tagStateURL: 489 if ch == ']' { // End of tag. 490 tStyle = tStyle.Url(tempStr.String()) 491 tagState = tagStateDoneTag 492 } else { // URL character. 493 tempStr.WriteByte(ch) 494 } 495 case tagStateRegionStart: 496 switch { 497 case ch == '"': // End of region tag. 498 tagState = tagStateRegionEnd 499 case isOneOf(ch, "_,;: -."): // Region name. 500 tempStr.WriteByte(ch) 501 tagState = tagStateRegionName 502 default: // Invalid tag. 503 return 504 } 505 case tagStateRegionEnd: 506 if ch == ']' { // End of tag. 507 tRegion = tempStr.String() 508 tagState = tagStateDoneTag 509 } else { // Invalid tag. 510 return 511 } 512 case tagStateRegionName: 513 switch { 514 case ch == '"': // End of region tag. 515 tagState = tagStateRegionEnd 516 case isOneOf(ch, "_,;: -."): // Region name. 517 tempStr.WriteByte(ch) 518 default: // Invalid tag. 519 return 520 } 521 } 522 523 // The last transition led to a tag end. Make the tag permanent. 524 if tagState == tagStateDoneTag { 525 length, style, region = tagLength, tStyle, tRegion 526 tagState = tagStateNone // Reset state. 527 } 528 } 529 530 return 531 } 532 533 // TaggedStringWidth returns the width of the given string needed to print it on 534 // screen. The text may contain style tags which are not counted. 535 func TaggedStringWidth(text string) (width int) { 536 var state *stepState 537 for len(text) > 0 { 538 _, text, state = step(text, state, stepOptionsStyle) 539 width += state.Width() 540 } 541 return 542 } 543 544 // WordWrap splits a text such that each resulting line does not exceed the 545 // given screen width. Split points are determined using the algorithm described 546 // in [Unicode Standard Annex #14]. 547 // 548 // This function considers style tags to have no width. 549 // 550 // [Unicode Standard Annex #14]: https://www.unicode.org/reports/tr14/ 551 func WordWrap(text string, width int) (lines []string) { 552 if width <= 0 { 553 return 554 } 555 556 var ( 557 state *stepState 558 lineWidth, lineLength, lastOption, lastOptionWidth int 559 ) 560 str := text 561 for len(str) > 0 { 562 // Parse the next character. 563 _, str, state = step(str, state, stepOptionsStyle) 564 cWidth := state.Width() 565 566 // Would it exceed the line width? 567 if lineWidth+cWidth > width { 568 if lastOptionWidth == 0 { 569 // No split point so far. Just split at the current position. 570 lines = append(lines, text[:lineLength]) 571 text = text[lineLength:] 572 lineWidth, lineLength, lastOption, lastOptionWidth = 0, 0, 0, 0 573 } else { 574 // Split at the last split point. 575 lines = append(lines, text[:lastOption]) 576 text = text[lastOption:] 577 lineWidth -= lastOptionWidth 578 lineLength -= lastOption 579 lastOption, lastOptionWidth = 0, 0 580 } 581 } 582 583 // Move ahead. 584 lineWidth += cWidth 585 lineLength += state.GrossLength() 586 587 // Check for split points. 588 if lineBreak, optional := state.LineBreak(); lineBreak { 589 if optional { 590 // Remember this split point. 591 lastOption = lineLength 592 lastOptionWidth = lineWidth 593 } else { 594 // We must split here. 595 lines = append(lines, strings.TrimRight(text[:lineLength], "\n\r")) 596 text = text[lineLength:] 597 lineWidth, lineLength, lastOption, lastOptionWidth = 0, 0, 0, 0 598 } 599 } 600 } 601 lines = append(lines, text) 602 603 return 604 } 605 606 // Escape escapes the given text such that color and/or region tags are not 607 // recognized and substituted by the print functions of this package. For 608 // example, to include a tag-like string in a box title or in a TextView: 609 // 610 // box.SetTitle(tview.Escape("[squarebrackets]")) 611 // fmt.Fprint(textView, tview.Escape(`["quoted"]`)) 612 func Escape(text string) string { 613 return escapePattern.ReplaceAllString(text, "$1[]") 614 } 615 616 // Unescape unescapes text previously escaped with [Escape]. 617 func Unescape(text string) string { 618 return unescapePattern.ReplaceAllString(text, "$1]") 619 } 620 621 // stripTags strips style tags from the given string. (Region tags are not 622 // stripped.) 623 func stripTags(text string) string { 624 var ( 625 str strings.Builder 626 state *stepState 627 ) 628 for len(text) > 0 { 629 var c string 630 c, text, state = step(text, state, stepOptionsStyle) 631 str.WriteString(c) 632 } 633 return str.String() 634 }