gemini-browser

A text-based gemini browser
git clone git://git.laack.co/gemini-browser.git
Log | Files | Refs | README

core.go (30096B)


      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package bidi
      6 
      7 import (
      8 	"fmt"
      9 	"log"
     10 )
     11 
     12 // This implementation is a port based on the reference implementation found at:
     13 // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
     14 //
     15 // described in Unicode Bidirectional Algorithm (UAX #9).
     16 //
     17 // Input:
     18 // There are two levels of input to the algorithm, since clients may prefer to
     19 // supply some information from out-of-band sources rather than relying on the
     20 // default behavior.
     21 //
     22 // - Bidi class array
     23 // - Bidi class array, with externally supplied base line direction
     24 //
     25 // Output:
     26 // Output is separated into several stages:
     27 //
     28 //  - levels array over entire paragraph
     29 //  - reordering array over entire paragraph
     30 //  - levels array over line
     31 //  - reordering array over line
     32 //
     33 // Note that for conformance to the Unicode Bidirectional Algorithm,
     34 // implementations are only required to generate correct reordering and
     35 // character directionality (odd or even levels) over a line. Generating
     36 // identical level arrays over a line is not required. Bidi explicit format
     37 // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
     38 // positions as long as the rest of the input is properly reordered.
     39 //
     40 // As the algorithm is defined to operate on a single paragraph at a time, this
     41 // implementation is written to handle single paragraphs. Thus rule P1 is
     42 // presumed by this implementation-- the data provided to the implementation is
     43 // assumed to be a single paragraph, and either contains no 'B' codes, or a
     44 // single 'B' code at the end of the input. 'B' is allowed as input to
     45 // illustrate how the algorithm assigns it a level.
     46 //
     47 // Also note that rules L3 and L4 depend on the rendering engine that uses the
     48 // result of the bidi algorithm. This implementation assumes that the rendering
     49 // engine expects combining marks in visual order (e.g. to the left of their
     50 // base character in RTL runs) and that it adjusts the glyphs used to render
     51 // mirrored characters that are in RTL runs so that they render appropriately.
     52 
     53 // level is the embedding level of a character. Even embedding levels indicate
     54 // left-to-right order and odd levels indicate right-to-left order. The special
     55 // level of -1 is reserved for undefined order.
     56 type level int8
     57 
     58 const implicitLevel level = -1
     59 
     60 // in returns if x is equal to any of the values in set.
     61 func (c Class) in(set ...Class) bool {
     62 	for _, s := range set {
     63 		if c == s {
     64 			return true
     65 		}
     66 	}
     67 	return false
     68 }
     69 
     70 // A paragraph contains the state of a paragraph.
     71 type paragraph struct {
     72 	initialTypes []Class
     73 
     74 	// Arrays of properties needed for paired bracket evaluation in N0
     75 	pairTypes  []bracketType // paired Bracket types for paragraph
     76 	pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
     77 
     78 	embeddingLevel level // default: = implicitLevel;
     79 
     80 	// at the paragraph levels
     81 	resultTypes  []Class
     82 	resultLevels []level
     83 
     84 	// Index of matching PDI for isolate initiator characters. For other
     85 	// characters, the value of matchingPDI will be set to -1. For isolate
     86 	// initiators with no matching PDI, matchingPDI will be set to the length of
     87 	// the input string.
     88 	matchingPDI []int
     89 
     90 	// Index of matching isolate initiator for PDI characters. For other
     91 	// characters, and for PDIs with no matching isolate initiator, the value of
     92 	// matchingIsolateInitiator will be set to -1.
     93 	matchingIsolateInitiator []int
     94 }
     95 
     96 // newParagraph initializes a paragraph. The user needs to supply a few arrays
     97 // corresponding to the preprocessed text input. The types correspond to the
     98 // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
     99 // each rune. pairValues provides a unique bracket class identifier for each
    100 // rune (suggested is the rune of the open bracket for opening and matching
    101 // close brackets, after normalization). The embedding levels are optional, but
    102 // may be supplied to encode embedding levels of styled text.
    103 func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) (*paragraph, error) {
    104 	var err error
    105 	if err = validateTypes(types); err != nil {
    106 		return nil, err
    107 	}
    108 	if err = validatePbTypes(pairTypes); err != nil {
    109 		return nil, err
    110 	}
    111 	if err = validatePbValues(pairValues, pairTypes); err != nil {
    112 		return nil, err
    113 	}
    114 	if err = validateParagraphEmbeddingLevel(levels); err != nil {
    115 		return nil, err
    116 	}
    117 
    118 	p := &paragraph{
    119 		initialTypes:   append([]Class(nil), types...),
    120 		embeddingLevel: levels,
    121 
    122 		pairTypes:  pairTypes,
    123 		pairValues: pairValues,
    124 
    125 		resultTypes: append([]Class(nil), types...),
    126 	}
    127 	p.run()
    128 	return p, nil
    129 }
    130 
    131 func (p *paragraph) Len() int { return len(p.initialTypes) }
    132 
    133 // The algorithm. Does not include line-based processing (Rules L1, L2).
    134 // These are applied later in the line-based phase of the algorithm.
    135 func (p *paragraph) run() {
    136 	p.determineMatchingIsolates()
    137 
    138 	// 1) determining the paragraph level
    139 	// Rule P1 is the requirement for entering this algorithm.
    140 	// Rules P2, P3.
    141 	// If no externally supplied paragraph embedding level, use default.
    142 	if p.embeddingLevel == implicitLevel {
    143 		p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
    144 	}
    145 
    146 	// Initialize result levels to paragraph embedding level.
    147 	p.resultLevels = make([]level, p.Len())
    148 	setLevels(p.resultLevels, p.embeddingLevel)
    149 
    150 	// 2) Explicit levels and directions
    151 	// Rules X1-X8.
    152 	p.determineExplicitEmbeddingLevels()
    153 
    154 	// Rule X9.
    155 	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
    156 	// from the string explicitly. But they are not copied into isolating run
    157 	// sequences when they are created, so they are removed for all
    158 	// practical purposes.
    159 
    160 	// Rule X10.
    161 	// Run remainder of algorithm one isolating run sequence at a time
    162 	for _, seq := range p.determineIsolatingRunSequences() {
    163 		// 3) resolving weak types
    164 		// Rules W1-W7.
    165 		seq.resolveWeakTypes()
    166 
    167 		// 4a) resolving paired brackets
    168 		// Rule N0
    169 		resolvePairedBrackets(seq)
    170 
    171 		// 4b) resolving neutral types
    172 		// Rules N1-N3.
    173 		seq.resolveNeutralTypes()
    174 
    175 		// 5) resolving implicit embedding levels
    176 		// Rules I1, I2.
    177 		seq.resolveImplicitLevels()
    178 
    179 		// Apply the computed levels and types
    180 		seq.applyLevelsAndTypes()
    181 	}
    182 
    183 	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
    184 	// BNs. This is for convenience, so the resulting level array will have
    185 	// a value for every character.
    186 	p.assignLevelsToCharactersRemovedByX9()
    187 }
    188 
    189 // determineMatchingIsolates determines the matching PDI for each isolate
    190 // initiator and vice versa.
    191 //
    192 // Definition BD9.
    193 //
    194 // At the end of this function:
    195 //
    196 //   - The member variable matchingPDI is set to point to the index of the
    197 //     matching PDI character for each isolate initiator character. If there is
    198 //     no matching PDI, it is set to the length of the input text. For other
    199 //     characters, it is set to -1.
    200 //   - The member variable matchingIsolateInitiator is set to point to the
    201 //     index of the matching isolate initiator character for each PDI character.
    202 //     If there is no matching isolate initiator, or the character is not a PDI,
    203 //     it is set to -1.
    204 func (p *paragraph) determineMatchingIsolates() {
    205 	p.matchingPDI = make([]int, p.Len())
    206 	p.matchingIsolateInitiator = make([]int, p.Len())
    207 
    208 	for i := range p.matchingIsolateInitiator {
    209 		p.matchingIsolateInitiator[i] = -1
    210 	}
    211 
    212 	for i := range p.matchingPDI {
    213 		p.matchingPDI[i] = -1
    214 
    215 		if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
    216 			depthCounter := 1
    217 			for j := i + 1; j < p.Len(); j++ {
    218 				if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
    219 					depthCounter++
    220 				} else if u == PDI {
    221 					if depthCounter--; depthCounter == 0 {
    222 						p.matchingPDI[i] = j
    223 						p.matchingIsolateInitiator[j] = i
    224 						break
    225 					}
    226 				}
    227 			}
    228 			if p.matchingPDI[i] == -1 {
    229 				p.matchingPDI[i] = p.Len()
    230 			}
    231 		}
    232 	}
    233 }
    234 
    235 // determineParagraphEmbeddingLevel reports the resolved paragraph direction of
    236 // the substring limited by the given range [start, end).
    237 //
    238 // Determines the paragraph level based on rules P2, P3. This is also used
    239 // in rule X5c to find if an FSI should resolve to LRI or RLI.
    240 func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
    241 	var strongType Class = unknownClass
    242 
    243 	// Rule P2.
    244 	for i := start; i < end; i++ {
    245 		if t := p.resultTypes[i]; t.in(L, AL, R) {
    246 			strongType = t
    247 			break
    248 		} else if t.in(FSI, LRI, RLI) {
    249 			i = p.matchingPDI[i] // skip over to the matching PDI
    250 			if i > end {
    251 				log.Panic("assert (i <= end)")
    252 			}
    253 		}
    254 	}
    255 	// Rule P3.
    256 	switch strongType {
    257 	case unknownClass: // none found
    258 		// default embedding level when no strong types found is 0.
    259 		return 0
    260 	case L:
    261 		return 0
    262 	default: // AL, R
    263 		return 1
    264 	}
    265 }
    266 
    267 const maxDepth = 125
    268 
    269 // This stack will store the embedding levels and override and isolated
    270 // statuses
    271 type directionalStatusStack struct {
    272 	stackCounter        int
    273 	embeddingLevelStack [maxDepth + 1]level
    274 	overrideStatusStack [maxDepth + 1]Class
    275 	isolateStatusStack  [maxDepth + 1]bool
    276 }
    277 
    278 func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
    279 func (s *directionalStatusStack) pop()       { s.stackCounter-- }
    280 func (s *directionalStatusStack) depth() int { return s.stackCounter }
    281 
    282 func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
    283 	s.embeddingLevelStack[s.stackCounter] = level
    284 	s.overrideStatusStack[s.stackCounter] = overrideStatus
    285 	s.isolateStatusStack[s.stackCounter] = isolateStatus
    286 	s.stackCounter++
    287 }
    288 
    289 func (s *directionalStatusStack) lastEmbeddingLevel() level {
    290 	return s.embeddingLevelStack[s.stackCounter-1]
    291 }
    292 
    293 func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
    294 	return s.overrideStatusStack[s.stackCounter-1]
    295 }
    296 
    297 func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
    298 	return s.isolateStatusStack[s.stackCounter-1]
    299 }
    300 
    301 // Determine explicit levels using rules X1 - X8
    302 func (p *paragraph) determineExplicitEmbeddingLevels() {
    303 	var stack directionalStatusStack
    304 	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
    305 
    306 	// Rule X1.
    307 	stack.push(p.embeddingLevel, ON, false)
    308 
    309 	for i, t := range p.resultTypes {
    310 		// Rules X2, X3, X4, X5, X5a, X5b, X5c
    311 		switch t {
    312 		case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
    313 			isIsolate := t.in(RLI, LRI, FSI)
    314 			isRTL := t.in(RLE, RLO, RLI)
    315 
    316 			// override if this is an FSI that resolves to RLI
    317 			if t == FSI {
    318 				isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
    319 			}
    320 			if isIsolate {
    321 				p.resultLevels[i] = stack.lastEmbeddingLevel()
    322 				if stack.lastDirectionalOverrideStatus() != ON {
    323 					p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
    324 				}
    325 			}
    326 
    327 			var newLevel level
    328 			if isRTL {
    329 				// least greater odd
    330 				newLevel = (stack.lastEmbeddingLevel() + 1) | 1
    331 			} else {
    332 				// least greater even
    333 				newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
    334 			}
    335 
    336 			if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
    337 				if isIsolate {
    338 					validIsolateCount++
    339 				}
    340 				// Push new embedding level, override status, and isolated
    341 				// status.
    342 				// No check for valid stack counter, since the level check
    343 				// suffices.
    344 				switch t {
    345 				case LRO:
    346 					stack.push(newLevel, L, isIsolate)
    347 				case RLO:
    348 					stack.push(newLevel, R, isIsolate)
    349 				default:
    350 					stack.push(newLevel, ON, isIsolate)
    351 				}
    352 				// Not really part of the spec
    353 				if !isIsolate {
    354 					p.resultLevels[i] = newLevel
    355 				}
    356 			} else {
    357 				// This is an invalid explicit formatting character,
    358 				// so apply the "Otherwise" part of rules X2-X5b.
    359 				if isIsolate {
    360 					overflowIsolateCount++
    361 				} else { // !isIsolate
    362 					if overflowIsolateCount == 0 {
    363 						overflowEmbeddingCount++
    364 					}
    365 				}
    366 			}
    367 
    368 		// Rule X6a
    369 		case PDI:
    370 			if overflowIsolateCount > 0 {
    371 				overflowIsolateCount--
    372 			} else if validIsolateCount == 0 {
    373 				// do nothing
    374 			} else {
    375 				overflowEmbeddingCount = 0
    376 				for !stack.lastDirectionalIsolateStatus() {
    377 					stack.pop()
    378 				}
    379 				stack.pop()
    380 				validIsolateCount--
    381 			}
    382 			p.resultLevels[i] = stack.lastEmbeddingLevel()
    383 
    384 		// Rule X7
    385 		case PDF:
    386 			// Not really part of the spec
    387 			p.resultLevels[i] = stack.lastEmbeddingLevel()
    388 
    389 			if overflowIsolateCount > 0 {
    390 				// do nothing
    391 			} else if overflowEmbeddingCount > 0 {
    392 				overflowEmbeddingCount--
    393 			} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
    394 				stack.pop()
    395 			}
    396 
    397 		case B: // paragraph separator.
    398 			// Rule X8.
    399 
    400 			// These values are reset for clarity, in this implementation B
    401 			// can only occur as the last code in the array.
    402 			stack.empty()
    403 			overflowIsolateCount = 0
    404 			overflowEmbeddingCount = 0
    405 			validIsolateCount = 0
    406 			p.resultLevels[i] = p.embeddingLevel
    407 
    408 		default:
    409 			p.resultLevels[i] = stack.lastEmbeddingLevel()
    410 			if stack.lastDirectionalOverrideStatus() != ON {
    411 				p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
    412 			}
    413 		}
    414 	}
    415 }
    416 
    417 type isolatingRunSequence struct {
    418 	p *paragraph
    419 
    420 	indexes []int // indexes to the original string
    421 
    422 	types          []Class // type of each character using the index
    423 	resolvedLevels []level // resolved levels after application of rules
    424 	level          level
    425 	sos, eos       Class
    426 }
    427 
    428 func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
    429 
    430 func maxLevel(a, b level) level {
    431 	if a > b {
    432 		return a
    433 	}
    434 	return b
    435 }
    436 
    437 // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
    438 // either L or R, for each isolating run sequence.
    439 func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
    440 	length := len(indexes)
    441 	types := make([]Class, length)
    442 	for i, x := range indexes {
    443 		types[i] = p.resultTypes[x]
    444 	}
    445 
    446 	// assign level, sos and eos
    447 	prevChar := indexes[0] - 1
    448 	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
    449 		prevChar--
    450 	}
    451 	prevLevel := p.embeddingLevel
    452 	if prevChar >= 0 {
    453 		prevLevel = p.resultLevels[prevChar]
    454 	}
    455 
    456 	var succLevel level
    457 	lastType := types[length-1]
    458 	if lastType.in(LRI, RLI, FSI) {
    459 		succLevel = p.embeddingLevel
    460 	} else {
    461 		// the first character after the end of run sequence
    462 		limit := indexes[length-1] + 1
    463 		for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
    464 
    465 		}
    466 		succLevel = p.embeddingLevel
    467 		if limit < p.Len() {
    468 			succLevel = p.resultLevels[limit]
    469 		}
    470 	}
    471 	level := p.resultLevels[indexes[0]]
    472 	return &isolatingRunSequence{
    473 		p:       p,
    474 		indexes: indexes,
    475 		types:   types,
    476 		level:   level,
    477 		sos:     typeForLevel(maxLevel(prevLevel, level)),
    478 		eos:     typeForLevel(maxLevel(succLevel, level)),
    479 	}
    480 }
    481 
    482 // Resolving weak types Rules W1-W7.
    483 //
    484 // Note that some weak types (EN, AN) remain after this processing is
    485 // complete.
    486 func (s *isolatingRunSequence) resolveWeakTypes() {
    487 
    488 	// on entry, only these types remain
    489 	s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
    490 
    491 	// Rule W1.
    492 	// Changes all NSMs.
    493 	precedingCharacterType := s.sos
    494 	for i, t := range s.types {
    495 		if t == NSM {
    496 			s.types[i] = precedingCharacterType
    497 		} else {
    498 			// if t.in(LRI, RLI, FSI, PDI) {
    499 			// 	precedingCharacterType = ON
    500 			// }
    501 			precedingCharacterType = t
    502 		}
    503 	}
    504 
    505 	// Rule W2.
    506 	// EN does not change at the start of the run, because sos != AL.
    507 	for i, t := range s.types {
    508 		if t == EN {
    509 			for j := i - 1; j >= 0; j-- {
    510 				if t := s.types[j]; t.in(L, R, AL) {
    511 					if t == AL {
    512 						s.types[i] = AN
    513 					}
    514 					break
    515 				}
    516 			}
    517 		}
    518 	}
    519 
    520 	// Rule W3.
    521 	for i, t := range s.types {
    522 		if t == AL {
    523 			s.types[i] = R
    524 		}
    525 	}
    526 
    527 	// Rule W4.
    528 	// Since there must be values on both sides for this rule to have an
    529 	// effect, the scan skips the first and last value.
    530 	//
    531 	// Although the scan proceeds left to right, and changes the type
    532 	// values in a way that would appear to affect the computations
    533 	// later in the scan, there is actually no problem. A change in the
    534 	// current value can only affect the value to its immediate right,
    535 	// and only affect it if it is ES or CS. But the current value can
    536 	// only change if the value to its right is not ES or CS. Thus
    537 	// either the current value will not change, or its change will have
    538 	// no effect on the remainder of the analysis.
    539 
    540 	for i := 1; i < s.Len()-1; i++ {
    541 		t := s.types[i]
    542 		if t == ES || t == CS {
    543 			prevSepType := s.types[i-1]
    544 			succSepType := s.types[i+1]
    545 			if prevSepType == EN && succSepType == EN {
    546 				s.types[i] = EN
    547 			} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
    548 				s.types[i] = AN
    549 			}
    550 		}
    551 	}
    552 
    553 	// Rule W5.
    554 	for i, t := range s.types {
    555 		if t == ET {
    556 			// locate end of sequence
    557 			runStart := i
    558 			runEnd := s.findRunLimit(runStart, ET)
    559 
    560 			// check values at ends of sequence
    561 			t := s.sos
    562 			if runStart > 0 {
    563 				t = s.types[runStart-1]
    564 			}
    565 			if t != EN {
    566 				t = s.eos
    567 				if runEnd < len(s.types) {
    568 					t = s.types[runEnd]
    569 				}
    570 			}
    571 			if t == EN {
    572 				setTypes(s.types[runStart:runEnd], EN)
    573 			}
    574 			// continue at end of sequence
    575 			i = runEnd
    576 		}
    577 	}
    578 
    579 	// Rule W6.
    580 	for i, t := range s.types {
    581 		if t.in(ES, ET, CS) {
    582 			s.types[i] = ON
    583 		}
    584 	}
    585 
    586 	// Rule W7.
    587 	for i, t := range s.types {
    588 		if t == EN {
    589 			// set default if we reach start of run
    590 			prevStrongType := s.sos
    591 			for j := i - 1; j >= 0; j-- {
    592 				t = s.types[j]
    593 				if t == L || t == R { // AL's have been changed to R
    594 					prevStrongType = t
    595 					break
    596 				}
    597 			}
    598 			if prevStrongType == L {
    599 				s.types[i] = L
    600 			}
    601 		}
    602 	}
    603 }
    604 
    605 // 6) resolving neutral types Rules N1-N2.
    606 func (s *isolatingRunSequence) resolveNeutralTypes() {
    607 
    608 	// on entry, only these types can be in resultTypes
    609 	s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
    610 
    611 	for i, t := range s.types {
    612 		switch t {
    613 		case WS, ON, B, S, RLI, LRI, FSI, PDI:
    614 			// find bounds of run of neutrals
    615 			runStart := i
    616 			runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
    617 
    618 			// determine effective types at ends of run
    619 			var leadType, trailType Class
    620 
    621 			// Note that the character found can only be L, R, AN, or
    622 			// EN.
    623 			if runStart == 0 {
    624 				leadType = s.sos
    625 			} else {
    626 				leadType = s.types[runStart-1]
    627 				if leadType.in(AN, EN) {
    628 					leadType = R
    629 				}
    630 			}
    631 			if runEnd == len(s.types) {
    632 				trailType = s.eos
    633 			} else {
    634 				trailType = s.types[runEnd]
    635 				if trailType.in(AN, EN) {
    636 					trailType = R
    637 				}
    638 			}
    639 
    640 			var resolvedType Class
    641 			if leadType == trailType {
    642 				// Rule N1.
    643 				resolvedType = leadType
    644 			} else {
    645 				// Rule N2.
    646 				// Notice the embedding level of the run is used, not
    647 				// the paragraph embedding level.
    648 				resolvedType = typeForLevel(s.level)
    649 			}
    650 
    651 			setTypes(s.types[runStart:runEnd], resolvedType)
    652 
    653 			// skip over run of (former) neutrals
    654 			i = runEnd
    655 		}
    656 	}
    657 }
    658 
    659 func setLevels(levels []level, newLevel level) {
    660 	for i := range levels {
    661 		levels[i] = newLevel
    662 	}
    663 }
    664 
    665 func setTypes(types []Class, newType Class) {
    666 	for i := range types {
    667 		types[i] = newType
    668 	}
    669 }
    670 
    671 // 7) resolving implicit embedding levels Rules I1, I2.
    672 func (s *isolatingRunSequence) resolveImplicitLevels() {
    673 
    674 	// on entry, only these types can be in resultTypes
    675 	s.assertOnly(L, R, EN, AN)
    676 
    677 	s.resolvedLevels = make([]level, len(s.types))
    678 	setLevels(s.resolvedLevels, s.level)
    679 
    680 	if (s.level & 1) == 0 { // even level
    681 		for i, t := range s.types {
    682 			// Rule I1.
    683 			if t == L {
    684 				// no change
    685 			} else if t == R {
    686 				s.resolvedLevels[i] += 1
    687 			} else { // t == AN || t == EN
    688 				s.resolvedLevels[i] += 2
    689 			}
    690 		}
    691 	} else { // odd level
    692 		for i, t := range s.types {
    693 			// Rule I2.
    694 			if t == R {
    695 				// no change
    696 			} else { // t == L || t == AN || t == EN
    697 				s.resolvedLevels[i] += 1
    698 			}
    699 		}
    700 	}
    701 }
    702 
    703 // Applies the levels and types resolved in rules W1-I2 to the
    704 // resultLevels array.
    705 func (s *isolatingRunSequence) applyLevelsAndTypes() {
    706 	for i, x := range s.indexes {
    707 		s.p.resultTypes[x] = s.types[i]
    708 		s.p.resultLevels[x] = s.resolvedLevels[i]
    709 	}
    710 }
    711 
    712 // Return the limit of the run consisting only of the types in validSet
    713 // starting at index. This checks the value at index, and will return
    714 // index if that value is not in validSet.
    715 func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
    716 loop:
    717 	for ; index < len(s.types); index++ {
    718 		t := s.types[index]
    719 		for _, valid := range validSet {
    720 			if t == valid {
    721 				continue loop
    722 			}
    723 		}
    724 		return index // didn't find a match in validSet
    725 	}
    726 	return len(s.types)
    727 }
    728 
    729 // Algorithm validation. Assert that all values in types are in the
    730 // provided set.
    731 func (s *isolatingRunSequence) assertOnly(codes ...Class) {
    732 loop:
    733 	for i, t := range s.types {
    734 		for _, c := range codes {
    735 			if t == c {
    736 				continue loop
    737 			}
    738 		}
    739 		log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
    740 	}
    741 }
    742 
    743 // determineLevelRuns returns an array of level runs. Each level run is
    744 // described as an array of indexes into the input string.
    745 //
    746 // Determines the level runs. Rule X9 will be applied in determining the
    747 // runs, in the way that makes sure the characters that are supposed to be
    748 // removed are not included in the runs.
    749 func (p *paragraph) determineLevelRuns() [][]int {
    750 	run := []int{}
    751 	allRuns := [][]int{}
    752 	currentLevel := implicitLevel
    753 
    754 	for i := range p.initialTypes {
    755 		if !isRemovedByX9(p.initialTypes[i]) {
    756 			if p.resultLevels[i] != currentLevel {
    757 				// we just encountered a new run; wrap up last run
    758 				if currentLevel >= 0 { // only wrap it up if there was a run
    759 					allRuns = append(allRuns, run)
    760 					run = nil
    761 				}
    762 				// Start new run
    763 				currentLevel = p.resultLevels[i]
    764 			}
    765 			run = append(run, i)
    766 		}
    767 	}
    768 	// Wrap up the final run, if any
    769 	if len(run) > 0 {
    770 		allRuns = append(allRuns, run)
    771 	}
    772 	return allRuns
    773 }
    774 
    775 // Definition BD13. Determine isolating run sequences.
    776 func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
    777 	levelRuns := p.determineLevelRuns()
    778 
    779 	// Compute the run that each character belongs to
    780 	runForCharacter := make([]int, p.Len())
    781 	for i, run := range levelRuns {
    782 		for _, index := range run {
    783 			runForCharacter[index] = i
    784 		}
    785 	}
    786 
    787 	sequences := []*isolatingRunSequence{}
    788 
    789 	var currentRunSequence []int
    790 
    791 	for _, run := range levelRuns {
    792 		first := run[0]
    793 		if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
    794 			currentRunSequence = nil
    795 			// int run = i;
    796 			for {
    797 				// Copy this level run into currentRunSequence
    798 				currentRunSequence = append(currentRunSequence, run...)
    799 
    800 				last := currentRunSequence[len(currentRunSequence)-1]
    801 				lastT := p.initialTypes[last]
    802 				if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
    803 					run = levelRuns[runForCharacter[p.matchingPDI[last]]]
    804 				} else {
    805 					break
    806 				}
    807 			}
    808 			sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
    809 		}
    810 	}
    811 	return sequences
    812 }
    813 
    814 // Assign level information to characters removed by rule X9. This is for
    815 // ease of relating the level information to the original input data. Note
    816 // that the levels assigned to these codes are arbitrary, they're chosen so
    817 // as to avoid breaking level runs.
    818 func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
    819 	for i, t := range p.initialTypes {
    820 		if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
    821 			p.resultTypes[i] = t
    822 			p.resultLevels[i] = -1
    823 		}
    824 	}
    825 	// now propagate forward the levels information (could have
    826 	// propagated backward, the main thing is not to introduce a level
    827 	// break where one doesn't already exist).
    828 
    829 	if p.resultLevels[0] == -1 {
    830 		p.resultLevels[0] = p.embeddingLevel
    831 	}
    832 	for i := 1; i < len(p.initialTypes); i++ {
    833 		if p.resultLevels[i] == -1 {
    834 			p.resultLevels[i] = p.resultLevels[i-1]
    835 		}
    836 	}
    837 	// Embedding information is for informational purposes only so need not be
    838 	// adjusted.
    839 }
    840 
    841 //
    842 // Output
    843 //
    844 
    845 // getLevels computes levels array breaking lines at offsets in linebreaks.
    846 // Rule L1.
    847 //
    848 // The linebreaks array must include at least one value. The values must be
    849 // in strictly increasing order (no duplicates) between 1 and the length of
    850 // the text, inclusive. The last value must be the length of the text.
    851 func (p *paragraph) getLevels(linebreaks []int) []level {
    852 	// Note that since the previous processing has removed all
    853 	// P, S, and WS values from resultTypes, the values referred to
    854 	// in these rules are the initial types, before any processing
    855 	// has been applied (including processing of overrides).
    856 	//
    857 	// This example implementation has reinserted explicit format codes
    858 	// and BN, in order that the levels array correspond to the
    859 	// initial text. Their final placement is not normative.
    860 	// These codes are treated like WS in this implementation,
    861 	// so they don't interrupt sequences of WS.
    862 
    863 	validateLineBreaks(linebreaks, p.Len())
    864 
    865 	result := append([]level(nil), p.resultLevels...)
    866 
    867 	// don't worry about linebreaks since if there is a break within
    868 	// a series of WS values preceding S, the linebreak itself
    869 	// causes the reset.
    870 	for i, t := range p.initialTypes {
    871 		if t.in(B, S) {
    872 			// Rule L1, clauses one and two.
    873 			result[i] = p.embeddingLevel
    874 
    875 			// Rule L1, clause three.
    876 			for j := i - 1; j >= 0; j-- {
    877 				if isWhitespace(p.initialTypes[j]) { // including format codes
    878 					result[j] = p.embeddingLevel
    879 				} else {
    880 					break
    881 				}
    882 			}
    883 		}
    884 	}
    885 
    886 	// Rule L1, clause four.
    887 	start := 0
    888 	for _, limit := range linebreaks {
    889 		for j := limit - 1; j >= start; j-- {
    890 			if isWhitespace(p.initialTypes[j]) { // including format codes
    891 				result[j] = p.embeddingLevel
    892 			} else {
    893 				break
    894 			}
    895 		}
    896 		start = limit
    897 	}
    898 
    899 	return result
    900 }
    901 
    902 // getReordering returns the reordering of lines from a visual index to a
    903 // logical index for line breaks at the given offsets.
    904 //
    905 // Lines are concatenated from left to right. So for example, the fifth
    906 // character from the left on the third line is
    907 //
    908 //	getReordering(linebreaks)[linebreaks[1] + 4]
    909 //
    910 // (linebreaks[1] is the position after the last character of the second
    911 // line, which is also the index of the first character on the third line,
    912 // and adding four gets the fifth character from the left).
    913 //
    914 // The linebreaks array must include at least one value. The values must be
    915 // in strictly increasing order (no duplicates) between 1 and the length of
    916 // the text, inclusive. The last value must be the length of the text.
    917 func (p *paragraph) getReordering(linebreaks []int) []int {
    918 	validateLineBreaks(linebreaks, p.Len())
    919 
    920 	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
    921 }
    922 
    923 // Return multiline reordering array for a given level array. Reordering
    924 // does not occur across a line break.
    925 func computeMultilineReordering(levels []level, linebreaks []int) []int {
    926 	result := make([]int, len(levels))
    927 
    928 	start := 0
    929 	for _, limit := range linebreaks {
    930 		tempLevels := make([]level, limit-start)
    931 		copy(tempLevels, levels[start:])
    932 
    933 		for j, order := range computeReordering(tempLevels) {
    934 			result[start+j] = order + start
    935 		}
    936 		start = limit
    937 	}
    938 	return result
    939 }
    940 
    941 // Return reordering array for a given level array. This reorders a single
    942 // line. The reordering is a visual to logical map. For example, the
    943 // leftmost char is string.charAt(order[0]). Rule L2.
    944 func computeReordering(levels []level) []int {
    945 	result := make([]int, len(levels))
    946 	// initialize order
    947 	for i := range result {
    948 		result[i] = i
    949 	}
    950 
    951 	// locate highest level found on line.
    952 	// Note the rules say text, but no reordering across line bounds is
    953 	// performed, so this is sufficient.
    954 	highestLevel := level(0)
    955 	lowestOddLevel := level(maxDepth + 2)
    956 	for _, level := range levels {
    957 		if level > highestLevel {
    958 			highestLevel = level
    959 		}
    960 		if level&1 != 0 && level < lowestOddLevel {
    961 			lowestOddLevel = level
    962 		}
    963 	}
    964 
    965 	for level := highestLevel; level >= lowestOddLevel; level-- {
    966 		for i := 0; i < len(levels); i++ {
    967 			if levels[i] >= level {
    968 				// find range of text at or above this level
    969 				start := i
    970 				limit := i + 1
    971 				for limit < len(levels) && levels[limit] >= level {
    972 					limit++
    973 				}
    974 
    975 				for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
    976 					result[j], result[k] = result[k], result[j]
    977 				}
    978 				// skip to end of level run
    979 				i = limit
    980 			}
    981 		}
    982 	}
    983 
    984 	return result
    985 }
    986 
    987 // isWhitespace reports whether the type is considered a whitespace type for the
    988 // line break rules.
    989 func isWhitespace(c Class) bool {
    990 	switch c {
    991 	case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
    992 		return true
    993 	}
    994 	return false
    995 }
    996 
    997 // isRemovedByX9 reports whether the type is one of the types removed in X9.
    998 func isRemovedByX9(c Class) bool {
    999 	switch c {
   1000 	case LRE, RLE, LRO, RLO, PDF, BN:
   1001 		return true
   1002 	}
   1003 	return false
   1004 }
   1005 
   1006 // typeForLevel reports the strong type (L or R) corresponding to the level.
   1007 func typeForLevel(level level) Class {
   1008 	if (level & 0x1) == 0 {
   1009 		return L
   1010 	}
   1011 	return R
   1012 }
   1013 
   1014 func validateTypes(types []Class) error {
   1015 	if len(types) == 0 {
   1016 		return fmt.Errorf("types is null")
   1017 	}
   1018 	for i, t := range types[:len(types)-1] {
   1019 		if t == B {
   1020 			return fmt.Errorf("B type before end of paragraph at index: %d", i)
   1021 		}
   1022 	}
   1023 	return nil
   1024 }
   1025 
   1026 func validateParagraphEmbeddingLevel(embeddingLevel level) error {
   1027 	if embeddingLevel != implicitLevel &&
   1028 		embeddingLevel != 0 &&
   1029 		embeddingLevel != 1 {
   1030 		return fmt.Errorf("illegal paragraph embedding level: %d", embeddingLevel)
   1031 	}
   1032 	return nil
   1033 }
   1034 
   1035 func validateLineBreaks(linebreaks []int, textLength int) error {
   1036 	prev := 0
   1037 	for i, next := range linebreaks {
   1038 		if next <= prev {
   1039 			return fmt.Errorf("bad linebreak: %d at index: %d", next, i)
   1040 		}
   1041 		prev = next
   1042 	}
   1043 	if prev != textLength {
   1044 		return fmt.Errorf("last linebreak was %d, want %d", prev, textLength)
   1045 	}
   1046 	return nil
   1047 }
   1048 
   1049 func validatePbTypes(pairTypes []bracketType) error {
   1050 	if len(pairTypes) == 0 {
   1051 		return fmt.Errorf("pairTypes is null")
   1052 	}
   1053 	for i, pt := range pairTypes {
   1054 		switch pt {
   1055 		case bpNone, bpOpen, bpClose:
   1056 		default:
   1057 			return fmt.Errorf("illegal pairType value at %d: %v", i, pairTypes[i])
   1058 		}
   1059 	}
   1060 	return nil
   1061 }
   1062 
   1063 func validatePbValues(pairValues []rune, pairTypes []bracketType) error {
   1064 	if pairValues == nil {
   1065 		return fmt.Errorf("pairValues is null")
   1066 	}
   1067 	if len(pairTypes) != len(pairValues) {
   1068 		return fmt.Errorf("pairTypes is different length from pairValues")
   1069 	}
   1070 	return nil
   1071 }