Source file src/unicode/graphic.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package unicode
     6  
     7  // Bit masks for each code point under U+0100, for fast lookup.
     8  const (
     9  	pC     = 1 << iota // a control character.
    10  	pP                 // a punctuation character.
    11  	pN                 // a numeral.
    12  	pS                 // a symbolic character.
    13  	pZ                 // a spacing character.
    14  	pLu                // an upper-case letter.
    15  	pLl                // a lower-case letter.
    16  	pp                 // a printable character according to Go's definition.
    17  	pg     = pp | pZ   // a graphical character according to the Unicode definition.
    18  	pLo    = pLl | pLu // a letter that is neither upper nor lower case.
    19  	pLmask = pLo
    20  )
    21  
    22  // GraphicRanges defines the set of graphic characters according to Unicode.
    23  var GraphicRanges = []*RangeTable{
    24  	L, M, N, P, S, Zs,
    25  }
    26  
    27  // PrintRanges defines the set of printable characters according to Go.
    28  // ASCII space, U+0020, is handled separately.
    29  var PrintRanges = []*RangeTable{
    30  	L, M, N, P, S,
    31  }
    32  
    33  // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
    34  // Such characters include letters, marks, numbers, punctuation, symbols, and
    35  // spaces, from categories L, M, N, P, S, Zs.
    36  func IsGraphic(r rune) bool {
    37  	// We convert to uint32 to avoid the extra test for negative,
    38  	// and in the index we convert to uint8 to avoid the range check.
    39  	if uint32(r) <= MaxLatin1 {
    40  		return properties[uint8(r)]&pg != 0
    41  	}
    42  	return In(r, GraphicRanges...)
    43  }
    44  
    45  // IsPrint reports whether the rune is defined as printable by Go. Such
    46  // characters include letters, marks, numbers, punctuation, symbols, and the
    47  // ASCII space character, from categories L, M, N, P, S and the ASCII space
    48  // character. This categorization is the same as IsGraphic except that the
    49  // only spacing character is ASCII space, U+0020.
    50  func IsPrint(r rune) bool {
    51  	if uint32(r) <= MaxLatin1 {
    52  		return properties[uint8(r)]&pp != 0
    53  	}
    54  	return In(r, PrintRanges...)
    55  }
    56  
    57  // IsOneOf reports whether the rune is a member of one of the ranges.
    58  // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
    59  func IsOneOf(ranges []*RangeTable, r rune) bool {
    60  	for _, inside := range ranges {
    61  		if Is(inside, r) {
    62  			return true
    63  		}
    64  	}
    65  	return false
    66  }
    67  
    68  // In reports whether the rune is a member of one of the ranges.
    69  func In(r rune, ranges ...*RangeTable) bool {
    70  	for _, inside := range ranges {
    71  		if Is(inside, r) {
    72  			return true
    73  		}
    74  	}
    75  	return false
    76  }
    77  
    78  // IsControl reports whether the rune is a control character.
    79  // The C (Other) Unicode category includes more code points
    80  // such as surrogates; use Is(C, r) to test for them.
    81  func IsControl(r rune) bool {
    82  	if uint32(r) <= MaxLatin1 {
    83  		return properties[uint8(r)]&pC != 0
    84  	}
    85  	// All control characters are < MaxLatin1.
    86  	return false
    87  }
    88  
    89  // IsLetter reports whether the rune is a letter (category L).
    90  func IsLetter(r rune) bool {
    91  	if uint32(r) <= MaxLatin1 {
    92  		return properties[uint8(r)]&(pLmask) != 0
    93  	}
    94  	return isExcludingLatin(Letter, r)
    95  }
    96  
    97  // IsMark reports whether the rune is a mark character (category M).
    98  func IsMark(r rune) bool {
    99  	// There are no mark characters in Latin-1.
   100  	return isExcludingLatin(Mark, r)
   101  }
   102  
   103  // IsNumber reports whether the rune is a number (category N).
   104  func IsNumber(r rune) bool {
   105  	if uint32(r) <= MaxLatin1 {
   106  		return properties[uint8(r)]&pN != 0
   107  	}
   108  	return isExcludingLatin(Number, r)
   109  }
   110  
   111  // IsPunct reports whether the rune is a Unicode punctuation character
   112  // (category P).
   113  func IsPunct(r rune) bool {
   114  	if uint32(r) <= MaxLatin1 {
   115  		return properties[uint8(r)]&pP != 0
   116  	}
   117  	return Is(Punct, r)
   118  }
   119  
   120  // IsSpace reports whether the rune is a space character as defined
   121  // by Unicode's White Space property; in the Latin-1 space
   122  // this is
   123  //	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
   124  // Other definitions of spacing characters are set by category
   125  // Z and property Pattern_White_Space.
   126  func IsSpace(r rune) bool {
   127  	// This property isn't the same as Z; special-case it.
   128  	if uint32(r) <= MaxLatin1 {
   129  		switch r {
   130  		case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
   131  			return true
   132  		}
   133  		return false
   134  	}
   135  	return isExcludingLatin(White_Space, r)
   136  }
   137  
   138  // IsSymbol reports whether the rune is a symbolic character.
   139  func IsSymbol(r rune) bool {
   140  	if uint32(r) <= MaxLatin1 {
   141  		return properties[uint8(r)]&pS != 0
   142  	}
   143  	return isExcludingLatin(Symbol, r)
   144  }
   145  

View as plain text