forked from ebhomengo/niki
1
0
Fork 0
niki/vendor/github.com/mailru/easyjson/jlexer/lexer.go

2147 lines
25 KiB
Go
Raw Normal View History

2024-05-14 13:07:09 +00:00
// Package jlexer contains a JSON lexer implementation.
2024-05-14 13:07:09 +00:00
//
2024-05-14 13:07:09 +00:00
// It is expected that it is mostly used with generated parser code, so the interface is tuned
2024-05-14 13:07:09 +00:00
// for a parser that knows what kind of data is expected.
2024-05-14 13:07:09 +00:00
package jlexer
import (
"bytes"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io"
"strconv"
"unicode"
"unicode/utf16"
"unicode/utf8"
"github.com/josharian/intern"
)
// tokenKind determines type of a token.
2024-05-14 13:07:09 +00:00
type tokenKind byte
const (
tokenUndef tokenKind = iota // No token.
tokenDelim // Delimiter: one of '{', '}', '[' or ']'.
tokenString // A string literal, e.g. "abc\u1234"
tokenNumber // Number literal, e.g. 1.5e5
tokenBool // Boolean literal: true or false.
tokenNull // null keyword.
2024-05-14 13:07:09 +00:00
)
// token describes a single token: type, position in the input and value.
2024-05-14 13:07:09 +00:00
type token struct {
kind tokenKind // Type of a token.
boolValue bool // Value if a boolean literal token.
byteValueCloned bool // true if byteValue was allocated and does not refer to original json body
byteValue []byte // Raw value of a token.
delimValue byte
2024-05-14 13:07:09 +00:00
}
// Lexer is a JSON lexer: it iterates over JSON tokens in a byte slice.
2024-05-14 13:07:09 +00:00
type Lexer struct {
Data []byte // Input data given to the lexer.
start int // Start of the current token.
pos int // Current unscanned position in the input stream.
2024-05-14 13:07:09 +00:00
token token // Last scanned token, if token.kind != tokenUndef.
firstElement bool // Whether current element is the first in array or an object.
wantSep byte // A comma or a colon character, which need to occur before a token.
UseMultipleErrors bool // If we want to use multiple errors.
fatalError error // Fatal error occurred during lexing. It is usually a syntax error.
multipleErrors []*LexerError // Semantic errors occurred during lexing. Marshalling will be continued after finding this errors.
2024-05-14 13:07:09 +00:00
}
// FetchToken scans the input for the next token.
2024-05-14 13:07:09 +00:00
func (r *Lexer) FetchToken() {
2024-05-14 13:07:09 +00:00
r.token.kind = tokenUndef
2024-05-14 13:07:09 +00:00
r.start = r.pos
// Check if r.Data has r.pos element
2024-05-14 13:07:09 +00:00
// If it doesn't, it mean corrupted input data
2024-05-14 13:07:09 +00:00
if len(r.Data) < r.pos {
2024-05-14 13:07:09 +00:00
r.errParse("Unexpected end of data")
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
// Determine the type of a token by skipping whitespace and reading the
2024-05-14 13:07:09 +00:00
// first character.
2024-05-14 13:07:09 +00:00
for _, c := range r.Data[r.pos:] {
2024-05-14 13:07:09 +00:00
switch c {
2024-05-14 13:07:09 +00:00
case ':', ',':
2024-05-14 13:07:09 +00:00
if r.wantSep == c {
2024-05-14 13:07:09 +00:00
r.pos++
2024-05-14 13:07:09 +00:00
r.start++
2024-05-14 13:07:09 +00:00
r.wantSep = 0
2024-05-14 13:07:09 +00:00
} else {
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
case ' ', '\t', '\r', '\n':
2024-05-14 13:07:09 +00:00
r.pos++
2024-05-14 13:07:09 +00:00
r.start++
case '"':
2024-05-14 13:07:09 +00:00
if r.wantSep != 0 {
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
r.token.kind = tokenString
2024-05-14 13:07:09 +00:00
r.fetchString()
2024-05-14 13:07:09 +00:00
return
case '{', '[':
2024-05-14 13:07:09 +00:00
if r.wantSep != 0 {
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.firstElement = true
2024-05-14 13:07:09 +00:00
r.token.kind = tokenDelim
2024-05-14 13:07:09 +00:00
r.token.delimValue = r.Data[r.pos]
2024-05-14 13:07:09 +00:00
r.pos++
2024-05-14 13:07:09 +00:00
return
case '}', ']':
2024-05-14 13:07:09 +00:00
if !r.firstElement && (r.wantSep != ',') {
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.wantSep = 0
2024-05-14 13:07:09 +00:00
r.token.kind = tokenDelim
2024-05-14 13:07:09 +00:00
r.token.delimValue = r.Data[r.pos]
2024-05-14 13:07:09 +00:00
r.pos++
2024-05-14 13:07:09 +00:00
return
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
2024-05-14 13:07:09 +00:00
if r.wantSep != 0 {
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.token.kind = tokenNumber
2024-05-14 13:07:09 +00:00
r.fetchNumber()
2024-05-14 13:07:09 +00:00
return
case 'n':
2024-05-14 13:07:09 +00:00
if r.wantSep != 0 {
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
r.token.kind = tokenNull
2024-05-14 13:07:09 +00:00
r.fetchNull()
2024-05-14 13:07:09 +00:00
return
case 't':
2024-05-14 13:07:09 +00:00
if r.wantSep != 0 {
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
r.token.kind = tokenBool
2024-05-14 13:07:09 +00:00
r.token.boolValue = true
2024-05-14 13:07:09 +00:00
r.fetchTrue()
2024-05-14 13:07:09 +00:00
return
case 'f':
2024-05-14 13:07:09 +00:00
if r.wantSep != 0 {
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
r.token.kind = tokenBool
2024-05-14 13:07:09 +00:00
r.token.boolValue = false
2024-05-14 13:07:09 +00:00
r.fetchFalse()
2024-05-14 13:07:09 +00:00
return
default:
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.fatalError = io.EOF
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
// isTokenEnd returns true if the char can follow a non-delimiter token
2024-05-14 13:07:09 +00:00
func isTokenEnd(c byte) bool {
2024-05-14 13:07:09 +00:00
return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '[' || c == ']' || c == '{' || c == '}' || c == ',' || c == ':'
2024-05-14 13:07:09 +00:00
}
// fetchNull fetches and checks remaining bytes of null keyword.
2024-05-14 13:07:09 +00:00
func (r *Lexer) fetchNull() {
2024-05-14 13:07:09 +00:00
r.pos += 4
2024-05-14 13:07:09 +00:00
if r.pos > len(r.Data) ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-3] != 'u' ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-2] != 'l' ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-1] != 'l' ||
2024-05-14 13:07:09 +00:00
(r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
r.pos -= 4
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
// fetchTrue fetches and checks remaining bytes of true keyword.
2024-05-14 13:07:09 +00:00
func (r *Lexer) fetchTrue() {
2024-05-14 13:07:09 +00:00
r.pos += 4
2024-05-14 13:07:09 +00:00
if r.pos > len(r.Data) ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-3] != 'r' ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-2] != 'u' ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-1] != 'e' ||
2024-05-14 13:07:09 +00:00
(r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
r.pos -= 4
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
// fetchFalse fetches and checks remaining bytes of false keyword.
2024-05-14 13:07:09 +00:00
func (r *Lexer) fetchFalse() {
2024-05-14 13:07:09 +00:00
r.pos += 5
2024-05-14 13:07:09 +00:00
if r.pos > len(r.Data) ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-4] != 'a' ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-3] != 'l' ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-2] != 's' ||
2024-05-14 13:07:09 +00:00
r.Data[r.pos-1] != 'e' ||
2024-05-14 13:07:09 +00:00
(r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
r.pos -= 5
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
// fetchNumber scans a number literal token.
2024-05-14 13:07:09 +00:00
func (r *Lexer) fetchNumber() {
2024-05-14 13:07:09 +00:00
hasE := false
2024-05-14 13:07:09 +00:00
afterE := false
2024-05-14 13:07:09 +00:00
hasDot := false
r.pos++
2024-05-14 13:07:09 +00:00
for i, c := range r.Data[r.pos:] {
2024-05-14 13:07:09 +00:00
switch {
2024-05-14 13:07:09 +00:00
case c >= '0' && c <= '9':
2024-05-14 13:07:09 +00:00
afterE = false
2024-05-14 13:07:09 +00:00
case c == '.' && !hasDot:
2024-05-14 13:07:09 +00:00
hasDot = true
2024-05-14 13:07:09 +00:00
case (c == 'e' || c == 'E') && !hasE:
2024-05-14 13:07:09 +00:00
hasE = true
2024-05-14 13:07:09 +00:00
hasDot = true
2024-05-14 13:07:09 +00:00
afterE = true
2024-05-14 13:07:09 +00:00
case (c == '+' || c == '-') && afterE:
2024-05-14 13:07:09 +00:00
afterE = false
2024-05-14 13:07:09 +00:00
default:
2024-05-14 13:07:09 +00:00
r.pos += i
2024-05-14 13:07:09 +00:00
if !isTokenEnd(c) {
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
} else {
2024-05-14 13:07:09 +00:00
r.token.byteValue = r.Data[r.start:r.pos]
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
r.pos = len(r.Data)
2024-05-14 13:07:09 +00:00
r.token.byteValue = r.Data[r.start:]
2024-05-14 13:07:09 +00:00
}
// findStringLen tries to scan into the string literal for ending quote char to determine required size.
2024-05-14 13:07:09 +00:00
// The size will be exact if no escapes are present and may be inexact if there are escaped chars.
2024-05-14 13:07:09 +00:00
func findStringLen(data []byte) (isValid bool, length int) {
2024-05-14 13:07:09 +00:00
for {
2024-05-14 13:07:09 +00:00
idx := bytes.IndexByte(data, '"')
2024-05-14 13:07:09 +00:00
if idx == -1 {
2024-05-14 13:07:09 +00:00
return false, len(data)
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if idx == 0 || (idx > 0 && data[idx-1] != '\\') {
2024-05-14 13:07:09 +00:00
return true, length + idx
2024-05-14 13:07:09 +00:00
}
// count \\\\\\\ sequences. even number of slashes means quote is not really escaped
2024-05-14 13:07:09 +00:00
cnt := 1
2024-05-14 13:07:09 +00:00
for idx-cnt-1 >= 0 && data[idx-cnt-1] == '\\' {
2024-05-14 13:07:09 +00:00
cnt++
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if cnt%2 == 0 {
2024-05-14 13:07:09 +00:00
return true, length + idx
2024-05-14 13:07:09 +00:00
}
length += idx + 1
2024-05-14 13:07:09 +00:00
data = data[idx+1:]
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
// unescapeStringToken performs unescaping of string token.
2024-05-14 13:07:09 +00:00
// if no escaping is needed, original string is returned, otherwise - a new one allocated
2024-05-14 13:07:09 +00:00
func (r *Lexer) unescapeStringToken() (err error) {
2024-05-14 13:07:09 +00:00
data := r.token.byteValue
2024-05-14 13:07:09 +00:00
var unescapedData []byte
for {
2024-05-14 13:07:09 +00:00
i := bytes.IndexByte(data, '\\')
2024-05-14 13:07:09 +00:00
if i == -1 {
2024-05-14 13:07:09 +00:00
break
2024-05-14 13:07:09 +00:00
}
escapedRune, escapedBytes, err := decodeEscape(data[i:])
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.errParse(err.Error())
2024-05-14 13:07:09 +00:00
return err
2024-05-14 13:07:09 +00:00
}
if unescapedData == nil {
2024-05-14 13:07:09 +00:00
unescapedData = make([]byte, 0, len(r.token.byteValue))
2024-05-14 13:07:09 +00:00
}
var d [4]byte
2024-05-14 13:07:09 +00:00
s := utf8.EncodeRune(d[:], escapedRune)
2024-05-14 13:07:09 +00:00
unescapedData = append(unescapedData, data[:i]...)
2024-05-14 13:07:09 +00:00
unescapedData = append(unescapedData, d[:s]...)
data = data[i+escapedBytes:]
2024-05-14 13:07:09 +00:00
}
if unescapedData != nil {
2024-05-14 13:07:09 +00:00
r.token.byteValue = append(unescapedData, data...)
2024-05-14 13:07:09 +00:00
r.token.byteValueCloned = true
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
2024-05-14 13:07:09 +00:00
// or it returns -1.
2024-05-14 13:07:09 +00:00
func getu4(s []byte) rune {
2024-05-14 13:07:09 +00:00
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
2024-05-14 13:07:09 +00:00
return -1
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
var val rune
2024-05-14 13:07:09 +00:00
for i := 2; i < len(s) && i < 6; i++ {
2024-05-14 13:07:09 +00:00
var v byte
2024-05-14 13:07:09 +00:00
c := s[i]
2024-05-14 13:07:09 +00:00
switch c {
2024-05-14 13:07:09 +00:00
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
2024-05-14 13:07:09 +00:00
v = c - '0'
2024-05-14 13:07:09 +00:00
case 'a', 'b', 'c', 'd', 'e', 'f':
2024-05-14 13:07:09 +00:00
v = c - 'a' + 10
2024-05-14 13:07:09 +00:00
case 'A', 'B', 'C', 'D', 'E', 'F':
2024-05-14 13:07:09 +00:00
v = c - 'A' + 10
2024-05-14 13:07:09 +00:00
default:
2024-05-14 13:07:09 +00:00
return -1
2024-05-14 13:07:09 +00:00
}
val <<= 4
2024-05-14 13:07:09 +00:00
val |= rune(v)
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return val
2024-05-14 13:07:09 +00:00
}
// decodeEscape processes a single escape sequence and returns number of bytes processed.
2024-05-14 13:07:09 +00:00
func decodeEscape(data []byte) (decoded rune, bytesProcessed int, err error) {
2024-05-14 13:07:09 +00:00
if len(data) < 2 {
2024-05-14 13:07:09 +00:00
return 0, 0, errors.New("incorrect escape symbol \\ at the end of token")
2024-05-14 13:07:09 +00:00
}
c := data[1]
2024-05-14 13:07:09 +00:00
switch c {
2024-05-14 13:07:09 +00:00
case '"', '/', '\\':
2024-05-14 13:07:09 +00:00
return rune(c), 2, nil
2024-05-14 13:07:09 +00:00
case 'b':
2024-05-14 13:07:09 +00:00
return '\b', 2, nil
2024-05-14 13:07:09 +00:00
case 'f':
2024-05-14 13:07:09 +00:00
return '\f', 2, nil
2024-05-14 13:07:09 +00:00
case 'n':
2024-05-14 13:07:09 +00:00
return '\n', 2, nil
2024-05-14 13:07:09 +00:00
case 'r':
2024-05-14 13:07:09 +00:00
return '\r', 2, nil
2024-05-14 13:07:09 +00:00
case 't':
2024-05-14 13:07:09 +00:00
return '\t', 2, nil
2024-05-14 13:07:09 +00:00
case 'u':
2024-05-14 13:07:09 +00:00
rr := getu4(data)
2024-05-14 13:07:09 +00:00
if rr < 0 {
2024-05-14 13:07:09 +00:00
return 0, 0, errors.New("incorrectly escaped \\uXXXX sequence")
2024-05-14 13:07:09 +00:00
}
read := 6
2024-05-14 13:07:09 +00:00
if utf16.IsSurrogate(rr) {
2024-05-14 13:07:09 +00:00
rr1 := getu4(data[read:])
2024-05-14 13:07:09 +00:00
if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
2024-05-14 13:07:09 +00:00
read += 6
2024-05-14 13:07:09 +00:00
rr = dec
2024-05-14 13:07:09 +00:00
} else {
2024-05-14 13:07:09 +00:00
rr = unicode.ReplacementChar
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return rr, read, nil
2024-05-14 13:07:09 +00:00
}
return 0, 0, errors.New("incorrectly escaped bytes")
2024-05-14 13:07:09 +00:00
}
// fetchString scans a string literal token.
2024-05-14 13:07:09 +00:00
func (r *Lexer) fetchString() {
2024-05-14 13:07:09 +00:00
r.pos++
2024-05-14 13:07:09 +00:00
data := r.Data[r.pos:]
isValid, length := findStringLen(data)
2024-05-14 13:07:09 +00:00
if !isValid {
2024-05-14 13:07:09 +00:00
r.pos += length
2024-05-14 13:07:09 +00:00
r.errParse("unterminated string literal")
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.token.byteValue = data[:length]
2024-05-14 13:07:09 +00:00
r.pos += length + 1 // skip closing '"' as well
2024-05-14 13:07:09 +00:00
}
// scanToken scans the next token if no token is currently available in the lexer.
2024-05-14 13:07:09 +00:00
func (r *Lexer) scanToken() {
2024-05-14 13:07:09 +00:00
if r.token.kind != tokenUndef || r.fatalError != nil {
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
// consume resets the current token to allow scanning the next one.
2024-05-14 13:07:09 +00:00
func (r *Lexer) consume() {
2024-05-14 13:07:09 +00:00
r.token.kind = tokenUndef
2024-05-14 13:07:09 +00:00
r.token.byteValueCloned = false
2024-05-14 13:07:09 +00:00
r.token.delimValue = 0
2024-05-14 13:07:09 +00:00
}
// Ok returns true if no error (including io.EOF) was encountered during scanning.
2024-05-14 13:07:09 +00:00
func (r *Lexer) Ok() bool {
2024-05-14 13:07:09 +00:00
return r.fatalError == nil
2024-05-14 13:07:09 +00:00
}
const maxErrorContextLen = 13
func (r *Lexer) errParse(what string) {
2024-05-14 13:07:09 +00:00
if r.fatalError == nil {
2024-05-14 13:07:09 +00:00
var str string
2024-05-14 13:07:09 +00:00
if len(r.Data)-r.pos <= maxErrorContextLen {
2024-05-14 13:07:09 +00:00
str = string(r.Data)
2024-05-14 13:07:09 +00:00
} else {
2024-05-14 13:07:09 +00:00
str = string(r.Data[r.pos:r.pos+maxErrorContextLen-3]) + "..."
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.fatalError = &LexerError{
2024-05-14 13:07:09 +00:00
Reason: what,
2024-05-14 13:07:09 +00:00
Offset: r.pos,
Data: str,
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) errSyntax() {
2024-05-14 13:07:09 +00:00
r.errParse("syntax error")
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) errInvalidToken(expected string) {
2024-05-14 13:07:09 +00:00
if r.fatalError != nil {
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if r.UseMultipleErrors {
2024-05-14 13:07:09 +00:00
r.pos = r.start
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
r.SkipRecursive()
2024-05-14 13:07:09 +00:00
switch expected {
2024-05-14 13:07:09 +00:00
case "[":
2024-05-14 13:07:09 +00:00
r.token.delimValue = ']'
2024-05-14 13:07:09 +00:00
r.token.kind = tokenDelim
2024-05-14 13:07:09 +00:00
case "{":
2024-05-14 13:07:09 +00:00
r.token.delimValue = '}'
2024-05-14 13:07:09 +00:00
r.token.kind = tokenDelim
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Reason: fmt.Sprintf("expected %s", expected),
2024-05-14 13:07:09 +00:00
Offset: r.start,
Data: string(r.Data[r.start:r.pos]),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
var str string
2024-05-14 13:07:09 +00:00
if len(r.token.byteValue) <= maxErrorContextLen {
2024-05-14 13:07:09 +00:00
str = string(r.token.byteValue)
2024-05-14 13:07:09 +00:00
} else {
2024-05-14 13:07:09 +00:00
str = string(r.token.byteValue[:maxErrorContextLen-3]) + "..."
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.fatalError = &LexerError{
2024-05-14 13:07:09 +00:00
Reason: fmt.Sprintf("expected %s", expected),
2024-05-14 13:07:09 +00:00
Offset: r.pos,
Data: str,
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) GetPos() int {
2024-05-14 13:07:09 +00:00
return r.pos
2024-05-14 13:07:09 +00:00
}
// Delim consumes a token and verifies that it is the given delimiter.
2024-05-14 13:07:09 +00:00
func (r *Lexer) Delim(c byte) {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
if !r.Ok() || r.token.delimValue != c {
2024-05-14 13:07:09 +00:00
r.consume() // errInvalidToken can change token if UseMultipleErrors is enabled.
2024-05-14 13:07:09 +00:00
r.errInvalidToken(string([]byte{c}))
2024-05-14 13:07:09 +00:00
} else {
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
// IsDelim returns true if there was no scanning error and next token is the given delimiter.
2024-05-14 13:07:09 +00:00
func (r *Lexer) IsDelim(c byte) bool {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return !r.Ok() || r.token.delimValue == c
2024-05-14 13:07:09 +00:00
}
// Null verifies that the next token is null and consumes it.
2024-05-14 13:07:09 +00:00
func (r *Lexer) Null() {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if !r.Ok() || r.token.kind != tokenNull {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("null")
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
}
// IsNull returns true if the next token is a null keyword.
2024-05-14 13:07:09 +00:00
func (r *Lexer) IsNull() bool {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return r.Ok() && r.token.kind == tokenNull
2024-05-14 13:07:09 +00:00
}
// Skip skips a single token.
2024-05-14 13:07:09 +00:00
func (r *Lexer) Skip() {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
}
// SkipRecursive skips next array or object completely, or just skips a single token if not
2024-05-14 13:07:09 +00:00
// an array/object.
2024-05-14 13:07:09 +00:00
//
2024-05-14 13:07:09 +00:00
// Note: no syntax validation is performed on the skipped data.
2024-05-14 13:07:09 +00:00
func (r *Lexer) SkipRecursive() {
2024-05-14 13:07:09 +00:00
r.scanToken()
2024-05-14 13:07:09 +00:00
var start, end byte
2024-05-14 13:07:09 +00:00
startPos := r.start
switch r.token.delimValue {
2024-05-14 13:07:09 +00:00
case '{':
2024-05-14 13:07:09 +00:00
start, end = '{', '}'
2024-05-14 13:07:09 +00:00
case '[':
2024-05-14 13:07:09 +00:00
start, end = '[', ']'
2024-05-14 13:07:09 +00:00
default:
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
r.consume()
level := 1
2024-05-14 13:07:09 +00:00
inQuotes := false
2024-05-14 13:07:09 +00:00
wasEscape := false
for i, c := range r.Data[r.pos:] {
2024-05-14 13:07:09 +00:00
switch {
2024-05-14 13:07:09 +00:00
case c == start && !inQuotes:
2024-05-14 13:07:09 +00:00
level++
2024-05-14 13:07:09 +00:00
case c == end && !inQuotes:
2024-05-14 13:07:09 +00:00
level--
2024-05-14 13:07:09 +00:00
if level == 0 {
2024-05-14 13:07:09 +00:00
r.pos += i + 1
2024-05-14 13:07:09 +00:00
if !json.Valid(r.Data[startPos:r.pos]) {
2024-05-14 13:07:09 +00:00
r.pos = len(r.Data)
2024-05-14 13:07:09 +00:00
r.fatalError = &LexerError{
2024-05-14 13:07:09 +00:00
Reason: "skipped array/object json value is invalid",
2024-05-14 13:07:09 +00:00
Offset: r.pos,
Data: string(r.Data[r.pos:]),
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
case c == '\\' && inQuotes:
2024-05-14 13:07:09 +00:00
wasEscape = !wasEscape
2024-05-14 13:07:09 +00:00
continue
2024-05-14 13:07:09 +00:00
case c == '"' && inQuotes:
2024-05-14 13:07:09 +00:00
inQuotes = wasEscape
2024-05-14 13:07:09 +00:00
case c == '"':
2024-05-14 13:07:09 +00:00
inQuotes = true
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
wasEscape = false
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.pos = len(r.Data)
2024-05-14 13:07:09 +00:00
r.fatalError = &LexerError{
2024-05-14 13:07:09 +00:00
Reason: "EOF reached while skipping array/object or token",
2024-05-14 13:07:09 +00:00
Offset: r.pos,
Data: string(r.Data[r.pos:]),
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
// Raw fetches the next item recursively as a data slice
2024-05-14 13:07:09 +00:00
func (r *Lexer) Raw() []byte {
2024-05-14 13:07:09 +00:00
r.SkipRecursive()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return nil
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return r.Data[r.start:r.pos]
2024-05-14 13:07:09 +00:00
}
// IsStart returns whether the lexer is positioned at the start
2024-05-14 13:07:09 +00:00
// of an input string.
2024-05-14 13:07:09 +00:00
func (r *Lexer) IsStart() bool {
2024-05-14 13:07:09 +00:00
return r.pos == 0
2024-05-14 13:07:09 +00:00
}
// Consumed reads all remaining bytes from the input, publishing an error if
2024-05-14 13:07:09 +00:00
// there is anything but whitespace remaining.
2024-05-14 13:07:09 +00:00
func (r *Lexer) Consumed() {
2024-05-14 13:07:09 +00:00
if r.pos > len(r.Data) || !r.Ok() {
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
for _, c := range r.Data[r.pos:] {
2024-05-14 13:07:09 +00:00
if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
2024-05-14 13:07:09 +00:00
r.AddError(&LexerError{
2024-05-14 13:07:09 +00:00
Reason: "invalid character '" + string(c) + "' after top-level value",
2024-05-14 13:07:09 +00:00
Offset: r.pos,
Data: string(r.Data[r.pos:]),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
r.pos++
2024-05-14 13:07:09 +00:00
r.start++
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) unsafeString(skipUnescape bool) (string, []byte) {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if !r.Ok() || r.token.kind != tokenString {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("string")
2024-05-14 13:07:09 +00:00
return "", nil
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if !skipUnescape {
2024-05-14 13:07:09 +00:00
if err := r.unescapeStringToken(); err != nil {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("string")
2024-05-14 13:07:09 +00:00
return "", nil
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
bytes := r.token.byteValue
2024-05-14 13:07:09 +00:00
ret := bytesToStr(r.token.byteValue)
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
return ret, bytes
2024-05-14 13:07:09 +00:00
}
// UnsafeString returns the string value if the token is a string literal.
2024-05-14 13:07:09 +00:00
//
2024-05-14 13:07:09 +00:00
// Warning: returned string may point to the input buffer, so the string should not outlive
2024-05-14 13:07:09 +00:00
// the input buffer. Intended pattern of usage is as an argument to a switch statement.
2024-05-14 13:07:09 +00:00
func (r *Lexer) UnsafeString() string {
2024-05-14 13:07:09 +00:00
ret, _ := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
return ret
2024-05-14 13:07:09 +00:00
}
// UnsafeBytes returns the byte slice if the token is a string literal.
2024-05-14 13:07:09 +00:00
func (r *Lexer) UnsafeBytes() []byte {
2024-05-14 13:07:09 +00:00
_, ret := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
return ret
2024-05-14 13:07:09 +00:00
}
// UnsafeFieldName returns current member name string token
2024-05-14 13:07:09 +00:00
func (r *Lexer) UnsafeFieldName(skipUnescape bool) string {
2024-05-14 13:07:09 +00:00
ret, _ := r.unsafeString(skipUnescape)
2024-05-14 13:07:09 +00:00
return ret
2024-05-14 13:07:09 +00:00
}
// String reads a string literal.
2024-05-14 13:07:09 +00:00
func (r *Lexer) String() string {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if !r.Ok() || r.token.kind != tokenString {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("string")
2024-05-14 13:07:09 +00:00
return ""
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if err := r.unescapeStringToken(); err != nil {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("string")
2024-05-14 13:07:09 +00:00
return ""
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
var ret string
2024-05-14 13:07:09 +00:00
if r.token.byteValueCloned {
2024-05-14 13:07:09 +00:00
ret = bytesToStr(r.token.byteValue)
2024-05-14 13:07:09 +00:00
} else {
2024-05-14 13:07:09 +00:00
ret = string(r.token.byteValue)
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
return ret
2024-05-14 13:07:09 +00:00
}
// StringIntern reads a string literal, and performs string interning on it.
2024-05-14 13:07:09 +00:00
func (r *Lexer) StringIntern() string {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if !r.Ok() || r.token.kind != tokenString {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("string")
2024-05-14 13:07:09 +00:00
return ""
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if err := r.unescapeStringToken(); err != nil {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("string")
2024-05-14 13:07:09 +00:00
return ""
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
ret := intern.Bytes(r.token.byteValue)
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
return ret
2024-05-14 13:07:09 +00:00
}
// Bytes reads a string literal and base64 decodes it into a byte slice.
2024-05-14 13:07:09 +00:00
func (r *Lexer) Bytes() []byte {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if !r.Ok() || r.token.kind != tokenString {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("string")
2024-05-14 13:07:09 +00:00
return nil
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if err := r.unescapeStringToken(); err != nil {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("string")
2024-05-14 13:07:09 +00:00
return nil
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
ret := make([]byte, base64.StdEncoding.DecodedLen(len(r.token.byteValue)))
2024-05-14 13:07:09 +00:00
n, err := base64.StdEncoding.Decode(ret, r.token.byteValue)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.fatalError = &LexerError{
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
}
2024-05-14 13:07:09 +00:00
return nil
2024-05-14 13:07:09 +00:00
}
r.consume()
2024-05-14 13:07:09 +00:00
return ret[:n]
2024-05-14 13:07:09 +00:00
}
// Bool reads a true or false boolean keyword.
2024-05-14 13:07:09 +00:00
func (r *Lexer) Bool() bool {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if !r.Ok() || r.token.kind != tokenBool {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("bool")
2024-05-14 13:07:09 +00:00
return false
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
ret := r.token.boolValue
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
return ret
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) number() string {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if !r.Ok() || r.token.kind != tokenNumber {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("number")
2024-05-14 13:07:09 +00:00
return ""
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
ret := bytesToStr(r.token.byteValue)
2024-05-14 13:07:09 +00:00
r.consume()
2024-05-14 13:07:09 +00:00
return ret
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Uint8() uint8 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseUint(s, 10, 8)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return uint8(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Uint16() uint16 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseUint(s, 10, 16)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return uint16(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Uint32() uint32 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseUint(s, 10, 32)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return uint32(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Uint64() uint64 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseUint(s, 10, 64)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return n
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Uint() uint {
2024-05-14 13:07:09 +00:00
return uint(r.Uint64())
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Int8() int8 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseInt(s, 10, 8)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return int8(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Int16() int16 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseInt(s, 10, 16)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return int16(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Int32() int32 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseInt(s, 10, 32)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return int32(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Int64() int64 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseInt(s, 10, 64)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return n
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Int() int {
2024-05-14 13:07:09 +00:00
return int(r.Int64())
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Uint8Str() uint8 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseUint(s, 10, 8)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return uint8(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Uint16Str() uint16 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseUint(s, 10, 16)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return uint16(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Uint32Str() uint32 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseUint(s, 10, 32)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return uint32(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Uint64Str() uint64 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseUint(s, 10, 64)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return n
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) UintStr() uint {
2024-05-14 13:07:09 +00:00
return uint(r.Uint64Str())
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) UintptrStr() uintptr {
2024-05-14 13:07:09 +00:00
return uintptr(r.Uint64Str())
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Int8Str() int8 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseInt(s, 10, 8)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return int8(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Int16Str() int16 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseInt(s, 10, 16)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return int16(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Int32Str() int32 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseInt(s, 10, 32)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return int32(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Int64Str() int64 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseInt(s, 10, 64)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return n
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) IntStr() int {
2024-05-14 13:07:09 +00:00
return int(r.Int64Str())
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Float32() float32 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseFloat(s, 32)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return float32(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Float32Str() float32 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
n, err := strconv.ParseFloat(s, 32)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return float32(n)
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Float64() float64 {
2024-05-14 13:07:09 +00:00
s := r.number()
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
n, err := strconv.ParseFloat(s, 64)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: s,
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return n
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Float64Str() float64 {
2024-05-14 13:07:09 +00:00
s, b := r.unsafeString(false)
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return 0
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
n, err := strconv.ParseFloat(s, 64)
2024-05-14 13:07:09 +00:00
if err != nil {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
2024-05-14 13:07:09 +00:00
Reason: err.Error(),
Data: string(b),
2024-05-14 13:07:09 +00:00
})
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
return n
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) Error() error {
2024-05-14 13:07:09 +00:00
return r.fatalError
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) AddError(e error) {
2024-05-14 13:07:09 +00:00
if r.fatalError == nil {
2024-05-14 13:07:09 +00:00
r.fatalError = e
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) AddNonFatalError(e error) {
2024-05-14 13:07:09 +00:00
r.addNonfatalError(&LexerError{
2024-05-14 13:07:09 +00:00
Offset: r.start,
Data: string(r.Data[r.start:r.pos]),
2024-05-14 13:07:09 +00:00
Reason: e.Error(),
})
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) addNonfatalError(err *LexerError) {
2024-05-14 13:07:09 +00:00
if r.UseMultipleErrors {
2024-05-14 13:07:09 +00:00
// We don't want to add errors with the same offset.
2024-05-14 13:07:09 +00:00
if len(r.multipleErrors) != 0 && r.multipleErrors[len(r.multipleErrors)-1].Offset == err.Offset {
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.multipleErrors = append(r.multipleErrors, err)
2024-05-14 13:07:09 +00:00
return
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.fatalError = err
2024-05-14 13:07:09 +00:00
}
func (r *Lexer) GetNonFatalErrors() []*LexerError {
2024-05-14 13:07:09 +00:00
return r.multipleErrors
2024-05-14 13:07:09 +00:00
}
// JsonNumber fetches and json.Number from 'encoding/json' package.
2024-05-14 13:07:09 +00:00
// Both int, float or string, contains them are valid values
2024-05-14 13:07:09 +00:00
func (r *Lexer) JsonNumber() json.Number {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
if !r.Ok() {
2024-05-14 13:07:09 +00:00
r.errInvalidToken("json.Number")
2024-05-14 13:07:09 +00:00
return json.Number("")
2024-05-14 13:07:09 +00:00
}
switch r.token.kind {
2024-05-14 13:07:09 +00:00
case tokenString:
2024-05-14 13:07:09 +00:00
return json.Number(r.String())
2024-05-14 13:07:09 +00:00
case tokenNumber:
2024-05-14 13:07:09 +00:00
return json.Number(r.Raw())
2024-05-14 13:07:09 +00:00
case tokenNull:
2024-05-14 13:07:09 +00:00
r.Null()
2024-05-14 13:07:09 +00:00
return json.Number("")
2024-05-14 13:07:09 +00:00
default:
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
return json.Number("")
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
// Interface fetches an interface{} analogous to the 'encoding/json' package.
2024-05-14 13:07:09 +00:00
func (r *Lexer) Interface() interface{} {
2024-05-14 13:07:09 +00:00
if r.token.kind == tokenUndef && r.Ok() {
2024-05-14 13:07:09 +00:00
r.FetchToken()
2024-05-14 13:07:09 +00:00
}
if !r.Ok() {
2024-05-14 13:07:09 +00:00
return nil
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
switch r.token.kind {
2024-05-14 13:07:09 +00:00
case tokenString:
2024-05-14 13:07:09 +00:00
return r.String()
2024-05-14 13:07:09 +00:00
case tokenNumber:
2024-05-14 13:07:09 +00:00
return r.Float64()
2024-05-14 13:07:09 +00:00
case tokenBool:
2024-05-14 13:07:09 +00:00
return r.Bool()
2024-05-14 13:07:09 +00:00
case tokenNull:
2024-05-14 13:07:09 +00:00
r.Null()
2024-05-14 13:07:09 +00:00
return nil
2024-05-14 13:07:09 +00:00
}
if r.token.delimValue == '{' {
2024-05-14 13:07:09 +00:00
r.consume()
ret := map[string]interface{}{}
2024-05-14 13:07:09 +00:00
for !r.IsDelim('}') {
2024-05-14 13:07:09 +00:00
key := r.String()
2024-05-14 13:07:09 +00:00
r.WantColon()
2024-05-14 13:07:09 +00:00
ret[key] = r.Interface()
2024-05-14 13:07:09 +00:00
r.WantComma()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.Delim('}')
if r.Ok() {
2024-05-14 13:07:09 +00:00
return ret
2024-05-14 13:07:09 +00:00
} else {
2024-05-14 13:07:09 +00:00
return nil
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
} else if r.token.delimValue == '[' {
2024-05-14 13:07:09 +00:00
r.consume()
ret := []interface{}{}
2024-05-14 13:07:09 +00:00
for !r.IsDelim(']') {
2024-05-14 13:07:09 +00:00
ret = append(ret, r.Interface())
2024-05-14 13:07:09 +00:00
r.WantComma()
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.Delim(']')
if r.Ok() {
2024-05-14 13:07:09 +00:00
return ret
2024-05-14 13:07:09 +00:00
} else {
2024-05-14 13:07:09 +00:00
return nil
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
}
2024-05-14 13:07:09 +00:00
r.errSyntax()
2024-05-14 13:07:09 +00:00
return nil
2024-05-14 13:07:09 +00:00
}
// WantComma requires a comma to be present before fetching next token.
2024-05-14 13:07:09 +00:00
func (r *Lexer) WantComma() {
2024-05-14 13:07:09 +00:00
r.wantSep = ','
2024-05-14 13:07:09 +00:00
r.firstElement = false
2024-05-14 13:07:09 +00:00
}
// WantColon requires a colon to be present before fetching next token.
2024-05-14 13:07:09 +00:00
func (r *Lexer) WantColon() {
2024-05-14 13:07:09 +00:00
r.wantSep = ':'
2024-05-14 13:07:09 +00:00
r.firstElement = false
2024-05-14 13:07:09 +00:00
}