gohttp/vendor/github.com/ugorji/go/codec/decimal.go

492 lines
11 KiB
Go
Raw Normal View History

2023-12-21 18:36:51 +08:00
// Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a MIT license found in the LICENSE file.
package codec
import (
"math"
"strconv"
)
// Per go spec, floats are represented in memory as
// IEEE single or double precision floating point values.
//
// We also looked at the source for stdlib math/modf.go,
// reviewed https://github.com/chewxy/math32
// and read wikipedia documents describing the formats.
//
// It became clear that we could easily look at the bits to determine
// whether any fraction exists.
func parseFloat32(b []byte) (f float32, err error) {
return parseFloat32_custom(b)
}
func parseFloat64(b []byte) (f float64, err error) {
return parseFloat64_custom(b)
}
func parseFloat32_strconv(b []byte) (f float32, err error) {
f64, err := strconv.ParseFloat(stringView(b), 32)
f = float32(f64)
return
}
func parseFloat64_strconv(b []byte) (f float64, err error) {
return strconv.ParseFloat(stringView(b), 64)
}
// ------ parseFloat custom below --------
// JSON really supports decimal numbers in base 10 notation, with exponent support.
//
// We assume the following:
// - a lot of floating point numbers in json files will have defined precision
// (in terms of number of digits after decimal point), etc.
// - these (referenced above) can be written in exact format.
//
// strconv.ParseFloat has some unnecessary overhead which we can do without
// for the common case:
//
// - expensive char-by-char check to see if underscores are in right place
// - testing for and skipping underscores
// - check if the string matches ignorecase +/- inf, +/- infinity, nan
// - support for base 16 (0xFFFF...)
//
// The functions below will try a fast-path for floats which can be decoded
// without any loss of precision, meaning they:
//
// - fits within the significand bits of the 32-bits or 64-bits
// - exponent fits within the exponent value
// - there is no truncation (any extra numbers are all trailing zeros)
//
// To figure out what the values are for maxMantDigits, use this idea below:
//
// 2^23 = 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
// 2^32 = 42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
// 2^52 = 4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
// 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
//
// Note: we only allow for up to what can comfortably fit into the significand
// ignoring the exponent, and we only try to parse iff significand fits.
const (
fMaxMultiplierForExactPow10_64 = 1e15
fMaxMultiplierForExactPow10_32 = 1e7
fUint64Cutoff = (1<<64-1)/10 + 1
// fUint32Cutoff = (1<<32-1)/10 + 1
fBase = 10
)
const (
thousand = 1000
million = thousand * thousand
billion = thousand * million
trillion = thousand * billion
quadrillion = thousand * trillion
quintillion = thousand * quadrillion
)
// Exact powers of 10.
var uint64pow10 = [...]uint64{
1, 10, 100,
1 * thousand, 10 * thousand, 100 * thousand,
1 * million, 10 * million, 100 * million,
1 * billion, 10 * billion, 100 * billion,
1 * trillion, 10 * trillion, 100 * trillion,
1 * quadrillion, 10 * quadrillion, 100 * quadrillion,
1 * quintillion, 10 * quintillion,
}
var float64pow10 = [...]float64{
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1e20, 1e21, 1e22,
}
var float32pow10 = [...]float32{
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10,
}
type floatinfo struct {
mantbits uint8
// expbits uint8 // (unused)
// bias int16 // (unused)
// is32bit bool // (unused)
exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0)
// maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
mantCutoffIsUint64Cutoff bool
mantCutoff uint64
}
var fi32 = floatinfo{23, 10, 7, false, 1<<23 - 1}
var fi64 = floatinfo{52, 22, 15, false, 1<<52 - 1}
var fi64u = floatinfo{0, 19, 0, true, fUint64Cutoff}
func noFrac64(fbits uint64) bool {
exp := uint64(fbits>>52)&0x7FF - 1023 // uint(x>>shift)&mask - bias
// clear top 12+e bits, the integer part; if the rest is 0, then no fraction.
return exp < 52 && fbits<<(12+exp) == 0 // means there's no fractional part
}
func noFrac32(fbits uint32) bool {
exp := uint32(fbits>>23)&0xFF - 127 // uint(x>>shift)&mask - bias
// clear top 9+e bits, the integer part; if the rest is 0, then no fraction.
return exp < 23 && fbits<<(9+exp) == 0 // means there's no fractional part
}
func strconvParseErr(b []byte, fn string) error {
return &strconv.NumError{
Func: fn,
Err: strconv.ErrSyntax,
Num: string(b),
}
}
func parseFloat32_reader(r readFloatResult) (f float32, fail bool) {
f = float32(r.mantissa)
if r.exp == 0 {
} else if r.exp < 0 { // int / 10^k
f /= float32pow10[uint8(-r.exp)]
} else { // exp > 0
if r.exp > fi32.exactPow10 {
f *= float32pow10[r.exp-fi32.exactPow10]
if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range
fail = true
return // ok = false
}
f *= float32pow10[fi32.exactPow10]
} else {
f *= float32pow10[uint8(r.exp)]
}
}
if r.neg {
f = -f
}
return
}
func parseFloat32_custom(b []byte) (f float32, err error) {
r := readFloat(b, fi32)
if r.bad {
return 0, strconvParseErr(b, "ParseFloat")
}
if r.ok {
f, r.bad = parseFloat32_reader(r)
if !r.bad {
return
}
}
return parseFloat32_strconv(b)
}
func parseFloat64_reader(r readFloatResult) (f float64, fail bool) {
f = float64(r.mantissa)
if r.exp == 0 {
} else if r.exp < 0 { // int / 10^k
f /= float64pow10[-uint8(r.exp)]
} else { // exp > 0
if r.exp > fi64.exactPow10 {
f *= float64pow10[r.exp-fi64.exactPow10]
if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range
fail = true
return
}
f *= float64pow10[fi64.exactPow10]
} else {
f *= float64pow10[uint8(r.exp)]
}
}
if r.neg {
f = -f
}
return
}
func parseFloat64_custom(b []byte) (f float64, err error) {
r := readFloat(b, fi64)
if r.bad {
return 0, strconvParseErr(b, "ParseFloat")
}
if r.ok {
f, r.bad = parseFloat64_reader(r)
if !r.bad {
return
}
}
return parseFloat64_strconv(b)
}
func parseUint64_simple(b []byte) (n uint64, ok bool) {
var i int
var n1 uint64
var c uint8
LOOP:
if i < len(b) {
c = b[i]
// unsigned integers don't overflow well on multiplication, so check cutoff here
// e.g. (maxUint64-5)*10 doesn't overflow well ...
// if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' {
if n >= fUint64Cutoff || c < '0' || c > '9' {
return
} else if c == '0' {
n *= fBase
} else {
n1 = n
n = n*fBase + uint64(c-'0')
if n < n1 {
return
}
}
i++
goto LOOP
}
ok = true
return
}
func parseUint64_reader(r readFloatResult) (f uint64, fail bool) {
f = r.mantissa
if r.exp == 0 {
} else if r.exp < 0 { // int / 10^k
if f%uint64pow10[uint8(-r.exp)] != 0 {
fail = true
} else {
f /= uint64pow10[uint8(-r.exp)]
}
} else { // exp > 0
f *= uint64pow10[uint8(r.exp)]
}
return
}
func parseInteger_bytes(b []byte) (u uint64, neg, ok bool) {
if len(b) == 0 {
ok = true
return
}
if b[0] == '-' {
if len(b) == 1 {
return
}
neg = true
b = b[1:]
}
u, ok = parseUint64_simple(b)
if ok {
return
}
r := readFloat(b, fi64u)
if r.ok {
var fail bool
u, fail = parseUint64_reader(r)
if fail {
f, err := parseFloat64(b)
if err != nil {
return
}
if !noFrac64(math.Float64bits(f)) {
return
}
u = uint64(f)
}
ok = true
return
}
return
}
// parseNumber will return an integer if only composed of [-]?[0-9]+
// Else it will return a float.
func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) {
var ok, neg bool
var f uint64
if len(b) == 0 {
return
}
if b[0] == '-' {
neg = true
f, ok = parseUint64_simple(b[1:])
} else {
f, ok = parseUint64_simple(b)
}
if ok {
if neg {
z.v = valueTypeInt
if chkOvf.Uint2Int(f, neg) {
return strconvParseErr(b, "ParseInt")
}
z.i = -int64(f)
} else if preferSignedInt {
z.v = valueTypeInt
if chkOvf.Uint2Int(f, neg) {
return strconvParseErr(b, "ParseInt")
}
z.i = int64(f)
} else {
z.v = valueTypeUint
z.u = f
}
return
}
z.v = valueTypeFloat
z.f, err = parseFloat64_custom(b)
return
}
type readFloatResult struct {
mantissa uint64
exp int8
neg bool
trunc bool
bad bool // bad decimal string
hardexp bool // exponent is hard to handle (> 2 digits, etc)
ok bool
// sawdot bool
// sawexp bool
//_ [2]bool // padding
}
func readFloat(s []byte, y floatinfo) (r readFloatResult) {
var i uint // uint, so that we eliminate bounds checking
var slen = uint(len(s))
if slen == 0 {
// read an empty string as the zero value
// r.bad = true
r.ok = true
return
}
if s[0] == '-' {
r.neg = true
i++
}
// we considered punting early if string has length > maxMantDigits, but this doesn't account
// for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20
var nd, ndMant, dp int8
var sawdot, sawexp bool
var xu uint64
LOOP:
for ; i < slen; i++ {
switch s[i] {
case '.':
if sawdot {
r.bad = true
return
}
sawdot = true
dp = nd
case 'e', 'E':
sawexp = true
break LOOP
case '0':
if nd == 0 {
dp--
continue LOOP
}
nd++
if r.mantissa < y.mantCutoff {
r.mantissa *= fBase
ndMant++
}
case '1', '2', '3', '4', '5', '6', '7', '8', '9':
nd++
if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff {
r.mantissa *= fBase
xu = r.mantissa + uint64(s[i]-'0')
if xu < r.mantissa {
r.trunc = true
return
}
r.mantissa = xu
} else if r.mantissa < y.mantCutoff {
// mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0')
r.mantissa = r.mantissa*fBase + uint64(s[i]-'0')
} else {
r.trunc = true
return
}
ndMant++
default:
r.bad = true
return
}
}
if !sawdot {
dp = nd
}
if sawexp {
i++
if i < slen {
var eneg bool
if s[i] == '+' {
i++
} else if s[i] == '-' {
i++
eneg = true
}
if i < slen {
// for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
// exit quick if exponent is more than 2 digits.
if i+2 < slen {
r.hardexp = true
return
}
var e int8
if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
r.bad = true
return
}
e = int8(s[i] - '0')
i++
if i < slen {
if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
r.bad = true
return
}
e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
i++
}
if eneg {
dp -= e
} else {
dp += e
}
}
}
}
if r.mantissa != 0 {
r.exp = dp - ndMant
// do not set ok=true for cases we cannot handle
if r.exp < -y.exactPow10 ||
r.exp > y.exactInts+y.exactPow10 ||
(y.mantbits != 0 && r.mantissa>>y.mantbits != 0) {
r.hardexp = true
return
}
}
r.ok = true
return
}