1.2 IEEE 754
Single precision (=float format) : 32bit
- s: sign bit
Sign | Exponent | Fraction | Value | Comment |
---|---|---|---|---|
any | 1-254 | any | \((-1)^S \times 2^{E-127} \times 1.F\) | |
any | 0 | nonzero | \((-1)^S \times 2^{E-126} \times 0.F\) | Not implemented here |
0 | 0 | 0 | +0.0 | |
1 | 0 | 0 | -0.0 | |
0 | 255 | 0 | +Inf | |
1 | 255 | 0 | -Inf | |
any | 255 | nonzero | NaN |
- NaN: Not a Number
Double precision (=double format) : 64bit
- s: sign bit
Sign | Exponent | Fraction | Value | Comment |
---|---|---|---|---|
any | 1-2046 | any | \((-1)^S \times 2^{E-1023} \times 1.F\) | |
any | 0 | nonzero | \((-1)^S \times 2^{E-1022} \times 0.F\) | Not implemented here |
0 | 0 | 0 | +0.0 | |
1 | 0 | 0 | -0.0 | |
0 | 2047 | 0 | +Inf | |
1 | 2047 | 0 | -Inf | |
any | 2047 | nonzero | NaN |
- NaN: Not a Number
Bin2Dec function Practice transformation of a binary format number to a decimal number.
Bin2Dec
## function (b)
## {
## nBit = length(b)
## for (i in 1:nBit) {
## if (b[i] != 0 & b[i] != 1)
## return(NaN)
## }
## if (nBit == 32) {
## Index = c(2, 9, 10, 32, 127)
## }
## else if (nBit == 64) {
## Index = c(2, 12, 13, 64, 1023)
## }
## else {
## return(NaN)
## }
## S = 1
## if (b[1] == 1)
## S = -1
## E = 0
## for (i in Index[1]:Index[2]) {
## E = 2 * E
## E = E + b[i]
## }
## maxE = FALSE
## if ((nBit == 32 & E == 255) | (nBit == 64 & E == 2047))
## maxE = TRUE
## M = 0
## for (i in Index[4]:Index[3]) {
## M = M + b[i]
## M = M/2
## }
## if (M > 0 & maxE) {
## Expr = "NaN"
## Val = NaN
## }
## else if (M == 0 & maxE) {
## if (S == +1) {
## Expr = "+Inf"
## Val = +Inf
## }
## else {
## Expr = "-Inf"
## Val = -Inf
## }
## }
## else if (M == 0 & E == 0) {
## if (S == +1) {
## Expr = "+0.0"
## Val = +0
## }
## else {
## Expr = "-0.0"
## Val = -0
## }
## }
## else {
## M = 1 + M
## Expr = paste0(S, "*2^", E - Index[5], "*", M)
## Val = S * 2^(E - Index[5]) * M
## }
## attr(Val, "Expression") = Expr
## return(Val)
## }
## <bytecode: 0x000001e2abd11bc8>
## <environment: namespace:math>
Bin2Dec(rep(0, 32)) # +0.0
## [1] 0
## attr(,"Expression")
## [1] "+0.0"
Bin2Dec(c(1, rep(0, 31))) # -0.0
## [1] 0
## attr(,"Expression")
## [1] "-0.0"
Bin2Dec(c(0, rep(1, 8), rep(0, 23))) # +Inf
## [1] Inf
## attr(,"Expression")
## [1] "+Inf"
Bin2Dec(c(1, rep(1, 8), rep(0, 23))) # -Inf
## [1] -Inf
## attr(,"Expression")
## [1] "-Inf"
Bin2Dec(c(0, rep(1, 8), rep(1, 23))) # NaN
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(c(1, rep(1, 8), rep(1, 23))) # NaN
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(c(1, rep(1, 30), 0)) # NaN
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(c(0, 0,1,1,1,1,1,1,1,1,1,1, 0,0,0,0, rep(0,48))) # +1
## [1] 1
## attr(,"Expression")
## [1] "1*2^0*1"
Bin2Dec(c(1, 0,1,1,1,1,1,1,1,1,1,1, 0,0,0,0, rep(0,48))) # -1
## [1] -1
## attr(,"Expression")
## [1] "-1*2^0*1"
Bin2Dec(c(0, 0,1,1,1,1,1,1,1,1,1,1, 1,0,0,0, rep(0,48))) # 1.5
## [1] 1.5
## attr(,"Expression")
## [1] "1*2^0*1.5"
Bin2Dec(c(0, 1,0,0,0,0,0,0,0,0,0,0, 0,0,0,0, rep(0,48))) # 2
## [1] 2
## attr(,"Expression")
## [1] "1*2^1*1"
Bin2Dec(c(0, 1,0,0,0,0,0,0,0,0,0,1, 1,0,1,0, rep(0,48))) # 6.5
## [1] 6.5
## attr(,"Expression")
## [1] "1*2^2*1.625"
Range Test
Bin2Dec(c(0, rep(1, 63))) # NaN with IEEE 754, but +Inf in R
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(rep(1, 64)) # NaN with IEEE 754, but -Inf in R
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(c(0, rep(1, 10), 0, rep(1, 52))) # .Machine$double.xmax
## [1] 1.798e+308
## attr(,"Expression")
## [1] "1*2^1023*2"
Bin2Dec(c(1, rep(1 ,10), 0, rep(1, 52))) # -1 x .Machine$double.xmax
## [1] -1.798e+308
## attr(,"Expression")
## [1] "-1*2^1023*2"
Bin2Dec(c(0, rep(1, 11), rep(0, 52))) # +Inf
## [1] Inf
## attr(,"Expression")
## [1] "+Inf"
Bin2Dec(c(1, rep(1, 11), rep(0, 52))) # -Inf
## [1] -Inf
## attr(,"Expression")
## [1] "-Inf"
Bin2Dec(c(rep(0, 63), 1)) # half of .Machine$double.xmin
## [1] 1.113e-308
## attr(,"Expression")
## [1] "1*2^-1023*1"
Bin2Dec(c(1, rep(0, 62), 1)) # -1 x half of .Machine$double.xmin
## [1] -1.113e-308
## attr(,"Expression")
## [1] "-1*2^-1023*1"
format(Bin2Dec(c(rep(0, 63), 1)), digits=22) # to see the longest form
## [1] "1.112536929253600691400e-308"
format(Bin2Dec(c(1, rep(0, 62), 1)), digits=22) # to see the longest form
## [1] "-1.112536929253600691400e-308"
# R environment limitation and numerical specification .Machine
## $double.eps
## [1] 2.22e-16
##
## $double.neg.eps
## [1] 1.11e-16
##
## $double.xmin
## [1] 2.225e-308
##
## $double.xmax
## [1] 1.798e+308
##
## $double.base
## [1] 2
##
## $double.digits
## [1] 53
##
## $double.rounding
## [1] 5
##
## $double.guard
## [1] 0
##
## $double.ulp.digits
## [1] -52
##
## $double.neg.ulp.digits
## [1] -53
##
## $double.exponent
## [1] 11
##
## $double.min.exp
## [1] -1022
##
## $double.max.exp
## [1] 1024
##
## $integer.max
## [1] 2147483647
##
## $sizeof.long
## [1] 4
##
## $sizeof.longlong
## [1] 8
##
## $sizeof.longdouble
## [1] 16
##
## $sizeof.pointer
## [1] 8
##
## $longdouble.eps
## [1] 1.084e-19
##
## $longdouble.neg.eps
## [1] 5.421e-20
##
## $longdouble.digits
## [1] 64
##
## $longdouble.rounding
## [1] 5
##
## $longdouble.guard
## [1] 0
##
## $longdouble.ulp.digits
## [1] -63
##
## $longdouble.neg.ulp.digits
## [1] -64
##
## $longdouble.exponent
## [1] 15
##
## $longdouble.min.exp
## [1] -16382
##
## $longdouble.max.exp
## [1] 16384
*Note .Machine$double.min.exp is -1022 not -1023 as in IEEE 754
Overflow and underflow error
- Overflow error: Error occuring when the absolute value is too big to present.
- Underflow error: Error occuring when the absolute value is too small to present.
= 1e200
a1 = 1e300
b1 *b1 # Inf a1
## [1] Inf
= 1e-200
c1 /c1 # Inf b1
## [1] Inf
/b1 # 0 c1
## [1] 0
*b1*c1 # Inf, but the correct answer is 1e300 a1
## [1] Inf
*(b1*c1) # 1e+300 a1
## [1] 1e+300
In R, overflow or underflow error does not occur. R just presents +Inf, -Inf or 0.
Dec2Bin function Practice transformation of a decimal number to binary format.
Dec2Bin
## function (x, Double = TRUE)
## {
## if (Double == TRUE) {
## Index = c(2, 12, 13, 64, 1023)
## a = rep(0, 64)
## }
## else {
## Index = c(2, 9, 10, 32, 127)
## a = rep(0, 32)
## }
## if (x < 0)
## a[1] = 1
## E0 = floor(log(abs(x), base = 2))
## E = E0 + Index[5]
## for (i in Index[2]:Index[1]) {
## a[i] = E%%2
## E = floor(E/2)
## }
## M = abs(x)/2^E0
## M = M - 1
## for (i in Index[3]:Index[4]) {
## a[i] = floor(2 * M)
## M = 2 * M - a[i]
## }
## return(a)
## }
## <bytecode: 0x000001e2ab9737c8>
## <environment: namespace:math>
Dec2Bin(1)
## [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(-1)
## [1] 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(1.5)
## [1] 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(2)
## [1] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(6.5)
## [1] 0 1 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(1, FALSE)
## [1] 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(-1, FALSE)
## [1] 1 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(1.5, FALSE)
## [1] 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(2, FALSE)
## [1] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(6.5, FALSE)
## [1] 0 1 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Bin2Dec(Dec2Bin(1, FALSE))
## [1] 1
## attr(,"Expression")
## [1] "1*2^0*1"
Bin2Dec(Dec2Bin(-1, FALSE))
## [1] -1
## attr(,"Expression")
## [1] "-1*2^0*1"
Bin2Dec(Dec2Bin(1.5, FALSE))
## [1] 1.5
## attr(,"Expression")
## [1] "1*2^0*1.5"
Bin2Dec(Dec2Bin(2, FALSE))
## [1] 2
## attr(,"Expression")
## [1] "1*2^1*1"
Bin2Dec(Dec2Bin(6.5, FALSE))
## [1] 6.5
## attr(,"Expression")
## [1] "1*2^2*1.625"
Dec2Bin(1.1)
## [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
## [42] 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 1 0
Dec2Bin(1.2)
## [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0
## [42] 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
Dec2Bin(1.3)
## [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1
## [42] 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 1
Dec2Bin(1.4)
## [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0
## [42] 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0
Dec2Bin(1.5)
## [1] 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# Can all decimal numbers be presented in finite digits of binary number? No.
format(Bin2Dec(Dec2Bin(1.1)), digits=22)
## [1] "1.100000000000000088818"
Bin2Dec(Dec2Bin(1.1, FALSE))
## [1] 1.1
## attr(,"Expression")
## [1] "1*2^0*1.09999990463257"
Bin2Dec(Dec2Bin(1.2, FALSE))
## [1] 1.2
## attr(,"Expression")
## [1] "1*2^0*1.19999992847443"
Bin2Dec(Dec2Bin(1.3, FALSE))
## [1] 1.3
## attr(,"Expression")
## [1] "1*2^0*1.29999995231628"
Bin2Dec(Dec2Bin(1.4, FALSE))
## [1] 1.4
## attr(,"Expression")
## [1] "1*2^0*1.39999997615814"
Bin2Dec(Dec2Bin(1.5, FALSE))
## [1] 1.5
## attr(,"Expression")
## [1] "1*2^0*1.5"
format(Bin2Dec(Dec2Bin(1.0, FALSE)), 22)
## [1] "1"
format(Bin2Dec(Dec2Bin(1.1, FALSE)), 22)
## [1] "1.1"
format(Bin2Dec(Dec2Bin(1.2, FALSE)), 22)
## [1] "1.2"
format(Bin2Dec(Dec2Bin(1.3, FALSE)), 22)
## [1] "1.3"
format(Bin2Dec(Dec2Bin(1.4, FALSE)), 22)
## [1] "1.4"
format(Bin2Dec(Dec2Bin(1.5, FALSE)), 22)
## [1] "1.5"