1.2 IEEE 754

Single precision (=float format) : 32bit

  • s: sign bit
TABLE 1.1: Single Format
Sign Exponent Fraction Value Comment
any 1-254 any \((-1)^S \times 2^{E-127} \times 1.F\)
any 0 nonzero \((-1)^S \times 2^{E-126} \times 0.F\) Not implemented here
0 0 0 +0.0
1 0 0 -0.0
0 255 0 +Inf
1 255 0 -Inf
any 255 nonzero NaN
  • NaN: Not a Number

Double precision (=double format) : 64bit

  • s: sign bit
TABLE 1.2: Double Format
Sign Exponent Fraction Value Comment
any 1-2046 any \((-1)^S \times 2^{E-1023} \times 1.F\)
any 0 nonzero \((-1)^S \times 2^{E-1022} \times 0.F\) Not implemented here
0 0 0 +0.0
1 0 0 -0.0
0 2047 0 +Inf
1 2047 0 -Inf
any 2047 nonzero NaN
  • NaN: Not a Number

Bin2Dec function Practice transformation of a binary format number to a decimal number.

Bin2Dec
## function (b) 
## {
##     nBit = length(b)
##     for (i in 1:nBit) {
##         if (b[i] != 0 & b[i] != 1) 
##             return(NaN)
##     }
##     if (nBit == 32) {
##         Index = c(2, 9, 10, 32, 127)
##     }
##     else if (nBit == 64) {
##         Index = c(2, 12, 13, 64, 1023)
##     }
##     else {
##         return(NaN)
##     }
##     S = 1
##     if (b[1] == 1) 
##         S = -1
##     E = 0
##     for (i in Index[1]:Index[2]) {
##         E = 2 * E
##         E = E + b[i]
##     }
##     maxE = FALSE
##     if ((nBit == 32 & E == 255) | (nBit == 64 & E == 2047)) 
##         maxE = TRUE
##     M = 0
##     for (i in Index[4]:Index[3]) {
##         M = M + b[i]
##         M = M/2
##     }
##     if (M > 0 & maxE) {
##         Expr = "NaN"
##         Val = NaN
##     }
##     else if (M == 0 & maxE) {
##         if (S == +1) {
##             Expr = "+Inf"
##             Val = +Inf
##         }
##         else {
##             Expr = "-Inf"
##             Val = -Inf
##         }
##     }
##     else if (M == 0 & E == 0) {
##         if (S == +1) {
##             Expr = "+0.0"
##             Val = +0
##         }
##         else {
##             Expr = "-0.0"
##             Val = -0
##         }
##     }
##     else {
##         M = 1 + M
##         Expr = paste0(S, "*2^", E - Index[5], "*", M)
##         Val = S * 2^(E - Index[5]) * M
##     }
##     attr(Val, "Expression") = Expr
##     return(Val)
## }
## <bytecode: 0x000001e2abd11bc8>
## <environment: namespace:math>
Bin2Dec(rep(0, 32))                  # +0.0
## [1] 0
## attr(,"Expression")
## [1] "+0.0"
Bin2Dec(c(1, rep(0, 31)))            # -0.0
## [1] 0
## attr(,"Expression")
## [1] "-0.0"
Bin2Dec(c(0, rep(1, 8), rep(0, 23))) # +Inf
## [1] Inf
## attr(,"Expression")
## [1] "+Inf"
Bin2Dec(c(1, rep(1, 8), rep(0, 23))) # -Inf
## [1] -Inf
## attr(,"Expression")
## [1] "-Inf"
Bin2Dec(c(0, rep(1, 8), rep(1, 23))) # NaN
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(c(1, rep(1, 8), rep(1, 23))) # NaN
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(c(1, rep(1, 30), 0))         # NaN
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(c(0, 0,1,1,1,1,1,1,1,1,1,1, 0,0,0,0, rep(0,48))) # +1
## [1] 1
## attr(,"Expression")
## [1] "1*2^0*1"
Bin2Dec(c(1, 0,1,1,1,1,1,1,1,1,1,1, 0,0,0,0, rep(0,48))) # -1
## [1] -1
## attr(,"Expression")
## [1] "-1*2^0*1"
Bin2Dec(c(0, 0,1,1,1,1,1,1,1,1,1,1, 1,0,0,0, rep(0,48))) # 1.5
## [1] 1.5
## attr(,"Expression")
## [1] "1*2^0*1.5"
Bin2Dec(c(0, 1,0,0,0,0,0,0,0,0,0,0, 0,0,0,0, rep(0,48))) # 2
## [1] 2
## attr(,"Expression")
## [1] "1*2^1*1"
Bin2Dec(c(0, 1,0,0,0,0,0,0,0,0,0,1, 1,0,1,0, rep(0,48))) # 6.5
## [1] 6.5
## attr(,"Expression")
## [1] "1*2^2*1.625"

Range Test

Bin2Dec(c(0, rep(1, 63))) # NaN with IEEE 754, but +Inf in R
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(rep(1, 64)) # NaN with IEEE 754, but -Inf in R
## [1] NaN
## attr(,"Expression")
## [1] "NaN"
Bin2Dec(c(0, rep(1, 10), 0, rep(1, 52))) # .Machine$double.xmax
## [1] 1.798e+308
## attr(,"Expression")
## [1] "1*2^1023*2"
Bin2Dec(c(1, rep(1 ,10), 0, rep(1, 52))) # -1 x .Machine$double.xmax
## [1] -1.798e+308
## attr(,"Expression")
## [1] "-1*2^1023*2"
Bin2Dec(c(0, rep(1, 11), rep(0, 52))) # +Inf
## [1] Inf
## attr(,"Expression")
## [1] "+Inf"
Bin2Dec(c(1, rep(1, 11), rep(0, 52))) # -Inf
## [1] -Inf
## attr(,"Expression")
## [1] "-Inf"
Bin2Dec(c(rep(0, 63), 1))   # half of .Machine$double.xmin
## [1] 1.113e-308
## attr(,"Expression")
## [1] "1*2^-1023*1"
Bin2Dec(c(1, rep(0, 62), 1)) # -1 x half of .Machine$double.xmin
## [1] -1.113e-308
## attr(,"Expression")
## [1] "-1*2^-1023*1"
format(Bin2Dec(c(rep(0, 63), 1)), digits=22)     # to see the longest form
## [1] "1.112536929253600691400e-308"
format(Bin2Dec(c(1, rep(0, 62), 1)), digits=22)  # to see the longest form
## [1] "-1.112536929253600691400e-308"
.Machine # R environment limitation and numerical specification
## $double.eps
## [1] 2.22e-16
## 
## $double.neg.eps
## [1] 1.11e-16
## 
## $double.xmin
## [1] 2.225e-308
## 
## $double.xmax
## [1] 1.798e+308
## 
## $double.base
## [1] 2
## 
## $double.digits
## [1] 53
## 
## $double.rounding
## [1] 5
## 
## $double.guard
## [1] 0
## 
## $double.ulp.digits
## [1] -52
## 
## $double.neg.ulp.digits
## [1] -53
## 
## $double.exponent
## [1] 11
## 
## $double.min.exp
## [1] -1022
## 
## $double.max.exp
## [1] 1024
## 
## $integer.max
## [1] 2147483647
## 
## $sizeof.long
## [1] 4
## 
## $sizeof.longlong
## [1] 8
## 
## $sizeof.longdouble
## [1] 16
## 
## $sizeof.pointer
## [1] 8
## 
## $longdouble.eps
## [1] 1.084e-19
## 
## $longdouble.neg.eps
## [1] 5.421e-20
## 
## $longdouble.digits
## [1] 64
## 
## $longdouble.rounding
## [1] 5
## 
## $longdouble.guard
## [1] 0
## 
## $longdouble.ulp.digits
## [1] -63
## 
## $longdouble.neg.ulp.digits
## [1] -64
## 
## $longdouble.exponent
## [1] 15
## 
## $longdouble.min.exp
## [1] -16382
## 
## $longdouble.max.exp
## [1] 16384

*Note .Machine$double.min.exp is -1022 not -1023 as in IEEE 754

Overflow and underflow error

  • Overflow error: Error occuring when the absolute value is too big to present.
  • Underflow error: Error occuring when the absolute value is too small to present.
a1 = 1e200
b1 = 1e300
a1*b1 # Inf
## [1] Inf
c1 = 1e-200
b1/c1 # Inf
## [1] Inf
c1/b1 # 0
## [1] 0
a1*b1*c1 # Inf, but the correct answer is 1e300
## [1] Inf
a1*(b1*c1) # 1e+300
## [1] 1e+300

In R, overflow or underflow error does not occur. R just presents +Inf, -Inf or 0.

Dec2Bin function Practice transformation of a decimal number to binary format.

Dec2Bin
## function (x, Double = TRUE) 
## {
##     if (Double == TRUE) {
##         Index = c(2, 12, 13, 64, 1023)
##         a = rep(0, 64)
##     }
##     else {
##         Index = c(2, 9, 10, 32, 127)
##         a = rep(0, 32)
##     }
##     if (x < 0) 
##         a[1] = 1
##     E0 = floor(log(abs(x), base = 2))
##     E = E0 + Index[5]
##     for (i in Index[2]:Index[1]) {
##         a[i] = E%%2
##         E = floor(E/2)
##     }
##     M = abs(x)/2^E0
##     M = M - 1
##     for (i in Index[3]:Index[4]) {
##         a[i] = floor(2 * M)
##         M = 2 * M - a[i]
##     }
##     return(a)
## }
## <bytecode: 0x000001e2ab9737c8>
## <environment: namespace:math>
Dec2Bin(1)
##  [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(-1)
##  [1] 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(1.5)
##  [1] 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(2)
##  [1] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(6.5)
##  [1] 0 1 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(1, FALSE)
##  [1] 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(-1, FALSE)
##  [1] 1 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(1.5, FALSE)
##  [1] 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(2, FALSE)
##  [1] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Dec2Bin(6.5, FALSE)
##  [1] 0 1 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Bin2Dec(Dec2Bin(1, FALSE))
## [1] 1
## attr(,"Expression")
## [1] "1*2^0*1"
Bin2Dec(Dec2Bin(-1, FALSE))
## [1] -1
## attr(,"Expression")
## [1] "-1*2^0*1"
Bin2Dec(Dec2Bin(1.5, FALSE))
## [1] 1.5
## attr(,"Expression")
## [1] "1*2^0*1.5"
Bin2Dec(Dec2Bin(2, FALSE))
## [1] 2
## attr(,"Expression")
## [1] "1*2^1*1"
Bin2Dec(Dec2Bin(6.5, FALSE))
## [1] 6.5
## attr(,"Expression")
## [1] "1*2^2*1.625"
Dec2Bin(1.1)
##  [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
## [42] 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 1 0
Dec2Bin(1.2)
##  [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0
## [42] 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
Dec2Bin(1.3)
##  [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1
## [42] 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 1
Dec2Bin(1.4)
##  [1] 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0
## [42] 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0
Dec2Bin(1.5)
##  [1] 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [42] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# Can all decimal numbers be presented in finite digits of binary number? No.

format(Bin2Dec(Dec2Bin(1.1)), digits=22)
## [1] "1.100000000000000088818"
Bin2Dec(Dec2Bin(1.1, FALSE))
## [1] 1.1
## attr(,"Expression")
## [1] "1*2^0*1.09999990463257"
Bin2Dec(Dec2Bin(1.2, FALSE))
## [1] 1.2
## attr(,"Expression")
## [1] "1*2^0*1.19999992847443"
Bin2Dec(Dec2Bin(1.3, FALSE))
## [1] 1.3
## attr(,"Expression")
## [1] "1*2^0*1.29999995231628"
Bin2Dec(Dec2Bin(1.4, FALSE))
## [1] 1.4
## attr(,"Expression")
## [1] "1*2^0*1.39999997615814"
Bin2Dec(Dec2Bin(1.5, FALSE))
## [1] 1.5
## attr(,"Expression")
## [1] "1*2^0*1.5"
format(Bin2Dec(Dec2Bin(1.0, FALSE)), 22)
## [1] "1"
format(Bin2Dec(Dec2Bin(1.1, FALSE)), 22)
## [1] "1.1"
format(Bin2Dec(Dec2Bin(1.2, FALSE)), 22)
## [1] "1.2"
format(Bin2Dec(Dec2Bin(1.3, FALSE)), 22)
## [1] "1.3"
format(Bin2Dec(Dec2Bin(1.4, FALSE)), 22)
## [1] "1.4"
format(Bin2Dec(Dec2Bin(1.5, FALSE)), 22)
## [1] "1.5"