3.10 Richardson extrapolation

fx = function(x) x*x       # any real scalar valued function
x = 1                      # gradient at this point
h = max(1e-4, 1e-4*abs(x)) # small h
a = vector(length=4)       # for approximate gradients

# four central differences with consecutively smaller h
a[1] = (fx(x + h/2^1) - fx(x - h/2^1))/(2*h/2^1)
a[2] = (fx(x + h/2^2) - fx(x - h/2^2))/(2*h/2^2)
a[3] = (fx(x + h/2^3) - fx(x - h/2^3))/(2*h/2^3)
a[4] = (fx(x + h/2^4) - fx(x - h/2^4))/(2*h/2^4)

# first round refinement
a[1] = (a[2]*4^1 - a[1])/(4^1 - 1)
a[2] = (a[3]*4^1 - a[2])/(4^1 - 1)
a[3] = (a[4]*4^1 - a[3])/(4^1 - 1)

# second round refinement
a[1] = (a[2]*4^2 - a[1])/(4^2 - 1)
a[2] = (a[3]*4^2 - a[2])/(4^2 - 1)

# third (final) refinement
a[1] = (a[2]*4^3 - a[1])/(4^3 - 1)
a[1] # Answer

## [1] 2

# with i and j for the looping
i = 1 ; a[i] = (fx(x + h/2^i) - fx(x - h/2^i))/(2*h/2^i)
i = 2 ; a[i] = (fx(x + h/2^i) - fx(x - h/2^i))/(2*h/2^i)
i = 3 ; a[i] = (fx(x + h/2^i) - fx(x - h/2^i))/(2*h/2^i)
i = 4 ; a[i] = (fx(x + h/2^i) - fx(x - h/2^i))/(2*h/2^i)

# new i iteration
i = 1 ; j = 1 ; a[j] = (a[j + 1]*4^i  - a[j])/(4^i - 1)
i = 1 ; j = 2 ; a[j] = (a[j + 1]*4^i  - a[j])/(4^i - 1)
i = 1 ; j = 3 ; a[j] = (a[j + 1]*4^i  - a[j])/(4^i - 1)

i = 2 ; j = 1 ; a[j] = (a[j + 1]*4^i - a[j])/(4^i - 1)
i = 2 ; j = 2 ; a[j] = (a[j + 1]*4^i - a[j])/(4^i - 1)

i = 3 ; j = 1 ; a[j] = (a[j + 1]*4^i - a[j])/(4^i - 1)
a[1]

## [1] 2

# with loop
for (i in 1:4) a[i] = (fx(x + h/2^i) - fx(x - h/2^i))/(2*h/2^i)
for (i in 1:4) for (j in 1:(4 - i)) a[j] = (a[j + 1]*4^i - a[j])/(4^i - 1)
a[1]

## [1] 2

require(math)
Deriv0

## function (fx, x) 
## {
##     if (length(x) != 1) 
##         return(NULL)
##     a = matrix(nrow = 4, ncol = 4)
##     h = max(1e-04, 1e-04 * abs(x))
##     for (i in 1:4) {
##         a[i, 1] = (fx(x + h/2^i) - fx(x - h/2^i))/(2 * h/2^i)
##     }
##     for (i in 2:4) {
##         for (j in 2:i) {
##             a[i, j] = (4^(j - 1) * a[i, j - 1] - a[i - 1, j - 
##                 1])/(4^(j - 1) - 1)
##         }
##     }
##     return(a[4, 4])
## }
## <bytecode: 0x000001e2ab9ec550>
## <environment: namespace:math>

Deriv1

## function (fx, x) 
## {
##     if (length(x) != 1) 
##         return(NULL)
##     a = vector(length = 4)
##     h = max(1e-04, 1e-04 * abs(x))
##     for (i in 1:4) {
##         a[i] = (fx(x + h/2^i) - fx(x - h/2^i))/(2 * h/2^i)
##     }
##     for (i in 1:3) {
##         for (j in 1:(4 - i)) {
##             a[j] = (4^i * a[j + 1] - a[j])/(4^i - 1)
##         }
##     }
##     return(a[1])
## }
## <bytecode: 0x000001e2ab949028>
## <environment: namespace:math>

fx0 = function(x) sum(x*x)

Deriv0(fx0, 1)

## [1] 2

Deriv1(fx0, 1)

## [1] 2

Grad

## function (func, x) 
## {
##     n = length(x)
##     x1 = vector(length = n)
##     x2 = vector(length = n)
##     ga = vector(length = 4)
##     gr = vector(length = n)
##     for (i in 2:n) x1[i] = x2[i] = x[i]
##     for (i in 1:n) {
##         axi = abs(x[i])
##         if (axi <= 1) 
##             hi = 1e-04
##         else hi = 1e-04 * axi
##         for (k in 1:4) {
##             x1[i] = x[i] - hi
##             x2[i] = x[i] + hi
##             ga[k] = (func(x2) - func(x1))/(2 * hi)
##             hi = hi/2
##         }
##         ga[1] = (ga[2] * 4 - ga[1])/3
##         ga[2] = (ga[3] * 4 - ga[2])/3
##         ga[3] = (ga[4] * 4 - ga[3])/3
##         ga[1] = (ga[2] * 16 - ga[1])/15
##         ga[2] = (ga[3] * 16 - ga[2])/15
##         gr[i] = (ga[2] * 64 - ga[1])/63
##         x1[i] = x2[i] = x[i]
##     }
##     return(gr)
## }
## <bytecode: 0x000001e2a47625b8>
## <environment: namespace:math>

Grad(fx0, 1:3)

## [1] 2 4 6

Explore and enhance the following R script for derivatives of multi-variable function

Deriv2

## function (fx, x) 
## {
##     n = length(x)
##     x1 = vector(length = n)
##     x2 = vector(length = n)
##     ga = vector(length = 4)
##     gr = vector(length = n)
##     if (n > 1) 
##         for (i in 2:n) x1[i] = x2[i] = x[i]
##     for (i in 1:n) {
##         axi = abs(x[i])
##         if (axi <= 1) 
##             hi = 1e-04
##         else hi = 1e-04 * axi
##         for (k in 1:4) {
##             x1[i] = x[i] - hi
##             x2[i] = x[i] + hi
##             ga[k] = (fx(x2) - fx(x1))/(2 * hi)
##             hi = hi/2
##         }
##         for (j in 1:3) {
##             for (k in 1:(4 - j)) {
##                 ga[k] = (4^j * ga[k + 1] - ga[k])/(4^j - 1)
##             }
##         }
##         gr[i] = ga[1]
##         x1[i] = x2[i] = x[i]
##     }
##     return(gr)
## }
## <bytecode: 0x000001e2ac777100>
## <environment: namespace:math>

fx2 = function(x) sum(x^2)
Deriv2(fx2, c(1,1))

## [1] 2 2

Similarly hessian can be calculated.

Hessian

## function (fx, x) 
## {
##     n = length(x)
##     h0 = vector(length = n)
##     x1 = vector(length = n)
##     x2 = vector(length = n)
##     ha = vector(length = 4)
##     H = matrix(NA, nrow = n, ncol = n)
##     f0 = fx(x)
##     for (i in 1:n) {
##         x1[i] = x2[i] = x[i]
##         axi = abs(x[i])
##         if (axi < 1) 
##             h0[i] = 1e-04
##         else h0[i] = 1e-04 * axi
##     }
##     for (i in 1:n) {
##         for (j in i:1) {
##             hi = h0[i]
##             if (i == j) {
##                 for (k in 1:4) {
##                   x1[i] = x[i] - hi
##                   x2[i] = x[i] + hi
##                   ha[k] = (fx(x1) - 2 * f0 + fx(x2))/(hi * hi)
##                   hi = hi/2
##                 }
##             }
##             else {
##                 hj = h0[j]
##                 for (k in 1:4) {
##                   x1[i] = x[i] - hi
##                   x1[j] = x[j] - hj
##                   x2[i] = x[i] + hi
##                   x2[j] = x[j] + hj
##                   ha[k] = (fx(x1) - 2 * f0 + fx(x2) - H[i, i] * 
##                     hi * hi - H[j, j] * hj * hj)/(2 * hi * hj)
##                   hi = hi/2
##                   hj = hj/2
##                 }
##             }
##             w = 4
##             for (m in 1:2) {
##                 for (k in 1:(4 - m)) ha[k] = (ha[k + 1] * w - 
##                   ha[k])/(w - 1)
##                 w = w * 4
##             }
##             H[i, j] = (ha[2] * 64 - ha[1])/63
##             if (i != j) 
##                 H[j, i] = H[i, j]
##             x1[j] = x2[j] = x[j]
##         }
##         x1[i] = x2[i] = x[i]
##     }
##     return(H)
## }
## <bytecode: 0x000001e2abea3f08>
## <environment: namespace:math>

A little more sophiscated version of gradient and hessian function can be found in the numDeriv package.

library("numDeriv") 

grad(sin, pi)

## [1] -1

grad(sin, (0:10)*2*pi/10)

##  [1]  1.000  0.809  0.309 -0.309 -0.809 -1.000 -0.809 -0.309  0.309  0.809  1.000

func0 = function(x){ sum(sin(x))  }
grad(func0 , (0:10)*2*pi/10)

##  [1]  1.000  0.809  0.309 -0.309 -0.809 -1.000 -0.809 -0.309  0.309  0.809  1.000

func1 = function(x){ sin(10*x) - exp(-x) }
curve(func1,from=0,to=5)

x = 2.04
numd1 = grad(func1, x)
exact = 10*cos(10*x) + exp(-x)
c(numd1, exact, (numd1 - exact)/exact)

## [1] 3.335e-01 3.335e-01 8.469e-12

x = c(1:10)
numd1 = grad(func1, x)
exact = 10*cos(10*x) + exp(-x)
cbind(numd1, exact, (numd1 - exact)/exact)

##        numd1  exact           
##  [1,] -8.023 -8.023  6.118e-12
##  [2,]  4.216  4.216  6.271e-12
##  [3,]  1.592  1.592 -3.507e-12
##  [4,] -6.651 -6.651  6.698e-12
##  [5,]  9.656  9.656 -7.892e-13
##  [6,] -9.522 -9.522 -4.610e-12
##  [7,]  6.334  6.334  1.243e-11
##  [8,] -1.104 -1.104  6.093e-12
##  [9,] -4.481 -4.481  1.524e-12
## [10,]  8.623  8.623 -8.120e-13

func2 = function(x) c(sin(x), cos(x))
x = (0:1)*2*pi
jacobian(func2, x)

##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
## [3,]    0    0
## [4,]    0    0

x = 0.25 * pi
hessian(sin, x)

##         [,1]
## [1,] -0.7071

fun1e = function(x) sum(exp(2*x))
x = c(1, 3, 5)
hessian(fun1e, x, method.args=list(d=0.01))

##           [,1]      [,2]      [,3]
## [1,] 2.956e+01 2.583e-15 1.620e-11
## [2,] 2.583e-15 1.614e+03 5.401e-12
## [3,] 1.620e-11 5.401e-12 8.811e+04

func = function(x){c(x[1], x[1], x[2]^2)}
z = genD(func, c(2,2,5))
z

## $D
##      [,1] [,2] [,3]      [,4]       [,5] [,6]      [,7]      [,8] [,9]
## [1,]    1    0    0 4.276e-08 -6.302e-25    0 2.101e-25 0.000e+00    0
## [2,]    1    0    0 4.276e-08 -6.302e-25    0 2.101e-25 0.000e+00    0
## [3,]    0    4    0 0.000e+00  1.377e-16    2 0.000e+00 5.506e-17    0
## 
## $p
## [1] 3
## 
## $f0
## [1] 2 2 4
## 
## $func
## function(x){c(x[1], x[1], x[2]^2)}
## <bytecode: 0x000001e2a4154010>
## 
## $x
## [1] 2 2 5
## 
## $d
## [1] 1e-04
## 
## $method
## [1] "Richardson"
## 
## $method.args
## $method.args$eps
## [1] 1e-04
## 
## $method.args$d
## [1] 1e-04
## 
## $method.args$zero.tol
## [1] 1.781e-05
## 
## $method.args$r
## [1] 4
## 
## $method.args$v
## [1] 2
## 
## 
## attr(,"class")
## [1] "Darray"