Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
masekj committed Jul 26, 2023
1 parent 91b61b1 commit 1a13766
Show file tree
Hide file tree
Showing 6 changed files with 1,027 additions and 0 deletions.
210 changes: 210 additions & 0 deletions Numerical_example.ipynb

Large diffs are not rendered by default.

144 changes: 144 additions & 0 deletions Numerical_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/usr/bin/env python
# coding: utf-8

# Copyright (C) 2023 Jan Mašek and Miroslav Vořechovský
# MIT licence https://en.wikipedia.org/wiki/MIT_License

from SampleTiler import strat_sample_by_tiling, get_LHS_median_sample, get_scrambled_Halton_sample
from Tools import eval_function
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import qmc


# A numerical integration example comparing the performance of selected sampling methods:
#
# ### Methods:
# Selected are couple of standard sampling methods:
# * Simple Random Sampling (Monte Carlo)
# * LHS with random coordinate pairing, points at strata median
# * Randomized Quasi Monte Carlo (RQMC) - Scrambled Halton Sequence
#
# To be compared with the tiled point sets. Selected are:
# * Crude tiling with a single SRS tile
# * Crude tiling with a single LHS tile
# * Scrambled tiling with a single LHS tile (small t-shifts only)
#
# Feel free to explore the properties of tiled point sets for various parameter values!
#
# ### Function:      $g_{\Pi\mathrm{exp}} (\mathbf{X}) = \prod_{v=1}^{N_{\mathrm{v}}} \exp(-X_v^2)$
#
#
# Following statistics are computed from the average of $n_r$ realizations of each point sample for a given point count.
# * Estimated mean value
# * Standard deviation
# * Variance
# * Root mean square error
#
#


# The number of points within each tile
ns = 8
# The dimension of the design domain
nv = 2
# Maximum sample size
Ns_max = 1000
# Number of realizations for each sample
nr = 500
# Tile counts
t_max = int((Ns_max / ns) ** (1 / nv)) + 1
t_values = np.arange(1, t_max+1)
# Total point counts
Ns_values = ns*t_values**nv

### Selected sampling methods
sampling_methods = []
# Simple Random Sampling (Monte Carlo)
sampling_methods.append({"name" : "SRS" , "color" : "blue" , "dash" : "solid"})
# LHS random pairing, points at strata median
sampling_methods.append({"name" : "LHS-M-R" , "color" : "red" , "dash" : "solid"})
# Scrambled Halton sequence
sampling_methods.append({"name" : "RQMC Halton", "color" : "black" , "dash" : "solid"})
# Crude tiling with a single SRS tile
sampling_methods.append({"name" : "TC/SRS" , "color" : "blue" , "dash" : "dashed"})
# Crude tiling with a single LHS tile
sampling_methods.append({"name" : "TC/LHS-M-R" , "color" : "red" , "dash" : "dashed"})
# Scrambled tiling with a single LHS tile (small t-shifts only)
sampling_methods.append({"name" : "TP/LHS-M-R" , "color" : "red" , "dash" : "dashdot"})

#Function exact solution
exact_solution = 1/np.sqrt(3**nv)




for method in sampling_methods:
method["results"] = np.zeros([Ns_values.shape[0],nr])
method["stats"] = np.zeros([Ns_values.shape[0],4])

for idx, t in enumerate (t_values):
for r in range(nr):
#Sample generation
if method["name"] == "SRS":
x = np.random.rand(Ns_values[idx],nv)
if method["name"] == "LHS-M-R":
x = get_LHS_median_sample (Ns_values[idx], nv)
if method["name"] == "RQMC Halton":
sampler = qmc.Halton(d=nv, scramble=True)
x = sampler.random(n=Ns_values[idx])
if method["name"] == "TC/SRS":
x = strat_sample_by_tiling(nv, ns, t, tile_type = 'SRS', median = True, my_tile = None, one_tile = True,\
var_perms = False, rand_revers = False, t_shifts = True, b_shifts = False,\
large_shifts = False)
if method["name"] == "TC/LHS-M-R":
x = strat_sample_by_tiling(nv, ns, t, tile_type = 'LH', median = True, my_tile = None, one_tile = True,\
var_perms = False, rand_revers = False, t_shifts = False, b_shifts = False,\
large_shifts = False)
if method["name"] == "TP/LHS-M-R":
x = strat_sample_by_tiling(nv, ns, t, tile_type = 'LH', median = True, my_tile = None, one_tile = True,\
var_perms = False, rand_revers = False, t_shifts = True, b_shifts = False,\
large_shifts = False)

#Function computation
method["results"][idx,r] = np.mean(eval_function(x))

#Statistical evaluation
#Mean
method["stats"][idx,0] = np.mean(method["results"][idx])
#Standard deviation
method["stats"][idx,1] = np.std(method["results"][idx])
#Variance
method["stats"][idx,2] = method["stats"][idx,1]**2
#Root mean square error
method["stats"][idx,3] = np.sqrt( np.mean( (method["results"][idx]-exact_solution)**2) )




fig, ( pMean, pVar, pRMSE ) = plt.subplots(nrows=3, ncols=1, figsize=(7, 16))

for m in sampling_methods:
pMean.semilogx(Ns_values, m["stats"][:,0], label=m["name"], color = m["color"], linestyle = m["dash"])
pMean.fill_between(Ns_values, m["stats"][:,0]+m["stats"][:,1], y2=m["stats"][:,0]-m["stats"][:,1], alpha=0.1)

pVar.loglog(Ns_values, m["stats"][:,2], label=m["name"], color = m["color"], linestyle = m["dash"])
pRMSE.loglog(Ns_values, m["stats"][:,3], label=m["name"], color = m["color"], linestyle = m["dash"])

pMean.axhline(y=exact_solution, color='gray', linestyle='--', linewidth=0.7, zorder=0)
pMean.set_ylim(exact_solution*0.8,exact_solution*1.2)
pMean.set_title("Estimated mean value ± std_dev")
pMean.legend()

pVar.set_title("Variance")
pVar.legend()

pRMSE.set_title("Root mean square error (RMSE) ")
pRMSE.legend()
plt.show()






211 changes: 211 additions & 0 deletions SampleTiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
# Copyright (C) 2023 Miroslav Vořechovský and Jan Mašek
# MIT licence https://en.wikipedia.org/wiki/MIT_License

import numpy as np
from scipy.stats import norm, rankdata, qmc
from scipy import stats
from IPython.display import FileLink, FileLinks

################## root sampling methods #####################
def get_LHS_median_sample(Ns, dim):
return (get_LHS_ranks(Ns,dim) + 0.5)/Ns

def get_scrambled_Halton_sample(Ns, dim):
sampler = qmc.Halton(d=dim, scramble=True)
return sampler.random(n=Ns)

################## tiling #####################
def strat_sample_by_tiling(nv, ns, t, tile_type = 'MC', median = True, my_tile = None, one_tile = True,\
var_perms = False, rand_revers = False,
t_shifts = False, b_shifts = False, large_shifts = False):
'''
Tile a point sample to generate a tiled point set.
Parameters:
nv (int): Dimension of the design domain.
ns (int): Number of points in each tile.
t (int): The number of strata (tiles) along each dimension.
my_tile (np.array): Provided point sample that will be used for tiling.
If my_tile is None, random sampling will be conducted according to tile_type.
Shape: (ns, nv)
tile_type (str): Sampling method used for generation of tiles.
Options: 'LH' (Latin Hypercube Sample with random pairing), 'SRS' (Simple Random Sampling).
median (bool): Triggers point positions in strata median for tile_type='LH'.
one_tile (bool): If True, tiling from an identical tile sample.
If False, each tile is generated randomly.
var_perms (bool): Triggers random variable permutations.
rand_revers (bool): Triggers random variable reversions.
t_shifts (bool): Triggers minor coordinate shifts to achieve regular 1D LH-like projections, see Eq. (5).
b_shifts (bool): Pillar shifting, see Sec. 3.1.
large_shifts (bool): Triggers large coordinate shifts for high dimension cluster removal, see Eq. (6).
Returns:
np.array: Tiled point set. 2D numpy array of Ns coordinates in nv dimensions. (Ns = ns * t ** nv)
'''

# Total number of strata (tiles)
T = t**nv

# prepare all T = t**nv tiles with ns point each.
# these tiles have edge length 1 and they are located in the unit hypercube starting at the origin
# shape: (T,ns,nv)
tiles = get_T_tiles_in_unit_hypercube(nv, ns, t, tile_type, median, my_tile, one_tile)

###################################################################################
# perform the desired scrambling operations
#
if rand_revers:
rev = np.random.choice([0,1], t**nv * nv, p=[0.5, 0.5]).reshape((t**nv, nv))
# tiles and directions with rev==0 are reversed (all nsim points in a tile)
# with rev==1 are kept untouched
tiles = (1-rev[:, None ,:])*(1-tiles) \
+ rev[:, None ,:] *( tiles)

if var_perms:
for r in range(T):
var = np.random.permutation(nv)
tiles[r] = np.take(tiles[r], var, axis=1) #reorder variables according to given permutation 'var'

# origins = 'bottom left' corners of all tiles in format (shape t**nv, nv)
o = strat_sample_corners(nv, t)
so = o.copy() #copy of the origins

if t_shifts:
ds = 1./(ns*t**nv) #the smallest distance between point projections along each variable
if tile_type == 'MC':
ds = 1./(ns*t)
# half of ds and half of original LH distance between two points along a line
shift_correction = 0.5*ds - 1/(2*t*ns) #corrects for true LHS for t even. Relieves the need of periodic shift

s = get_shifts_of_tiles_individual(t,nv,o)
#apply shifts to the origins
so = o + s*ds + shift_correction
if large_shifts: #additional shifts to break small clouds caused by tiny shifts
so += s*ds*ns*t

if b_shifts:
ds = 1./(ns*t**nv) #the smallest distance between point projections along each variable
# half of ds and half of original LH distance between two points along a line
shift_correction = 0.5*ds - 1/(2*t*ns) #corrects for true LHS for t even. Relieves the need of periodic shift
s = get_shifts_of_tiles_block(t,nv,o)
so = o + s*ds + shift_correction

# now compose the tiles to a design by adding them to the left corners (potentially shifted)
# and finally scale the tile down to edge length 1/t
x_3d = so[:, None , : ] + tiles/t #copy the single tile to all the strata

#periodic correction of the shift in each tile (compare to the original left bound xo+1/t)
if t_shifts:# and tile_type == 'MC':
remainders_after_periodic_shifts = (x_3d - o[:,None,:])%(1/t) #remainders to be added to tile left origins
x_3d = o[:,None,:] + remainders_after_periodic_shifts #add remainders to tile left origins
#x_3d = np.where(x_3d > o[:,None,:] + 1./t, x_3d - ((x_3d - o[:,None,:])//(1/t))/t, x_3d)
#x_3d = np.where(x_3d > o[:,None,:] + 1./t, x_3d - 1./t, x_3d)
#x_3d = np.where(x_3d < o[:,None,:] , x_3d + 1./t, x_3d)

if b_shifts:
x_3d = np.where(x_3d > 1, x_3d - 1., x_3d)

return np.reshape(x_3d,(ns * t**nv,nv))



################# auxiliary methods ###########################

def generate_permutations(t,nv, randomized = True):
rand_s = np.zeros([nv,t])

if randomized:
for v in range(nv):
rand_s[v,:] = np.random.permutation(np.arange(t))
else:
for v in range(nv):
rand_s[v,:] = np.arange(t)
return rand_s

def get_LHS_ranks(Ns,dim):
ranks = np.zeros((Ns, dim))
arr = np.arange(Ns)
for col in range (dim): #each column separately
perm = np.random.permutation(arr)
ranks[:,col] = perm

return ranks

def get_shifts_of_tiles_block(t,nv,o):
s = np.zeros((t**nv, nv)) #integer shift for every tile (rows) and along every direction (cols)
bounds = np.zeros( nv )
var_list = list(np.arange(0, nv, 1, dtype=int))
for v in range(nv): #index along individual directions to make a shift
shift_perm = np.arange(t**(nv-1))
shift_perm = np.random.permutation(shift_perm)
v_mask = (o[:,v]==0)
v_idx = np.where(v_mask)[0] #indices of rows in 's' matrix. fullfiles by t**(nv-1)
#cur_list = var_list[:v] + var_list[v+1:] #indices of directions except v
for index in range(len(v_idx)):
gmask = np.full((t**nv), True) #set all tiles to True
bounds = o[v_idx[index],:]
for dim in range(nv):
if dim != v:
gmask = np.logical_and(gmask , np.isclose(o[:,dim], bounds[dim], rtol=0.1/t) )# o[:,v]==r/t

x_idx = np.where(gmask)[0] #indices of rows in 's' matrix
s[x_idx,v] = shift_perm[index]
return s

def get_shifts_of_tiles_individual(t,nv,o):
s = np.zeros((t**nv, nv))
for v in range(nv): #index along individual directions to make a shift
for r in range(t): #index all (slices of) tiles along a given direction
shift_perm = np.arange(t**(nv-1))
shift_perm = np.random.permutation(shift_perm)
mask = np.isclose(o[:,v], r/t, rtol=0.1/t)# o[:,v]==r/t
x_idx = np.where(mask)[0] #indices of rows in 's' matrix
for index in range(len(x_idx)):
s[x_idx[index],v] = shift_perm[index]
return s

def get_one_tile_in_unit_hypercube(nv, ns, t, tile_type = 'MC', median = True):
u = 0.5 #for LHS-median
if tile_type == 'LH':
LHS_ranks = get_LHS_ranks(ns,nv)
if not median: #LHS-random selection from 1D strata
u = np.random.rand(ns, nv)
tile = (LHS_ranks+u)/ns
elif tile_type == 'Halton':
sampler = qmc.Halton(d=nv, scramble=True)
tile = sampler.random(n=ns) #a single Halton tile
else: #if tile_type == 'MC':
tile = np.random.rand(ns,nv) #a single MC tile
return tile

def get_T_tiles_in_unit_hypercube(nv, ns, t, tile_type = 'MC', median = True, my_tile = None, one_tile = True):
T = t**nv

if one_tile: # repeat a single tile
if (my_tile is None): #check whether the tile is provided
tile = get_one_tile_in_unit_hypercube(nv, ns, t, tile_type, median)

else:
if(my_tile.shape[0] != ns or my_tile.shape[1] != nv):
#check whether the size has a correct size
print(f'Wrong tile size. Either {my_tile.shape[0]} in not ns={ns}, or nv={my_tile.shape[1]} is not nv={nv}')
return None
tile = my_tile

#copy the tile into T individual tiles - obtain a 3D array (shape: T,ns,nv)
tiles = np.tile(tile,(T, 1,1))

else: # generate independent tiles
tiles=np.zeros((T,ns,nv))#space for T individual tiles, each ns*nv (shape: T,ns,nv)
for tile in range(T):
#write a single tile in format ns points (rows) and nv cols
tiles[tile,:,:] = get_one_tile_in_unit_hypercube(nv, ns, t, tile_type, median)

return tiles

def strat_sample_corners(nv, t):
pa = np.linspace( 0 , 1-1/t , t , endpoint=True) #left tile probability bounds
pa_list = [pa] * (nv)
p = np.meshgrid(*pa_list)#,indexing='ij')
x = np.array(p).reshape((nv, t**nv)).T#returns the standard "design format" (shape t**nv, nv)
return x#np.flip(x, axis=1)
Loading

0 comments on commit 1a13766

Please sign in to comment.