3.8. Solutions: Lecture 3#
3.8.1. Exercise 3.1#
ex_3.1.py
#!/usr/bin/python
'''
Create a one-dimensional histogram filled with 5 values and save the histogram image to a `png` file
'''
import matplotlib.pyplot as plt
import numpy as np
def main () :
'''
Funzione che implementa il programma principale
'''
# the sample to be represented as an histogram
sample = [1.3977702773941403, 0.74021968956204, 0.559129026598526, 0.034722468927208275, 1.472659179569878]
fig, ax = plt.subplots (nrows = 1, ncols = 1)
ax.hist (sample,
color = 'orange',
)
ax.set_title ('Histogram example', size=14)
ax.set_xlabel ('variable')
ax.set_ylabel ('event counts per bin')
plt.savefig ('ex_3.1.png')
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.2. Exercise 3.2#
ex_3.2.py
#!/usr/bin/python
'''
Read the text file [eventi_unif.txt](https://github.com/UnimibFisicaLaboratori/UnimibFisicaLabStatPython/blob/main/book/lectures/Lecture_03/exercises/eventi_unif.txt)
* Print the first 10 positive elements to the screen.
* Count the number of events contained in the file.
* Determine the minimum and maximum values among the numbers saved in the file.
'''
import sys
def main () :
'''
Funzione che implementa il programma principale
'''
if (len (sys.argv) < 2) :
print ('usage:', sys.argv[0], 'inputfile.txt')
sys.exit ()
# read the file
with open(sys.argv[1]) as f:
sample = [float (x) for x in f.readlines()]
for elem in sample[:10]:
print (elem)
print ('total number of events:', len (sample))
print ('sample minimum:', min (sample))
print ('sample maximum:', max (sample))
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.3. Exercise 3.3#
ex_3.3.py
#!/usr/bin/python
'''
Read the text file [```eventi_gauss.txt```](https://github.com/UnimibFisicaLaboratori/UnimibFisicaLabStatPython/blob/main/book/lectures/Lecture_03/exercises/eventi_gauss.txt):
* Fill a histogram with the first N numbers contained in the file,
where N is a command-line parameter during program execution.
* Choose the histogram's definition range and its bin number
based on the numbers to be represented.
'''
import sys
import matplotlib.pyplot as plt
import numpy as np
from math import ceil, floor
def sturges (N_events) :
return ceil (1 + np.log2 (N_events))
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def main () :
'''
Funzione che implementa il programma principale
'''
if (len (sys.argv) < 3) :
print ('usage:', sys.argv[0], 'inputfile.txt Nmax')
sys.exit ()
N_max = int (sys.argv[2])
# read the file
with open(sys.argv[1]) as f:
sample = [float (x) for x in f.readlines()]
xMin = floor (min (sample[:N_max]))
xMax = ceil (max (sample[:N_max]))
N_bins = sturges (N_max)
bin_edges = np.linspace (xMin, xMax, N_bins)
fig, ax = plt.subplots (nrows = 1, ncols = 1)
ax.hist (sample[:N_max],
bins = bin_edges,
color = 'orange',
)
ax.set_title ('Histogram example', size=14)
ax.set_xlabel ('variable')
ax.set_ylabel ('event counts per bin')
plt.savefig ('ex_3.3.png')
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.4. Exercise 3.4#
ex_3.3.py
#!/usr/bin/python
'''
## Exercise 3.4
* Display the distributions of events from the two files of the previous exercises, overlaid,
finding the best visualization for the comparison between the two histograms.
'''
import sys
import matplotlib.pyplot as plt
import numpy as np
from math import ceil, floor
def sturges (N_events) :
return ceil (1 + np.log2 (N_events))
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def main () :
'''
Funzione che implementa il programma principale
'''
if (len (sys.argv) < 3) :
print ('usage:', sys.argv[0], 'inputfile1.txt inputfile2.txt')
sys.exit ()
# read the files
with open(sys.argv[1]) as f:
sample_1 = [float (x) for x in f.readlines()]
with open(sys.argv[2]) as f:
sample_2 = [float (x) for x in f.readlines()]
xMin = floor (min (min (sample_1), min (sample_2)))
xMax = ceil (max (max (sample_1), max (sample_2)))
N_bins = sturges (min (len (sample_1), len (sample_2)))
bin_edges = np.linspace (xMin, xMax, N_bins)
fig, ax = plt.subplots (nrows = 1, ncols = 1)
ax.hist (sample_1,
bins = bin_edges,
color = 'orange',
)
# ax.hist (sample_2,
# bins = bin_edges,
# color = 'red',
# alpha = 0.5,
# )
ax.hist (sample_2,
bins = bin_edges,
color = 'red',
histtype='step',
)
ax.set_title ('Histogram example', size=14)
ax.set_xlabel ('variable')
ax.set_ylabel ('event counts per bin')
plt.savefig ('ex_3.4.png')
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.5. Exercise 3.5#
ex_3.5.py
#!/usr/bin/python
'''
## Exercise 3.5
Read the text file [```eventi_unif.txt```](https://raw.githubusercontent.com/UnimibFisicaLaboratori/UnimibFisicaLab2/master/Lezione_03/programmi/eventi_unif.txt):
* Calculate the mean of the numbers in the text file.
* Calculate the variance of the numbers in the text file.
* Calculate the standard deviation of the numbers in the text file.
* Calculate the standard deviation from the mean of the numbers in the text file.
'''
import sys
from stats import mean, variance, sigma, sigma_mean
def main () :
'''
Funzione che implementa il programma principale
'''
if (len (sys.argv) < 2) :
print ('usage:', sys.argv[0], 'inputfile.txt')
sys.exit ()
# read the file
with open(sys.argv[1]) as f:
sample = [float (x) for x in f.readlines()]
for elem in sample[:10]:
print (elem)
print ('total number of events:', len (sample))
print ('sample minimum:', min (sample))
print ('sample maximum:', max (sample))
print ('sample mean:', mean (sample))
print ('sample variance:', variance (sample))
print ('sample sigma:', sigma (sample))
print ('sample sigma of the mean:', sigma_mean (sample))
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
stats.py
from math import sqrt
def mean (sample) :
'''
calculates the mean of the sample present in the object
'''
summ = sum (sample)
N = len (sample)
return summ / N
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def variance (sample, bessel = True) :
'''
calculates the variance of the sample present in the object
'''
summ = 0.
sum_sq = 0.
N = len (sample)
for elem in sample :
summ += elem
sum_sq += elem * elem
var = sum_sq / N - summ * summ / (N * N)
if bessel : var = N * var / (N - 1)
return var
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def sigma (sample, bessel = True) :
'''
calculates the sigma of the sample present in the object
'''
return sqrt (variance (sample, bessel))
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def sigma_mean (sample, bessel = True) :
'''
calculates the sigma of the sample present in the object
'''
N = len (sample)
return sqrt (variance (sample, bessel) / N)
3.8.6. Exercise 3.6#
ex_3.6.py
#!/usr/bin/python
'''
Write a ```python``` class in the form of a library which,
given the name of a text file containing a sample of events as input,
is able to store the sample internally,
calculate its mean, variance, standard deviation, standard deviation from the mean,
display the sample in a histogram
with an appropriately chosen definition range and bin number.
Write a test program for the created class.
'''
from my_histo import my_histo
import sys
def main () :
'''
Funzione che implementa il programma principale
'''
if (len (sys.argv) < 2) :
print ('usage:', sys.argv[0], 'inputfile.txt')
sys.exit ()
my_histo_test = my_histo (sys.argv[1])
my_histo_test.draw_histo ('ex_3.6.png')
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
my_histo.py
#!/usr/bin/python
from math import sqrt
import matplotlib.pyplot as plt
import numpy as np
from math import ceil, floor
def sturges (N_events) :
return ceil (1 + np.log2 (N_events))
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
class my_histo :
'''calculator for statistics of a list of numbers'''
summ = 0.
sumSq = 0.
N = 0
sample = []
def __init__ (self, sample_file_name) :
'''
reads as input the file containing the collection of events
and reads it
'''
with open (sample_file_name) as f:
self.sample = [float (x) for x in f.readlines ()]
self.summ = sum (self.sample)
self.sumSq = sum ([x*x for x in self.sample])
self.N = len (self.sample)
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def mean (self) :
'''
calculates the mean of the sample present in the object
'''
return self.summ / self.N
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def variance (self, bessel = True) :
'''
calculates the variance of the sample present in the object
'''
var = self.sumSq / self.N - self.mean () * self.mean ()
if bessel : var = self.N * var / (self.N - 1)
return var
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def sigma (self, bessel = True) :
'''
calculates the sigma of the sample present in the object
'''
return sqrt (self.variance (bessel))
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def sigma_mean (self, bessel = True) :
'''
calculates the sigma of the sample present in the object
'''
return sqrt (self.variance (bessel) / self.N)
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
def draw_histo (self, output_file_name) :
'''
draw the sample content into an histogram
'''
xMin = floor (min (self.sample))
xMax = ceil (max (self.sample))
N_bins = sturges (self.N)
bin_edges = np.linspace (xMin, xMax, N_bins)
fig, ax = plt.subplots (nrows = 1, ncols = 1)
ax.hist (self.sample,
bins = bin_edges,
color = 'orange',
)
ax.set_title ('Histogram example', size=14)
ax.set_xlabel ('variable')
ax.set_ylabel ('event counts per bin')
plt.savefig (output_file_name)
3.8.7. Exercise 3.7#
ex_3.7.py
#!/usr/bin/python
'''
Write a Python program to draw a Gaussian distribution and its cumulative function
'''
import matplotlib.pyplot as plt
from scipy.stats import norm
import numpy as np
def main () :
'''
Funzione che implementa il programma principale
'''
normal = norm (100., 10.)
x_axis = np.linspace (50., 150., 100)
plt.plot (x_axis, normal.pdf (x_axis), label="PDF")
plt.legend ()
plt.savefig ('ex_3.7_pdf.png')
plt.clf () # clear the figure
plt.plot (x_axis, normal.cdf (x_axis), label="CDF")
plt.legend ()
plt.savefig ('ex_3.7_cdf.png')
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.8. Exercise 3.8#
ex_3.8.py
#!/usr/bin/python
'''
Write a Python program to draw an exponential distribution and its cumulative function
'''
import matplotlib.pyplot as plt
from scipy.stats import expon
import numpy as np
def main () :
'''
Funzione che implementa il programma principale
'''
tau = 2.
expo = expon (0., tau)
x_axis = np.linspace (0., 8., 100)
plt.plot (x_axis, expo.pdf (x_axis), label="PDF")
plt.legend ()
plt.savefig ('ex_3.8_pdf.png')
plt.clf () # clear the figure
plt.plot (x_axis, expo.cdf (x_axis), label="CDF")
plt.legend ()
plt.savefig ('ex_3.8_cdf.png')
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.9. Exercise 3.9#
ex_3.9.py
#!/usr/bin/python
'''
Use the Python `scipy.stat.norm` object to determine the area of a normal distribution
of its tails outside the range included within an interval of 1, 2, 3, 4, and 5
standard deviations around its mean
'''
import matplotlib.pyplot as plt
from scipy.stats import norm
import numpy as np
def main () :
'''
Funzione che implementa il programma principale
'''
normal = norm (0., 1.)
for sigmas in range (1,6) :
tails_area = normal.cdf (0. - sigmas) + 1. - normal.cdf (0. + sigmas)
print ('outside ' + str (sigmas) + ' sigmas :\t'
+ str (tails_area))
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.10. Exercise 3.10#
ex_3.10.py
#!/usr/bin/python
'''
Write a Python program to draw a binomial distribution and its cumulative function
'''
import matplotlib.pyplot as plt
from scipy.stats import binom
import numpy as np
def main () :
'''
Funzione che implementa il programma principale
'''
success_p = 0.5
N = 8
binomial = binom (N, success_p)
x_axis = np.arange (N+1)
plt.scatter (x_axis, binomial.pmf (x_axis), label='PMF')
plt.title ("Binomial Distribution")
plt.ylabel ("Density")
plt.legend ()
plt.savefig ('ex_3.10_pdf.png')
plt.clf () # clear the figure
plt.scatter (x_axis, binomial.cdf (x_axis), label='CDF')
plt.legend ()
plt.savefig ('ex_3.10_cdf.png')
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.11. Exercise 3.11#
ex_3.11.py
#!/usr/bin/python
'''
Write a Python program to draw a Poisson distribution and its cumulative function
'''
import matplotlib.pyplot as plt
from scipy.stats import poisson
import numpy as np
def main () :
'''
Funzione che implementa il programma principale
'''
average = 4.
poiss = poisson (average)
x_axis = np.arange (3 * average)
plt.scatter (x_axis, poiss.pmf (x_axis), label='PMF')
plt.title ("Poisson Distribution")
plt.ylabel ("Density")
plt.legend ()
plt.savefig ('ex_3.11_pdf.png')
plt.clf () # clear the figure
plt.scatter (x_axis, poiss.cdf (x_axis), label='CDF')
plt.legend ()
plt.savefig ('ex_3.11_cdf.png')
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.12. Exercise 3.12#
ex_3.12.py
#!/usr/bin/python
'''
Write a Python program to draw a Poisson distribution
Show, by using the third and fourth central momenta calculations available in the `scipy.stat` library,
that the momenta of a Poisson distribution asymptotically tend to the ones of a Gaussian
'''
import matplotlib.pyplot as plt
from scipy.stats import poisson
import numpy as np
def main () :
'''
Funzione che implementa il programma principale
'''
skew_l = []
kurt_l = []
ave_l = range (100)
for average in ave_l :
poiss = poisson (average)
ave, var, skew, kurt = poiss.stats (moments='mvsk')
skew_l.append (skew)
kurt_l.append (kurt)
plt.scatter (ave_l, skew_l)
plt.title ('Poisson skewness')
plt.xlabel ('mean')
plt.ylabel ('skewness')
plt.savefig ('ex_3.12_skew.png')
plt.clf () # clear the figure
plt.scatter (ave_l, skew_l)
plt.title ('Poisson kurtosis')
plt.xlabel ('mean')
plt.ylabel ('kurtosis')
plt.savefig ('ex_3.12_kurt.png')
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.13. Exercise 3.13#
ex_3.13.py
#!/usr/bin/python
'''
What is the probability that ten measurements of the same quantity
expected to be Gaussian fall within an interval of 1 standard deviation width
around the mean?
'''
from scipy.stats import norm
import numpy as np
def main () :
'''
Funzione che implementa il programma principale
'''
normal = norm (0., 1.)
x_min = 0. - 0.5
x_max = 0. + 0.5
single_evt_prob = normal.cdf (x_max) - normal.cdf (x_min)
print ('single event probability: ', single_evt_prob)
print ('joint probability: ', single_evt_prob**10)
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()
3.8.14. Exercise 3.14#
ex_3.14.py
#!/usr/bin/python
'''
What is the probability that ten measurements of the same counting experiment
expected to be Poisson distributed are all larger than the expected average number
of events?
'''
import matplotlib.pyplot as plt
from scipy.stats import poisson
import numpy as np
def main () :
'''
Funzione che implementa il programma principale
'''
average = 4.
poiss = poisson (average)
single_evt_prob = 1. - poiss.cdf (average)
print ('single event probability: ', single_evt_prob)
print ('joint probability: ', single_evt_prob**10)
# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
if __name__ == "__main__":
main ()