3.8. Solutions: Lecture 3#

3.8.1. Exercise 3.1#

  • ex_3.1.py

#!/usr/bin/python
'''
Create a one-dimensional histogram filled with 5 values and save the histogram image to a `png` file
'''

import matplotlib.pyplot as plt
import numpy as np


def main () :
    '''
    Funzione che implementa il programma principale
    '''

    # the sample to be represented as an histogram
    sample = [1.3977702773941403, 0.74021968956204, 0.559129026598526, 0.034722468927208275, 1.472659179569878]

    fig, ax = plt.subplots (nrows = 1, ncols = 1)
    ax.hist (sample,
             color = 'orange',
            )
    ax.set_title ('Histogram example', size=14)
    ax.set_xlabel ('variable')
    ax.set_ylabel ('event counts per bin')

    plt.savefig ('ex_3.1.png')
    

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

3.8.2. Exercise 3.2#

  • ex_3.2.py

#!/usr/bin/python
'''
Read the text file [eventi_unif.txt](https://github.com/UnimibFisicaLaboratori/UnimibFisicaLabStatPython/blob/main/book/lectures/Lecture_03/exercises/eventi_unif.txt)
  * Print the first 10 positive elements to the screen.
  * Count the number of events contained in the file.
  * Determine the minimum and maximum values among the numbers saved in the file.
'''

import sys

def main () :
    '''
    Funzione che implementa il programma principale
    '''

    if (len (sys.argv) < 2) :
        print ('usage:', sys.argv[0], 'inputfile.txt')
        sys.exit ()

    # read the file
    with open(sys.argv[1]) as f:
        sample = [float (x) for x in f.readlines()]

    for elem in sample[:10]:
        print (elem)
  
    print ('total number of events:', len (sample))
    print ('sample minimum:', min (sample))
    print ('sample maximum:', max (sample))
   

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

3.8.3. Exercise 3.3#

  • ex_3.3.py

#!/usr/bin/python
'''
Read the text file [```eventi_gauss.txt```](https://github.com/UnimibFisicaLaboratori/UnimibFisicaLabStatPython/blob/main/book/lectures/Lecture_03/exercises/eventi_gauss.txt):
  * Fill a histogram with the first N numbers contained in the file,
    where N is a command-line parameter during program execution.
  * Choose the histogram's definition range and its bin number
    based on the numbers to be represented.
'''

import sys
import matplotlib.pyplot as plt
import numpy as np
from math import ceil, floor


def sturges (N_events) :
    return ceil (1 + np.log2 (N_events))
    

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
def main () :
    '''
    Funzione che implementa il programma principale
    '''

    if (len (sys.argv) < 3) :
        print ('usage:', sys.argv[0], 'inputfile.txt Nmax')
        sys.exit ()

    N_max = int (sys.argv[2])

    # read the file
    with open(sys.argv[1]) as f:
        sample = [float (x) for x in f.readlines()]

    xMin = floor (min (sample[:N_max]))
    xMax = ceil (max (sample[:N_max]))
    N_bins = sturges (N_max)

    bin_edges = np.linspace (xMin, xMax, N_bins)
    fig, ax = plt.subplots (nrows = 1, ncols = 1)
    ax.hist (sample[:N_max],
             bins = bin_edges,
             color = 'orange',
            )
    ax.set_title ('Histogram example', size=14)
    ax.set_xlabel ('variable')
    ax.set_ylabel ('event counts per bin')

    plt.savefig ('ex_3.3.png')


# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

3.8.4. Exercise 3.4#

  • ex_3.3.py

#!/usr/bin/python
'''
## Exercise 3.4
  * Display the distributions of events from the two files of the previous exercises, overlaid,
    finding the best visualization for the comparison between the two histograms.
'''

import sys
import matplotlib.pyplot as plt
import numpy as np
from math import ceil, floor


def sturges (N_events) :
    return ceil (1 + np.log2 (N_events))
    

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
def main () :
    '''
    Funzione che implementa il programma principale
    '''

    if (len (sys.argv) < 3) :
        print ('usage:', sys.argv[0], 'inputfile1.txt inputfile2.txt')
        sys.exit ()

    # read the files
    with open(sys.argv[1]) as f:
        sample_1 = [float (x) for x in f.readlines()]
    with open(sys.argv[2]) as f:
        sample_2 = [float (x) for x in f.readlines()]

    xMin = floor (min (min (sample_1), min (sample_2)))
    xMax = ceil (max (max (sample_1), max (sample_2)))
    N_bins = sturges (min (len (sample_1), len (sample_2)))

    bin_edges = np.linspace (xMin, xMax, N_bins)

    fig, ax = plt.subplots (nrows = 1, ncols = 1)
    ax.hist (sample_1,
             bins = bin_edges,
             color = 'orange',
            )
    # ax.hist (sample_2,
    #          bins = bin_edges,
    #          color = 'red',
    #          alpha = 0.5,
    #         )
    ax.hist (sample_2,
             bins = bin_edges,
             color = 'red',
             histtype='step',
            )
    ax.set_title ('Histogram example', size=14)
    ax.set_xlabel ('variable')
    ax.set_ylabel ('event counts per bin')

    plt.savefig ('ex_3.4.png')


# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

3.8.5. Exercise 3.5#

  • ex_3.5.py

#!/usr/bin/python
'''
## Exercise 3.5
Read the text file [```eventi_unif.txt```](https://raw.githubusercontent.com/UnimibFisicaLaboratori/UnimibFisicaLab2/master/Lezione_03/programmi/eventi_unif.txt):
  * Calculate the mean of the numbers in the text file.
  * Calculate the variance of the numbers in the text file.
  * Calculate the standard deviation of the numbers in the text file.
  * Calculate the standard deviation from the mean of the numbers in the text file.
'''

import sys
from stats import mean, variance, sigma, sigma_mean

def main () :
    '''
    Funzione che implementa il programma principale
    '''

    if (len (sys.argv) < 2) :
        print ('usage:', sys.argv[0], 'inputfile.txt')
        sys.exit ()

    # read the file
    with open(sys.argv[1]) as f:
        sample = [float (x) for x in f.readlines()]

    for elem in sample[:10]:
        print (elem)
  
    print ('total number of events:', len (sample))
    print ('sample minimum:', min (sample))
    print ('sample maximum:', max (sample))
    print ('sample mean:', mean (sample))
    print ('sample variance:', variance (sample))
    print ('sample sigma:', sigma (sample))
    print ('sample sigma of the mean:', sigma_mean (sample))
   

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

  • stats.py

from math import sqrt 

def mean (sample) :
    '''
    calculates the mean of the sample present in the object
    '''
    summ = sum (sample)
    N = len (sample)
    return summ / N

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

def variance (sample, bessel = True) :
    '''
    calculates the variance of the sample present in the object
    '''
    summ = 0.
    sum_sq = 0.
    N = len (sample)
    for elem in sample :
       summ += elem
       sum_sq += elem * elem
    var = sum_sq / N - summ * summ / (N * N)
    if bessel : var = N * var / (N - 1)
    return var

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

def sigma (sample, bessel = True) :
    '''
    calculates the sigma of the sample present in the object
    '''
    return sqrt (variance (sample, bessel))

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

def sigma_mean (sample, bessel = True) :
    '''
    calculates the sigma of the sample present in the object
    '''
    N = len (sample)
    return sqrt (variance (sample, bessel) / N)

3.8.6. Exercise 3.6#

  • ex_3.6.py

#!/usr/bin/python
'''
Write a ```python``` class in the form of a library which,
given the name of a text file containing a sample of events as input,
is able to store the sample internally,
calculate its mean, variance, standard deviation, standard deviation from the mean,
display the sample in a histogram
with an appropriately chosen definition range and bin number.
Write a test program for the created class.
'''

from my_histo import my_histo
import sys


def main () :
    '''
    Funzione che implementa il programma principale
    '''

    if (len (sys.argv) < 2) :
        print ('usage:', sys.argv[0], 'inputfile.txt')
        sys.exit ()

    my_histo_test = my_histo (sys.argv[1])
    my_histo_test.draw_histo ('ex_3.6.png')
    

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

  • my_histo.py

#!/usr/bin/python

from math import sqrt
import matplotlib.pyplot as plt
import numpy as np
from math import ceil, floor


def sturges (N_events) :
    return ceil (1 + np.log2 (N_events))        


# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 


class my_histo :
    '''calculator for statistics of a list of numbers'''

    summ = 0.
    sumSq = 0.
    N = 0
    sample = []

    def __init__ (self, sample_file_name) :
        '''
        reads as input the file containing the collection of events
        and reads it
        '''
        with open (sample_file_name) as f:
            self.sample = [float (x) for x in f.readlines ()]

        self.summ = sum (self.sample)
        self.sumSq = sum ([x*x for x in self.sample])
        self.N = len (self.sample)

    # ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    def mean (self) :
        '''
        calculates the mean of the sample present in the object
        '''
        return self.summ / self.N

    # ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    def variance (self, bessel = True) :
        '''
        calculates the variance of the sample present in the object
        '''
        var = self.sumSq / self.N - self.mean () * self.mean ()
        if bessel : var = self.N * var / (self.N - 1)
        return var

    # ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    def sigma (self, bessel = True) :
        '''
        calculates the sigma of the sample present in the object
        '''
        return sqrt (self.variance (bessel))

    # ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    def sigma_mean (self, bessel = True) :
        '''
        calculates the sigma of the sample present in the object
        '''
        return sqrt (self.variance (bessel) / self.N)
    
    # ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    def draw_histo (self, output_file_name) :
        '''
        draw the sample content into an histogram
        '''
        xMin = floor (min (self.sample))
        xMax = ceil (max (self.sample))
        N_bins = sturges (self.N)

        bin_edges = np.linspace (xMin, xMax, N_bins)
        fig, ax = plt.subplots (nrows = 1, ncols = 1)
        ax.hist (self.sample,
                 bins = bin_edges,
                 color = 'orange',
                )
        ax.set_title ('Histogram example', size=14)
        ax.set_xlabel ('variable')
        ax.set_ylabel ('event counts per bin')

        plt.savefig (output_file_name)






3.8.7. Exercise 3.7#

  • ex_3.7.py

#!/usr/bin/python
'''
Write a Python program to draw a Gaussian distribution and its cumulative function
'''

import matplotlib.pyplot as plt
from scipy.stats import norm
import numpy as np

def main () :
    '''
    Funzione che implementa il programma principale
    '''
    normal = norm (100., 10.)
    x_axis = np.linspace (50., 150., 100)
    plt.plot (x_axis, normal.pdf (x_axis), label="PDF")
    plt.legend ()
    plt.savefig ('ex_3.7_pdf.png')
    
    plt.clf () # clear the figure

    plt.plot (x_axis, normal.cdf (x_axis), label="CDF")
    plt.legend ()
    plt.savefig ('ex_3.7_cdf.png')

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

3.8.8. Exercise 3.8#

  • ex_3.8.py

#!/usr/bin/python
'''
Write a Python program to draw an exponential distribution and its cumulative function
'''

import matplotlib.pyplot as plt
from scipy.stats import expon
import numpy as np

def main () :
    '''
    Funzione che implementa il programma principale
    '''

    tau = 2.
    expo = expon (0., tau)
    x_axis = np.linspace (0., 8., 100)
    plt.plot (x_axis, expo.pdf (x_axis), label="PDF")
    plt.legend ()
    plt.savefig ('ex_3.8_pdf.png')
    
    plt.clf () # clear the figure

    plt.plot (x_axis, expo.cdf (x_axis), label="CDF")
    plt.legend ()
    plt.savefig ('ex_3.8_cdf.png')

# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

3.8.9. Exercise 3.9#

  • ex_3.9.py

#!/usr/bin/python
'''
Use the Python `scipy.stat.norm` object to determine the area of a normal distribution
of its tails outside the range included within an interval of 1, 2, 3, 4, and 5 
standard deviations around its mean
'''

import matplotlib.pyplot as plt
from scipy.stats import norm
import numpy as np

def main () :
    '''
    Funzione che implementa il programma principale
    '''
    normal = norm (0., 1.)
    for sigmas in range (1,6) :
        tails_area = normal.cdf (0. - sigmas) + 1. - normal.cdf (0. + sigmas)
        print ('outside ' + str (sigmas) + ' sigmas :\t' 
               + str (tails_area))


# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

3.8.10. Exercise 3.10#

  • ex_3.10.py

#!/usr/bin/python
'''
Write a Python program to draw a binomial distribution and its cumulative function
'''

import matplotlib.pyplot as plt
from scipy.stats import binom
import numpy as np


def main () :
    '''
    Funzione che implementa il programma principale
    '''

    success_p = 0.5
    N = 8
    binomial = binom (N, success_p)
    x_axis = np.arange (N+1)
    plt.scatter (x_axis, binomial.pmf (x_axis), label='PMF')
    plt.title ("Binomial Distribution")
    plt.ylabel ("Density")
    plt.legend ()
    plt.savefig ('ex_3.10_pdf.png')

    plt.clf () # clear the figure

    plt.scatter (x_axis, binomial.cdf (x_axis), label='CDF')
    plt.legend ()
    plt.savefig ('ex_3.10_cdf.png')


# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 


if __name__ == "__main__":
    main ()

3.8.11. Exercise 3.11#

  • ex_3.11.py

#!/usr/bin/python
'''
Write a Python program to draw a Poisson distribution and its cumulative function
'''

import matplotlib.pyplot as plt
from scipy.stats import poisson
import numpy as np


def main () :
    '''
    Funzione che implementa il programma principale
    '''

    average = 4.
    poiss = poisson (average)
    x_axis = np.arange (3 * average)
    plt.scatter (x_axis, poiss.pmf (x_axis), label='PMF')
    plt.title ("Poisson Distribution")
    plt.ylabel ("Density")
    plt.legend ()
    plt.savefig ('ex_3.11_pdf.png')

    plt.clf () # clear the figure

    plt.scatter (x_axis, poiss.cdf (x_axis), label='CDF')
    plt.legend ()
    plt.savefig ('ex_3.11_cdf.png')


# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 


if __name__ == "__main__":
    main ()

3.8.12. Exercise 3.12#

  • ex_3.12.py

#!/usr/bin/python
'''
Write a Python program to draw a Poisson distribution
Show, by using the third and fourth central momenta calculations available in the `scipy.stat` library,
that the momenta of a Poisson distribution asymptotically tend to the ones of a Gaussian
'''

import matplotlib.pyplot as plt
from scipy.stats import poisson
import numpy as np


def main () :
    '''
    Funzione che implementa il programma principale
    '''

    skew_l = []
    kurt_l = []
    ave_l = range (100)
    for average in ave_l :
        poiss = poisson (average)
        ave, var, skew, kurt = poiss.stats (moments='mvsk')
        skew_l.append (skew)
        kurt_l.append (kurt)

    plt.scatter (ave_l, skew_l)
    plt.title ('Poisson skewness')
    plt.xlabel ('mean')
    plt.ylabel ('skewness')
    plt.savefig ('ex_3.12_skew.png')

    plt.clf () # clear the figure

    plt.scatter (ave_l, skew_l)
    plt.title ('Poisson kurtosis')
    plt.xlabel ('mean')
    plt.ylabel ('kurtosis')
    plt.savefig ('ex_3.12_kurt.png')


# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 


if __name__ == "__main__":
    main ()

3.8.13. Exercise 3.13#

  • ex_3.13.py

#!/usr/bin/python
'''
What is the probability that ten measurements of the same quantity
expected to be Gaussian fall within an interval of 1 standard deviation width 
around the mean?
'''

from scipy.stats import norm
import numpy as np

def main () :
    '''
    Funzione che implementa il programma principale
    '''

    normal = norm (0., 1.)
    x_min = 0. - 0.5
    x_max = 0. + 0.5
    single_evt_prob = normal.cdf (x_max) - normal.cdf (x_min)
    print ('single event probability: ', single_evt_prob)
    print ('joint probability: ', single_evt_prob**10)


# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 

    
if __name__ == "__main__":
    main ()

3.8.14. Exercise 3.14#

  • ex_3.14.py

#!/usr/bin/python
'''
What is the probability that ten measurements of the same counting experiment
expected to be Poisson distributed are all larger than the expected average number 
of events?
'''

import matplotlib.pyplot as plt
from scipy.stats import poisson
import numpy as np


def main () :
    '''
    Funzione che implementa il programma principale
    '''

    average = 4.
    poiss = poisson (average)
    single_evt_prob = 1. - poiss.cdf (average)
    print ('single event probability: ', single_evt_prob)
    print ('joint probability: ', single_evt_prob**10)


# ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 


if __name__ == "__main__":
    main ()