dct = nsfg.get_dct_dataframe(nsfg.data_dir/nsfg.dct_file)
dct.head()

%%time
dat = nsfg.get_dat_dataframe(nsfg.data_dir/nsfg.dat_file, dct)

Wall time: 2.28 s

df = dat[nsfg.columns_of_interest].copy()

df = nsfg.cleanup(df)

Effect size¶

col = "prglngth"
col1 = "birthord"
length_lim = (27, 46)

length_mask = (df[col] >= length_lim[0]) & (df[col]<=length_lim[1])
first_mask = df[col1]==1
other_mask = df[col1]!=1
m, v = df.loc[(first_mask & length_mask) , col].mean(), df.loc[(first_mask & length_mask) , col].var()
mo, vo = df.loc[(other_mask & length_mask) , col].mean(), df.loc[(other_mask & length_mask) , col].var()

m, v, mo, vo

(38.702079049577335, 5.966524469983441, 38.45389490901656, 5.876963865435888)

effect_size(m, mo, v, vo, (first_mask & length_mask).sum(), (other_mask & length_mask).sum(),)

0.10200942346625483

Probability functions¶

x = np.random.choice([1,2,3], p=[.1, .6, .3], size=10)
pmf = get_discrete_pmf(x)
pmf.xk, pmf.pk

(array([2, 3], dtype=int64), array([0.7, 0.3]))

ax = pd.DataFrame({"x":pmf.xk, "p":pmf.pk}).plot(kind="bar", x="x", y="p", label="pmf")
ax.plot(pmf.cdf(pmf.xk), '-r', label="cdf")
ax.legend()

<matplotlib.legend.Legend at 0x255195edbc8>

samples = np.random.normal(loc=1., scale=2., size=100)
kde = stats.gaussian_kde(samples)

fig, ax = plt.subplots()
x = np.linspace(-10,10,100)
ax.plot(x, kde.evaluate(x))
ax.hist(samples, bins=20, density=True)
plt.show()

k = 2
central_moment(x, k=k), moment(x, k=k), stats.norm.moment(k, scale=2, loc=1)

(34.00673400673401, 34.00673400673401, 5.0)

PearsonsMedianSkewness(x)

-3.655350246359612e-17

Sampling distributions¶

Computing and visualizing biased and unbiased statistics over synthesized data.

population_generator = stats.norm(loc=90., scale=5.)  # rv of the population to draw from
sample_size = 9  # number of samples to draw
n_iter = 200  # number of trials drawing `sample_size` samples

Setting up the telemetry storage

process_telemetry = {
    "mean": np.zeros(n_iter), 
    "std_biased": np.zeros(n_iter), 
    "std_unbiased": np.zeros(n_iter), 
    "sem": np.zeros(n_iter), 
}

Executing n_iter trials drawing sample_size samples each

%%time
for i in range(n_iter):
    population = population_generator.rvs(size=sample_size)
    process_telemetry["mean"][i] = np.mean(population) 
    process_telemetry["std_biased"][i] = np.std(population)
    process_telemetry["std_unbiased"][i] = np.std(population, ddof=1)
    process_telemetry["sem"][i] = stats.sem(population)

Wall time: 31 ms

Visualizing telemetry in serial form

fig, axs = plt.subplots(figsize=(10,7), ncols=1, nrows=4, constrained_layout=True,
                        sharex=True)

ax = axs[0]
ax.plot(process_telemetry["mean"], marker="o")
ax.set_title("Mean")
ax.set_ylim((population_generator.kwds["loc"]-10, population_generator.kwds["loc"]+10))

ax = axs[1]
ax.plot(process_telemetry["std_unbiased"], marker="o")
ax.set_title("std - unbiased")

ax = axs[2]
ax.plot(process_telemetry["std_biased"], marker="o")
ax.set_title("std - biased")

ax = axs[3]
ax.plot(process_telemetry["sem"], marker="o")
ax.set_title("standard error mean")

plt.show()

Visualizing telemetry using distributions

fig, axs = plt.subplots(figsize=(10,7), ncols=1, nrows=4, constrained_layout=True,)
bins = 50

plot_hist_with_kde(process_telemetry["mean"], left_ext=5, right_ext=5, plot_params={"title":"Mean"}, 
                   ax=axs[0])
plot_hist_with_kde(process_telemetry["std_unbiased"], plot_params={"title":"std - unbiased"}, 
                   ax=axs[1])
plot_hist_with_kde(process_telemetry["std_biased"], plot_params={"title":"std - biased"}, 
                   ax=axs[2])
plot_hist_with_kde(process_telemetry["sem"], plot_params={"title":"standard error mean"}, 
                   ax=axs[3])
plt.show()

	start	vtype	name	fstring	desc	end
0	1	str	caseid	%12s	RESPONDENT ID NUMBER	13
1	13	int	pregordr	%2f	PREGNANCY ORDER (NUMBER)	15
2	15	int	howpreg_n	%2f	BB-2 # OF WEEKS OR MONTHS CURRENTLY PREGNANT	17
3	17	int	howpreg_p	%1f	BB-2 CURRENT PREGNANCY LENGTH REPORTED IN MONT...	18
4	18	int	moscurrp	%1f	NUMBER OF MONTHS CURRENTLY PREGNANT	19

Basic Statistics

Effect size¶

`effect_size`[source]

Probability functions¶

`get_discrete_pmf`[source]

`central_moment`[source]

`moment`[source]

`PearsonsMedianSkewness`[source]

Sampling distributions¶

`plot_hist_with_kde`[source]

Basic Statistics

Effect size¶

effect_size[source]

Probability functions¶

get_discrete_pmf[source]

central_moment[source]

moment[source]

PearsonsMedianSkewness[source]

Sampling distributions¶

plot_hist_with_kde[source]

`effect_size`[source]

`get_discrete_pmf`[source]

`central_moment`[source]

`moment`[source]

`PearsonsMedianSkewness`[source]

`plot_hist_with_kde`[source]