#Basic python library which need to import
import pandas as pd
import numpy as np
#Date stuff
from datetime import datetime
from datetime import timedelta
#Library for Nice graphing
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sn
%matplotlib inline
#Library for statistics operation
import scipy.stats as stats
# Date Time library
from datetime import datetime
#Machine learning Library
import statsmodels.api as sm
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error, mean_squared_error
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
# Settings
pd.set_option('display.max_columns', None)
np.set_printoptions(threshold=np.nan)
np.set_printoptions(precision=3)
sns.set(style="darkgrid")
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning) C:\ProgramData\Anaconda3\lib\site-packages\sklearn\ensemble\weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release. from numpy.core.umath_tests import inner1d
# Importing training data set
train = pd.read_csv("train.csv")
#Import Test Data
test=pd.read_csv("test.csv")
# Import Store data set
stores = pd.read_csv("stores.csv")
# Now import features data set
feature = pd.read_csv("features.csv")
-(train+Store+Feature)
-(test+Store+Feature)
# For Train data set
train_bt = pd.merge(train,stores)
train = pd.merge(train_bt,feature)
#For test data set
test_bt = pd.merge(test,stores)
test= pd.merge(test_bt,feature)
train.head(2)
Store | Dept | Date | Weekly_Sales | IsHoliday | Type | Size | Temperature | Fuel_Price | MarkDown1 | MarkDown2 | MarkDown3 | MarkDown4 | MarkDown5 | CPI | Unemployment | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 2010-02-05 | 24924.50 | False | A | 151315 | 42.31 | 2.572 | NaN | NaN | NaN | NaN | NaN | 211.096358 | 8.106 |
1 | 1 | 2 | 2010-02-05 | 50605.27 | False | A | 151315 | 42.31 | 2.572 | NaN | NaN | NaN | NaN | NaN | 211.096358 | 8.106 |
test.head(2)
Store | Dept | Date | IsHoliday | Type | Size | Temperature | Fuel_Price | MarkDown1 | MarkDown2 | MarkDown3 | MarkDown4 | MarkDown5 | CPI | Unemployment | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 2012-11-02 | False | A | 151315 | 55.32 | 3.386 | 6766.44 | 5147.7 | 50.82 | 3639.9 | 2737.42 | 223.462779 | 6.573 |
1 | 1 | 2 | 2012-11-02 | False | A | 151315 | 55.32 | 3.386 | 6766.44 | 5147.7 | 50.82 | 3639.9 | 2737.42 | 223.462779 | 6.573 |
print (train.info())
print ("*****************************************")
print (test.info())
<class 'pandas.core.frame.DataFrame'> Int64Index: 421570 entries, 0 to 421569 Data columns (total 16 columns): Store 421570 non-null int64 Dept 421570 non-null int64 Date 421570 non-null object Weekly_Sales 421570 non-null float64 IsHoliday 421570 non-null bool Type 421570 non-null object Size 421570 non-null int64 Temperature 421570 non-null float64 Fuel_Price 421570 non-null float64 MarkDown1 150681 non-null float64 MarkDown2 111248 non-null float64 MarkDown3 137091 non-null float64 MarkDown4 134967 non-null float64 MarkDown5 151432 non-null float64 CPI 421570 non-null float64 Unemployment 421570 non-null float64 dtypes: bool(1), float64(10), int64(3), object(2) memory usage: 51.9+ MB None ***************************************** <class 'pandas.core.frame.DataFrame'> Int64Index: 115064 entries, 0 to 115063 Data columns (total 15 columns): Store 115064 non-null int64 Dept 115064 non-null int64 Date 115064 non-null object IsHoliday 115064 non-null bool Type 115064 non-null object Size 115064 non-null int64 Temperature 115064 non-null float64 Fuel_Price 115064 non-null float64 MarkDown1 114915 non-null float64 MarkDown2 86437 non-null float64 MarkDown3 105235 non-null float64 MarkDown4 102176 non-null float64 MarkDown5 115064 non-null float64 CPI 76902 non-null float64 Unemployment 76902 non-null float64 dtypes: bool(1), float64(9), int64(3), object(2) memory usage: 13.3+ MB None
# tale only those values whose sales is positive.
train = train[train['Weekly_Sales']>0]
numeric_var_train=[key for key in dict(train.dtypes) if dict(train.dtypes)[key] in ['float64', 'int64', 'float32', 'int32']]
cat_var_train=[key for key in dict(train.dtypes) if dict(train.dtypes)[key] in ['object']]
# Train Numerical Data
train_num=train[numeric_var_train]
# Train Categorical Data
train_cat=train[cat_var_train]
print (numeric_var_train)
print (cat_var_train)
['Store', 'Dept', 'Weekly_Sales', 'Size', 'Temperature', 'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5', 'CPI', 'Unemployment'] ['Date', 'Type']
# Use a general function that returns multiple values
def var_summary(x):
return pd.Series([x.count(), x.isnull().sum(), x.sum(), x.mean(), x.median(), x.std(), x.var(), x.min(), x.dropna().quantile(0.01), x.dropna().quantile(0.05),x.dropna().quantile(0.10),x.dropna().quantile(0.25),x.dropna().quantile(0.50),x.dropna().quantile(0.75), x.dropna().quantile(0.90),x.dropna().quantile(0.95), x.dropna().quantile(0.99),x.max()],
index=['N', 'NMISS', 'SUM', 'MEAN','MEDIAN', 'STD', 'VAR', 'MIN', 'P1' , 'P5' ,'P10' ,'P25' ,'P50' ,'P75' ,'P90' ,'P95' ,'P99' ,'MAX'])
num_summary=train_num.apply(lambda x: var_summary(x)).T
num_summary
def cat_summary(x):
return pd.Series([x.count(), x.isnull().sum(), x.value_counts()],
index=['N', 'NMISS', 'ColumnsNames'])
cat_summary=train_cat.apply(lambda x: cat_summary(x))
cat_summary
Date | Type | |
---|---|---|
N | 420212 | 420212 |
NMISS | 0 | 0 |
ColumnsNames | 2011-12-23 3018 2011-11-25 3016 2011-12-... | A 214961 B 162787 C 42464 Name: Type... |
numeric_var_test=[key for key in dict(test.dtypes) if dict(test.dtypes)[key] in ['float64', 'int64', 'float32', 'int32']]
cat_var_test=[key for key in dict(test.dtypes) if dict(test.dtypes)[key] in ['object']]
# Train Numerical Data
test_num=test[numeric_var_test]
# Train Categorical Data
test_cat=test[cat_var_test]
print (numeric_var_test)
print (cat_var_test)
['Store', 'Dept', 'Size', 'Temperature', 'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5', 'CPI', 'Unemployment'] ['Date', 'Type']
# Numerical data summary report
num_summary=test_num.apply(lambda x: var_summary(x)).T
num_summary.head()
N | NMISS | SUM | MEAN | MEDIAN | STD | VAR | MIN | P1 | P5 | P10 | P25 | P50 | P75 | P90 | P95 | P99 | MAX | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Store | 115064.0 | 0.0 | 2.558817e+06 | 22.238207 | 22.000 | 12.809930 | 1.640943e+02 | 1.000 | 1.000 | 3.000 | 5.000 | 11.000 | 22.000 | 33.000 | 40.000 | 43.000 | 45.000 | 45.000 |
Dept | 115064.0 | 0.0 | 5.101883e+06 | 44.339524 | 37.000 | 30.656410 | 9.398155e+02 | 1.000 | 1.000 | 4.000 | 7.000 | 18.000 | 37.000 | 74.000 | 92.000 | 95.000 | 98.000 | 99.000 |
Size | 115064.0 | 0.0 | 1.570597e+10 | 136497.688921 | 140167.000 | 61106.926438 | 3.734056e+09 | 34875.000 | 34875.000 | 39690.000 | 39910.000 | 93638.000 | 140167.000 | 202505.000 | 204184.000 | 206302.000 | 219622.000 | 219622.000 |
Temperature | 115064.0 | 0.0 | 6.206760e+06 | 53.941804 | 54.470 | 18.724153 | 3.505939e+02 | -7.290 | 11.440 | 23.980 | 29.970 | 39.820 | 54.470 | 67.350 | 79.480 | 83.820 | 92.140 | 101.950 |
Fuel_Price | 115064.0 | 0.0 | 4.121070e+05 | 3.581546 | 3.606 | 0.239442 | 5.733244e-02 | 2.872 | 2.957 | 3.161 | 3.227 | 3.431 | 3.606 | 3.766 | 3.866 | 3.951 | 4.079 | 4.125 |
# categorical data summary report
def cat_summary(x):
return pd.Series([x.count(), x.isnull().sum(), x.value_counts()],
index=['N', 'NMISS', 'ColumnsNames'])
cat_summary=test_cat.apply(lambda x: cat_summary(x))
cat_summary
Date | Type | |
---|---|---|
N | 115064 | 115064 |
NMISS | 0 | 0 |
ColumnsNames | 2012-12-21 3002 2012-12-07 2989 2012-12-... | A 58713 B 44500 C 11851 Name: Type, d... |
# Run Pandas profilingto see the over all report
import pandas_profiling
pandas_profiling.ProfileReport(train)
Dataset info
Number of variables | 16 |
---|---|
Number of observations | 421570 |
Total Missing (%) | 21.1% |
Total size in memory | 51.9 MiB |
Average record size in memory | 129.0 B |
Variables types
Numeric | 13 |
---|---|
Categorical | 2 |
Boolean | 1 |
Date | 0 |
Text (Unique) | 0 |
Rejected | 0 |
Unsupported | 0 |
Warnings
Date
has a high cardinality: 143 distinct values WarningMarkDown1
has 270889 / 64.3% missing values MissingMarkDown2
has 310322 / 73.6% missing values MissingMarkDown3
has 284479 / 67.5% missing values MissingMarkDown4
has 286603 / 68.0% missing values MissingMarkDown5
has 270138 / 64.1% missing values MissingCPI
Numeric
Distinct count | 2145 |
---|---|
Unique (%) | 0.5% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 171.2 |
---|---|
Minimum | 126.06 |
Maximum | 227.23 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 126.06 |
---|---|
5-th percentile | 126.5 |
Q1 | 132.02 |
Median | 182.32 |
Q3 | 212.42 |
95-th percentile | 221.94 |
Maximum | 227.23 |
Range | 101.17 |
Interquartile range | 80.394 |
Descriptive statistics
Standard deviation | 39.159 |
---|---|
Coef of variation | 0.22873 |
Kurtosis | -1.8297 |
Mean | 171.2 |
MAD | 38.066 |
Skewness | 0.085219 |
Sum | 72174000 |
Variance | 1533.4 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
129.8555333 | 711 | 0.2% |
|
131.1083333 | 708 | 0.2% |
|
129.84596670000002 | 707 | 0.2% |
|
130.38490320000002 | 706 | 0.2% |
|
130.683 | 706 | 0.2% |
|
131.0756667 | 706 | 0.2% |
|
130.6457931 | 706 | 0.2% |
|
130.7196333 | 705 | 0.2% |
|
130.4546207 | 705 | 0.2% |
|
129.98454840000002 | 704 | 0.2% |
|
Other values (2135) | 414506 | 98.3% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
126.064 | 678 | 0.2% |
|
126.0766452 | 679 | 0.2% |
|
126.08545159999998 | 675 | 0.2% |
|
126.08929029999999 | 682 | 0.2% |
|
126.1019355 | 686 | 0.2% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
227.01841659999997 | 69 | 0.0% |
|
227.0369359 | 70 | 0.0% |
|
227.16939190000002 | 63 | 0.0% |
|
227.21428799999998 | 62 | 0.0% |
|
227.2328068 | 63 | 0.0% |
|
Date
Categorical
Distinct count | 143 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
2011-12-23 |
|
---|---|
2011-11-25 |
|
2011-12-16 |
|
Other values (140) |
412509
|
Value | Count | Frequency (%) | |
2011-12-23 | 3027 | 0.7% |
|
2011-11-25 | 3021 | 0.7% |
|
2011-12-16 | 3013 | 0.7% |
|
2011-12-09 | 3010 | 0.7% |
|
2012-02-17 | 3007 | 0.7% |
|
2011-12-30 | 3003 | 0.7% |
|
2012-02-10 | 3001 | 0.7% |
|
2011-12-02 | 2994 | 0.7% |
|
2012-03-02 | 2990 | 0.7% |
|
2012-10-12 | 2990 | 0.7% |
|
Other values (133) | 391514 | 92.9% |
|
Dept
Numeric
Distinct count | 81 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 44.26 |
---|---|
Minimum | 1 |
Maximum | 99 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 4 |
Q1 | 18 |
Median | 37 |
Q3 | 74 |
95-th percentile | 95 |
Maximum | 99 |
Range | 98 |
Interquartile range | 56 |
Descriptive statistics
Standard deviation | 30.492 |
---|---|
Coef of variation | 0.68893 |
Kurtosis | -1.2156 |
Mean | 44.26 |
MAD | 26.537 |
Skewness | 0.35822 |
Sum | 18658822 |
Variance | 929.77 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
1 | 6435 | 1.5% |
|
10 | 6435 | 1.5% |
|
38 | 6435 | 1.5% |
|
21 | 6435 | 1.5% |
|
67 | 6435 | 1.5% |
|
16 | 6435 | 1.5% |
|
14 | 6435 | 1.5% |
|
13 | 6435 | 1.5% |
|
79 | 6435 | 1.5% |
|
81 | 6435 | 1.5% |
|
Other values (71) | 357220 | 84.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 6435 | 1.5% |
|
2 | 6435 | 1.5% |
|
3 | 6435 | 1.5% |
|
4 | 6435 | 1.5% |
|
5 | 6347 | 1.5% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
95 | 6435 | 1.5% |
|
96 | 4854 | 1.2% |
|
97 | 6278 | 1.5% |
|
98 | 5836 | 1.4% |
|
99 | 862 | 0.2% |
|
Fuel_Price
Numeric
Distinct count | 892 |
---|---|
Unique (%) | 0.2% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 3.361 |
---|---|
Minimum | 2.472 |
Maximum | 4.468 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 2.472 |
---|---|
5-th percentile | 2.653 |
Q1 | 2.933 |
Median | 3.452 |
Q3 | 3.738 |
95-th percentile | 4.029 |
Maximum | 4.468 |
Range | 1.996 |
Interquartile range | 0.805 |
Descriptive statistics
Standard deviation | 0.45851 |
---|---|
Coef of variation | 0.13642 |
Kurtosis | -1.1854 |
Mean | 3.361 |
MAD | 0.4032 |
Skewness | -0.1049 |
Sum | 1416900 |
Variance | 0.21024 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
3.638 | 2548 | 0.6% |
|
3.63 | 2164 | 0.5% |
|
2.7710000000000004 | 1917 | 0.5% |
|
3.891 | 1856 | 0.4% |
|
3.594 | 1796 | 0.4% |
|
3.5239999999999996 | 1793 | 0.4% |
|
3.523 | 1792 | 0.4% |
|
2.72 | 1790 | 0.4% |
|
3.6660000000000004 | 1778 | 0.4% |
|
2.78 | 1656 | 0.4% |
|
Other values (882) | 402480 | 95.5% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
2.472 | 38 | 0.0% |
|
2.513 | 45 | 0.0% |
|
2.5140000000000002 | 906 | 0.2% |
|
2.52 | 39 | 0.0% |
|
2.533 | 42 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
4.294 | 363 | 0.1% |
|
4.301 | 360 | 0.1% |
|
4.308 | 168 | 0.0% |
|
4.449 | 358 | 0.1% |
|
4.468 | 368 | 0.1% |
|
IsHoliday
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.070358 |
---|
True |
|
---|---|
(Missing) |
391909
|
Value | Count | Frequency (%) | |
True | 29661 | 7.0% |
|
(Missing) | 391909 | 93.0% |
|
MarkDown1
Numeric
Distinct count | 2278 |
---|---|
Unique (%) | 0.5% |
Missing (%) | 64.3% |
Missing (n) | 270889 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 7246.4 |
---|---|
Minimum | 0.27 |
Maximum | 88647 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0.27 |
---|---|
5-th percentile | 149.19 |
Q1 | 2240.3 |
Median | 5347.4 |
Q3 | 9210.9 |
95-th percentile | 21801 |
Maximum | 88647 |
Range | 88646 |
Interquartile range | 6970.6 |
Descriptive statistics
Standard deviation | 8291.2 |
---|---|
Coef of variation | 1.1442 |
Kurtosis | 17.606 |
Mean | 7246.4 |
MAD | 5262.8 |
Skewness | 3.3418 |
Sum | 1091900000 |
Variance | 68744000 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
1.5 | 102 | 0.0% |
|
460.73 | 102 | 0.0% |
|
175.64 | 93 | 0.0% |
|
1282.42 | 75 | 0.0% |
|
9264.48 | 75 | 0.0% |
|
686.24 | 75 | 0.0% |
|
5924.71 | 75 | 0.0% |
|
1483.17 | 75 | 0.0% |
|
3242.59 | 74 | 0.0% |
|
10671.71 | 74 | 0.0% |
|
Other values (2267) | 149861 | 35.5% |
|
(Missing) | 270889 | 64.3% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.27 | 51 | 0.0% |
|
0.5 | 49 | 0.0% |
|
1.5 | 102 | 0.0% |
|
1.94 | 50 | 0.0% |
|
2.12 | 52 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
62567.6 | 66 | 0.0% |
|
65021.23 | 73 | 0.0% |
|
75149.79 | 73 | 0.0% |
|
78124.5 | 70 | 0.0% |
|
88646.76 | 68 | 0.0% |
|
MarkDown2
Numeric
Distinct count | 1500 |
---|---|
Unique (%) | 0.4% |
Missing (%) | 73.6% |
Missing (n) | 310322 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 3334.6 |
---|---|
Minimum | -265.76 |
Maximum | 104520 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -265.76 |
---|---|
5-th percentile | 1.95 |
Q1 | 41.6 |
Median | 192 |
Q3 | 1926.9 |
95-th percentile | 16497 |
Maximum | 104520 |
Range | 104790 |
Interquartile range | 1885.3 |
Descriptive statistics
Standard deviation | 9475.4 |
---|---|
Coef of variation | 2.8415 |
Kurtosis | 37.59 |
Mean | 3334.6 |
MAD | 4690.4 |
Skewness | 5.4413 |
Sum | 370970000 |
Variance | 89782000 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
1.91 | 539 | 0.1% |
|
3.0 | 493 | 0.1% |
|
0.5 | 485 | 0.1% |
|
1.5 | 471 | 0.1% |
|
4.0 | 367 | 0.1% |
|
6.0 | 365 | 0.1% |
|
7.64 | 354 | 0.1% |
|
3.82 | 353 | 0.1% |
|
5.73 | 345 | 0.1% |
|
19.0 | 345 | 0.1% |
|
Other values (1489) | 107131 | 25.4% |
|
(Missing) | 310322 | 73.6% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-265.76 | 71 | 0.0% |
|
-192.0 | 72 | 0.0% |
|
-20.0 | 72 | 0.0% |
|
-10.98 | 60 | 0.0% |
|
-10.5 | 143 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
82881.16 | 73 | 0.0% |
|
89121.94 | 74 | 0.0% |
|
92523.94 | 73 | 0.0% |
|
97740.99 | 73 | 0.0% |
|
104519.54 | 72 | 0.0% |
|
MarkDown3
Numeric
Distinct count | 1663 |
---|---|
Unique (%) | 0.4% |
Missing (%) | 67.5% |
Missing (n) | 284479 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 1439.4 |
---|---|
Minimum | -29.1 |
Maximum | 141630 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -29.1 |
---|---|
5-th percentile | 0.65 |
Q1 | 5.08 |
Median | 24.6 |
Q3 | 103.99 |
95-th percentile | 1059.9 |
Maximum | 141630 |
Range | 141660 |
Interquartile range | 98.91 |
Descriptive statistics
Standard deviation | 9623.1 |
---|---|
Coef of variation | 6.6854 |
Kurtosis | 77.688 |
Mean | 1439.4 |
MAD | 2578.1 |
Skewness | 8.3995 |
Sum | 197330000 |
Variance | 92604000 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
3.0 | 754 | 0.2% |
|
6.0 | 710 | 0.2% |
|
2.0 | 660 | 0.2% |
|
1.0 | 611 | 0.1% |
|
0.22 | 487 | 0.1% |
|
0.5 | 463 | 0.1% |
|
0.01 | 444 | 0.1% |
|
4.0 | 439 | 0.1% |
|
3.2 | 379 | 0.1% |
|
1.98 | 363 | 0.1% |
|
Other values (1652) | 131781 | 31.3% |
|
(Missing) | 284479 | 67.5% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-29.1 | 72 | 0.0% |
|
-1.0 | 70 | 0.0% |
|
-0.87 | 46 | 0.0% |
|
-0.2 | 69 | 0.0% |
|
0.0 | 67 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
89402.64 | 71 | 0.0% |
|
101378.79 | 73 | 0.0% |
|
103991.94 | 72 | 0.0% |
|
109030.75 | 75 | 0.0% |
|
141630.61 | 74 | 0.0% |
|
MarkDown4
Numeric
Distinct count | 1945 |
---|---|
Unique (%) | 0.5% |
Missing (%) | 68.0% |
Missing (n) | 286603 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 3383.2 |
---|---|
Minimum | 0.22 |
Maximum | 67475 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0.22 |
---|---|
5-th percentile | 28.76 |
Q1 | 504.22 |
Median | 1481.3 |
Q3 | 3595 |
95-th percentile | 12646 |
Maximum | 67475 |
Range | 67475 |
Interquartile range | 3090.8 |
Descriptive statistics
Standard deviation | 6292.4 |
---|---|
Coef of variation | 1.8599 |
Kurtosis | 29.997 |
Mean | 3383.2 |
MAD | 3329.7 |
Skewness | 4.8475 |
Sum | 456620000 |
Variance | 39594000 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
9.0 | 280 | 0.1% |
|
4.0 | 200 | 0.0% |
|
2.0 | 197 | 0.0% |
|
3.0 | 146 | 0.0% |
|
47.0 | 143 | 0.0% |
|
67.72 | 142 | 0.0% |
|
17.0 | 141 | 0.0% |
|
657.56 | 141 | 0.0% |
|
8.0 | 140 | 0.0% |
|
1330.36 | 140 | 0.0% |
|
Other values (1934) | 133297 | 31.6% |
|
(Missing) | 286603 | 68.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.22 | 57 | 0.0% |
|
0.41 | 52 | 0.0% |
|
0.46 | 48 | 0.0% |
|
0.78 | 52 | 0.0% |
|
0.87 | 49 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
52739.02 | 72 | 0.0% |
|
53603.99 | 72 | 0.0% |
|
57815.43 | 68 | 0.0% |
|
57817.56 | 74 | 0.0% |
|
67474.85 | 72 | 0.0% |
|
MarkDown5
Numeric
Distinct count | 2294 |
---|---|
Unique (%) | 0.5% |
Missing (%) | 64.1% |
Missing (n) | 270138 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 4629 |
---|---|
Minimum | 135.16 |
Maximum | 108520 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 135.16 |
---|---|
5-th percentile | 715.52 |
Q1 | 1878.4 |
Median | 3359.4 |
Q3 | 5563.8 |
95-th percentile | 11269 |
Maximum | 108520 |
Range | 108380 |
Interquartile range | 3685.4 |
Descriptive statistics
Standard deviation | 5962.9 |
---|---|
Coef of variation | 1.2882 |
Kurtosis | 107.85 |
Mean | 4629 |
MAD | 2989.8 |
Skewness | 8.1699 |
Sum | 700970000 |
Variance | 35556000 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
2743.18 | 136 | 0.0% |
|
1064.56 | 120 | 0.0% |
|
9083.54 | 75 | 0.0% |
|
20371.02 | 75 | 0.0% |
|
3567.03 | 75 | 0.0% |
|
4180.29 | 75 | 0.0% |
|
3557.67 | 75 | 0.0% |
|
986.23 | 74 | 0.0% |
|
1773.53 | 74 | 0.0% |
|
14660.97 | 74 | 0.0% |
|
Other values (2283) | 150579 | 35.7% |
|
(Missing) | 270138 | 64.1% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
135.16 | 65 | 0.0% |
|
153.04 | 47 | 0.0% |
|
153.9 | 49 | 0.0% |
|
164.08 | 52 | 0.0% |
|
170.64 | 69 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
58068.14 | 69 | 0.0% |
|
63005.58 | 69 | 0.0% |
|
85851.87 | 68 | 0.0% |
|
105223.11 | 70 | 0.0% |
|
108519.28 | 68 | 0.0% |
|
Size
Numeric
Distinct count | 40 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 136730 |
---|---|
Minimum | 34875 |
Maximum | 219622 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 34875 |
---|---|
5-th percentile | 39690 |
Q1 | 93638 |
Median | 140170 |
Q3 | 202500 |
95-th percentile | 206300 |
Maximum | 219622 |
Range | 184747 |
Interquartile range | 108870 |
Descriptive statistics
Standard deviation | 60981 |
---|---|
Coef of variation | 0.446 |
Kurtosis | -1.2063 |
Mean | 136730 |
MAD | 52517 |
Skewness | -0.32585 |
Sum | 57640387438 |
Variance | 3718600000 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
39690 | 20802 | 4.9% |
|
39910 | 20597 | 4.9% |
|
203819 | 20376 | 4.8% |
|
219622 | 10474 | 2.5% |
|
126512 | 10315 | 2.4% |
|
205863 | 10272 | 2.4% |
|
151315 | 10244 | 2.4% |
|
202307 | 10238 | 2.4% |
|
204184 | 10225 | 2.4% |
|
158114 | 10224 | 2.4% |
|
Other values (30) | 287803 | 68.3% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
34875 | 8999 | 2.1% |
|
37392 | 9036 | 2.1% |
|
39690 | 20802 | 4.9% |
|
39910 | 20597 | 4.9% |
|
41062 | 6751 | 1.6% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
204184 | 10225 | 2.4% |
|
205863 | 10272 | 2.4% |
|
206302 | 10113 | 2.4% |
|
207499 | 10062 | 2.4% |
|
219622 | 10474 | 2.5% |
|
Store
Numeric
Distinct count | 45 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 22.201 |
---|---|
Minimum | 1 |
Maximum | 45 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 3 |
Q1 | 11 |
Median | 22 |
Q3 | 33 |
95-th percentile | 43 |
Maximum | 45 |
Range | 44 |
Interquartile range | 22 |
Descriptive statistics
Standard deviation | 12.785 |
---|---|
Coef of variation | 0.5759 |
Kurtosis | -1.1465 |
Mean | 22.201 |
MAD | 10.996 |
Skewness | 0.077763 |
Sum | 9359084 |
Variance | 163.46 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
13 | 10474 | 2.5% |
|
10 | 10315 | 2.4% |
|
4 | 10272 | 2.4% |
|
1 | 10244 | 2.4% |
|
2 | 10238 | 2.4% |
|
24 | 10228 | 2.4% |
|
27 | 10225 | 2.4% |
|
34 | 10224 | 2.4% |
|
20 | 10214 | 2.4% |
|
6 | 10211 | 2.4% |
|
Other values (35) | 318925 | 75.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 10244 | 2.4% |
|
2 | 10238 | 2.4% |
|
3 | 9036 | 2.1% |
|
4 | 10272 | 2.4% |
|
5 | 8999 | 2.1% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
41 | 10088 | 2.4% |
|
42 | 6953 | 1.6% |
|
43 | 6751 | 1.6% |
|
44 | 7169 | 1.7% |
|
45 | 9637 | 2.3% |
|
Temperature
Numeric
Distinct count | 3528 |
---|---|
Unique (%) | 0.8% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 60.09 |
---|---|
Minimum | -2.06 |
Maximum | 100.14 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -2.06 |
---|---|
5-th percentile | 27.31 |
Q1 | 46.68 |
Median | 62.09 |
Q3 | 74.28 |
95-th percentile | 87.27 |
Maximum | 100.14 |
Range | 102.2 |
Interquartile range | 27.6 |
Descriptive statistics
Standard deviation | 18.448 |
---|---|
Coef of variation | 0.307 |
Kurtosis | -0.63592 |
Mean | 60.09 |
MAD | 15.377 |
Skewness | -0.3214 |
Sum | 25332000 |
Variance | 340.33 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
50.43 | 709 | 0.2% |
|
67.87 | 646 | 0.2% |
|
72.62 | 594 | 0.1% |
|
76.67 | 583 | 0.1% |
|
70.28 | 563 | 0.1% |
|
76.03 | 555 | 0.1% |
|
50.56 | 544 | 0.1% |
|
64.05 | 542 | 0.1% |
|
64.21 | 519 | 0.1% |
|
50.81 | 487 | 0.1% |
|
Other values (3518) | 415828 | 98.6% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-2.06 | 69 | 0.0% |
|
5.54 | 68 | 0.0% |
|
6.23 | 69 | 0.0% |
|
7.46 | 69 | 0.0% |
|
9.51 | 70 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
99.2 | 46 | 0.0% |
|
99.22 | 185 | 0.0% |
|
99.66 | 48 | 0.0% |
|
100.07 | 46 | 0.0% |
|
100.14 | 44 | 0.0% |
|
Type
Categorical
Distinct count | 3 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
A |
215478
|
---|---|
B |
163495
|
C |
|
Value | Count | Frequency (%) | |
A | 215478 | 51.1% |
|
B | 163495 | 38.8% |
|
C | 42597 | 10.1% |
|
Unemployment
Numeric
Distinct count | 349 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 7.9603 |
---|---|
Minimum | 3.879 |
Maximum | 14.313 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 3.879 |
---|---|
5-th percentile | 5.326 |
Q1 | 6.891 |
Median | 7.866 |
Q3 | 8.572 |
95-th percentile | 12.187 |
Maximum | 14.313 |
Range | 10.434 |
Interquartile range | 1.681 |
Descriptive statistics
Standard deviation | 1.8633 |
---|---|
Coef of variation | 0.23407 |
Kurtosis | 2.7312 |
Mean | 7.9603 |
MAD | 1.283 |
Skewness | 1.1837 |
Sum | 3355800 |
Variance | 3.4719 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
8.099 | 5152 | 1.2% |
|
8.163 | 3636 | 0.9% |
|
7.852 | 3614 | 0.9% |
|
7.343 | 3416 | 0.8% |
|
7.057 | 3414 | 0.8% |
|
7.931 | 3400 | 0.8% |
|
7.441 | 3397 | 0.8% |
|
6.565 | 3370 | 0.8% |
|
8.2 | 3361 | 0.8% |
|
6.891 | 3360 | 0.8% |
|
Other values (339) | 385450 | 91.4% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
3.8789999999999996 | 287 | 0.1% |
|
4.077 | 938 | 0.2% |
|
4.125 | 1831 | 0.4% |
|
4.145 | 562 | 0.1% |
|
4.156000000000001 | 1815 | 0.4% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
13.975 | 1529 | 0.4% |
|
14.020999999999999 | 2263 | 0.5% |
|
14.099 | 2441 | 0.6% |
|
14.18 | 2423 | 0.6% |
|
14.312999999999999 | 2636 | 0.6% |
|
Weekly_Sales
Numeric
Distinct count | 359464 |
---|---|
Unique (%) | 85.3% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 15981 |
---|---|
Minimum | -4988.9 |
Maximum | 693100 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -4988.9 |
---|---|
5-th percentile | 59.975 |
Q1 | 2079.7 |
Median | 7612 |
Q3 | 20206 |
95-th percentile | 61202 |
Maximum | 693100 |
Range | 698090 |
Interquartile range | 18126 |
Descriptive statistics
Standard deviation | 22711 |
---|---|
Coef of variation | 1.4211 |
Kurtosis | 21.491 |
Mean | 15981 |
MAD | 15161 |
Skewness | 3.262 |
Sum | 6737200000 |
Variance | 515800000 |
Memory size | 6.4 MiB |
Value | Count | Frequency (%) | |
10.0 | 353 | 0.1% |
|
5.0 | 289 | 0.1% |
|
20.0 | 232 | 0.1% |
|
15.0 | 215 | 0.1% |
|
12.0 | 175 | 0.0% |
|
1.0 | 169 | 0.0% |
|
10.47 | 167 | 0.0% |
|
11.97 | 154 | 0.0% |
|
2.0 | 148 | 0.0% |
|
7.0 | 146 | 0.0% |
|
Other values (359454) | 419522 | 99.5% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-4988.94 | 1 | 0.0% |
|
-3924.0 | 1 | 0.0% |
|
-1750.0 | 1 | 0.0% |
|
-1699.0 | 1 | 0.0% |
|
-1321.48 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
474330.1 | 1 | 0.0% |
|
627962.93 | 1 | 0.0% |
|
630999.19 | 1 | 0.0% |
|
649770.18 | 1 | 0.0% |
|
693099.36 | 1 | 0.0% |
|
Store | Dept | Date | Weekly_Sales | IsHoliday | Type | Size | Temperature | Fuel_Price | MarkDown1 | MarkDown2 | MarkDown3 | MarkDown4 | MarkDown5 | CPI | Unemployment | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 2010-02-05 | 24924.50 | False | A | 151315 | 42.31 | 2.572 | NaN | NaN | NaN | NaN | NaN | 211.096358 | 8.106 |
1 | 1 | 2 | 2010-02-05 | 50605.27 | False | A | 151315 | 42.31 | 2.572 | NaN | NaN | NaN | NaN | NaN | 211.096358 | 8.106 |
2 | 1 | 3 | 2010-02-05 | 13740.12 | False | A | 151315 | 42.31 | 2.572 | NaN | NaN | NaN | NaN | NaN | 211.096358 | 8.106 |
3 | 1 | 4 | 2010-02-05 | 39954.04 | False | A | 151315 | 42.31 | 2.572 | NaN | NaN | NaN | NaN | NaN | 211.096358 | 8.106 |
4 | 1 | 5 | 2010-02-05 | 32229.38 | False | A | 151315 | 42.31 | 2.572 | NaN | NaN | NaN | NaN | NaN | 211.096358 | 8.106 |
pandas_profiling.ProfileReport(test)
Dataset info
Number of variables | 15 |
---|---|
Number of observations | 115064 |
Total Missing (%) | 7.4% |
Total size in memory | 13.3 MiB |
Average record size in memory | 121.0 B |
Variables types
Numeric | 12 |
---|---|
Categorical | 2 |
Boolean | 1 |
Date | 0 |
Text (Unique) | 0 |
Rejected | 0 |
Unsupported | 0 |
Warnings
CPI
Numeric
Distinct count | 361 |
---|---|
Unique (%) | 0.3% |
Missing (%) | 33.2% |
Missing (n) | 38162 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 176.96 |
---|---|
Minimum | 131.24 |
Maximum | 228.98 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 131.24 |
---|---|
5-th percentile | 131.48 |
Q1 | 138.4 |
Median | 192.3 |
Q3 | 223.24 |
95-th percentile | 227.78 |
Maximum | 228.98 |
Range | 97.74 |
Interquartile range | 84.842 |
Descriptive statistics
Standard deviation | 41.24 |
---|---|
Coef of variation | 0.23305 |
Kurtosis | -1.8588 |
Mean | 176.96 |
MAD | 40.222 |
Skewness | 0.071448 |
Sum | 13609000 |
Variance | 1700.7 |
Memory size | 1.8 MiB |
Value | Count | Frequency (%) | |
132.71609679999997 | 2080 | 1.8% |
|
139.1226129 | 1664 | 1.4% |
|
201.0705712 | 825 | 0.7% |
|
224.80253140000002 | 783 | 0.7% |
|
131.537 | 704 | 0.6% |
|
132.2725714 | 703 | 0.6% |
|
131.2793548 | 702 | 0.6% |
|
131.642 | 702 | 0.6% |
|
131.4784 | 701 | 0.6% |
|
132.65377420000002 | 698 | 0.6% |
|
Other values (350) | 67340 | 58.5% |
|
(Missing) | 38162 | 33.2% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
131.2362258 | 695 | 0.6% |
|
131.2793548 | 702 | 0.6% |
|
131.3258 | 696 | 0.6% |
|
131.37666670000002 | 695 | 0.6% |
|
131.4275333 | 693 | 0.6% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
228.72986380000003 | 401 | 0.3% |
|
228.7796682 | 208 | 0.2% |
|
228.8020401 | 60 | 0.1% |
|
228.8892482 | 60 | 0.1% |
|
228.9764563 | 186 | 0.2% |
|
Date
Categorical
Distinct count | 39 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
2012-12-21 |
|
---|---|
2012-12-07 |
|
2012-12-28 |
|
Other values (36) |
106085
|
Value | Count | Frequency (%) | |
2012-12-21 | 3002 | 2.6% |
|
2012-12-07 | 2989 | 2.6% |
|
2012-12-28 | 2988 | 2.6% |
|
2012-12-14 | 2986 | 2.6% |
|
2013-02-15 | 2984 | 2.6% |
|
2012-11-23 | 2976 | 2.6% |
|
2012-11-09 | 2971 | 2.6% |
|
2013-01-04 | 2964 | 2.6% |
|
2013-02-08 | 2964 | 2.6% |
|
2012-11-30 | 2962 | 2.6% |
|
Other values (29) | 85278 | 74.1% |
|
Dept
Numeric
Distinct count | 81 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 44.34 |
---|---|
Minimum | 1 |
Maximum | 99 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 4 |
Q1 | 18 |
Median | 37 |
Q3 | 74 |
95-th percentile | 95 |
Maximum | 99 |
Range | 98 |
Interquartile range | 56 |
Descriptive statistics
Standard deviation | 30.656 |
---|---|
Coef of variation | 0.6914 |
Kurtosis | -1.2242 |
Mean | 44.34 |
MAD | 26.74 |
Skewness | 0.36242 |
Sum | 5101883 |
Variance | 939.82 |
Memory size | 1.8 MiB |
Value | Count | Frequency (%) | |
1 | 1755 | 1.5% |
|
13 | 1755 | 1.5% |
|
91 | 1755 | 1.5% |
|
90 | 1755 | 1.5% |
|
21 | 1755 | 1.5% |
|
38 | 1755 | 1.5% |
|
82 | 1755 | 1.5% |
|
40 | 1755 | 1.5% |
|
81 | 1755 | 1.5% |
|
16 | 1755 | 1.5% |
|
Other values (71) | 97514 | 84.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 1755 | 1.5% |
|
2 | 1755 | 1.5% |
|
3 | 1755 | 1.5% |
|
4 | 1755 | 1.5% |
|
5 | 1738 | 1.5% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
95 | 1755 | 1.5% |
|
96 | 1350 | 1.2% |
|
97 | 1716 | 1.5% |
|
98 | 1632 | 1.4% |
|
99 | 613 | 0.5% |
|
Fuel_Price
Numeric
Distinct count | 297 |
---|---|
Unique (%) | 0.3% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 3.5815 |
---|---|
Minimum | 2.872 |
Maximum | 4.125 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 2.872 |
---|---|
5-th percentile | 3.161 |
Q1 | 3.431 |
Median | 3.606 |
Q3 | 3.766 |
95-th percentile | 3.951 |
Maximum | 4.125 |
Range | 1.253 |
Interquartile range | 0.335 |
Descriptive statistics
Standard deviation | 0.23944 |
---|---|
Coef of variation | 0.066854 |
Kurtosis | -0.1176 |
Mean | 3.5815 |
MAD | 0.18861 |
Skewness | -0.39128 |
Sum | 412110 |
Variance | 0.057332 |
Memory size | 1.8 MiB |
Value | Count | Frequency (%) | |
3.417 | 1853 | 1.6% |
|
3.583 | 1851 | 1.6% |
|
3.386 | 1793 | 1.6% |
|
3.611 | 1374 | 1.2% |
|
3.108 | 1201 | 1.0% |
|
3.4789999999999996 | 1169 | 1.0% |
|
3.597 | 1071 | 0.9% |
|
3.451 | 1043 | 0.9% |
|
3.227 | 1040 | 0.9% |
|
3.614 | 1028 | 0.9% |
|
Other values (287) | 101641 | 88.3% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
2.872 | 276 | 0.2% |
|
2.889 | 276 | 0.2% |
|
2.9139999999999997 | 193 | 0.2% |
|
2.927 | 194 | 0.2% |
|
2.957 | 279 | 0.2% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
4.079 | 282 | 0.2% |
|
4.099 | 355 | 0.3% |
|
4.104 | 186 | 0.2% |
|
4.109 | 189 | 0.2% |
|
4.125 | 166 | 0.1% |
|
IsHoliday
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.077592 |
---|
True |
|
---|---|
(Missing) |
106136
|
Value | Count | Frequency (%) | |
True | 8928 | 7.8% |
|
(Missing) | 106136 | 92.2% |
|
MarkDown1
Numeric
Distinct count | 1753 |
---|---|
Unique (%) | 1.5% |
Missing (%) | 0.1% |
Missing (n) | 149 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 7689.2 |
---|---|
Minimum | -2781.4 |
Maximum | 103180 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -2781.4 |
---|---|
5-th percentile | 189.49 |
Q1 | 1966.5 |
Median | 4842.3 |
Q3 | 9439.1 |
95-th percentile | 23141 |
Maximum | 103180 |
Range | 105970 |
Interquartile range | 7472.7 |
Descriptive statistics
Standard deviation | 10699 |
---|---|
Coef of variation | 1.3914 |
Kurtosis | 22.871 |
Mean | 7689.2 |
MAD | 6160.2 |
Skewness | 4.1727 |
Sum | 883610000 |
Variance | 114460000 |
Memory size | 1.8 MiB |
Value | Count | Frequency (%) | |
4655.55 | 74 | 0.1% |
|
13357.31 | 74 | 0.1% |
|
22673.11 | 74 | 0.1% |
|
13613.52 | 74 | 0.1% |
|
5692.66 | 74 | 0.1% |
|
10755.57 | 74 | 0.1% |
|
9753.88 | 74 | 0.1% |
|
20297.6 | 74 | 0.1% |
|
5813.45 | 73 | 0.1% |
|
7701.72 | 73 | 0.1% |
|
Other values (1742) | 114177 | 99.2% |
|
(Missing) | 149 | 0.1% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-2781.45 | 50 | 0.0% |
|
-772.21 | 43 | 0.0% |
|
-563.9 | 70 |