{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Python\n",
    "\n",
    "using the [PyCall.jl](https://github.com/JuliaPy/PyCall.jl) package."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CovNWFn"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "using Printf, DelimitedFiles, LinearAlgebra, Statistics\n",
    "\n",
    "include(\"jlFiles/printmat.jl\")\n",
    "include(\"jlFiles/OlsNW.jl\")         #functions for OLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sample size:    (388,)\n"
     ]
    }
   ],
   "source": [
    "x = readdlm(\"Data/FFmFactorsPs.csv\",',',skipstart=1)\n",
    "\n",
    "                #yearmonth, market, small minus big, high minus low\n",
    "(ym,Rme,RSMB,RHML) = (x[:,1],x[:,2]/100,x[:,3]/100,x[:,4]/100) \n",
    "x = nothing\n",
    "\n",
    "printlnPs(\"Sample size:\",size(Rme))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Do OLS (in Julia)\n",
    "\n",
    "use the function sin the file OlsNW.jl to do OLS. Report point estimates and standard errors."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[34m\u001b[1mOLS Results (assuming iid residuals):\u001b[22m\u001b[39m\n",
      "\n",
      "            b   std_iid\n",
      "c       0.007     0.002\n",
      "SMB     0.217     0.073\n",
      "HML    -0.429     0.074\n",
      "\n"
     ]
    }
   ],
   "source": [
    "Y = Rme\n",
    "T = size(Y,1)\n",
    "X = [ones(T) RSMB RHML]\n",
    "\n",
    "(b,u,Yhat,V,R2) = OlsGMFn(Y,X)\n",
    "std_iid = sqrt.(diag(V))\n",
    "\n",
    "printblue(\"OLS Results (assuming iid residuals):\\n\")\n",
    "xNames = [\"c\",\"SMB\",\"HML\"]\n",
    "printmat([b std_iid],colNames=[\"b\",\"std_iid\"],rowNames=xNames)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Getting Started with PyCall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "using PyCall\n",
    "sm = pyimport(\"statsmodels.api\");          #activate this package"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PyObject <class 'statsmodels.iolib.summary.Summary'>\n",
      "\"\"\"\n",
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                      y   R-squared:                       0.134\n",
      "Model:                            OLS   Adj. R-squared:                  0.130\n",
      "Method:                 Least Squares   F-statistic:                     29.85\n",
      "Date:                Thu, 09 Dec 2021   Prob (F-statistic):           8.88e-13\n",
      "Time:                        10:00:28   Log-Likelihood:                 672.28\n",
      "No. Observations:                 388   AIC:                            -1339.\n",
      "Df Residuals:                     385   BIC:                            -1327.\n",
      "Df Model:                           2                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "const          0.0070      0.002      3.167      0.002       0.003       0.011\n",
      "x1             0.2170      0.074      2.949      0.003       0.072       0.362\n",
      "x2            -0.4291      0.074     -5.821      0.000      -0.574      -0.284\n",
      "==============================================================================\n",
      "Omnibus:                       58.863   Durbin-Watson:                   1.849\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):              146.539\n",
      "Skew:                          -0.749   Prob(JB):                     1.51e-32\n",
      "Kurtosis:                       5.612   Cond. No.                         38.8\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
      "\"\"\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "┌ Warning: `vendor()` is deprecated, use `BLAS.get_config()` and inspect the output instead\n",
      "│   caller = npyinitialize() at numpy.jl:67\n",
      "└ @ PyCall C:\\Users\\psoderlind\\.julia\\packages\\PyCall\\3fwVL\\src\\numpy.jl:67\n"
     ]
    }
   ],
   "source": [
    "resultsP = sm.OLS(Y, X).fit()        #can use Python functions directly\n",
    "\n",
    "println(resultsP.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[:HC0_se, :HC1_se, :HC2_se, :HC3_se, :_HCCM, :__class__, :__delattr__, :__dict__, :__dir__, :__doc__, :__eq__, :__format__, :__ge__, :__getattribute__, :__gt__, :__hash__, :__init__, :__init_subclass__, :__le__, :__lt__, :__module__, :__ne__, :__new__, :__reduce__, :__reduce_ex__, :__repr__, :__setattr__, :__sizeof__, :__str__, :__subclasshook__, :__weakref__, :_abat_diagonal, :_cache, :_data_attr, :_data_in_cache, :_get_robustcov_results, :_is_nested, :_use_t, :_wexog_singular_values, :aic, :bic, :bse, :centered_tss, :compare_f_test, :compare_lm_test, :compare_lr_test, :condition_number, :conf_int, :conf_int_el, :cov_HC0, :cov_HC1, :cov_HC2, :cov_HC3, :cov_kwds, :cov_params, :cov_type, :df_model, :df_resid, :diagn, :eigenvals, :el_test, :ess, :f_pvalue, :f_test, :fittedvalues, :fvalue, :get_influence, :get_prediction, :get_robustcov_results, :info_criteria, :initialize, :k_constant, :llf, :load, :model, :mse_model, :mse_resid, :mse_total, :nobs, :normalized_cov_params, :outlier_test, :params, :predict, :pvalues, :remove_data, :resid, :resid_pearson, :rsquared, :rsquared_adj, :save, :scale, :ssr, :summary, :summary2, :t_test, :t_test_pairwise, :tvalues, :uncentered_tss, :use_t, :wald_test, :wald_test_terms, :wresid]\n"
     ]
    }
   ],
   "source": [
    "println(keys(resultsP))              #print all keys (field names)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Task 1\n",
    "\n",
    "Print the Julia and Python estimates (of the coefficients) in a table so we can compare directly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Comparing the estimates in Julia and Python\n",
      "     0.007     0.007\n",
      "     0.217     0.217\n",
      "    -0.429    -0.429\n",
      "\n"
     ]
    }
   ],
   "source": [
    "b_P = resultsP.params                #the numerical results are now a Julia vector\n",
    "\n",
    "println(\"Comparing the estimates in Julia and Python\")\n",
    "printmat([b b_P])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Task 2\n",
    "\n",
    "Print the smallest and largest values of the difference between the residuals according to Julia and those according to Python."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(-2.7755575615628914e-17, 4.163336342344337e-17)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "printmat(extrema(resultsP.resid - u))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# OLS (in Julia) with Robust Standard Errors\n",
    "\n",
    "Use standard errors that are robust to heteroskedastcity and autocorrelation (2 lags)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[34m\u001b[1mOLS Results (robust std):\u001b[22m\u001b[39m\n",
      "\n",
      "            b    std_nw\n",
      "c       0.007     0.002\n",
      "SMB     0.217     0.129\n",
      "HML    -0.429     0.118\n",
      "\n"
     ]
    }
   ],
   "source": [
    "(b,u,Yhat,V,R2) = OlsNWFn(Y,X,2)\n",
    "std_nw = sqrt.(diag(V))\n",
    "\n",
    "printblue(\"OLS Results (robust std):\\n\")\n",
    "xNames = [\"c\",\"SMB\",\"HML\"]\n",
    "printmat([b std_nw],colNames=[\"b\",\"std_nw\"],rowNames=xNames)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Task 3 \n",
    "\n",
    "Now redo the Python estimation with the same sort of robust standard errors. Hint: `resultsP.get_robustcov_results()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PyObject <class 'statsmodels.iolib.summary.Summary'>\n",
      "\"\"\"\n",
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                      y   R-squared:                       0.134\n",
      "Model:                            OLS   Adj. R-squared:                  0.130\n",
      "Method:                 Least Squares   F-statistic:                     11.87\n",
      "Date:                Thu, 09 Dec 2021   Prob (F-statistic):           9.94e-06\n",
      "Time:                        10:00:32   Log-Likelihood:                 672.28\n",
      "No. Observations:                 388   AIC:                            -1339.\n",
      "Df Residuals:                     385   BIC:                            -1327.\n",
      "Df Model:                           2                                         \n",
      "Covariance Type:                  HAC                                         \n",
      "==============================================================================\n",
      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "const          0.0070      0.002      2.850      0.005       0.002       0.012\n",
      "x1             0.2170      0.129      1.688      0.092      -0.036       0.470\n",
      "x2            -0.4291      0.118     -3.649      0.000      -0.660      -0.198\n",
      "==============================================================================\n",
      "Omnibus:                       58.863   Durbin-Watson:                   1.849\n",
      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):              146.539\n",
      "Skew:                          -0.749   Prob(JB):                     1.51e-32\n",
      "Kurtosis:                       5.612   Cond. No.                         38.8\n",
      "==============================================================================\n",
      "\n",
      "Notes:\n",
      "[1] Standard Errors are heteroscedasticity and autocorrelation robust (HAC) using 2 lags and without small sample correction\n",
      "\"\"\"\n"
     ]
    }
   ],
   "source": [
    "resultsP2 = resultsP.get_robustcov_results(cov_type=\"HAC\",maxlags=2)\n",
    "\n",
    "println(resultsP2.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "@webio": {
   "lastCommId": null,
   "lastKernelId": null
  },
  "kernelspec": {
   "display_name": "Julia 1.7.0",
   "language": "julia",
   "name": "julia-1.7"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}