{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Python\n", "\n", "using the [PyCall.jl](https://github.com/JuliaPy/PyCall.jl) package." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "CovNWFn" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using Printf, DelimitedFiles, LinearAlgebra, Statistics\n", "\n", "include(\"jlFiles/printmat.jl\")\n", "include(\"jlFiles/OlsNW.jl\") #functions for OLS" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sample size: (388,)\n" ] } ], "source": [ "x = readdlm(\"Data/FFmFactorsPs.csv\",',',skipstart=1)\n", "\n", " #yearmonth, market, small minus big, high minus low\n", "(ym,Rme,RSMB,RHML) = (x[:,1],x[:,2]/100,x[:,3]/100,x[:,4]/100) \n", "x = nothing\n", "\n", "printlnPs(\"Sample size:\",size(Rme))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Do OLS (in Julia)\n", "\n", "use the function sin the file OlsNW.jl to do OLS. Report point estimates and standard errors." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[34m\u001b[1mOLS Results (assuming iid residuals):\u001b[22m\u001b[39m\n", "\n", " b std_iid\n", "c 0.007 0.002\n", "SMB 0.217 0.073\n", "HML -0.429 0.074\n", "\n" ] } ], "source": [ "Y = Rme\n", "T = size(Y,1)\n", "X = [ones(T) RSMB RHML]\n", "\n", "(b,u,Yhat,V,R2) = OlsGMFn(Y,X)\n", "std_iid = sqrt.(diag(V))\n", "\n", "printblue(\"OLS Results (assuming iid residuals):\\n\")\n", "xNames = [\"c\",\"SMB\",\"HML\"]\n", "printmat([b std_iid],colNames=[\"b\",\"std_iid\"],rowNames=xNames)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Getting Started with PyCall" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "using PyCall\n", "sm = pyimport(\"statsmodels.api\"); #activate this package" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PyObject \n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: y R-squared: 0.134\n", "Model: OLS Adj. R-squared: 0.130\n", "Method: Least Squares F-statistic: 29.85\n", "Date: Thu, 09 Dec 2021 Prob (F-statistic): 8.88e-13\n", "Time: 10:00:28 Log-Likelihood: 672.28\n", "No. Observations: 388 AIC: -1339.\n", "Df Residuals: 385 BIC: -1327.\n", "Df Model: 2 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const 0.0070 0.002 3.167 0.002 0.003 0.011\n", "x1 0.2170 0.074 2.949 0.003 0.072 0.362\n", "x2 -0.4291 0.074 -5.821 0.000 -0.574 -0.284\n", "==============================================================================\n", "Omnibus: 58.863 Durbin-Watson: 1.849\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 146.539\n", "Skew: -0.749 Prob(JB): 1.51e-32\n", "Kurtosis: 5.612 Cond. No. 38.8\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "┌ Warning: `vendor()` is deprecated, use `BLAS.get_config()` and inspect the output instead\n", "│ caller = npyinitialize() at numpy.jl:67\n", "└ @ PyCall C:\\Users\\psoderlind\\.julia\\packages\\PyCall\\3fwVL\\src\\numpy.jl:67\n" ] } ], "source": [ "resultsP = sm.OLS(Y, X).fit() #can use Python functions directly\n", "\n", "println(resultsP.summary())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[:HC0_se, :HC1_se, :HC2_se, :HC3_se, :_HCCM, :__class__, :__delattr__, :__dict__, :__dir__, :__doc__, :__eq__, :__format__, :__ge__, :__getattribute__, :__gt__, :__hash__, :__init__, :__init_subclass__, :__le__, :__lt__, :__module__, :__ne__, :__new__, :__reduce__, :__reduce_ex__, :__repr__, :__setattr__, :__sizeof__, :__str__, :__subclasshook__, :__weakref__, :_abat_diagonal, :_cache, :_data_attr, :_data_in_cache, :_get_robustcov_results, :_is_nested, :_use_t, :_wexog_singular_values, :aic, :bic, :bse, :centered_tss, :compare_f_test, :compare_lm_test, :compare_lr_test, :condition_number, :conf_int, :conf_int_el, :cov_HC0, :cov_HC1, :cov_HC2, :cov_HC3, :cov_kwds, :cov_params, :cov_type, :df_model, :df_resid, :diagn, :eigenvals, :el_test, :ess, :f_pvalue, :f_test, :fittedvalues, :fvalue, :get_influence, :get_prediction, :get_robustcov_results, :info_criteria, :initialize, :k_constant, :llf, :load, :model, :mse_model, :mse_resid, :mse_total, :nobs, :normalized_cov_params, :outlier_test, :params, :predict, :pvalues, :remove_data, :resid, :resid_pearson, :rsquared, :rsquared_adj, :save, :scale, :ssr, :summary, :summary2, :t_test, :t_test_pairwise, :tvalues, :uncentered_tss, :use_t, :wald_test, :wald_test_terms, :wresid]\n" ] } ], "source": [ "println(keys(resultsP)) #print all keys (field names)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Task 1\n", "\n", "Print the Julia and Python estimates (of the coefficients) in a table so we can compare directly." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Comparing the estimates in Julia and Python\n", " 0.007 0.007\n", " 0.217 0.217\n", " -0.429 -0.429\n", "\n" ] } ], "source": [ "b_P = resultsP.params #the numerical results are now a Julia vector\n", "\n", "println(\"Comparing the estimates in Julia and Python\")\n", "printmat([b b_P])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Task 2\n", "\n", "Print the smallest and largest values of the difference between the residuals according to Julia and those according to Python." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(-2.7755575615628914e-17, 4.163336342344337e-17)\n", "\n" ] } ], "source": [ "printmat(extrema(resultsP.resid - u))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# OLS (in Julia) with Robust Standard Errors\n", "\n", "Use standard errors that are robust to heteroskedastcity and autocorrelation (2 lags)." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[34m\u001b[1mOLS Results (robust std):\u001b[22m\u001b[39m\n", "\n", " b std_nw\n", "c 0.007 0.002\n", "SMB 0.217 0.129\n", "HML -0.429 0.118\n", "\n" ] } ], "source": [ "(b,u,Yhat,V,R2) = OlsNWFn(Y,X,2)\n", "std_nw = sqrt.(diag(V))\n", "\n", "printblue(\"OLS Results (robust std):\\n\")\n", "xNames = [\"c\",\"SMB\",\"HML\"]\n", "printmat([b std_nw],colNames=[\"b\",\"std_nw\"],rowNames=xNames)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Task 3 \n", "\n", "Now redo the Python estimation with the same sort of robust standard errors. Hint: `resultsP.get_robustcov_results()`" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PyObject \n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: y R-squared: 0.134\n", "Model: OLS Adj. R-squared: 0.130\n", "Method: Least Squares F-statistic: 11.87\n", "Date: Thu, 09 Dec 2021 Prob (F-statistic): 9.94e-06\n", "Time: 10:00:32 Log-Likelihood: 672.28\n", "No. Observations: 388 AIC: -1339.\n", "Df Residuals: 385 BIC: -1327.\n", "Df Model: 2 \n", "Covariance Type: HAC \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const 0.0070 0.002 2.850 0.005 0.002 0.012\n", "x1 0.2170 0.129 1.688 0.092 -0.036 0.470\n", "x2 -0.4291 0.118 -3.649 0.000 -0.660 -0.198\n", "==============================================================================\n", "Omnibus: 58.863 Durbin-Watson: 1.849\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 146.539\n", "Skew: -0.749 Prob(JB): 1.51e-32\n", "Kurtosis: 5.612 Cond. No. 38.8\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors are heteroscedasticity and autocorrelation robust (HAC) using 2 lags and without small sample correction\n", "\"\"\"\n" ] } ], "source": [ "resultsP2 = resultsP.get_robustcov_results(cov_type=\"HAC\",maxlags=2)\n", "\n", "println(resultsP2.summary())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "@webio": { "lastCommId": null, "lastKernelId": null }, "kernelspec": { "display_name": "Julia 1.7.0", "language": "julia", "name": "julia-1.7" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.7.0" } }, "nbformat": 4, "nbformat_minor": 4 }