## Match SIC industry portfolios to Fama-French industry portfolios.

In most of my panel data analyses, i use the Fama-French industry portfolios to construct industry-by-year fixed effects. Each industry code in the Fama-French industry portfolios corresponds to a set of 4-digit SIC codes, but the databases available at WRDS do not provide information on the Fama-French industry portfolios. Judson Caskey has created a Stata package called “ffind” that produces 5, 10, 12, 17, 30, 38, 48, or 49 Fama-French industry portfolios based on the 4-digit SIC codes. Ekaterina Volkova has created an R function called “match_FF” that produces 5, 10, 12, 17, 30, 38, 48, or 49 Fama-French industry portfolios based on the 4-digit SIC codes.

## Panel data regressions.

### 1. Standard errors clusters

# Notes:# 1.foois the dataset/frame containing the required data. # 2.firmis the unique firm identifier, e.g., tic, cik, or gvkey. # 3.timeis the year identifier, e.g., fyear or datadate. # Load linear group fixed effects package. require(lfe) # Perform panel data OLS with non-robust standard errors. m1 <- felm(y ~ x, data = foo) # Perform panel data OLS with heteroscedasticity-robust (i.e., White) standard errors. m2 <- felm(y ~ x, data = foo), robust = TRUE) # Perform panel data OLS with standard errors estimated in the presence of a firm effect (refer to Petersen, 2009). m3 <- felm(y ~ x | 0 | 0 | firm, data = foo) # Perform panel data OLS with standard errors estimated in the presence of a time effect (refer to Petersen, 2009). m4 <- felm(y ~ x | 0 | 0 | time, data = foo) # Perform panel data OLS with standard errors estimated in the presence of both a firm, and a time effect (refer to Petersen, 2009). m5 <- felm(y ~ x | 0 | 0 | firm + time, data = foo)

### 2. Fixed effects

# Notes:# 1.foois the dataset/frame containing the required data. # 2.firmis the unique firm identifier, e.g., tic, cik, or gvkey. # 3.timeis the year identifier, e.g., fyear, or datadate. # 4.industryis the industry identifier, e.g., sic4, or naics. # 5.reghdferefers to higher dimensional fixed effects, in the spirit of Cameron and Miller (2016), and Correia (2016). # Load linear group fixed effects package. require(lfe) # Perform panel data OLS with firm and time effects, and non-robust standard errors. m1 <- felm(y ~ x | firm + time | 0 | 0, cmethod = "reghdfe", data = foo) # Perform panel data OLS with firm and time effects, and standard errors estimated in the presence of both a firm, and a time effect (refer to Petersen, 2009). m2 <- felm(y ~ x | firm + time | 0 | firm + time, cmethod = "reghdfe", data = foo) # Perform panel data OLS with firm and industry-by-time effects, and standard errors estimated in the presence of both a firm, and a time effect (refer to Petersen, 2009). foo <- foo %>% group_by(industry, time) %>% mutate(i_t = cur_group_id()) %>% data.table() m3 <- felm(y ~ x | firm + i_t | 0 | firm + time, cmethod = "reghdfe", data = foo)

## Functions

## 1. Vars_Sum

# Notes:# 1.Vars_Sumis a function that creates descriptive statistics for the variable(s) of interest. It is also flexible in creating grouped descriptive statistics. # 2.foois the dataset/frame containing the required data. # 3.varis the variable(s) of interest. In the case of multiple variables, var refers to the variables vector. # 4....is the vector of groups.timeis the year identifier, e.g., fyear, or datadate.industryis the industry identifier, e.g., sic4, or naics. Vars_Sum <- function(data, var, ...) { data %>% group_by_(.dots = lazyeval::lazy_dots(...)) %>% summarise( n = n(), mean = mean({{ var }}, na.rm = TRUE), sd = sd({{ var }}, na.rm = TRUE), Median = median({{ var }}, na.rm = TRUE), IQR = IQR({{ var }}, na.rm = TRUE), min = min({{ var }}, na.rm = TRUE), max = max({{ var }}, na.rm = TRUE), .groups = "drop" ) }# For ungrouped descriptive statistics, simply apply the function as follows: Vars_Sum(foo, var) # For grouped descriptive statistics, e.g., by industry and year, simply apply the function as follows: Vars_Sum(foo, var, industry, time).# HOW TO USE:

## 2. Correl_Matrix

# Notes:# 1.Correl_Matrixis a function that creates correlations matrices for a vector of variables. # 2. The purpose of the function is to merge two difference sets of correlation matrices in one table, e.g., spearman correlation matrix above the diagonal, and pearson correlation matrix below the diagonal. # 3.is the dataset/frame containing the required data. Correl_Matrix <- function(foo, method = c("pearson", "spearman"), removeTriangle = c("upper", "lower"), result = c("text", "html", "latex")) { # Compute correlations matrix. require(Hmisc) require(xtable) foo <- as.matrix(foo) if (method[1] == "pearson") { MATRIX <- rcorr(foo, type = "pearson") } ## remove lower triangle of correlation matrix else if (method[1] == "spearman") { MATRIX <- rcorr(foo, type = "spearman") } R <- MATRIX$r # Matrix of correlation coefficients. p <- MATRIX$P # Matrix of correlation coefficients p-values. ## Define notions for significance levels; spacing is important. mystars <- ifelse(p < .0001, "*** ", ifelse(p < .001, "*** ", ifelse(p < .01, "*** ", ifelse(p < .05, "** ", ifelse(p < .1, "* ", " "))))) ## round the correlations matrix to two decimal. R <- format(round(cbind(rep(-1.11, ncol(foo)), R), 2))[, -1] ## build a new matrix that includes the correlations with their appropriate stars. Rnew <- matrix(paste(R, mystars, sep = ""), ncol = ncol(foo)) diag(Rnew) <- paste(diag(R), " ", sep = "") rownames(Rnew) <- colnames(foo) colnames(Rnew) <- paste(colnames(foo), "", sep = "") ## remove upper triangle of correlation matrix. if (removeTriangle[1] == "upper") { Rnew <- as.matrix(Rnew) Rnew[upper.tri(Rnew, diag = TRUE)] <- "" Rnew <- as.data.frame(Rnew) } ## remove lower triangle of correlation matrix else if (removeTriangle[1] == "lower") { Rnew <- as.matrix(Rnew) Rnew[lower.tri(Rnew, diag = TRUE)] <- "" Rnew <- as.data.frame(Rnew) } }foo# Correl_Matrix requires a matrix with the variables of interest as input. Correlation <- foo %>% select(...) %>% data.matrix() Lower <- Correl_Matrix(Correlation, removeTriangle = "lower", method = "spearman") Upper <- Correl_Matrix(Correlation, removeTriangle = "upper", method = "pearson") Matrix <- Upper diag(Matrix) <- 1 Matrix[upper.tri(Matrix)] <- Lower[upper.tri(Lower)] print(xtable(Matrix), type = "html/text/latex", file = "File name")# HOW TO USE: