Match SIC industry portfolios to Fama-French industry portfolios.

In most of my panel data analyses, i use the Fama-French industry portfolios to construct industry-by-year fixed effects. Each industry code in the Fama-French industry portfolios corresponds to a set of 4-digit SIC codes, but the databases available at WRDS do not provide information on the Fama-French industry portfolios. Judson Caskey has created a Stata package called “ffind” that produces 5, 10, 12, 17, 30, 38, 48, or 49 Fama-French industry portfolios based on the 4-digit SIC codes. Ekaterina Volkova has created an R function called “match_FF” that produces 5, 10, 12, 17, 30, 38, 48, or 49 Fama-French industry portfolios based on the 4-digit SIC codes.

Panel data regressions.

1. Standard errors clusters

# Notes:
# 1. foo is the dataset/frame containing the required data.
# 2. firm is the unique firm identifier, e.g., tic, cik, or gvkey.
# 3. time is the year identifier, e.g., fyear or datadate.

# Load linear group fixed effects package.
require(lfe)

# Perform panel data OLS with non-robust standard errors.
m1 <- felm(y ~ x, data = foo)

# Perform panel data OLS with heteroscedasticity-robust (i.e., White) standard errors.
m2 <- felm(y ~ x, data = foo), robust = TRUE)

# Perform panel data OLS with standard errors estimated in the presence of a firm effect (refer to Petersen, 2009).
m3 <- felm(y ~ x | 0 | 0 | firm, data = foo)

# Perform panel data OLS with standard errors estimated in the presence of a time effect (refer to Petersen, 2009).
m4 <- felm(y ~ x | 0 | 0 | time, data = foo)

# Perform panel data OLS with standard errors estimated in the presence of both a firm, and a time effect (refer to Petersen, 2009).
m5 <- felm(y ~ x | 0 | 0 | firm + time, data = foo)

2. Fixed effects

# Notes: 
# 1. foo is the dataset/frame containing the required data.
# 2. firm is the unique firm identifier, e.g., tic, cik, or gvkey.
# 3. time is the year identifier, e.g., fyear, or datadate.
# 4. industry is the industry identifier, e.g., sic4, or naics.
# 5. reghdfe refers to higher dimensional fixed effects, in the spirit of Cameron and Miller (2016), and  Correia (2016). 

# Load linear group fixed effects package.
require(lfe)

# Perform panel data OLS with firm and time effects, and non-robust standard errors.
m1 <- felm(y ~ x | firm + time | 0 | 0, cmethod = "reghdfe", data = foo)

# Perform panel data OLS with firm and time effects, and standard errors estimated in the presence of both a firm, and a time effect (refer to Petersen, 2009).
m2 <- felm(y ~ x | firm + time | 0 | firm + time, cmethod = "reghdfe", data = foo)

# Perform panel data OLS with firm and industry-by-time effects, and standard errors estimated in the presence of both a firm, and a time effect (refer to Petersen, 2009).
foo <- foo %>%
  group_by(industry, time) %>%
  mutate(i_t = cur_group_id()) %>%
  data.table()
m3 <- felm(y ~ x | firm + i_t | 0 | firm + time, cmethod = "reghdfe", data = foo)

Functions

1. Vars_Sum

# Notes:
# 1. Vars_Sum is a function that creates descriptive statistics for the variable(s) of interest. It is also flexible in creating grouped descriptive statistics.
# 2. foo is the dataset/frame containing the required data.
# 3. var is the variable(s) of interest. In the case of multiple variables, var refers to the variables vector.
# 4. ... is the vector of groups. time is the year identifier, e.g., fyear, or datadate. industry is the industry identifier, e.g., sic4, or naics.

Vars_Sum <- function(data, var, ...) {
  data %>%
    group_by_(.dots = lazyeval::lazy_dots(...)) %>%
    summarise(
      n = n(),
      mean = mean({{ var }}, na.rm = TRUE),
      sd = sd({{ var }}, na.rm = TRUE),
      Median = median({{ var }}, na.rm = TRUE),
      IQR = IQR({{ var }}, na.rm = TRUE),
      min = min({{ var }}, na.rm = TRUE),
      max = max({{ var }}, na.rm = TRUE),
      .groups = "drop"
    )
}

# HOW TO USE:
# For ungrouped descriptive statistics, simply apply the function as follows: Vars_Sum(foo, var)
# For grouped descriptive statistics, e.g., by industry and year, simply apply the function as follows: Vars_Sum(foo, var, industry, time).

2. Correl_Matrix

# Notes: 
# 1. Correl_Matrix is a function that creates correlations matrices for a vector of variables.
# 2. The purpose of the function is to merge two difference sets of correlation matrices in one table, e.g., spearman correlation matrix above the diagonal, and pearson correlation matrix below the diagonal. 
# 3. foo is the dataset/frame containing the required data.

Correl_Matrix <- function(foo, method = c("pearson", "spearman"), removeTriangle = c("upper", "lower"),
                          result = c("text", "html", "latex")) {

  # Compute correlations matrix.
  require(Hmisc)
  require(xtable)
  foo <- as.matrix(foo)
  if (method[1] == "pearson") {
    MATRIX <- rcorr(foo, type = "pearson")
  }

  ## remove lower triangle of correlation matrix
  else if (method[1] == "spearman") {
    MATRIX <- rcorr(foo, type = "spearman")
  }
  R <- MATRIX$r # Matrix of correlation coefficients.
  p <- MATRIX$P # Matrix of correlation coefficients p-values.

  ## Define notions for significance levels; spacing is important.
  mystars <- ifelse(p < .0001, "*** ", ifelse(p < .001, "*** ", ifelse(p < .01, "*** ", ifelse(p < .05, "**  ", ifelse(p < .1, "*   ", "    ")))))

  ## round the correlations matrix to two decimal.
  R <- format(round(cbind(rep(-1.11, ncol(foo)), R), 2))[, -1]

  ## build a new matrix that includes the correlations with their appropriate stars.
  Rnew <- matrix(paste(R, mystars, sep = ""), ncol = ncol(foo))
  diag(Rnew) <- paste(diag(R), " ", sep = "")
  rownames(Rnew) <- colnames(foo)
  colnames(Rnew) <- paste(colnames(foo), "", sep = "")

  ## remove upper triangle of correlation matrix.
  if (removeTriangle[1] == "upper") {
    Rnew <- as.matrix(Rnew)
    Rnew[upper.tri(Rnew, diag = TRUE)] <- ""
    Rnew <- as.data.frame(Rnew)
  }

  ## remove lower triangle of correlation matrix
  else if (removeTriangle[1] == "lower") {
    Rnew <- as.matrix(Rnew)
    Rnew[lower.tri(Rnew, diag = TRUE)] <- ""
    Rnew <- as.data.frame(Rnew)
  }
}

# HOW TO USE:
# Correl_Matrix requires a matrix with the variables of interest as input.
Correlation <- foo %>%
  select(...) %>%
  data.matrix()
Lower <- Correl_Matrix(Correlation, removeTriangle = "lower", method = "spearman")
Upper <- Correl_Matrix(Correlation, removeTriangle = "upper", method = "pearson")
Matrix <- Upper
diag(Matrix) <- 1
Matrix[upper.tri(Matrix)] <- Lower[upper.tri(Lower)]
print(xtable(Matrix), type = "html/text/latex", file = "File name")