I work with sales datasets from retail companies. It is important for me to know if a day was a holiday, and to estimate the effect of that holiday on sales.
But also the days before and after a holiday may be informative, that is why I wrote this little routine: It generates an R dataset for the specified years with all the holidays in Austria. It can be easily adjusted to other nations by adopting the “national” vector.
Demetra+ Manual http://www.cros-portal.eu/sites/default/files//Demetra%2B%20User%20Manual%20November%202012.pdf was the source for the holidays. Demetra+ is a free software for seasonal adjustment, but its finest aggregate is the month – which is to coarse for my use case.
makeholidaydataset=function(daysbefore=30,daysafter=7,years=2001:2013){
#this function generates Austrian holidays dataset usable for regressions; adjust the fixed holidays in fh
# to adapt for other countries add line to national
library(data.table)
library(foreign)
#fixed holidays
fh=matrix(c(
"NewYear",1,1,
"MayDay" ,5,1,
"Halloween",10,31,
"AllSaintsDay",11,1,
"Christmas",12,25),ncol=3,byrow=T)
national=matrix(c(
"Nationalholiday",10,26,
"StMary",8,15,
"MariaEmpfaegnis",12,8),ncol=3,byrow=T)
fixedholidays=rbind(fh,national)
fixedholidays=data.table(holiday=fixedholidays[,1],month=as.integer(fixedholidays[,2]),day=as.integer(fixedholidays[,3]))
library(plyr)
gendates=function(year){
dates=as.IDate(ISOdate(year=year,month=fixedholidays$month, day=fixedholidays$day))
data.table(date=dates,holiday=fixedholidays$holiday)
}
DTholidays=data.table(ldply(years,gendates))
#variable holidays: easter related
#easter=read.csv(file="c:/git/da/data/holiday/easter.csv",row.names=NULL)
easter=c("1990-04-15","1991-03-31","1992-04-19","1993-04-11","1994-04-03","1995-04-16","1996-04-07","1997-03-30","1998-04-12","1999-04-04",
"2000-04-23","2001-04-15","2002-03-31","2003-04-20","2004-04-11","2005-03-27","2006-04-16","2007-04-08",
"2008-03-23","2009-04-12","2010-04-04","2011-04-24","2012-04-08","2013-03-31","2014-04-20","2015-04-05",
"2016-03-27","2017-04-16","2018-04-01","2019-04-21")
easterdates=as.IDate(easter)
eastermonday=easterdates+1
ashwednesday=easterdates-46
ascensionday=easterdates+40
pentecost=easterdates+50
whitmonday=pentecost+1
maundythursday=easterdates-3
goodfriday=easterdates-2
variableholidays= rbind(
data.table(holiday="eastersunday",date=easterdates),
data.table(holiday="eastermonday",date=eastermonday),
data.table(holiday="ashwednesday",date=ashwednesday),
data.table(holiday="ascensionday",date=ascensionday),
data.table(holiday="pentecost",date=pentecost),
data.table(holiday="whitmonday",date=whitmonday),
data.table(holiday="maundythursday",date=maundythursday),
data.table(holiday="goodfriday",date=goodfriday)
)
allholidays=rbind(DTholidays,variableholidays,use.names=T)
allholidays$value=1
#generate before and after window
library(reshape2)
A=acast(allholidays,date~holiday,fun.aggregate=sum)
DATES=as.IDate(rownames(A))
offsets=-daysbefore:daysafter
returnoffseteddswithdate=function(offset){
d=DATES+offset
class(d)=c("IDate" ,"Date" )
B=copy(A)
rownames(B)=as.character(d)
B[B==1]=offset
B=as.data.frame(B)
B$date=as.IDate(rownames(B))
B
}
all=data.table(ldply(offsets,returnoffseteddswithdate))
m=melt(all,id.vars="date")
m=data.table(m)
m=m[,list(value=sum(value)),by=list(date,variable)]
all=data.table(dcast(m,date~variable,fun.aggregate=sum))
all=all[order(date)]
all
}
DTholiday=makeholidaydataset()
save(DTholiday,file="holidays.rdata")
easter=DTholiday[eastersunday!=0,list(date,eastersunday)]
setkey(easter,date)
DTisholiday=makeholidaydataset(daysbefore=0,daysafter=0)[,list(date,isholiday=1)]
setkey(DTisholiday,date)
myholidays=merge(DTisholiday,easter,all=T)