Commit 372ab10a authored by Knut Wenzig's avatar Knut Wenzig
Browse files

neue Version: komplettes SOEP Survey Paper

parent 0652099a
File added
This diff is collapsed.
......@@ -3,10 +3,13 @@
# https://gitlab.soep.de/kwenzig/dortools/wikis/home
#
# - R-Paket rmarkdown
#
# bei: Error: pandoc version 1.12.3 or higher is required and was not found.
# > Sys.setenv(RSTUDIO_PANDOC="C:/Program Files/RStudio/bin/pandoc")
#
# - aktuelles Repository additional metadata
# https://gitlab.soep.de/kwenzig/additionalmetadata
# library(xlsx)
library(xlsx)
library(foreign)
library(dortools)
# Pfade
......@@ -16,15 +19,28 @@ library(dortools)
# Ordner mit codebook.csv
codebookcsvpath <- "D:/lokal/core-doku/v31/datasets/health/"
codebookcsvpath <- "D:/lokal/dokumentation_fams/datasets/bep_mig/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Household_14/"
codebookcsvpath <- paste0("D:/lokal/core-doku/v31/datasets/", dtan, "/")
codebookcsvpath <- "D:/lokal/core-doku/v31/datasets/bepgen/"
# codebookcsvpath <- paste0("D:/lokal/core-doku/v31/datasets/", dtan, "/")
codebookcsvpath <- "D:/lokal/dokumentation_fams/datasets/bep_mig/"
codebookcsvpath <- "D:/lokal/dokumentation_fams/datasets/bioagel/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Persons_14/"
codebookcsvpath <- "D:/lokal/dokumentation_fams/datasets/biosoc/"
codebookcsvpath <- "D:/lokal/dokumentation_fams/datasets/bekind/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Registry_M/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Registry/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Weights_14/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Persons_14_M/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Household_14_M/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Household_14/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Weights_14/"
codebookcsvpath <- "D:/lokal/core-doku/v31/datasets/bepbrutto/"
codebookcsvpath <- "D:/lokal/piaac-l-2014/Metadata/datasets/ZA5989_Persons_14/"
codebookcsvpath <- "D:/lokal/isdatadoku/datasets/bioparen/"
codebookcsvpath <- paste0("D:/lokal/isdatadoku/datasets/",dataset.name,"/")
codebookcsvpath <- "D:/lokal/core-doku/v31/datasets/health/"
codebookcsvpath <- "D:/lokal/isdatadoku/datasets/inno/"
codebookcsvpath <- "D:/lokal/core-doku/v31/datasets/behgen/"
distribution <- "2014"
distribution <- "v31.1"
language <- "en"
allow.markdown <- TRUE
collapse.variable.groups <- TRUE
......@@ -36,21 +52,29 @@ display.question.id <- FALSE
do.xelatex <- TRUE # LaTeX-Lauf starten
rename.pdf <- TRUE # pdf>questionnaire umbenennen do.xelatex=T
running.title.only <- FALSE # Kolumnentitel: with.title muss TRUE sein
with.footer <- TRUE # Fußzeile (questionnaire und Seite)
first.page.number <- 1
is.surveypaper <- FALSE
has.schmutztitel <- TRUE
schmutztitel.has.DIWSOEPLogo <- TRUE
running.title.only <- FALSE # Kolumnentitel (depricated, ergibt sich aus is.surveypaper/has.schmutztitel)
with.footer <- TRUE # Fußzeile (questionnaire und Seite) (depricated, für SOEP-Papers)
first.page.number <- 1 # depricated
toc.on.page <- -1 # -1 wählt gute defaults, z.B. 2 bei SOEP Survey-Papers
# Pfade
# Ordner im im Stata-12-File liegt (saveold!)
datapath <- "D:/lokal/min_wage/datasets/Bus2015_Mindestlohn/"
datapath <- "D:/lokal/dortest/t1/datasets/auto/"
datapath <- "//hume/soep-data/DATA/soep31_en/stata/"
datapath <- "//hume/soep-data/DATA/soep31_de/stata/"
datapath <- "S:/MA/kwenzig/piaac_l/"
datapath <- paste0("//hume/soep-data/DATA2/SOEP-IS/SOEP-IS 2014 Generierung HiWi/Data/finaldata/",
toupper(language), "/")
datapath <- "//hume/soep-data/DATA/soep31_en/stata/"
# Ordner des repositories (enthaelt Ordner LaTex und R)
path <- "D:/lokal/codebooker/"
titletexfile <- paste0(path,"LaTeX/titlegen.tex")
covertexfile <- paste0(path,"LaTeX/covergen.tex")
texfile <- paste0(path,"LaTeX/codebookgen.tex")
addpath <- "D:/lokal/additionalmetadata/" # additional metadata
......@@ -223,13 +247,6 @@ logical_variables.csv <- multiCSVopen(type="logical_variables",
questionnaires.csv <- read.csv(paste0(meta.path, "questionnaires.csv"),
colClasses="character",encoding="UTF-8")
# titles.csv ist ein selbst administrierter Datensatz mit Informationen
# über die Titelseite (eine Zeile pro Datensatz)
titles.csv <- read.csv(paste0(addpath, "titles.csv"),
colClasses="character",encoding="UTF-8")
# Export titel-file für Weitergabe
# write.xlsx(titles.csv, paste0(addpath, "titles_export.xls"))
# Einschränkung auf benötigte Informationen
variable_categories.csv <- variable_categories.csv[variable_categories.csv$
......@@ -366,7 +383,7 @@ if(language=="de"){
codebook.csv <- SwitchLanguageDE(codebook.csv, "label")
}
TitleLaTeX <- function(study, distribution, dataset, version) {
TitleLaTeX <- function(study, distribution, dataset, version, language) {
# Für den Datensatz dataset wird ein LaTeX-Code für die Titelseite des
# Codebooks erstellt.
#
......@@ -379,11 +396,12 @@ TitleLaTeX <- function(study, distribution, dataset, version) {
titles.csv$dataset==dataset.name & titles.csv$version==version) ==
TRUE))
title <- titles.csv$Title[title.row]
series <- TeXifyStr(titles.csv$Series[title.row])
seriesno <- TeXifyStr(titles.csv$No[title.row])
if(nchar(seriesno)>0){
seriesno <- paste0("SOEP Survey Paper ", seriesno)
}
date <- TeXifyStr(titles.csv$date[title.row])
date <- TeXifyStr(titles.csv$Erscheinungsjahr[title.row])
author <- titles.csv$Autor[title.row]
publishers <- TeXifyStr(titles.csv$publishers[title.row])
doi <- titles.csv$zuDOI[title.row]
......@@ -493,7 +511,7 @@ VariableLaTeX <- function(variable) {
# - Absatz mit Prosa
# - Absatz mit references
cat(variable) # debug
# cat(variable) # debug
# variable <- "k_cost" # debug
# variablenname als LaTeX-String
var.latex <- TeXifyStr(codebook.csv$variable.print[codebook.csv$variable==variable])
......@@ -512,7 +530,7 @@ VariableLaTeX <- function(variable) {
# Referneces mit Formatierung it
ref.latex <- paste0("\\itshape ",
ref.orig,
"\\normalfont\n\n")
"\\upshape\n\n")
# Originalinhalt von contact
contact.orig <- TeXifyStr(codebook.csv$contact[codebook.csv$variable==variable])
......@@ -575,7 +593,7 @@ VariableLaTeX <- function(variable) {
c("value", "label", "frequency")]
if(optimize.tables){
cat.rows <- nrow(cat.latex)
cat(cat.rows)
#cat(cat.rows)
if(cat.rows>31){
rows.omitted <- cat.rows-30
frequency.omitted <- as.character(sum(as.numeric(cat.latex[16:(cat.rows-15),
......@@ -717,11 +735,69 @@ if(collapse.variable.groups) {
" [", l10n("generische.Information",language), "]")
}
# titles.csv ist ein selbst administrierter Datensatz mit Informationen
# über die Titelseite (eine Zeile pro Datensatz)
titles.csv <- read.csv(paste0(addpath, "titles.csv"),
colClasses="character",encoding="UTF-8")
title.row <- which((titles.csv$study==study & titles.csv$distribution==distribution &
titles.csv$dataset==dataset.name & titles.csv$version==version) ==
TRUE)
titles.csv <- titles.csv[title.row,][1,]
if(row.names(titles.csv)[1]=="NA"){
titles.csv[1, ] <- ""
}
title <- titles.csv$Title
series <- TeXifyStr(titles.csv$Series)
paperno <- TeXifyStr(titles.csv$No)
year <- TeXifyStr(titles.csv$Erscheinungsjahr)
author <- titles.csv$Autor
doi <- titles.csv$zuDOI
dataset.group <- titles.csv$dataset.group
if(nchar(doi)>0){
if(dataset.group!=""){
the.this.file.s <- paste0("The files ", TeXifyStr(dataset.group), " are")
} else {
the.this.file.s <- paste0("The file ", TeXifyStr(dataset.name), " is")
}
dataset.footnote <- paste0(the.this.file.s,
" part of a collection, which is released with \\hyperref{http://dx.doi.org/",
doi, "}{}{}{doi:", TeXifyStr(doi), "}.")
} else {
dataset.footnote <- ""
}
studies.csv <- read.csv(paste0(meta.path,"studies.csv"),
colClasses="character", encoding="UTF-8")
studies.csv <- studies.csv[studies.csv$study==study, ]
study.label <- studies.csv$label[1]
if(is.null(study.label) | nchar(study.label)==0){
study.label <- study
}
# Export titel-file für Weitergabe
write.xlsx(titles.csv, paste0(addpath, "titles_export.xls"))
# Ausgabe des Titelsnipptes in Datei
cat(TitleLaTeX(study, distribution, dataset.name, version),
cat(TitleLaTeX(study, distribution, dataset.name, version, language),
file=(con <- file(titletexfile, "w", encoding = "UTF-8")),
sep="", fill=FALSE, labels=NULL, append=FALSE)
close(con)
# Ausgabe des Coversnipptes in Datei
cover.infos <- CoverLaTeX(language=language,
study=study, study.label=study.label, distribution=distribution,
dataset.name=dataset.name, version=version,
title=title, author=author, date=year, papertitle.footnote=dataset.footnote,
is.surveypaper = is.surveypaper, series = series, paperno=paperno,
has.schmutztitel=has.schmutztitel,
schmutztitel.has.DIWSOEPLogo = schmutztitel.has.DIWSOEPLogo,
toc.on.page = toc.on.page)
cat(cover.infos,
file=(con <- file(covertexfile, "w", encoding = "UTF-8")),
sep="", fill=FALSE, labels=NULL, append=FALSE)
close(con)
# Ausgabe des Datensatzsnipptes in Datei
cat(DatasetLaTeX(codebook.csv),
......@@ -730,23 +806,29 @@ cat(DatasetLaTeX(codebook.csv),
close(con)
if(do.xelatex){
if(is.surveypaper | has.schmutztitel){
tex.file.name <- "spp_codebook"
} else {
tex.file.name <- "codebook"
}
if(is.surveypaper){
rename.to.filename <- paste0("diw_ssp", substr("0000", 1, 4-nchar(paperno)),
paperno, ".pdf")
} else {
rename.to.filename <- paste0("c_", study, "_", distribution, "_",
dataset.name, "_", version,"_", language, ".pdf")
}
xelatexpath <- paste0(path,"LaTeX/")
xelatexcmd <- paste0("xelatex --aux-directory=", xelatexpath,
" --include-directory=", xelatexpath,
" --output-directory=", xelatexpath,
" ", xelatexpath, "codebook.tex")
" ", xelatexpath, tex.file.name, ".tex")
log <- shell(xelatexcmd, invisible=FALSE, intern=TRUE)
log <- shell(xelatexcmd, invisible=FALSE, intern=TRUE)
log <- shell(xelatexcmd, invisible=FALSE, intern=TRUE)
if(rename.pdf){
file.copy(from=paste0(xelatexpath,"codebook.pdf"),
to=paste0(xelatexpath,
"c_",
study, "_",
distribution, "_",
dataset.name, "_",
version,"_",
language,".pdf"),
file.copy(from=paste0(xelatexpath, tex.file.name, ".pdf"),
to=paste0(xelatexpath, rename.to.filename),
overwrite = TRUE, recursive = FALSE,
copy.mode = FALSE, copy.date = TRUE)
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment