The aim of this document is to automate acoustic data acquisition in R without having to directly use Praat scripting. Rather, these functions will interact with Praat for you. You will need to segment files in Praat manually. Given the absolute directory to a folder, the functions will loop through a folder that has a set of .WAV files and their associated segmented .TextGrid files. The extension can be .wav or .WAV.
This file includes two functions. The first is create_praat_files, which interacts with Praat via the package PraatR, and creates the relevant files on your computer from the Praat environment. The second is get_acoustic_data, which can utilize these files to get time-dynamic acoustic data (with any number of time points you wish, including 1, which will get the midpoint value). Any combination of F0, intensity, or formant data can be collected. The output of get_acoustic_data is ready for use in analyses such as GAMM, with minimal further data processing required.
More information on utilizing these functions are below. The packages PraatR, rPraat, and the tidyverse should be installed onto your computer. You also need to have Praat installed on your computer.
if (!"PraatR" %in% installed.packages()) devtools:::install_github("usagi5886/PraatR")
if (!"rPraat" %in% installed.packages()) install.packages("rPraat")
if (!"tidyverse" %in% installed.packages()) install.packages("tidyverse")
create_praat_files = function(absdir, pitch = TRUE, formant = TRUE, intensity = TRUE){
library(PraatR)
library(tidyverse)
mydir = list.files(absdir, "*.WAV", ignore.case = T)
mydir=paste0(absdir, mydir)
mydirpitch=str_replace_all(mydir,fixed(".WAV", ignore_case = T), ".Pitch")
mydirpitchtier=str_replace_all(mydir,fixed(".WAV", ignore_case = T), ".PitchTier")
mydirformant=str_replace_all(mydir,fixed(".WAV", ignore_case = T), ".Formant")
mydirint=str_replace_all(mydir, fixed(".WAV", ignore_case = T), ".Intensity")
mydirinttier=str_replace_all(mydir,fixed(".WAV", ignore_case = T), ".IntensityTier")
mydirtable=str_replace_all(mydir,fixed(".WAV", ignore_case = T), ".FormantTable")
mydirall=cbind(mydir, mydirpitch, mydirpitchtier, mydirformant, mydirint, mydirinttier, mydirtable)
PitchArguments=list(0.001, #timestep
75, #pitch floor
350) #pitch ceiling
#to Intensity...
IntensityArguments=list(100, #Maximum Pitch
0) #TimeStep
#to Formant...
FormantArguments=list(0.001, #timestep
5, #Maximum number of formants
5500, #Maximum formant
0.025, #Window length
50) #pre-emphasis from
#to table
FormantTableArgs = list("no", #frame number
"yes", #include time
6, #number of time decimals
"no", #include intensity
3, #number of intnsity decimals
"yes", #include number of formants
3, #number of frequency decimals
"yes")#include bandwidths
if (pitch ==TRUE){
print("Creating all Pitch files, please wait patiently.")
apply(mydirall, 1,
function(x) praat("To Pitch...", arguments = PitchArguments, input=x[1],
output=x[2], overwrite=TRUE))
print("Creating all PitchTier files, please wait patiently.")
apply(mydirall, 1,
function(x) praat( "Down to PitchTier", input=x[2], output=x[3], overwrite=TRUE, filetype="headerless spreadsheet" ))
}
if(intensity ==TRUE){
print("Creating all Intensity files, please wait patiently.")
apply(mydirall, 1,
function(x) praat("To Intensity...", arguments = IntensityArguments, input=x[1],
output=x[5], overwrite=TRUE))
print("Creating all IntensityTier files, please wait patiently.")
apply(mydirall, 1,
function(x) praat("Down to IntensityTier", input=x[5],
output=x[6], overwrite=TRUE, filetype="text"))
}
if (formant ==TRUE){
print("Creating all Formant files, please wait patiently.")
apply(mydirall, 1,
function(x) praat( "To Formant (burg)...",arguments = FormantArguments,input = x[1], output = x[4], overwrite = TRUE)
)
}
print("Creating all FormantTable files, please wait patiently.")
apply(mydirall, 1, function(x) praat( "Down to Table...",
arguments = FormantTableArgs,
input=x[4],
output=x[7],
filetype="comma-separated",
overwrite = TRUE
))
}
get_acoustic_data = function(absdir, tiername = "segment", numpoints = 10, pitch = TRUE, formant = TRUE, intensity = TRUE, remove_files = FALSE){
library(rPraat)
library(tidyverse)
mydir = list.files(absdir, "*.WAV", ignore.case = T)
alltextgrids2 = list()
for (j in 1:length(mydir)){
print(paste("Currently processing file", mydir[j], "... Please wait patiently."))
curwav = paste0(absdir, mydir[j])
curtext = str_replace(curwav, fixed(".WAV", ignore_case = T), ".TextGrid")
curpitch= str_replace(curwav, fixed(".WAV", ignore_case = T), ".Pitch")
curpitchtier = str_replace(curwav, fixed(".WAV", ignore_case = T), ".PitchTier")
curint = str_replace(curwav, fixed(".WAV", ignore_case = T), ".Intensity")
curinttier = str_replace(curwav, fixed(".WAV", ignore_case = T), ".IntensityTier")
curformant = str_replace(curwav, fixed(".WAV", ignore_case = T), ".Formant")
curformanttable = str_replace(curwav, fixed(".WAV", ignore_case = T), ".FormantTable")
TextGridInfo = tg.read(curtext)
CurTextGrid = data.frame(filename = mydir[j], tmin =TextGridInfo[[tiername]]$t1, tmax = TextGridInfo[[tiername]]$t2, label = TextGridInfo[[tiername]]$label) %>% filter(label !="")
if (numpoints ==1){
CurTextGrid = CurTextGrid %>% mutate(normtime = 0.5, acttimenorm = (tmin+tmax)/2)
} else{
CurTextGrid = CurTextGrid %>% mutate(RepNo = as.numeric(as.factor(tmin))) %>% group_by(tmin) %>%
mutate(normtime = list(seq(0.1,1.0,length.out = numpoints)),
acttimenorm= list(seq(from = tmin, to = tmax, by = (tmax-tmin)/(numpoints-1)))) %>%
unnest(cols = c(normtime, acttimenorm))
}
if (pitch ==TRUE){
PitchTierInfo = pt.read(curpitchtier)
mywhichptpitch = map_dbl(CurTextGrid$acttimenorm, function(x) which.min(abs(x - PitchTierInfo$t)))
#CurTextGrid$F0Time = map_dbl(mywhichptpitch, function(x) PitchTierInfo$t[x])
CurTextGrid$F0 = map_dbl(mywhichptpitch, function(x) PitchTierInfo$f[x])
CurTextGrid = CurTextGrid %>% mutate(F0 = as.numeric(na_if(F0, "--undefined--")))
if (remove_files==TRUE) file.remove(curpitch, curpitchtier)
}
if (intensity ==TRUE){
IntensityTierInfo = it.read(curinttier)
mywhichptint = map_dbl(CurTextGrid$acttimenorm, function(x) which.min(abs(x - IntensityTierInfo$t)))
CurTextGrid$Intensity = map_dbl(mywhichptint, function(x) IntensityTierInfo$i[x])
if (remove_files==TRUE) file.remove(curint, curinttier)
}
if (formant ==TRUE){
#FormantInfo = formant.read(curformant)
FormantInfo = suppressMessages(read_delim(curformanttable, delim = ","))
mywhichptformant = map_dbl(CurTextGrid$acttimenorm, function(x) which.min(abs(x - FormantInfo$`time(s)`)))
#CurTextGrid$FormantTime = map_dbl(mywhichptformant, function(x) FormantInfo$`time(s)`[x])
CurTextGrid$F1 = map(mywhichptformant, function(x) FormantInfo$`F1(Hz)`[x])
CurTextGrid$B1 = map(mywhichptformant, function(x) FormantInfo$`B1(Hz)`[x])
CurTextGrid$F2 = map(mywhichptformant, function(x) FormantInfo$`F2(Hz)`[x])
CurTextGrid$B2 = map(mywhichptformant, function(x) FormantInfo$`B2(Hz)`[x])
CurTextGrid$F3 = map(mywhichptformant, function(x) FormantInfo$`F3(Hz)`[x])
CurTextGrid$B3 = map(mywhichptformant, function(x) FormantInfo$`B3(Hz)`[x])
CurTextGrid$F4 = map(mywhichptformant, function(x) FormantInfo$`F4(Hz)`[x])
CurTextGrid$B4 = map(mywhichptformant, function(x) FormantInfo$`B4(Hz)`[x])
CurTextGrid$F5 = map(mywhichptformant, function(x) FormantInfo$`F5(Hz)`[x])
CurTextGrid$B5 = map(mywhichptformant, function(x) FormantInfo$`B5(Hz)`[x])
CurTextGrid = CurTextGrid %>% mutate_at(.vars = c("F1", "B1","F2", "B2","F3", "B3","F4", "B4","F5", "B5"), .funs = ~ as.numeric(na_if(., "--undefined--")))
if (remove_files==TRUE) file.remove(curformant, curformanttable)
}
alltextgrids2[[j]] = CurTextGrid
#rm(CurTextGrid, curwav, curtable, curpitch, curtext, curpitchtier)
}
all_data_rPraat <- do.call("rbind", alltextgrids2)
return(all_data_rPraat)
}
In order to use these functions, you need the following arguments:
For create_praat_files, which creates the actual .Pitch, .Intensity, .Formant, etc. files on your computer, you need to specify the absolute directory (starting with /Users… on a Mac, and C:/ on a Windows computer). You also need to decide if you are going to analyze Pitch, Intensity, or Formants. The script will allow you to do all three, but it will take some time. For each of these three measurements, if you would like the files created, leave the values to their default (TRUE); if you would not like them created, change them to FALSE. Note that this will take some time to run (depending on how many measures you are running, how long your files are, how many files there are, and the power of your computer), after which the output in your R console will say NULL.
For get_acoustic_data, you need the same absolute directory given above, the name of the tier you have segmented for which you want data, the number of normalized time points (numpoints, default is 10), and then which acoustic variable values you would like (TRUE/FALSE). The last argument is remove_files, which is by default set to FALSE. If you set this to TRUE, it will delete the files off of your computer permanently - this is only for the acoustic measures set to TRUE. (For example, if you create Pitch and Intensity files with create_praat_files, but only run get_acoustic_data for Pitch, it will only delete the pitch-related files.) If you want to redo an analysis for that acoustic measure, you would have to rerun create_praat_files.
The output for get_acoustic_data is a dataframe which will have the following information:
absdir = "/Users/marissabarlaz/Desktop/FemaleBP/"
#create_praat_files(absdir, pitch = TRUE, formant = FALSE, intensity = FALSE)
create_praat_files(absdir, pitch = TRUE, formant = TRUE, intensity = TRUE)
## [1] "Creating all Pitch files, please wait patiently."
## [1] "Creating all PitchTier files, please wait patiently."
## [1] "Creating all Intensity files, please wait patiently."
## [1] "Creating all IntensityTier files, please wait patiently."
## [1] "Creating all Formant files, please wait patiently."
## [1] "Creating all FormantTable files, please wait patiently."
## NULL
#acoustic_pitch_output = get_acoustic_data(absdir, tiername = "segment", numpoints = 10, pitch = TRUE, formant = FALSE, intensity = FALSE, remove_files = TRUE)
acoustic_all_output = get_acoustic_data(absdir, tiername = "segment", numpoints = 10, pitch = TRUE, formant = TRUE, intensity = TRUE, remove_files = FALSE)
## [1] "Currently processing file BP17-L-babado.WAV ... Please wait patiently."
## [1] "Currently processing file BP17-L-cabido.WAV ... Please wait patiently."
## [1] "Currently processing file BP17-L-tributo.WAV ... Please wait patiently."
## [1] "Currently processing file BP18-L-babado.WAV ... Please wait patiently."
## [1] "Currently processing file BP18-L-cabido.WAV ... Please wait patiently."
## [1] "Currently processing file BP18-L-tributo.WAV ... Please wait patiently."
## [1] "Currently processing file BP19-L-babado.WAV ... Please wait patiently."
## [1] "Currently processing file BP19-L-cabido.WAV ... Please wait patiently."
## [1] "Currently processing file BP19-L-tributo.WAV ... Please wait patiently."
## [1] "Currently processing file BP20-L-babado.WAV ... Please wait patiently."
## [1] "Currently processing file BP20-L-cabido.WAV ... Please wait patiently."
## [1] "Currently processing file BP20-L-tributo.WAV ... Please wait patiently."
## [1] "Currently processing file BP21-L-babado.WAV ... Please wait patiently."
## [1] "Currently processing file BP21-L-cabido.WAV ... Please wait patiently."
## [1] "Currently processing file BP21-L-tributo.WAV ... Please wait patiently."
head(acoustic_all_output)
## # A tibble: 6 x 19
## # Groups: tmin [1]
## filename tmin tmax label RepNo normtime acttimenorm F0 Intensity F1
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 BP17-L-… 0.369 0.566 ao 1 0.1 0.369 204. 54.9 678.
## 2 BP17-L-… 0.369 0.566 ao 1 0.2 0.391 210. 56.7 801.
## 3 BP17-L-… 0.369 0.566 ao 1 0.3 0.412 211. 57.1 826.
## 4 BP17-L-… 0.369 0.566 ao 1 0.4 0.434 209. 56.0 852.
## 5 BP17-L-… 0.369 0.566 ao 1 0.5 0.456 207. 55.2 871.
## 6 BP17-L-… 0.369 0.566 ao 1 0.6 0.478 205. 55.0 877.
## # … with 9 more variables: B1 <dbl>, F2 <dbl>, B2 <dbl>, F3 <dbl>, B3 <dbl>,
## # F4 <dbl>, B4 <dbl>, F5 <dbl>, B5 <dbl>