Phonetics Tools in R - Supplementary code

Data processing
- PraatR
Dygraphs
- Create shorter examples from long audio
- Create dygraphs
Getting information from two tiers

Data processing

library(tidyverse)

## ── Attaching packages ────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ───────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(PraatR)
library(rPraat)
library(phonR)
library(dygraphs)
library(phonTools)

## 
## Attaching package: 'phonTools'

## The following object is masked from 'package:dplyr':
## 
##     slice

library(audio)

## 
## Attaching package: 'audio'

## The following object is masked from 'package:phonTools':
## 
##     play

library(tuneR)

## 
## Attaching package: 'tuneR'

## The following object is masked from 'package:audio':
## 
##     play

## The following objects are masked from 'package:phonTools':
## 
##     normalize, play

library(gss)

absdir = "/Users/marissabarlaz/Desktop/FemaleBP/"
mydir = list.files(absdir, "*.WAV")
mydir

##  [1] "BP17-L-babado.WAV"  "BP17-L-cabido.WAV"  "BP17-L-tributo.WAV"
##  [4] "BP18-L-babado.WAV"  "BP18-L-cabido.WAV"  "BP18-L-tributo.WAV"
##  [7] "BP19-L-babado.WAV"  "BP19-L-cabido.WAV"  "BP19-L-tributo.WAV"
## [10] "BP20-L-babado.WAV"  "BP20-L-cabido.WAV"  "BP20-L-tributo.WAV"
## [13] "BP21-L-babado.WAV"  "BP21-L-cabido.WAV"  "BP21-L-tributo.WAV"

testwav = paste0(absdir, mydir[1])
testwav

## [1] "/Users/marissabarlaz/Desktop/FemaleBP/BP17-L-babado.WAV"

testtext = str_replace(testwav, ".WAV", ".TextGrid")
testformant = str_replace(testwav, ".WAV", ".Formant")
testpitch = str_replace(testwav, ".WAV", ".Pitch")
testpitchtier = str_replace(testwav, ".WAV", ".PitchTier")
testint = str_replace(testwav, ".WAV", ".Intensity")
testinttier = str_replace(testwav, ".WAV", ".IntensityTier")
testtable = str_replace(testwav, ".WAV", ".Table")

PraatR

praat( "To Pitch...", arguments=list( 0.001, 75,350), input=testwav, output=testpitch, overwrite=TRUE )
praat( "Down to PitchTier", input=testpitch, output=testpitchtier, overwrite=TRUE, filetype="headerless spreadsheet" )

praat( "To Intensity...", arguments = list(100, 0), input=testwav, output=testint, overwrite=TRUE )
praat( "Down to IntensityTier", input=testint, output=testinttier, overwrite=TRUE, filetype="text" )

praat( "To Formant (burg)...", arguments = list(0.01, 4, 5000, 0.05, 50), input=testwav, output=testformant, overwrite=TRUE )

Dygraphs

Create shorter examples from long audio

#form.test = formant.read(testformant)
tg.test = tg.read(testtext)
pt.test = pt.read(testpitchtier)
it.test = it.read(testinttier)

tg.testmini = tg.cut0(tg.test, tStart = 0, tEnd = 5)
pt.testmini = pt.cut0(pt.test, tStart = 0, tEnd = 5)
it.testmini = it.cut0(it.test, tStart = 0, tEnd = 5)
#mylength = length(form.test$t[form.test$t<=5])
# form.testmini = form.test
# form.testmini$t = form.testmini$t[1:mylength]
# form.testmini$xmax = form.testmini$t[mylength]
# form.testmini$frame = form.test$frame[1:mylength] 
# form.testmini$nx = mylength

# formlist = data.frame(form.test$t, do.call("rbind", form.test$frame)) %>% mutate(frequency = str_replace_all(frequency, "(^[^[:digit:]]+|\\)$)", ""), bandwidth = str_replace_all(bandwidth, "(^[^[:digit:]]+|\\)$)", "")) %>% separate(frequency, into =c("f1", "f2", "f3", "f4", "f5"), sep = ",") %>% separate(bandwidth, into =c("f1_bw", "f2_bw", "f3_bw", "f4_bw", "f5_bw"), sep = ",") %>% mutate_at(.vars = c(4:13), .funs  = "as.numeric")

Create dygraphs

#tg.plot(tg.test, group = "testplot")
#pt.plot(pt.test, group = "testplot")
#it.plot(it.test, group = "testplot")
# formant.plot(form.test, group = "testplot", scaleIntensity = TRUE, drawBandwidth = TRUE)
# tg.plot(tg.testmini, formant = form.testmini)
# tg.plot(tg.testmini, pt = pt.testmini, it = it.testmini)

tg.plot(tg.testmini, group = "testplot2")

pt.plot(pt.testmini, group = "testplot2")

it.plot(it.testmini, group = "testplot2")

# formant.plot(form.testmini, group = "testplot2", scaleIntensity = TRUE, drawBandwidth = TRUE)
#tg.plot(tg.testmini, formant = form.testmini)
tg.plot(tg.testmini, pt = pt.testmini, it = it.testmini)

#plot sound
sndWav <- readWave(testwav)
fs <- sndWav@samp.rate
snd <- sndWav@left / (2^(sndWav@bit-1))
t <- seqM(0, (length(snd)-1)/fs, by = 1/fs)

snddf = data.frame(t, snd) %>% filter(t <=5)
dygraph(snddf, xlab = "Time (sec)", group = "testplot")%>% dyRangeSelector(height = 20)

#tg.plot(tg.testmini, formant = form.testmini, group = "testplot")

Getting information from two tiers

Sometimes you will want to include information from two tiers - for example, if you have a phone tier and a syllable tier, you might want to know what syllable each phone is in. I give an example using the data.table package here. The exmaple is given using the sample data that comes from the rPraat package, sicne my dissertation data does not include two annotated tiers.

library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

## The following object is masked from 'package:purrr':
## 
##     transpose

#get sample data
tgtg = tg.sample()
ptsamp = pt.sample()

#create data frames for phone and syllable tiers
tg1 = data.frame(t11 =tgtg$phone$t1, t21 = tgtg$phone$t2, label1 = tgtg$phone$label)
tg2 = data.frame(t12 =tgtg$syllable$t1, t22 = tgtg$syllable$t2, label2= tgtg$syllable$label)

head(tg1)

##          t11        t21 label1
## 1 0.00800000 0.09657247       
## 2 0.09657247 0.14520627      j
## 3 0.14520627 0.21598818     a:
## 4 0.21598818 0.29489410      c
## 5 0.29489410 0.35215657      i
## 6 0.35215657 0.39789757    P\\

head(tg2)

##          t12        t22 label2
## 1 0.00000000 0.09657247       
## 2 0.09657247 0.21598818    ja:
## 3 0.21598818 0.35215657     ci
## 4 0.35215657 0.53570333  P\\ek
## 5 0.53570333 0.63220031     nu
## 6 0.63220031 0.76000949   t_so

#this is getting five points of f0 throughout each phoneme. 
#first, get times for calculating f0 and saving them into the tg data frame
tg1= tg1 %>% mutate(RepNo = as.numeric(as.factor(t11))) %>% group_by(t11) %>% 
  mutate(normtime = list(seq(0.2,1.0,.2)),
         acttimenorm= list(seq(from = t11, to = t21, by = (t21-t11)/4))) %>%
  unnest() %>% mutate(f0=0)

#now extracting f0 from the pitch tier data
for (i in 1:length(tg1$t11)){
  mycurpoint = which.min(abs(tg1$acttimenorm[i] - ptsamp$t))
  tg1$f0[i] = ptsamp$f[mycurpoint]
}

head(tg1)

## # A tibble: 6 x 7
## # Groups:   t11 [2]
##      t11    t21 label1 RepNo normtime acttimenorm    f0
##    <dbl>  <dbl> <chr>  <dbl>    <dbl>       <dbl> <dbl>
## 1 0.008  0.0966 ""         1      0.2      0.008   210.
## 2 0.008  0.0966 ""         1      0.4      0.0301  210.
## 3 0.008  0.0966 ""         1      0.6      0.0523  210.
## 4 0.008  0.0966 ""         1      0.8      0.0744  210.
## 5 0.008  0.0966 ""         1      1        0.0966  210.
## 6 0.0966 0.145  "j"        2      0.2      0.0966  210.

#create data tables and set keys
tg1 = data.table(tg1)
tg2 = data.table(tg2)
setkey(tg1)
setkey(tg2)
#combine syllable level with phone level tgs
tgall = foverlaps(tg1, tg2, by.x  = c("t11", "t21"), by.y = c("t12", "t22"), type = "within")
tgall = data.frame(tgall)
head(tgall, 15)

##           t12        t22 label2        t11        t21 label1 RepNo normtime
## 1  0.00000000 0.09657247        0.00800000 0.09657247            1      0.2
## 2  0.00000000 0.09657247        0.00800000 0.09657247            1      0.4
## 3  0.00000000 0.09657247        0.00800000 0.09657247            1      0.6
## 4  0.00000000 0.09657247        0.00800000 0.09657247            1      0.8
## 5  0.00000000 0.09657247        0.00800000 0.09657247            1      1.0
## 6  0.09657247 0.21598818    ja: 0.09657247 0.14520627      j     2      0.2
## 7  0.09657247 0.21598818    ja: 0.09657247 0.14520627      j     2      0.4
## 8  0.09657247 0.21598818    ja: 0.09657247 0.14520627      j     2      0.6
## 9  0.09657247 0.21598818    ja: 0.09657247 0.14520627      j     2      0.8
## 10 0.09657247 0.21598818    ja: 0.09657247 0.14520627      j     2      1.0
## 11 0.09657247 0.21598818    ja: 0.14520627 0.21598818     a:     3      0.2
## 12 0.09657247 0.21598818    ja: 0.14520627 0.21598818     a:     3      0.4
## 13 0.09657247 0.21598818    ja: 0.14520627 0.21598818     a:     3      0.6
## 14 0.09657247 0.21598818    ja: 0.14520627 0.21598818     a:     3      0.8
## 15 0.09657247 0.21598818    ja: 0.14520627 0.21598818     a:     3      1.0
##    acttimenorm       f0
## 1   0.00800000 210.0627
## 2   0.03014312 210.0627
## 3   0.05228623 210.0627
## 4   0.07442935 210.0627
## 5   0.09657247 210.0627
## 6   0.09657247 210.0627
## 7   0.10873092 219.4931
## 8   0.12088937 221.2859
## 9   0.13304782 224.3650
## 10  0.14520627 230.2947
## 11  0.14520627 230.2947
## 12  0.16290175 203.8227
## 13  0.18059722 189.5803
## 14  0.19829270 192.0104
## 15  0.21598818 195.8509

Marissa Barlaz, PhD