diff --git a/film/get_annotation_lengths.R b/film/get_annotation_lengths.R
new file mode 100644
index 0000000..6e313ff
--- /dev/null
+++ b/film/get_annotation_lengths.R
@@ -0,0 +1,72 @@
+#!/usr/bin/env Rscript
+
+# In order to make this script run it is necessary to install following packages:
+
+# install.packages('tidyverse')
+
+args = commandArgs(trailingOnly=TRUE)
+
+# test if there is one argument: if not, return an error
+if (length(args)!=1) {
+        stop("One argument that specifies the input file must be given.\nFor example: Rscript get_annotation_lengths.R elan_file.eaf", call.=FALSE)
+}
+
+library(methods)
+
+suppressPackageStartupMessages(library(tidyverse))
+library(xml2)
+
+retrieve_tier <- function(eaf_xml, tier_prefix){
+        eaf_xml %>% 
+                xml_find_all(paste0("//TIER[starts-with(@TIER_ID,'", tier_prefix, "')]")) %>%
+                map_df(., ~ data_frame(content = .x %>% 
+                                    xml_find_all('ANNOTATION/*/ANNOTATION_VALUE') %>% 
+                                    xml_text,
+                            participant = .x %>% xml_attr('PARTICIPANT'),
+                            a_id = .x %>%
+                                    xml_find_all('ANNOTATION/*') %>%
+                                    xml_attr('ANNOTATION_ID'),
+                            ref_id = .x %>%
+                                    xml_find_all('ANNOTATION/*') %>%
+                                    xml_attr('ANNOTATION_REF'),
+                            ts_1 = .x %>% xml_find_all('ANNOTATION/*') %>%
+                                    xml_attr('TIME_SLOT_REF1'),
+                            ts_2 = .x %>% xml_find_all('ANNOTATION/*') %>%
+                                    xml_attr('TIME_SLOT_REF2')))
+}
+
+eaf_xml <- read_xml(args[1])
+
+eaf <- eaf_xml %>% 
+        retrieve_tier('ref') %>% 
+        select(-ref_id) %>%
+        dplyr::rename(ref_id = a_id,
+                      reference = content) %>%
+        split(.$ref_id) %>%
+        map(., ~ .x %>% 
+                    mutate(time_start = eaf_xml %>%
+                    xml_find_first(paste0("//TIME_SLOT[@TIME_SLOT_ID='", .x$ts_1[1] ,"']")) %>%
+                    xml_attr('TIME_VALUE') %>% as.numeric())) %>%
+        map(., ~ .x %>% 
+                    mutate(time_end = eaf_xml %>%
+                    xml_find_first(paste0("//TIME_SLOT[@TIME_SLOT_ID='", .x$ts_2[1] ,"']")) %>%
+                    xml_attr('TIME_VALUE') %>% as.numeric())) %>%
+        bind_rows %>%
+        left_join(
+                retrieve_tier(eaf_xml, 'orth') %>%
+                        dplyr::rename(orth_id = a_id,
+                                      utterance = content) %>%
+                        select(-ts_1, -ts_2),
+                by = c("participant", "ref_id")) %>%
+        left_join(
+                retrieve_tier(eaf_xml, 'ft-en') %>%
+                        dplyr::rename(orth_id = ref_id,
+                                      ft_eng = content) %>%
+                        select(-ts_1, -ts_2, -a_id),
+                by = c("participant", "orth_id")
+        ) %>% mutate(utterance_length = time_end - time_start)
+
+paste('Segments with annotations:', lubridate::seconds_to_period(sum(eaf %>% .$utterance_length) / 1000))
+paste('Segments with English translations:', lubridate::seconds_to_period(sum(eaf %>% filter(ft_eng != '') %>% .$utterance_length) / 1000))
+paste('Segments with no Cyrillic characters:', lubridate::seconds_to_period(sum(eaf %>% filter(! stringr::str_detect(utterance, '[А-Яа-яӧі]')) %>% .$utterance_length) / 1000))
+paste('Segments which contain Cyrillic characters:', lubridate::seconds_to_period(sum(eaf %>% filter(stringr::str_detect(utterance, '[А-Яа-яӧі]')) %>% .$utterance_length) / 1000))