Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
lecords authored May 29, 2023
1 parent 3d5f4d1 commit fa8b0b4
Show file tree
Hide file tree
Showing 46 changed files with 56,934 additions and 0 deletions.
230 changes: 230 additions & 0 deletions 00_merge_area-size.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
---
title: "R Notebook"
output: html_notebook
---

```{r}
wd <- dirname(getwd())
#Set working directory and folder structure
data_folder <- file.path(wd,'clinical_data')
#set plot folder for results
plot_folder <- file.path(wd,"results","merged")
set.seed(101100)
```


```{r}
wd <-dirname(getwd())
t86a <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_86A.csv")))
t86b <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_86B.csv")))
t86c <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_86C.csv")))
t87a <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_87A.csv")))
t87b <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_87B.csv")))
t87c <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_87C.csv")))
t88a <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_88A.csv")))
t88b <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_88B.csv")))
t88c <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_88C.csv")))
t175a <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_175A.csv")))
t175b <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_175B.csv")))
t175c <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_175C.csv")))
t176a <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_176A.csv")))
t176b <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_176B.csv")))
t176c <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_176C.csv")))
t178a <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_178A.csv")))
t178b <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_178B.csv")))
t178c <- read.csv(file=file.path(wd,"clinical_data", paste("Resized_AreaMeasurements_Image_178C.csv")))
t86a$TMA <- "86A"
t86b$TMA <- "86B"
t86c$TMA <- "86C"
t87a$TMA <- "87A"
t87b$TMA <- "87B"
t87c$TMA <- "87C"
t88a$TMA <- "88A"
t88b$TMA <- "88B"
t88c$TMA <- "88C"
t175a$TMA <- "175A"
t175b$TMA <- "175B"
t175c$TMA <- "175C"
t176a$TMA <- "176A"
t176b$TMA <- "176B"
t176c$TMA <- "176C"
t178a$TMA <- "178A"
t178b$TMA <- "178B"
t178c$TMA <- "178C"
t.area <-rbind(t86a,t86b,t86c,t87a,t87b,t87c,t88a,t88b,t88c,t175a,t175b,t175c,t176a,t176b,t176c,t178a,t178b,t178c)
t.area$ImageID <- t.area$Metadata_acid
t.area <- t.area %>% select(c(AreaOccupied_AreaOccupied_StromaBinary,AreaOccupied_AreaOccupied_TumourBinary,AreaOccupied_AreaOccupied_TumourStromaBinary,TMA, ImageID)) #ImageNumber
t.area <-t.area %>% unite("TMA_ImageID", c(TMA, ImageID), remove=T)
colnames(t.area) <- c("Area_px_Stroma","Area_px_Tumour","Area_px_Core", "TMA_ImageID")
range(t.area$Area_px_Core)/1000000
hist(t.area$Area_px_Core/1000000)
summary(t.area$Area_px_Core/1000000)
t.area.mm <-
t.area %>%
dplyr::mutate_at(vars(Area_px_Stroma:Area_px_Core),
.funs = funs(. /1000000))
t.area.mm[t.area.mm$Area_px_Core>1,]
t.area.mm$ImageID <-NULL
colnames(t.area.mm) <- c("Area_mm_Stroma","Area_mm_Tumour","Area_mm_Core", "TMA_ImageID")
area <-left_join(t.area, t.area.mm, by="TMA_ImageID")
write.csv(area, file=file.path(data_folder, "area.csv"))
```


```{r}
colData(roi.sce) <-as.data.frame(colData(roi.sce)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
colData(pat.sce) <-as.data.frame(colData(pat.sce)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
colData(roi.sce) <-as.data.frame(colData(roi.sce)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
colData(roi.pat.sce) <-as.data.frame(colData(roi.pat.sce)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
colData(immune.u1) <-as.data.frame(colData(immune.u1)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
colData(tumour.final) <-as.data.frame(colData(tumour.final)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
colData(vessel.sce) <-as.data.frame(colData(vessel.sce)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
colData(vessel.vessel_pat.sce) <-as.data.frame(colData(vessel.vessel_pat.sce)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
colData(tumour.final) <-as.data.frame(colData(tumour.final)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
#immune
immune.o <-immune.final
immune.final <-immune.o
colData(immune.final) <-as.data.frame(colData(immune.final)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
dat.sce <- as_tibble(colData(immune.final))
dat.sce<-dat.sce %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE)
dat.sce <-DataFrame(dat.sce)
dat.sce<-merge(dat.sce, area, by="TMA_ImageID")
rownames(dat.sce) <-paste(dat.sce$TmaID, dat.sce$TmaBlock, dat.sce$ImageID, dat.sce$CellNumber, sep='_')
colData(immune.final) <- dat.sce
colnames(immune.final) <-rownames(dat.sce)
colData(immune.final)
colnames(colData(immune.final))
#tcells
tcells.o <-tcells.final
tcells.final <-tcells.o
colData(tcells.final) <-as.data.frame(colData(tcells.final)) %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE) %>% left_join(area, by="TMA_ImageID") %>% DataFrame()
dat.sce <- as_tibble(colData(tcells.final))
dat.sce$Area_mm_Core <-NULL
dat.sce$Area_mm_Stroma <-NULL
dat.sce$Area_mm_Tumour <-NULL
dat.sce$Area_px_Core <-NULL
dat.sce$Area_px_Stroma <-NULL
dat.sce$Area_px_Tumour <-NULL
dat.sce$TMA_Image <-NULL
dat.sce<-dat.sce %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE)
dat.sce <-DataFrame(dat.sce)
dat.sce<-merge(dat.sce, area, by="TMA_ImageID")
rownames(dat.sce) <-paste(dat.sce$TmaID, dat.sce$TmaBlock, dat.sce$ImageID, dat.sce$CellNumber, sep='_')
colData(tcells.final) <- dat.sce
colnames(tcells.final) <-rownames(dat.sce)
colData(tcells.final)
colnames(colData(tcells.final))
#tumour
dat.sce <- as_tibble(colData(tumour))
dat.sce<-dat.sce %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE)
dat.sce <-DataFrame(dat.sce)
dat.sce<-merge(dat.sce, area, by="TMA_ImageID")
rownames(dat.sce) <-paste(dat.sce$TmaID, dat.sce$TmaBlock, dat.sce$ImageID, dat.sce$CellNumber, sep='_')
colData(tumour) <- dat.sce
colnames(tumour) <-rownames(dat.sce)
colData(tumour)
colnames(colData(tumour))
#fibro
dat.sce <- as_tibble(colData(fibro.final.new))
dat.sce<-dat.sce %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE)
dat.sce <-DataFrame(dat.sce)
dat.sce<-merge(dat.sce, area, by="TMA_ImageID")
rownames(dat.sce) <-paste(dat.sce$TmaID, dat.sce$TmaBlock, dat.sce$ImageID, dat.sce$CellNumber, sep='_')
colData(fibro.final.new) <- dat.sce
colnames(fibro.final.new) <-rownames(dat.sce)
colData(fibro.final.new)
colnames(colData(fibro.final.new))
#vessel
dat.sce <- as_tibble(colData(vessel))
dat.sce<-dat.sce %>%
unite(TMA_ImageID, c(TMA, ImageID), sep = "_", remove = FALSE)
dat.sce <-DataFrame(dat.sce)
dat.sce<-merge(dat.sce, area, by="TMA_ImageID")
rownames(dat.sce) <-paste(dat.sce$TmaID, dat.sce$TmaBlock, dat.sce$ImageID, dat.sce$CellNumber, sep='_')
colData(vessel) <- dat.sce
colnames(vessel) <-rownames(dat.sce)
colData(vessel)
colnames(colData(vessel))
length(unique(tumour[,tumour$TMA=="88_A"]$ImageID))
order(unique(tumour[,tumour$TMA=="88_A"]$ImageID))
fibro$Area_mm
```

```{r}
wd <- dirname(getwd())
#Set working directory and folder structure
data_folder <- file.path(wd,'TMA_all',"SCE")
set.seed(101100)
saveRDS(tumour,file=file.path(data_folder,paste("tumour_sce_merge_minus_ctrl_area.rds")))
saveRDS(immune,file=file.path(data_folder,paste("immune_sce_merge_minus_ctrl_area.rds")))
saveRDS(tcells,file=file.path(data_folder,paste("tcells_sce_merge_minus_ctrl_area.rds")))
saveRDS(fibro,file=file.path(data_folder,paste("fibro_sce_merge_minus_ctrl_area.rds")))
saveRDS(vessel,file=file.path(data_folder,paste("vessel_sce_merge_minus_ctrl_area.rds")))
```
139 changes: 139 additions & 0 deletions 00_prepare_clinicaldata.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
---
title: "Prepare clinical data and position data"
output: html_notebook
---
```{r install needed packages}
#install.packages("zoo")
library("zoo")
```

```{r Set file structure}
wd <- (getwd())
data_folder <- file.path(dirname(wd),'clinical_data')
```


```{r Read in clinical data}
clinical.data <-read.csv(file=file.path(data_folder, paste("TMA_86_87_88_175_176_178_USZ.csv")))
```

```{r Read in position files}
p86 <- read.csv(file=file.path(data_folder, paste("zTMA_86.csv")),header=T, na.strings=c("","NA"))
p87 <- read.csv(file=file.path(data_folder, paste("zTMA_87.csv")),header=T, na.strings=c("","NA"))
p88 <- read.csv(file=file.path(data_folder, paste("zTMA_88.csv")),header=T, na.strings=c("","NA"))
p175 <-read.csv(file=file.path(data_folder, paste("zTMA_175.csv")),header=T, na.strings=c("","NA"))
p176 <-read.csv(file=file.path(data_folder, paste("zTMA_176.csv")),header=T, na.strings=c("","NA"))
p178 <- read.csv(file=file.path(data_folder, paste("zTMA_178.csv")),header=T, na.strings=c("","NA"))
```

```{r Add missing patient numbers and ctrls}
p86$Patient_Nr <-na.locf(p86$Patient_Nr)
p87$Patient_Nr <-na.locf(p87$Patient_Nr)
p88$Patient_Nr <-na.locf(p88$Patient_Nr)
p175$Patient_Nr <-na.locf(p175$Patient_Nr)
p176$Patient_Nr <-na.locf(p176$Patient_Nr)
p178$Patient_Nr <-na.locf(p178$Patient_Nr)
```

```{r Add TMA column to position data}
p86["TMA"] <-"86"
p87["TMA"] <-"87"
p88["TMA"] <-"88"
p175["TMA"] <-"175"
p176["TMA"] <-"176"
p178["TMA"] <-"178"
```

```{r merge clinical data and position data}
clinical.data_pos <-clinical.data
#change Nr. to Patient_Nr
names(clinical.data_pos)[names(clinical.data_pos) == 'Nr.'] <- 'Patient_Nr'
#combine all position files rowwise
p_combined <- rbind(p86,p87,p88,p175,p176,p178)
clinical.data_pos <-merge(p_combined, clinical.data_pos, by.x=c("Patient_Nr", "TMA"), by.y=c("Patient_Nr","TMA"))
```

```{r save combined clincal and position data as csv file}
#write.csv(clinical.data_pos, file=file.path(data_folder,paste("combined_clinical_position_data.csv")))
clinical.data <- read.csv( file=file.path(data_folder,paste("combined_clinical_position_data.csv")))
table(clinical.data$DX.name)
clinical.data$DX.name[clinical.data$DX.name == "Adeno-Ca"] <- "Adenocarcinoma"
clinical.data$DX.name[clinical.data$DX.name == "Adeno-Ca "] <- "Adenocarcinoma"
clinical.data$DX.name[clinical.data$DX.name == "PE-Ca"] <- "Squamous cell carcinoma"
clinical.data$DX.name[clinical.data$DX.name == "adenosquam. Ca"] <- "Adeno squamous cell carcinoma"
clinical.data$DX.name[clinical.data$DX.name == "ASQ"] <- "Adeno squamous cell carcinoma"
clinical.data$DX.name[clinical.data$DX.name == "Adeno squamous cell carcinoma"] <- "Adeno squamous cell carcinoma"
clinical.data$DX.name[clinical.data$DX.name == "Adenosquamöses CA"] <- "Adeno squamous cell carcinoma"
clinical.data$DX.name[clinical.data$DX.name == "Adeno/squamous carcinoma"] <- "Adeno squamous cell carcinoma"
clinical.data$DX.name[clinical.data$DX.name == "LC"] <- "Large cell carcinoma"
clinical.data$DX.name[clinical.data$DX.name == "SCC"] <- "Squamous cell carcinoma"
clinical.data$DX.name[clinical.data$DX.name == "LCNEC"] <- "Large cell neuroendocrine carcinoma"
unique(clinical.data$DX.name)
#add factor levels
levels(clinical.data$Gender) <- list(male=1, female=2)
levels(clinical.data$Grade) <- list("missing"=0,"Grade 1"=1, "Grade 2"=2,"Grade 3"=3)
levels(clinical.data$Vessel) <- list("negative"=0, "positive"=1)
levels(clinical.data$Pleura) <- list("negative"=0, "positive"=1)
levels(clinical.data$R) <- list("R0"=0, "R1"=1)
levels(clinical.data$Relapse) <- list("no"=0, "yes"=1)
levels(clinical.data$Ev.O) <- list("alive"=0, "dead"=1)
levels(clinical.data$T.new )<- list("1a"=1, "1b"=2,"2a"=3, "2b"=4, "3"=5, "4"=6)
levels(clinical.data$M.new) <- list("no"=0, "1a"=1, "1b"=2)
levels(clinical.data$N) <- list("N0"=0, "N1"=1,"N2"=2, "N3"=3)
levels(clinical.data$Stage) <- list("1a"=1, "1b"=2,"2a"=3, "2b"=4, "3a"=5, "3b"=6,"4"=7)
levels(clinical.data$Chemo) <- list("Neoadjuvant no"=0, "Neoadjuvant yes"=1)
levels(clinical.data$Radio) <- list("Neoadjuvant no"=0, "Neoadjuvant yes"=1)
levels(clinical.data$Chemo3) <- list("Adjuvant no"=0, "Adjuvant yes"=1)
levels(clinical.data$Radio4) <- list("Adjuvant no"=0, "Adjuvant loc"=1, "Other"=2, "Both"=3)
levels(clinical.data$Chemo3) <- list("Post relapse no"=0, "Post relapse yes"=1)
levels(clinical.data$Radio6) <- list("Post relapse no"=0, "Post relapse loc"=1, "Post relapse other"=2, "Post relapse both"=3)
levels(clinical.data$Smok) <- list("no"=0, "currently"=1, "former"=2, "unknown"=3)
cols <- c("TMA","Gender","Typ","Grade","Vessel","Pleura","T.new","N","M.new","Stage","R","Chemo","Radio","Chemo3","Radio4","Relapse","Chemo5","Radio6","DFS","Ev.O" ,"Smok" )
clinical.data[cols] <- lapply(clinical.data[cols], as.factor)
head(clinical.data)
clinical.data$ROI <- paste(clinical.data$Grid, clinical.data$x.y.localisation, sep="")
head(clinical.data$ROI)
clinical.data$RoiID <-paste(clinical.data$TMA, clinical.data$ROI, sep="_")
clinical.data$Patient_ID <- paste(clinical.data$TMA, clinical.data$Patient_Nr,sep="_")
clinical.data$LN.Met <- ifelse(clinical.data$N ==0, "No LN Metastases", "LN Metastases")
clinical.data$Dist.Met <- ifelse(clinical.data$M.new ==0, "No Dist. Metastases", "Dist. Metastases")
clinical.data$NeoAdj <- ifelse(clinical.data$Radio==1 |clinical.data$Chemo==1, "NeoAdjuvantTherapy", "NoNeoAdjuvantTherapy")
clinical.data$X <-NULL
clinical.data$TmaBlock <- clinical.data$Grid
clinical.data$Grid <-NULL
clinical.data$x.localisation <-NULL
clinical.data$y.localisation <-NULL
wd <-"/mnt"
data_folder <-(file.path(wd,"lena_processed2","NSCLC_NEW","clinical_data"))
write.csv(clinical.data, file=file.path(data_folder,"clinical_data.csv"))
```


```{r save position files}
write.csv(p86, file=file.path(data_folder, paste("zTMA_86.csv")))
write.csv(p87, file=file.path(data_folder, paste("zTMA_87.csv")))
write.csv(p88, file=file.path(data_folder, paste("zTMA_88.csv")))
write.csv(p175, file=file.path(data_folder, paste("zTMA_175.csv")))
write.csv(p176, file=file.path(data_folder, paste("zTMA_176.csv")))
write.csv(p178, file=file.path(data_folder, paste("zTMA_178.csv")))
```

Loading

0 comments on commit fa8b0b4

Please sign in to comment.