I have a list of samples mutated for signalling pathways like below 1 if mutated and 0 if not mutated

  > dput(pathway)
structure(list(sample = c("LP6005334.DNA_H01", "LP6005334.DNA_H01", 
"LP6005334.DNA_H01", "LP6005334.DNA_H01", "LP6005334.DNA_H01", 
"LP6005334.DNA_H01", "LP6005334.DNA_H01", "LP6005334.DNA_H01", 
"LP6005334.DNA_H01", "LP6005334.DNA_H01", "LP6005334.DNA_H01", 
"LP6005334.DNA_H01", "LP6005500.DNA_D03", "LP6005500.DNA_D03", 
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6005500.DNA_D03", 
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6005500.DNA_D03", 
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6005500.DNA_D03", 
"LP6005500.DNA_D03", "LP6005500.DNA_D03", "LP6007600", "LP6007600", 
"LP6007600", "LP6007600", "LP6007600", "LP6007600", "LP6007600", 
"LP6007600", "LP6007600", "LP6007600", "LP6007600", "LP6007600", 
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03", 
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03", 
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03", 
"LP6008202.DNA_B03", "LP6008202.DNA_B03", "LP6008202.DNA_B03", 
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03", 
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03", 
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03", 
"LP6008334.DNA_A03", "LP6008334.DNA_A03", "LP6008334.DNA_A03", 
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04", 
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04", 
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04", 
"LP6008334.DNA_A04", "LP6008334.DNA_A04", "LP6008334.DNA_A04", 
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02", 
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02", 
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02", 
"LP6008334.DNA_B02", "LP6008334.DNA_B02", "LP6008334.DNA_B02", 
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02", 
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02", 
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02", 
"LP6008334.DNA_C02", "LP6008334.DNA_C02", "LP6008334.DNA_C02", 
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02", 
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02", 
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02", 
"LP6008334.DNA_D02", "LP6008334.DNA_D02", "LP6008334.DNA_D02", 
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02", 
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02", 
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02", 
"LP6008336.DNA_F02", "LP6008336.DNA_F02", "LP6008336.DNA_F02", 
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01", 
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01", 
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01", 
"LP6008336.DNA_G01", "LP6008336.DNA_G01", "LP6008336.DNA_G01", 
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01", 
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01", 
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01", 
"LP6008336.DNA_H01", "LP6008336.DNA_H01", "LP6008336.DNA_H01", 
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07", 
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07", 
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07", 
"LP6008337.DNA_A07", "LP6008337.DNA_A07", "LP6008337.DNA_A07", 
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06", 
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06", 
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06", 
"LP6008337.DNA_H06", "LP6008337.DNA_H06", "LP6008337.DNA_H06", 
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04", 
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04", 
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04", 
"LP6008460.DNA_A04", "LP6008460.DNA_A04", "LP6008460.DNA_A04", 
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01", 
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01", 
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01", 
"LP6008460.DNA_D01", "LP6008460.DNA_D01", "LP6008460.DNA_D01", 
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02", 
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02", 
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02", 
"LP6008460.DNA_F02", "LP6008460.DNA_F02", "LP6008460.DNA_F02", 
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03", 
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03", 
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03", 
"LP6008460.DNA_G03", "LP6008460.DNA_G03", "LP6008460.DNA_G03", 
"s15", "s15", "s15", "s15", "s15", "s15", "s15", "s15", "s15", 
"s15", "s15", "s15", "s15", "s15", "s18", "s18", "s18", "s18", 
"s18", "s18", "s18", "s18", "s18", "s18", "s18", "s18", "s18", 
"s18", "s18", "s18", "s24", "s24", "s24", "s24", "s24", "s24", 
"s24", "s24", "s24", "s24", "s24", "s24", "s24", "s24", "s24", 
"s30", "s30", "s30", "s30", "s30", "s30", "s30", "s30", "s30", 
"s30", "s30", "s30", "s30", "s30", "s59", "s59", "s59", "s59", 
"s59", "s59", "s59", "s59", "s59", "s59", "s59", "s59", "s67", 
"s67", "s67", "s67", "s67", "s67", "s67", "s67", "s67", "s67", 
"s67", "s67", "s67", "s67", "s80", "s80", "s80", "s80", "s80", 
"s80", "s80", "s80", "s80", "s80", "s80", "s80", "s80", "s80", 
"s80", "s80", "s86", "s86", "s86", "s86", "s86", "s86", "s86", 
"s86", "s86", "s86", "s86", "s86", "s86", "s86", "s94", "s94", 
"s94", "s94", "s94", "s94", "s94", "s94", "s94", "s94", "s94", 
"s94", "s94", "s94"), Pathway = c("PI3K", "Cell_Cycle", "RTK-RAS", 
"WNT", "TGF-Beta", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", 
"NRF2", "PI3K", "Cell_Cycle", "WNT", "TGF-Beta", "RTK-RAS", "WNT", 
"CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "TGF-Beta", 
"PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "PI3K", "TGF-Beta", "RTK-RAS", "WNT", 
"CR", "CF", "TP53", "Cell_Cycle", "NOTCH", "Hippo", "MYC", "NRF2", 
"Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", 
"TP53", "NOTCH", "Hippo", "MYC", "NRF2", "RTK-RAS", "TGF-Beta", 
"PI3K", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", "Hippo", 
"MYC", "NRF2", "Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", 
"CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "PI3K", 
"Cell_Cycle", "RTK-RAS", "TGF-Beta", "WNT", "CR", "CF", "TP53", 
"NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "TGF-Beta", 
"RTK-RAS", "WNT", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", 
"NRF2", "Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", 
"CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "TGF-Beta", "PI3K", 
"RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "WNT", "TGF-Beta", 
"RTK-RAS", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", 
"PI3K", "TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "RTK-RAS", "TGF-Beta", 
"PI3K", "WNT", "CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", 
"Cell_Cycle", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", 
"TP53", "NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "CF", 
"CR", "Hippo", "MYC", "NOTCH", "NRF2", "PI3K", "RTK-RAS", "TGF-Beta", 
"TP53", "WNT", "Cell_Cycle", "RTK-RAS", "TGF-Beta", "PI3K", "WNT", 
"CR", "CF", "TP53", "NOTCH", "Hippo", "MYC", "NRF2", "RTK-RAS", 
"TGF-Beta", "PI3K", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "RTK-RAS", "TGF-Beta", 
"PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "RTK-RAS", "WNT", 
"TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "PI3K", "Cell_Cycle", 
"RTK-RAS", "TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", 
"Cell_Cycle", "NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", 
"WNT", "TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", 
"Cell_Cycle", "NOTCH", "Hippo", "MYC", "NRF2", "TGF-Beta", "PI3K", 
"RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "Cell_Cycle", "WNT", "TGF-Beta", "PI3K", 
"RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "RTK-RAS", "WNT", 
"TGF-Beta", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "PI3K", "Cell_Cycle", "WNT", 
"TGF-Beta", "PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", 
"NOTCH", "Hippo", "MYC", "NRF2", "Cell_Cycle", "RTK-RAS", "TGF-Beta", 
"PI3K", "RTK-RAS", "WNT", "CR", "CF", "TP53", "Cell_Cycle", "NOTCH", 
"Hippo", "MYC", "NRF2"), value = c(1L, 1L, 1L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L)), class = "data.frame", row.names = c(NA, -346L))

I want to convert this to a boolean matrix (wide format) in which pathways are in columns and samples are in rows

A sample obtains 1 if mutated for a pathway and 0 if not mutated for a pathway like

> head(p)
             sample value Cell_Cycle Hippo MYC NOTCH NRF2 PI3K TGF-Beta RTK-RAS TP53
1 LP6008334.DNA_C02     0          0     0   0     0    0    0        0       0    0
2 LP6008334.DNA_A03     0          0     0   0     0    0    0        0       0    0
3 LP6005334.DNA_H01     0          0     0   0     0    0    0        0       0    0
4 LP6008337.DNA_H06     0          0     0   0     0    0    0        0       0    0
5               s15     0          0     0   0     0    0    0        0       0    0
6 LP6008460.DNA_D01     0          0     0   0     0    0    0        0       0    0
  WNT CF CR
1   0  0  0
2   0  0  0
3   0  0  0
4   0  0  0
5   0  0  0
6   0  0  0
>

I have tried this But all I get is zero

for (pathway in setdiff(unique(p$Pathway), colnames(p))) {
  p <- cbind(p,array(0,nrow(p)))
  colnames(p)[ncol(p)] <- pathway
}

I also have been trying

reshape(pathway, idvar = "sample",   timevar = "Pathway",  direction = "wide")

But gives wrong things of 0 and 1

Please help me



Source link