乡下人产国偷v产偷v自拍,国产午夜片在线观看,婷婷成人亚洲综合国产麻豆,久久综合给合久久狠狠狠9

  • <output id="e9wm2"></output>
    <s id="e9wm2"><nobr id="e9wm2"><ins id="e9wm2"></ins></nobr></s>

    • 分享

      GWAS數(shù)據(jù)沒有提供eaf,如何是好……

       昵稱69125444 2023-09-12

      事情是這樣的,好不容易從文獻中找到一手SNPs數(shù)據(jù),喜大普奔……

      圖片

      定睛一看,沒有eaf值啊,這可咋整,后續(xù)需要用到read_outcome_data函數(shù),eaf值是必須的呢!

      在這里停滯了好久,準備放棄這部分數(shù)據(jù)了,但是又覺得很可惜,數(shù)次徘徊……

      于是開始搜搜搜,然后B站還真的給我推了,柳暗花明又一村啊——

      1方法1:snp_add_eaf

      [孟德爾隨機化之代碼生成eaf_嗶哩嗶哩_bilibili https://www.bilibili.com/video/BV1kR4y1f7Pz/?spm_id_from=333.788.recommend_more_video.-1&vd_source=e5e36ce10569a7a5d647d18bdb42e4b5

      更驚喜的事情是,原作者閃現(xiàn)評論區(qū)提供了代碼出處:

      圖片
      圖片

      [(https://github.com/linjf15/MR_tricks/tree/main/GWAS_preprocessing)

      snp_add_eaf <- function(dat, build = '37', pop = 'EUR')
      {
        stopifnot(build %in% c('37','38'))
        stopifnot('SNP' %in% names(dat))
        
        # Create and get a url
        server <- ifelse(build == '37','http://grch37.rest.','http://rest.')
        pop <- paste0('1000GENOMES:phase_3:',pop)
        
        snp_reverse_base <- function(x)
        {
          x <- stringr::str_to_upper(x)
          stopifnot(x %in% c('A','T','C','G'))
          switch(x,'A'='T','T'='A','C'='G','G'='C')
        }
        
        res_tab <- lapply(1:nrow(dat), function(i)
        {
          print(paste0('seaching for No.', i, ' SNP'))
          dat_i <- dat[i,]
          
          ext <- paste0('/variation/Homo_sapiens/',dat_i$SNP, '?content-type=application/json;pops=1')
          url <- paste(server, ext, sep = '')
          res <- httr::GET(url)
          
          # Converts http errors to R errors or warnings
          httr::stop_for_status(res)
          
          # Convert R objects from JSON
          res <- httr::content(res)
          res_pop <- jsonlite::fromJSON(jsonlite::toJSON(res))$populations
          
          # Filter query results based on population set
          res_pop <- try(res_pop[res_pop$population == pop,])
          if('try-error' %in% class(res_pop))
          {
            print(paste0('There is not information for population ',pop))
            queried_effect_allele <- 'NR'
            queried_other_allele <- 'NR'
            queried_eaf <- -1
          }
          else
          {
            if(nrow(res_pop)==0)
            {
              print(paste0('There is not information for population ',pop))
              queried_effect_allele <- 'NR'
              queried_other_allele <- 'NR'
              queried_eaf <- -1
            }
            else
            {
              queried_effect_allele <- res_pop[1,'allele'][[1]]
              queried_other_allele <- res_pop[2,'allele'][[1]]
              queried_eaf <- res_pop[1,'frequency'][[1]]    
            }
          }
          
          effect_allele <- ifelse('effect_allele.exposure' %in% names(dat),
                                  dat_i$effect_allele.exposure,
                                  dat_i$effect_allele)
          
          other_allele <- ifelse('effect_allele.exposure' %in% names(dat),
                                  dat_i$other_allele.exposure,
                                  dat_i$other_allele)
          
          if('effect_allele.exposure' %in% names(dat))
          {
            name_output <- unique(c(names(dat), 'eaf.exposure','reliability.exposure'))
          }
          else
          {
            name_output <- unique(c(names(dat), 'eaf','reliability.exposure'))
          }
            
          len_effect_allele <- nchar(effect_allele)
          len_other_allele <- nchar(other_allele)
          
          if(len_effect_allele==1&len_other_allele==1)
          {
            if((queried_effect_allele==effect_allele & queried_other_allele==other_allele)|
               (queried_effect_allele==other_allele & queried_other_allele==effect_allele))
            {
              dat_i$eaf.exposure <- ifelse(effect_allele == queried_effect_allele,
                                           queried_eaf,
                                           1-queried_eaf)
              dat_i$eaf <- dat_i$eaf.exposure 
              dat_i$reliability.exposure <- 'high'
            }
            else
            {
              r_queried_effect_allele <- snp_reverse_base(queried_effect_allele)
              r_queried_other_allele <- snp_reverse_base(queried_other_allele)
              if((r_queried_effect_allele==effect_allele & r_queried_other_allele==other_allele)|
                 (r_queried_effect_allele==other_allele & r_queried_other_allele==effect_allele))
              {
                dat_i$eaf.exposure <- ifelse(effect_allele == r_queried_effect_allele,
                                             queried_eaf,
                                             1-queried_eaf)
                dat_i$eaf <- dat_i$eaf.exposure 
                dat_i$reliability.exposure <- 'high'
              }
              else
              {
                dat_i$eaf.exposure <- ifelse(effect_allele == queried_effect_allele,
                                             queried_eaf,
                                             1-queried_eaf)
                dat_i$eaf <- dat_i$eaf.exposure 
                dat_i$reliability.exposure <- 'low'
              }
            }
          }
          
          else
          {
            # To identify the potential DEL/ INS
            short_allele <- ifelse(len_effect_allele==1,
                                   effect_allele,
                                   other_allele)
            short_allele_eaf <- ifelse(short_allele == queried_effect_allele, 
                                       queried_eaf, 
                                       1-queried_eaf)
            dat_i$eaf.exposure <- ifelse(effect_allele == short_allele,
                                         short_allele_eaf,
                                         1-short_allele_eaf)
            dat_i$eaf <- dat_i$eaf.exposure 
            dat_i$reliability.exposure <- 'low'
          }
          
          dat_i[name_output]
        })
        
        return(do.call(rbind, res_tab))
      }
      圖片

      如果能確定數(shù)據(jù)的參考基因組版本,還可以自定義哦~

      2方法2:get_eaf_from_1000G

      [孟德爾隨機化, 補eaf, 本地與在線_嗶哩嗶哩_bilibili]

      [(https://www.bilibili.com/video/BV1Dh411j7yP/?spm_id_from=333.999.0.0&vd_source=e5e36ce10569a7a5d647d18bdb42e4b5)

      這個up主介紹了兩種方法,第一種如上所述,第二種來自于另外一個團隊【代碼作者:廣州醫(yī)科大學第一臨床學院周浩彬 ,第二臨床學院謝治鑫】,用起來都非常方便。

      說明文檔:[Get_MR/Get_MR1.0 help.md at main · HaobinZhou/Get_MR · GitHub]

      [(https://github.com/HaobinZhou/Get_MR/blob/main/Get_MR1.0 help.md)

      圖片
      get_eaf_from_1000G<-function(dat,path,type='exposure'){
        
        corrected_eaf_expo<-function(data_MAF){
          effect=data_MAF$effect_allele.exposure
          other=data_MAF$other_allele.exposure
          A1=data_MAF$A1
          A2=data_MAF$A2
          MAF_num=data_MAF$MAF
          EAF_num=1-MAF_num
          
          harna<-is.na(data_MAF$A1)
          harna<-data_MAF$SNP[which(harna==T)]
          
          cor1<-which(data_MAF$effect_allele.exposure !=data_MAF$A1)
          
          
          data_MAF$eaf.exposure=data_MAF$MAF
          data_MAF$type='raw'
          data_MAF$eaf.exposure[cor1]=EAF_num[cor1]
          data_MAF$type[cor1]='corrected'
          cor2<-which(data_MAF$other_allele.exposure ==data_MAF$A1)
          cor21<-setdiff(cor2,cor1)
          cor12<-setdiff(cor1,cor2)
          error<-c(cor12,cor21)
          data_MAF$eaf.exposure[error]=NA
          data_MAF$type[error]='error'
          
          data_MAF<-list(data_MAF=data_MAF,cor1=cor1,harna=harna,error=error)
          
          return(data_MAF)
          
        }
        
        corrected_eaf_out<-function(data_MAF){
          effect=data_MAF$effect_allele.outcome
          other=data_MAF$other_allele.outcome
          A1=data_MAF$A1
          A2=data_MAF$A2
          MAF_num=data_MAF$MAF
          EAF_num=1-MAF_num
          
          harna<-is.na(data_MAF$A1)
          harna<-data_MAF$SNP[which(harna==T)]
          
          cor1<-which(data_MAF$effect_allele.outcome !=data_MAF$A1)
          
          
          data_MAF$eaf.outcome=data_MAF$MAF
          data_MAF$type='raw'
          data_MAF$eaf.outcome[cor1]=EAF_num[cor1]
          data_MAF$type[cor1]='corrected'
          cor2<-which(data_MAF$other_allele.outcome ==data_MAF$A1)
          cor21<-setdiff(cor2,cor1)
          cor12<-setdiff(cor1,cor2)
          error<-c(cor12,cor21)
          data_MAF$eaf.outcome[error]=NA
          data_MAF$type[error]='error'
          
          data_MAF<-list(data_MAF=data_MAF,cor1=cor1,harna=harna,error=error)
          
          return(data_MAF)
          
        }
        
        if(type=='exposure' && (is.na(dat$eaf.exposure[1])==T || is.null(dat$eaf.exposure)==T)){
          r<-nrow(dat)
          
          setwd(path)
          MAF<-fread('fileFrequency.frq',header = T)
          
          dat<-merge(dat,MAF,by.x = 'SNP',by.y = 'SNP',all.x = T)
          
          dat<-corrected_eaf_expo(dat)
          
          cor1<-dat$cor1
          
          harna<-dat$harna
          
          error<-dat$error
          
          dat<-dat$data_MAF
          
          print(paste0('一共有',(r-length(harna)-length(error)),'個SNP成功匹配EAF,占比',(r-length(harna)-length(error))/r*100,'%'))
          
          print(paste0('一共有',length(cor1),'個SNP是major allele,EAF被計算為1-MAF,在成功匹配數(shù)目中占比',length(cor1)/(r-length(harna)-length(error))*100,'%'))
          
          print(paste0('一共有',length(harna),'個SNP在1000G中找不到,占比',length(harna)/r*100,'%'))
          
          print(paste0('一共有',length(error),'個SNP在輸入數(shù)據(jù)與1000G中效應列與參照列,將剔除eaf,占比',length(error)/r*100,'%'))
          
          print('輸出數(shù)據(jù)中的type列說明:')
          print('raw:EAF直接等于1000G里的MAF數(shù)值,因為效應列是minor allele')
          print('corrected:EAF等于1000G中1-MAF,因為效應列是major allele')
          print('error:輸入數(shù)據(jù)與1000G里面提供的數(shù)據(jù)完全不一致,比如這個SNP輸入的效應列是C,參照列是G,但是1000G提供的是A-T,這種情況下,EAF會被清空(NA),當成匹配失敗')
          
          return(dat)
        }
        
        if(type=='outcome' && (is.na(dat$eaf.outcome[1])==T || is.null(dat$eaf.outcome)==T)){
          r<-nrow(dat)
          
          setwd(path)
          MAF<-fread('fileFrequency.frq',header = T)
          
          dat<-merge(dat,MAF,by.x = 'SNP',by.y = 'SNP',all.x = T)
          
          dat<-corrected_eaf_out(dat)
          
          cor1<-dat$cor1
          
          harna<-dat$harna
          
          error<-dat$error
          
          dat<-dat$data_MAF
          
          print(paste0('一共有',(r-length(harna)-length(error)),'個SNP成功匹配EAF,占比',(r-length(harna)-length(error))/r*100,'%'))
          
          print(paste0('一共有',length(cor1),'個SNP是major allele,EAF被計算為1-MAF,在成功匹配數(shù)目中占比',length(cor1)/(r-length(harna)-length(error))*100,'%'))
          
          print(paste0('一共有',length(harna),'個SNP在1000G找不到,占比',length(harna)/r*100,'%'))
          
          print(paste0('一共有',length(error),'個SNP在輸入數(shù)據(jù)與1000G中效應列與參照列,將剔除eaf,占比',length(error)/r*100,'%'))
          
          print('輸出數(shù)據(jù)中的type列說明:')
          print('raw:EAF直接等于1000G里的MAF數(shù)值,因為效應列是minor allele')
          print('corrected:EAF等于1000G中1-MAF,因為效應列是major allele')
          print('error:輸入數(shù)據(jù)與1000G里面提供的數(shù)據(jù)完全不一致,比如這個SNP輸入的效應列是C,參照列是G,但是1000G提供的是A-T,這種情況下,EAF會被清空(NA),當成匹配失敗')
          
          return(dat)
        }
        else{return(dat)}
      }

      運行這個函數(shù)需要注意,如果你的數(shù)據(jù)是自己整理的本地數(shù)據(jù),那就要提前將數(shù)據(jù)整理一下:

      exp_dat <- read_exposure_data(
        filename = './Mediators/newcytokines_exposure.txt'##你的數(shù)據(jù)
        clump = FALSE,
        sep= '\t',
        snp_col = 'rsid',
        beta_col = 'BETA_exposure',
        se_col = 'SE_exposure',
        effect_allele_col ='EA_exposure',
        other_allele_col = 'NEA_exposure',
        eaf_col = 'eaf.exposure',
        pval_col = 'P'
      )
      head(exp_dat)

      # 從1000G的MAF文件中提取EAF并將其與輸入數(shù)據(jù)匹配
      newdat <- get_eaf_from_1000G(exp_dat, 'D:/09-MY_try/MR_Aanalyses/', type = 'exposure'##這里的路徑是包含fileFrequency.frq這個文件的文件夾!

      fileFrequency.frq文件來源:

        本站是提供個人知識管理的網(wǎng)絡存儲空間,所有內(nèi)容均由用戶發(fā)布,不代表本站觀點。請注意甄別內(nèi)容中的聯(lián)系方式、誘導購買等信息,謹防詐騙。如發(fā)現(xiàn)有害或侵權內(nèi)容,請點擊一鍵舉報。
        轉藏 分享 獻花(0

        0條評論

        發(fā)表

        請遵守用戶 評論公約

        類似文章 更多