\RequirePackage{color}
%\documentclass[pstcol,pdf,baltica,slideColor,colorBG,gyom]{prosper}
\documentclass[pstcol,pdf,baltica,slideColor,colorBG,thomasd]{prosper}
%\documentclass[pstcol,pdf,baltica,slideColor,colorBG,azure]{prosper}
\usepackage{pstricks,pst-node,pst-text,pst-3d}
\usepackage{graphics}
\usepackage{amsmath, amssymb, amsfonts}
\usepackage{multirow}
\usepackage{slashbox}
\usepackage{fancyheadings}
\usepackage{multicol}
\usepackage{colortbl}
\usepackage{ascmac}
\usepackage{latexsym}
\usepackage{fancybox}
\usepackage[ps2pdf]{hyperref}
%% BibTeX 記号定義
\def\BibTeX{{\rm B\kern-.05em{\sc i\kern-.025em b}\kern-.08em
T\kern-.1667em\lower.7ex\hbox{E}\kern-.125emX}}
%% Bibtex 参考文献 to Reference. (英語の時は下のコメントを外す)
\renewcommand{\refname}{}
%%\renewcommand{\scriptsize}{\@setfontsize\scriptsize{\@vipt}{2\@Q}}
\renewcommand\scriptsize{\@setfontsize\scriptsize\@viiipt{1}}
\renewcommand{\kanjifamilydefault}{gt}
\DefaultTransition{Split}
\newcommand{\bibfont}{\scriptsize}
\title{
\includegraphics[width=2em]{Rlogo.eps}
によるDDBJのアクセス方法}
\subtitle{- 2007-06-18 -}
\author{Ei-ji Nakama}
\email{nakama at com-one.com}
\institution{COM-ONE Ltd.}
\usepackage{/usr/share/R/share/texmf/Sweave}
\begin{document}
\bf\mathversion{bold}\maketitle
%%\bibliographystyle{junsrt}
\bibliographystyle{plain}
%% ========================== Sweave setting
%% 色分け
\DefineVerbatimEnvironment{Sinput}{Verbatim}{
formatcom={\color{black}},
frame=none
}
\DefineVerbatimEnvironment{Scode}{Verbatim}{
formatcom={\color{glay}},
frame=none
}
\DefineVerbatimEnvironment{Soutput}{Verbatim}{
fontshape=sl,
formatcom={\color{red}},
frame=none
}
%% 行詰め
\fvset{listparameters={\setlength{\topsep}{0pt}}}
%% ========================= 初期化start ===================
%% ========================= 初期化end ===================
\begin{slide}[Wipe]{RとXML}
\begin{itemize}
\item R\cite{R}でXMLを扱うにはXML\cite{XML}パッケージを利用します.
\item RのXML\cite{XML}パッケージはlibxml(\url{http://xmlsoft.org/})を使います.
\item RのXMLの利用例は少ないですが, libxmlの利用例が参考になります.
\item XML-1.9.0以降ではencodingもサポートしました.
\item Omegahat\footnote{\url{http://www.omegahat.org/}}にありましたが,現在はCRANにも登録されています.
\end{itemize}
\end{slide}
\begin{slide}[Wipe]{RとXML 例-1}
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> library(XML)
> f <- system.file("exampleData", "mathmlMatrix.xml",
+ package = "XML")
> doc <- xmlTreeParse(f,useInternalNodes=T)
> # 内部形式はXPATHが使えます
> unlist(xpathApply(doc,"//*",xmlName))
\end{Sinput}
\begin{Soutput}
[1] "mrow" "reln" "eq" "apply" "times"
[ reached getOption("max.print") -- omitted 15 entries ]]
\end{Soutput}
\begin{Sinput}
> sapply(getNodeSet(doc, "/mrow/reln/apply/vector"),
+ saveXML)
\end{Sinput}
\begin{Soutput}
[1] " 1 2 \n "
[2] " 2 1 \n "
\end{Soutput}
\begin{Sinput}
> free(doc)
\end{Sinput}
\begin{Soutput}
attr(,"class")
[1] "XMLInternalDocument"
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{RとXML 例-2}
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> f <- system.file("exampleData", "mathmlMatrix.xml",
+ package = "XML")
> doc <- xmlTreeParse(f,useInternalNodes=T)
> # XPATHでの値の取りだし
> path <- "/mrow/reln/apply/matrix/matrixrow/cn"
> unlist(xpathApply(doc, path, xmlValue))
\end{Sinput}
\begin{Soutput}
[1] " 0 " " 1 " " 1 " " 0 "
\end{Soutput}
\begin{Sinput}
> free(doc)
\end{Sinput}
\begin{Soutput}
attr(,"class")
[1] "XMLInternalDocument"
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{RとXML 例-3}
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> doc<-xmlTreeParse(f,useInternalNodes=T)
> # 内部形式をXMLに戻します
> sapply(getNodeSet(doc, "/"), saveXML)
\end{Sinput}
\begin{Soutput}
[1] "\n\n\n \n \n 1 2 \n \n\n \n 0 1 \n 1 0 \n \n \n\n \n 2 1 \n \n \n \n\n"
\end{Soutput}
\begin{Sinput}
> free(doc)
\end{Sinput}
\begin{Soutput}
attr(,"class")
[1] "XMLInternalDocument"
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{RとXML 例-4}
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> doc <- xmlTreeParse(system.file("exampleData", "mtcars.xml",
+ package="XML"))
> # 内部形式では無い場合
> names(xmlRoot(doc))
\end{Sinput}
\begin{Soutput}
[1] "variables" "record" "record" "record"
[5] "record"
[ reached getOption("max.print") -- omitted 28 entries ]]
\end{Soutput}
\begin{Sinput}
> r <- xmlRoot(doc)
> r[names(r) == "variables"]
\end{Sinput}
\begin{Soutput}
$variables
mpg
cyl
disp
hp
drat
wt
qsec
vs
am
gear
carb
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{RとHTTP}
\begin{itemize}
\item RのRCurl\cite{RCurl}パッケージはlibcurl(\url{http://curl.haxx.se/})を使います.
\item RCurl\cite{RCurl}は標準のsocketモジュールが対応していないSSL等も扱えます.
\item XML\cite{XML}やSSOAP\cite{SSOAP}と連携して高度なHTTP通信も行えます.
\item RCurl\cite{RCurl}はOmegahatから,BioConductor\cite{Biobase}のExtraに供給されています.
\end{itemize}
\end{slide}
\begin{slide}[Wipe]{RとHTTP PROXY設定}
RでPROXYを設定する場合はモジュール毎に対応が必要です.
\includegraphics{RdeDDBJ2-proxyoverview}
\\
例えば、http\_proxy=http://prs.ism.ac.jp:3128/
\end{slide}
\begin{slide}[Wipe]{RとSOAP}
\begin{itemize}
\item R\cite{R}でSOAPを扱うにはSSOAP\cite{SSOAP}パッケージを利用します.
\item SSOAP\cite{SSOAP}はXML\cite{XML}とRCurl\cite{RCurl}を必要とします.
\item SSOAP\cite{SSOAP}もOmegahatから, BioConductor\cite{Biobase}
のExtraに供給されています.
\end{itemize}
\end{slide}
\begin{slide}[Wipe]{RとDDBJ}
R\cite{R}でDDBJにアクセスするには,今までにあげた,XML\cite{XML}
, RCurl\cite{RCurl},SSOAP\cite{SSOAP}が必要です. \\
DDBJのチュートリアル\footnote{\url{http://www.xml.nig.ac.jp/tutorial/index_jp.html}}
\cite{tutorial}
に倣って, R版を作成しました.
\end{slide}
\begin{slide}[Wipe]{RとDDBJ - エントリーの取得1}
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> # 1. SSOAP のロード
> library(SSOAP)
> # 2. WSDLの指定
> url <- "http://xml.nig.ac.jp/wsdl/GetEntry.wsdl"
> GetEntry <- processWSDL (url)
> iface <- genSOAPClientInterface(def = GetEntry)
> # 3. WEBサービスの呼び出し
> result<-iface@functions$getXML_DDBJEntry("AB000003")
> # 4. 視覚的な確認
> xmlRoot(xmlTreeParse(result))
\end{Sinput}
\begin{Soutput}
AB000003
641
linear
PLN
12-SEP-2002
Rhizoctonia solani genes for 18S rRNA, 5.8S rRNA, 28S rRNA,partial and complete sequence, isolate:1556.
AB000003
AB000003.1
.
Thanatephorus cucumeris
Eukaryota; Fungi; Basidiomycota; Hymenomycetes;Heterobasidiomycetes; Heterobasidiomycetidae; Ceratobasidiales;Ceratobasidiaceae; Thanatephorus.
Kuninaga,S.
Direct Submission
Submitted (19-DEC-1996) to the DDBJ/EMBL/GenBank databases. ShiroKuninaga, Health Sciences University of Hokkaido, GeneralEducation; 1757 Kanazawa, Tohbetsu, Hokkaido 061-02, Japan(E-mail:kuninaga@hoku-iryo-u.ac.jp, Tel:81-1332-3-1211,Fax:81-1332-3-1276)
Kuninaga,S., Natsuaki,T. and Takeuchi,T.
Sequence variation of the rDNA ITS regions within and betweenanastomosis groups in Rhizoctonia solani
Curr. Genet. 32, 237-243 (1997)
<1..5
18S rRNA
208..362
5.8S rRNA
637..>641
28S rRNA
aattaaatgtggagttggttgtagctggccttgattcattttctaggcatgtgcacaccttctctttcatccacacacacctgtgcacctgtgagacagttagggtgggaggactttattggactctctccctctctgtctactcaatttttacacaaactcaatttactttaaaatgaatgtcattgatgtaacgcatctaatactaagtttcaacaacggatctcttggctctcgcatcgatgaagaacgcagcgaaatgcgataagtaatgtgaattgcagaattcagtgaatcatcgaatctttgaacgcaccttgcgctccttggtattccttggagcatgcctgtttgagtatcatgaaatcttcaaagtgaaatcttttgttaattcaatcggttctactttggtatttggaggtcttttgcagcttcacacctgctcctctttgtgcattagctggatctcagtgttatgcttggttccactcagcgtgataagttatctatcgctgaggacactgtaaaaaggtggccaaggtaaatacagatgaaccgcttctaatagtccattgacttggacaaatccatttatgatctgatctcaaatcaggtaggactacccgctgaacttaagcata
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{RとDDBJ - エントリーの取得2}
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> # ※前の頁の1-2は実行済み
> # 3. アクセッション番号AB000002-AB000005を一度の実行で取得
> acsession <- c("AB000002","AB000003","AB000004","AB000005")
> # 4. WEBサービスの呼び出し
> result <- sapply(acsession,
+ iface@functions$getFASTA_DDBJEntry)
> # 5. 視覚的な確認
> print(result)
\end{Sinput}
\begin{Soutput}
AB000002
">AB000002|Rhizoctonia solani genes for 18S rRNA, 5.8S rRNA, "
AB000003
">AB000003|Rhizoctonia solani genes for 18S rRNA, 5.8S rRNA, "
AB000004
">AB000004|Rhizoctonia solani genes for 18S rRNA, 5.8S rRNA, "
AB000005
">AB000005|Rhizoctonia solani genes for 18S rRNA, 5.8S rRNA, "
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{SRSとGetEntry、Blast連携1}
キーワードとして `prion', Human Division, Molecule`mRNA'を指定します.
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> # SSOAP のロード
> library(SSOAP)
> # WSDLの指定
> SRS <- processWSDL("http://xml.nig.ac.jp/wsdl/SRS.wsdl")
> SRSiface<-genSOAPClientInterface(def=SRS)
> # WEBサービスの呼び出し (条件 & は & にすること)
> result<-SRSiface@functions$searchSimple(
+ "[ddbj-AllText:prion*] &
+ [ddbj-Division:hum] & [ddbj-Molecule:mrna]")
> print(result)
\end{Sinput}
\begin{Soutput}
[1] "DDBJ:AF187843\nDDBJ:AF187844\nDDBJ:AK090575\nDDBJ:AY008282\nDDBJ"
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{SRSとGetEntry、Blast連携2}
その結果からコーディング領域のアミノ酸配列を抜き出します(GetEntry).
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> # 検索結果を改行で分割し, 配列に格納
> id <- unlist(strsplit(result,"\n"))
> url <- "http://xml.nig.ac.jp/wsdl/GetEntry.wsdl"
> getentry<-genSOAPClientInterface(def = processWSDL(url))
> # アクセッションNo(substringでDDBJ:をサプレス)
> # sapplyでアクセッションNoを引数にして
> # DADエントリーをFASTA形式で取得
> result<-sapply(substring(id,6),
+ getentry@functions$getFASTA_DADEntry)
> print(result)
\end{Sinput}
\begin{Soutput}
[1] ">AF187843-1|AAG43448.1|148|Homo sapiens doppel protein\nMRKHL"
[2] ">AF187844-1|AAG43449.1|176|Homo sapiens prion-like protein\nM"
[3] "character(0)"
[4] ">AY008282-1|AAG21693.1|253|Homo sapiens prion protein\nMANLGC"
[5] ">AY569456-1|AAS80162.1|253|Homo sapiens prion protein\nMANLGC"
[ reached getOption("max.print") -- omitted 14 entries ]]
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{SRSとGetEntry、Blast連携3}
相同性検索の一つである Blastp を 比較対象 Swiss-plot のデータベースに指定して実行
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> blastiface<-genSOAPClientInterface(def=processWSDL(
+ "http://xml.nig.ac.jp/wsdl/Blast.wsdl"))
> # blastサービスを呼び出す
> result<-blastiface@functions$searchParam("blastp",
+ "SWISS",
+ unlist(result),
+ "-m 8")
> print(result)
\end{Sinput}
\begin{Soutput}
[1] "AF187843-1|AAG43448.1|148|Homo\tsp|Q9UKY0|PRND_HUMAN\t99.32\t14"
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{SRSとGetEntry、Blast連携4}
取得したデータを加工する
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> # 結果を改行コードで分割する
> blastline<-unlist(strsplit(result,"\n"))
> print(blastline[1:3]) # 多いので三つだけ表示
\end{Sinput}
\begin{Soutput}
[1] "AF187843-1|AAG43448.1|148|Homo\tsp|Q9UKY0|PRND_HUMAN\t99.32\t14"
[2] "AF187843-1|AAG43448.1|148|Homo\tsp|Q9GJY2|PRND_SHEEP\t77.33\t15"
[3] "AF187843-1|AAG43448.1|148|Homo\tsp|Q9GK16|PRND_BOVIN\t76.67\t15"
\end{Soutput}
\begin{Sinput}
> # IDを抜き出す
> id <- as.vector(sapply(blastline,function(x){
+ unlist(strsplit(x,"\\|"))[5]
+ }))
> print(id)
\end{Sinput}
\begin{Soutput}
[1] "Q9UKY0" "Q9GJY2" "Q9GK16" "Q9QUG3" "O46501"
[ reached getOption("max.print") -- omitted 71 entries ]]
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{SRSとGetEntry、Blast連携5}
アノテーションをGetEntryで取得し, アクセッション番号, プロテインID,
Swiss-Prot ID, プロテインシンボル, プロテインの定義 を表示する.
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> # IDからGetUNIPROTEntry取得
> entry<-sapply(id, getentry@functions$getUNIPROTEntry)
> # 空白を圧縮
> entry<-gsub("\\s{2,}"," ",entry)
> # 改行で分割
> entry<-strsplit(entry,"\n")
> print(entry)
\end{Sinput}
\begin{Soutput}
[1] "c(\"ID PRND_HUMAN Reviewed; 176 AA.\", \"AC Q9UKY0; Q9H311; Q9H"
[2] "c(\"ID PRND_SHEEP Reviewed; 178 AA.\", \"AC Q9GJY2; Q9MYU2;\", \""
[3] "c(\"ID PRND_BOVIN Reviewed; 178 AA.\", \"AC Q9GK16; A2VDR5; Q27"
[4] "c(\"ID PRND_MOUSE Reviewed; 179 AA.\", \"AC Q9QUG3; Q9QZT5;\", \""
[5] "c(\"ID PRIO_CANFA Reviewed; 255 AA.\", \"AC O46501;\", \"DT 15-JU"
[ reached getOption("max.print") -- omitted 71 entries ]]
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{SRSとGetEntry、Blast連携6}
取得データの編集
\scriptsize{
\begin{Schunk}
\begin{Sinput}
> # IDの編集
> ID<-sapply(entry, function(x)
+ paste(substring(x[grep("^ID",x)],4),collapse=" "))
> # DEの編集
> DE<-sapply(entry, function(x)
+ paste(substring(x[grep("^DE",x)],4),collapse=" "))
> ENTRY<-cbind(ID,DE)
> print(ENTRY)
\end{Sinput}
\begin{Soutput}
ID
Q9UKY0 "PRND_HUMAN Reviewed; 176 AA."
Q9GJY2 "PRND_SHEEP Reviewed; 178 AA."
DE
Q9UKY0 "Prion-like protein doppel precursor (PrPLP) (Prion protein 2"
Q9GJY2 "Prion-like protein doppel precursor (PrPLP)."
[getOption("max.print") を越えました -- 末尾 74 行を省略します]]
\end{Soutput}
\end{Schunk}
}
\end{slide}
\begin{slide}[Wipe]{参考文献}
\tiny{
\bibliography{RdeDDBJ2,DDBJ}
}
\end{slide}
\end{document}